]> git.openstreetmap.org Git - chef.git/blobdiff - cookbooks/prometheus/templates/default/alert_rules.yml.erb
apache: use new combined_extended log format instead of combined
[chef.git] / cookbooks / prometheus / templates / default / alert_rules.yml.erb
index 598179d5cb4a6f5303d64f601766a61b5f65e61f..1fac68869072fa423a1f2c1e9e8affdb42a00976 100644 (file)
@@ -105,13 +105,20 @@ groups:
           pressure: "{{ $value | humanizePercentage }}"
   - name: database
     rules:
-      - alert: postgres replication delay
-        expr: pg_replication_lag_seconds > 30
-        for: 15m
+      - alert: active rails queries
+        expr: sum(pg_stat_activity_count{datname="openstreetmap",usename="rails",state="active"}) by (instance) > 50 and on (instance) chef_role{name="db-master"}
+        for: 5m
         labels:
           alertgroup: database
         annotations:
-          delay: "{{ $value | humanizeDuration }}"
+          queries: "{{ $value }}"
+      - alert: active cgimap queries
+        expr: sum(pg_stat_activity_count{datname="openstreetmap",usename="cgimap",state="active"}) by (instance) > 30 and on (instance) chef_role{name="db-master"}
+        for: 5m
+        labels:
+          alertgroup: database
+        annotations:
+          delay: "{{ $value }}"
   - name: discourse
     rules:
       - alert: discourse job failure rate