]> git.openstreetmap.org Git - chef.git/blobdiff - cookbooks/prometheus/templates/default/alert_rules.yml.erb
Add alerts for equinix uplink interfaces
[chef.git] / cookbooks / prometheus / templates / default / alert_rules.yml.erb
index dfd885bb8055252fd72a99df4807eeba1f6c956c..305afbd903a44e3745ce3d3eb5dc62d51895a2b7 100644 (file)
@@ -3,13 +3,20 @@
 groups:
   - name: amsterdam
     rules:
-      - alert: uplink
+      - alert: he uplink
         expr: junos_interface_up{site="amsterdam",name=~"ge-[01]/2/2"} != 1
         for: 6m
         labels:
           alertgroup: "amsterdam"
         annotations:
           status: "{{ $value }}"
+      - alert: equinix uplink
+        expr: junos_interface_up{site="amsterdam",name=~"xe-[01]/2/0"} != 1
+        for: 6m
+        labels:
+          alertgroup: "amsterdam"
+        annotations:
+          status: "{{ $value }}"
       - alert: pdu current draw
         expr: rPDU2PhaseStatusCurrent{site="amsterdam",rPDU2PhaseStatusIndex="1"} / 10 > 28
         for: 6m
@@ -130,13 +137,20 @@ groups:
           failure_rate: "{{ $value }} jobs/s"
   - name: dublin
     rules:
-      - alert: uplink
+      - alert: he uplink
         expr: junos_interface_up{site="dublin",name=~"ge-[01]/2/2"} != 1
         for: 6m
         labels:
           alertgroup: "dublin"
         annotations:
           status: "{{ $value }}"
+      - alert: equinix uplink
+        expr: junos_interface_up{site="dublin",name=~"xe-[01]/2/0"} != 1
+        for: 6m
+        labels:
+          alertgroup: "dublin"
+        annotations:
+          status: "{{ $value }}"
       - alert: pdu current draw
         expr: rPDU2PhaseStatusCurrent{site="dublin",rPDU2PhaseStatusIndex="1"} / 10 > 28
         for: 6m
@@ -669,6 +683,13 @@ groups:
           alertgroup: "{{ $labels.instance }}"
         annotations:
           new_errors: "{{ $value }}"
+  - name: resolved
+    rules:
+      - alert: dnssec validation failures
+        expr: rate(resolved_dnssec_verdicts_total{result="bogus"}[1m]) > 1
+        for: 5m
+        labels:
+          alertgroup: "{{ $labels.instance }}"
   - name: smart
     rules:
       - alert: smart failure