]> git.openstreetmap.org Git - chef.git/blobdiff - cookbooks/prometheus/templates/default/alert_rules.yml.erb
cloudwatch: add eu-north-1 for osm-main to collect replication metrics
[chef.git] / cookbooks / prometheus / templates / default / alert_rules.yml.erb
index 77400969edb72db718778b5737f97c3228b78d53..6dc966050a50d043d9ad5515ed19bf78ab364f18 100644 (file)
@@ -52,6 +52,13 @@ groups:
           alertgroup: "{{ $labels.instance }}"
         annotations:
           busy_workers: "{{ $value | humanizePercentage }}"
+      - alert: apache connection limit
+        expr: (apache_connections{state="total"} - on (instance) apache_connections{state="closing"}) / on (instance) (apache_server_limit * on (instance) (apache_threads_per_child + on (instance) (apache_async_request_worker_factor * on (instance) apache_workers{state="idle"} / on(instance) apache_processes{state="all"}))) > 0.8
+        for: 5m
+        labels:
+          alertgroup: "{{ $labels.instance }}"
+        annotations:
+          connections: "{{ $value | humanizePercentage }}"
   - name: chef
     rules:
       - alert: chef client not running
@@ -303,6 +310,20 @@ groups:
         for: 5m
         labels:
           alertgroup: "{{ $labels.site }}"
+      - alert: juniper laser receive power
+        expr: junos_interface_diagnostics_laser_rx_dbm < -12 and on (site, instance, name) junos_interface_admin_up == 1
+        for: 5m
+        labels:
+          alertgroup: "{{ $labels.site }}"
+        annotations:
+          power: "{{ $value }} dBm"
+      - alert: juniper laser transmit power
+        expr: junos_interface_diagnostics_laser_output_dbm < -8 and on (site, instance, name) junos_interface_admin_up == 1
+        for: 5m
+        labels:
+          alertgroup: "{{ $labels.site }}"
+        annotations:
+          power: "{{ $value }} dBm"
   - name: mail
     rules:
       - alert: exim down