X-Git-Url: https://git.openstreetmap.org./chef.git/blobdiff_plain/ca5c17ee5e469cd376b3a5546adf1efb5bca7513..39eecb953618c3ec9c1164468e3221f6c8e794b7:/cookbooks/prometheus/templates/default/alert_rules.yml.erb diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 77400969e..6dc966050 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -52,6 +52,13 @@ groups: alertgroup: "{{ $labels.instance }}" annotations: busy_workers: "{{ $value | humanizePercentage }}" + - alert: apache connection limit + expr: (apache_connections{state="total"} - on (instance) apache_connections{state="closing"}) / on (instance) (apache_server_limit * on (instance) (apache_threads_per_child + on (instance) (apache_async_request_worker_factor * on (instance) apache_workers{state="idle"} / on(instance) apache_processes{state="all"}))) > 0.8 + for: 5m + labels: + alertgroup: "{{ $labels.instance }}" + annotations: + connections: "{{ $value | humanizePercentage }}" - name: chef rules: - alert: chef client not running @@ -303,6 +310,20 @@ groups: for: 5m labels: alertgroup: "{{ $labels.site }}" + - alert: juniper laser receive power + expr: junos_interface_diagnostics_laser_rx_dbm < -12 and on (site, instance, name) junos_interface_admin_up == 1 + for: 5m + labels: + alertgroup: "{{ $labels.site }}" + annotations: + power: "{{ $value }} dBm" + - alert: juniper laser transmit power + expr: junos_interface_diagnostics_laser_output_dbm < -8 and on (site, instance, name) junos_interface_admin_up == 1 + for: 5m + labels: + alertgroup: "{{ $labels.site }}" + annotations: + power: "{{ $value }} dBm" - name: mail rules: - alert: exim down