X-Git-Url: https://git.openstreetmap.org./chef.git/blobdiff_plain/2c1479b9b228b234ea3514060ac2071e4caa1e4a..135b0061b49861418cfb01083c1fe42e9f11c1db:/cookbooks/prometheus/templates/default/alert_rules.yml.erb?ds=sidebyside diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 3dd1af873..16496c12d 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -5,28 +5,28 @@ groups: rules: - alert: pdu current draw expr: rPDU2PhaseStatusCurrent{site="amsterdam",rPDU2PhaseStatusIndex="1"} / 10 > 10 - for: 5m + for: 6m labels: alertgroup: "amsterdam" annotations: current: "{{ $value | humanize }}A" - alert: site current draw expr: sum(rPDU2PhaseStatusCurrent{site="amsterdam",rPDU2PhaseStatusIndex="1"} / 10) > 13 - for: 5m + for: 6m labels: alertgroup: "amsterdam" annotations: current: "{{ $value | humanize }}A" - alert: site temperature - expr: min(rPDU2SensorTempHumidityStatusTempC{site="amsterdam"}) / 10 < 18 or min(rPDU2SensorTempHumidityStatusTempC{site="amsterdam"}) / 10 > 25 - for: 5m + expr: min(rPDU2SensorTempHumidityStatusTempC{site="amsterdam"}) / 10 < 18 or min(rPDU2SensorTempHumidityStatusTempC{site="amsterdam"}) / 10 > 25.5 + for: 6m labels: alertgroup: "amsterdam" annotations: temperature: "{{ $value | humanize }}C" - alert: site humidity expr: max(rPDU2SensorTempHumidityStatusRelativeHumidity{site="amsterdam"}) / 100 < 0.25 or max(rPDU2SensorTempHumidityStatusRelativeHumidity{site="amsterdam"}) / 100 > 0.65 - for: 5m + for: 6m labels: alertgroup: "amsterdam" annotations: @@ -109,28 +109,28 @@ groups: rules: - alert: pdu current draw expr: rPDU2PhaseStatusCurrent{site="dublin",rPDU2PhaseStatusIndex="1"} / 10 > 13 - for: 5m + for: 6m labels: alertgroup: "dublin" annotations: current: "{{ $value | humanize }}A" - alert: site current draw expr: sum(rPDU2PhaseStatusCurrent{site="dublin",rPDU2PhaseStatusIndex="1"} / 10) > 17 - for: 5m + for: 6m labels: alertgroup: "dublin" annotations: current: "{{ $value | humanize }}A" - alert: site temperature - expr: min(rPDU2SensorTempHumidityStatusTempC{site="dublin"}) / 10 < 18 or min(rPDU2SensorTempHumidityStatusTempC{site="dublin"}) / 10 > 25 - for: 5m + expr: min(rPDU2SensorTempHumidityStatusTempC{site="dublin"}) / 10 < 18 or min(rPDU2SensorTempHumidityStatusTempC{site="dublin"}) / 10 > 25.5 + for: 6m labels: alertgroup: "dublin" annotations: temperature: "{{ $value | humanize }}C" - alert: site humidity expr: max(rPDU2SensorTempHumidityStatusRelativeHumidity{site="dublin"}) / 100 < 0.25 or max(rPDU2SensorTempHumidityStatusRelativeHumidity{site="dublin"}) / 100 > 0.65 - for: 5m + for: 6m labels: alertgroup: "dublin" annotations: @@ -157,7 +157,7 @@ groups: - name: filesystem rules: - alert: readonly filesystem - expr: node_filesystem_readonly == 1 + expr: node_filesystem_readonly > min_over_time(node_filesystem_readonly[7d]) for: 0m labels: alertgroup: "{{ $labels.instance }}" @@ -263,6 +263,11 @@ groups: alertgroup: "{{ $labels.site }}" - name: mail rules: + - alert: exim down + expr: exim_up == 0 + for: 5m + labels: + alertgroup: "{{ $labels.instance }}" - alert: exim queue length expr: exim_queue > exim_queue_limit for: 60m