X-Git-Url: https://git.openstreetmap.org./chef.git/blobdiff_plain/e442c6a6365b6be91e350d9fe79d3a6227e7295c..49cdf148a0562915edfd17e9b4ab19c9cd7aa721:/cookbooks/prometheus/templates/default/alert_rules.yml.erb diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 23bb157e4..28a5b3113 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -1,13 +1,6 @@ # DO NOT EDIT - This file is being maintained by Chef groups: - - name: alertmanager - rules: - - alert: prometheus target missing - expr: up == 0 - for: 10m - labels: - alertgroup: "prometheus" - name: amsterdam rules: - alert: pdu current draw @@ -122,7 +115,12 @@ groups: annotations: error_rate: "{{ $value | humanizePercentage }}" - alert: fastly healthcheck failing - expr: fastly_healthcheck_status = 0 + expr: count(fastly_healthcheck_status == 0) > 0 + for: 15m + labels: + alertgroup: fastly + - alert: fastly healthcheck failing + expr: count(fastly_healthcheck_status == 0) > 4 for: 5m labels: alertgroup: fastly @@ -422,6 +420,18 @@ groups: alertgroup: "{{ $labels.instance }}" annotations: queries: "{{ $value }}" + - name: prometheus + rules: + - alert: prometheus configuration error + expr: prometheus_config_last_reload_successful == 0 + for: 10m + labels: + alertgroup: "prometheus" + - alert: prometheus target missing + expr: up == 0 + for: 10m + labels: + alertgroup: "prometheus" - name: raid rules: - alert: raid array degraded @@ -442,7 +452,7 @@ groups: labels: alertgroup: "{{ $labels.instance }}" - alert: smart ssd wearout approaching - expr: smart_percentage_used >= 90 + expr: smart_percentage_used >= 80 for: 60m labels: alertgroup: "{{ $labels.instance }}"