From: Tom Hughes Date: Mon, 25 Jul 2022 11:52:28 +0000 (+0100) Subject: Add alerts for degraded raid arrays and failed raid disks X-Git-Url: https://git.openstreetmap.org./chef.git/commitdiff_plain/e442c6a6365b6be91e350d9fe79d3a6227e7295c?ds=inline Add alerts for degraded raid arrays and failed raid disks --- diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index c108451ab..23bb157e4 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -422,6 +422,18 @@ groups: alertgroup: "{{ $labels.instance }}" annotations: queries: "{{ $value }}" + - name: raid + rules: + - alert: raid array degraded + expr: ohai_array_info{status="degraded"} > 0 + for: 5m + labels: + alertgroup: "{{ $labels.instance }}" + - alert: raid disk failed + expr: ohai_disk_info{status="failed"} > 0 + for: 5m + labels: + alertgroup: "{{ $labels.instance }}" - name: smart rules: - alert: smart failure