From: Tom Hughes Date: Fri, 24 Feb 2023 14:28:18 +0000 (+0000) Subject: Add an alert for RAID controller battery failures X-Git-Url: https://git.openstreetmap.org./chef.git/commitdiff_plain/4a1a4f51747e0919a67e081b03ae6f853669c9c0 Add an alert for RAID controller battery failures --- diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 1736da60c..01d8ae00f 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -530,6 +530,11 @@ groups: alertgroup: "prometheus" - name: raid rules: + - alert: raid controller battery failed + expr: ohai_controller_info{battery_status="failed"} > 0 + for: 5m + labels: + alertgroup: "{{ $labels.instance }}" - alert: raid array degraded expr: ohai_array_info{status="degraded"} > 0 for: 5m