From: Tom Hughes Date: Thu, 21 Jul 2022 16:30:14 +0000 (+0100) Subject: Add alert rules for cisco switches X-Git-Url: https://git.openstreetmap.org./chef.git/commitdiff_plain/ab63149db6f2c9739f9dc39e0f0b18891f865690?ds=sidebyside Add alert rules for cisco switches --- diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 0d045af71..289b33f57 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -68,6 +68,28 @@ groups: alertgroup: "{{ $labels.instance }}" annotations: down_time: "{{ $value | humanizeDuration }}" + - name: cisco + rules: + - alert: cisco fan alarm + expr: rlPhdUnitEnvParamFan1Status{rlPhdUnitEnvParamFan1Status!="normal"} > 0 or rlPhdUnitEnvParamFan2Status{rlPhdUnitEnvParamFan2Status!="normal"} > 0 + for: 5m + labels: + alertgroup: "{{ $labels.site }}" + - alert: cisco temperature alarm + expr: rlPhdUnitEnvParamTempSensorStatus{rlPhdUnitEnvParamTempSensorStatus!="ok"} > 0 + for: 5m + labels: + alertgroup: "{{ $labels.site }}" + - alert: cisco main power alarm + expr: rlPhdUnitEnvParamMainPSStatus{rlPhdUnitEnvParamMainPSStatus!="normal"} > 0 + for: 5m + labels: + alertgroup: "{{ $labels.site }}" + - alert: cisco redundant power alarm + expr: rlPhdUnitEnvParamRedundantPSStatus{rlPhdUnitEnvParamRedundantPSStatus!="normal"} > 0 + for: 5m + labels: + alertgroup: "{{ $labels.site }}" - name: cpu rules: - alert: cpu pressure