]> git.openstreetmap.org Git - chef.git/commitdiff
Fix alerting for failed chef runs
authorTom Hughes <tom@compton.nu>
Tue, 28 Feb 2023 08:26:55 +0000 (08:26 +0000)
committerTom Hughes <tom@compton.nu>
Tue, 28 Feb 2023 08:26:55 +0000 (08:26 +0000)
cookbooks/prometheus/templates/default/alert_rules.yml.erb

index 1083f74f77b547722d48fcaad22d60021c60a798..7a535337264c8a90d42bd00653ecc5ff64ddb12a 100644 (file)
@@ -623,7 +623,7 @@ groups:
         labels:
           alertgroup: "{{ $labels.instance }}"
       - alert: systemd failed chef client service
         labels:
           alertgroup: "{{ $labels.instance }}"
       - alert: systemd failed chef client service
-        expr: node_systemd_unit_state{state="inactive",name="chef-client.service"}[6h] == 0
+        expr: sum_over_time(node_systemd_unit_state{state="inactive",name="chef-client.service"}[6h]) == 0
         for: 0m
         labels:
           alertgroup: "{{ $labels.instance }}"
         for: 0m
         labels:
           alertgroup: "{{ $labels.instance }}"