]> git.openstreetmap.org Git - chef.git/commitdiff
Fix alerting for failed chef runs
authorTom Hughes <tom@compton.nu>
Mon, 27 Feb 2023 22:33:53 +0000 (22:33 +0000)
committerTom Hughes <tom@compton.nu>
Mon, 27 Feb 2023 22:33:53 +0000 (22:33 +0000)
cookbooks/prometheus/templates/default/alert_rules.yml.erb

index 01d8ae00f654fce5a0120e3427928fd340c4d8df..1083f74f77b547722d48fcaad22d60021c60a798 100644 (file)
@@ -623,8 +623,8 @@ groups:
         labels:
           alertgroup: "{{ $labels.instance }}"
       - alert: systemd failed chef client service
-        expr: node_systemd_unit_state{state="failed",name="chef-client.service"} == 1
-        for: 6h
+        expr: node_systemd_unit_state{state="inactive",name="chef-client.service"}[6h] == 0
+        for: 0m
         labels:
           alertgroup: "{{ $labels.instance }}"
   - name: tile