From a4777dddfb2c769864094ec928d5bdcfdd411cc2 Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Tue, 28 Feb 2023 08:26:55 +0000 Subject: [PATCH] Fix alerting for failed chef runs --- cookbooks/prometheus/templates/default/alert_rules.yml.erb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 1083f74f7..7a5353372 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -623,7 +623,7 @@ groups: labels: alertgroup: "{{ $labels.instance }}" - alert: systemd failed chef client service - expr: node_systemd_unit_state{state="inactive",name="chef-client.service"}[6h] == 0 + expr: sum_over_time(node_systemd_unit_state{state="inactive",name="chef-client.service"}[6h]) == 0 for: 0m labels: alertgroup: "{{ $labels.instance }}" -- 2.39.5