From a052eb992b55917d502d1301f2c1b7d6f5fb81c9 Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Mon, 27 Feb 2023 22:33:53 +0000 Subject: [PATCH] Fix alerting for failed chef runs --- cookbooks/prometheus/templates/default/alert_rules.yml.erb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 01d8ae00f..1083f74f7 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -623,8 +623,8 @@ groups: labels: alertgroup: "{{ $labels.instance }}" - alert: systemd failed chef client service - expr: node_systemd_unit_state{state="failed",name="chef-client.service"} == 1 - for: 6h + expr: node_systemd_unit_state{state="inactive",name="chef-client.service"}[6h] == 0 + for: 0m labels: alertgroup: "{{ $labels.instance }}" - name: tile -- 2.39.5