From: Tom Hughes Date: Wed, 26 Oct 2022 21:17:54 +0000 (+0100) Subject: Resolve duplicate alert names X-Git-Url: https://git.openstreetmap.org./chef.git/commitdiff_plain/2f2297f6e834712d6dd28e00ff1d9bc6a6450009 Resolve duplicate alert names --- diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 6a9ad6120..417641cf0 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -119,7 +119,7 @@ groups: for: 15m labels: alertgroup: fastly - - alert: fastly healthcheck failing + - alert: multipe fastly healthchecks failing expr: count(fastly_healthcheck_status == 0) > 4 for: 5m labels: @@ -320,7 +320,7 @@ groups: alertgroup: "{{ $labels.instance }}" annotations: error_rate: "{{ $value | humanizePercentage }}" - - alert: interface transmit errors + - alert: wireguard interface transmit errors expr: rate(node_network_transmit_errs_total{device=~"wg.*"}[1m]) / rate(node_network_transmit_packets_total{device=~"wg.*"}[1m]) > 0.05 for: 1h labels: @@ -537,7 +537,7 @@ groups: for: 5m labels: alertgroup: "{{ $labels.instance }}" - - alert: systemd failed service + - alert: systemd failed chef client service expr: node_systemd_unit_state{state="failed",name="chef-client.service"} == 1 for: 6h labels: