X-Git-Url: https://git.openstreetmap.org./chef.git/blobdiff_plain/9cb8207449f58817c59494a59f6a6f6a894c5088..2f2297f6e834712d6dd28e00ff1d9bc6a6450009:/cookbooks/prometheus/templates/default/alert_rules.yml.erb?ds=sidebyside diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 6a9ad6120..417641cf0 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -119,7 +119,7 @@ groups: for: 15m labels: alertgroup: fastly - - alert: fastly healthcheck failing + - alert: multipe fastly healthchecks failing expr: count(fastly_healthcheck_status == 0) > 4 for: 5m labels: @@ -320,7 +320,7 @@ groups: alertgroup: "{{ $labels.instance }}" annotations: error_rate: "{{ $value | humanizePercentage }}" - - alert: interface transmit errors + - alert: wireguard interface transmit errors expr: rate(node_network_transmit_errs_total{device=~"wg.*"}[1m]) / rate(node_network_transmit_packets_total{device=~"wg.*"}[1m]) > 0.05 for: 1h labels: @@ -537,7 +537,7 @@ groups: for: 5m labels: alertgroup: "{{ $labels.instance }}" - - alert: systemd failed service + - alert: systemd failed chef client service expr: node_systemd_unit_state{state="failed",name="chef-client.service"} == 1 for: 6h labels: