From: Tom Hughes Date: Wed, 26 Jun 2024 19:34:15 +0000 (+0100) Subject: Set a minimum threshold for error rate alerts X-Git-Url: https://git.openstreetmap.org./chef.git/commitdiff_plain/bbbe7474976e33c5471f7ecdabd4cf2a44c25ee2 Set a minimum threshold for error rate alerts This avoids very small numbers of errors on idle machines triggering an errir rate alert. --- diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index dfb83891d..b78bbaddd 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -798,7 +798,7 @@ groups: - name: web rules: - alert: web error rate - expr: sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) / sum(rate(api_call_count_total[5m])) by (instance) > 0.002 + expr: sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) / sum(rate(api_call_count_total[5m])) by (instance) > 0.002 and sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) > 0.01 for: 5m labels: alertgroup: web