]> git.openstreetmap.org Git - chef.git/commitdiff
Set a minimum threshold for error rate alerts
authorTom Hughes <tom@compton.nu>
Wed, 26 Jun 2024 19:34:15 +0000 (20:34 +0100)
committerTom Hughes <tom@compton.nu>
Wed, 26 Jun 2024 19:34:15 +0000 (20:34 +0100)
This avoids very small numbers of errors on idle machines
triggering an errir rate alert.

cookbooks/prometheus/templates/default/alert_rules.yml.erb

index dfb83891d33b18040190d67359e071e0fd29a78f..b78bbaddd51fb8a54cc2834c4815f28a45c85af4 100644 (file)
@@ -798,7 +798,7 @@ groups:
   - name: web
     rules:
       - alert: web error rate
   - name: web
     rules:
       - alert: web error rate
-        expr: sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) / sum(rate(api_call_count_total[5m])) by (instance) > 0.002
+        expr: sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) / sum(rate(api_call_count_total[5m])) by (instance) > 0.002 and sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) > 0.01
         for: 5m
         labels:
           alertgroup: web
         for: 5m
         labels:
           alertgroup: web