X-Git-Url: https://git.openstreetmap.org./chef.git/blobdiff_plain/96e2ac0bfb678221faaa0feba7039deeb853ddf0..a379aac9967d9c28dae172c6e3141cc559cc63fb:/cookbooks/prometheus/templates/default/alert_rules.yml.erb diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index fdb90765b..b78bbaddd 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -488,6 +488,11 @@ groups: alertgroup: nominatim annotations: delay: "{{ $value | humanizeDuration }}" + - alert: nominatim connections + expr: sum(nginx_connections_writing and on (instance) chef_role{name="nominatim"}) > 2500 + for: 15m + labels: + alertgroup: nominatim - name: overpass rules: - alert: overpass osm database age @@ -793,7 +798,7 @@ groups: - name: web rules: - alert: web error rate - expr: sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) / sum(rate(api_call_count_total[5m])) by (instance) > 0.002 + expr: sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) / sum(rate(api_call_count_total[5m])) by (instance) > 0.002 and sum(rate(api_call_count_total{status=~"50[0-8]|5[1-9][0-9]"}[5m])) by (instance) > 0.01 for: 5m labels: alertgroup: web