alertgroup: database
annotations:
delay: "{{ $value | humanizeDuration }}"
+ - name: fastly
+ rules:
+ - alert: error rate
+ expr: sum(rate(fastly_rt_status_group_total{status_group="5xx"}[5m])) by (service_name, datacenter) / sum(rate(fastly_rt_status_group_total[5m])) by (service_name, datacenter) > 0.005
+ for: 15m
+ labels:
+ alertgroup: fastly
+ annotations:
+ error_rate: "{{ $value | humanizePercentage }}"
- name: filesystem
rules:
- alert: readonly filesystem
alertgroup: web
annotations:
error_rate: "{{ $value | humanizePercentage }}"
+ - alert: job processing rate
+ expr: rate(pg_stat_user_tables_n_tup_del{datname="openstreetmap",relname="delayed_jobs"}[5m]) / rate(pg_stat_user_tables_n_tup_ins{datname="openstreetmap",relname="delayed_jobs"}[5m]) < 0.9 and ignoring(job, name, datname, relname, schemaname, server) chef_role{name="db-master"} == 1
+ for: 5m
+ labels:
+ alertgroup: web
+ annotations:
+ job_processing_rate: "{{ $value | humanizePercentage }}"