delay: "{{ $value | humanizeDuration }}"
- name: fastly
rules:
- - alert: error rate
+ - alert: fastly error rate
expr: sum(rate(fastly_rt_status_group_total{status_group="5xx"}[5m])) by (service_name, datacenter) / sum(rate(fastly_rt_status_group_total[5m])) by (service_name, datacenter) > 0.005
for: 15m
labels:
alertgroup: fastly
annotations:
error_rate: "{{ $value | humanizePercentage }}"
+ - alert: fastly healthcheck failing
+ expr: fastly_healthcheck_status == 0
+ for: 5m
+ labels:
+ alertgroup: fastly
- name: filesystem
rules:
- alert: readonly filesystem
alertgroup: "{{ $labels.instance }}"
annotations:
queries: "{{ $value }}"
+ - name: raid
+ rules:
+ - alert: raid array degraded
+ expr: ohai_array_info{status="degraded"} > 0
+ for: 5m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
+ - alert: raid disk failed
+ expr: ohai_disk_info{status="failed"} > 0
+ for: 5m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
- name: smart
rules:
- alert: smart failure