+ - name: tile
+ rules:
+ - alert: renderd replication delay
+ expr: renderd_replication_delay > 120
+ for: 5m
+ labels:
+ alertgroup: tile
+ annotations:
+ delay: "{{ $value | humanizeDuration }}"
+ - alert: missed tile rate
+ expr: sum(rate(modtile_http_response_total{code="404"}[5m])) by (instance) / sum(rate(modtile_http_response_total[5m])) by (instance) > 0.05
+ for: 5m
+ labels:
+ alertgroup: tile
+ annotations:
+ miss_rate: "{{ $value | humanizePercentage }}"
+ - name: time
+ rules:
+ - alert: clock not synchronising
+ expr: min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16
+ for: 5m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
+ - alert: clock skew detected
+ expr: (node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)
+ for: 5m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
+ annotations:
+ skew: "{{ with printf \"node_timex_offset_seconds{instance='%s'}\" $labels.instance | query }} {{ . | humanizeDuration }}{{ end }}"