+ - name: memory
+ rules:
+ - alert: low memory
+ expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
+ for: 5m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
+ annotations:
+ memory_free: "{{ $value }}%"
+ - alert: memory pressure
+ expr: rate(node_vmstat_pgmajfault[1m]) > 1000
+ for: 5m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
+ annotations:
+ major_page_faults: "{{ $value }} faults/s"
+ - alert: oom kill detected
+ expr: increase(node_vmstat_oom_kill[1m]) > 0
+ for: 0m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
+ annotations:
+ new_oom_kills: "{{ $value }}"