annotations:
current: "{{ $value | humanize }}A"
- alert: site temperature
- expr: min(rPDU2SensorTempHumidityStatusTempC{site="amsterdam"}) / 10 < 18 or min(rPDU2SensorTempHumidityStatusTempC{site="amsterdam"}) / 10 > 25.5
+ expr: min(rPDU2SensorTempHumidityStatusTempC{site="amsterdam"}) / 10 < 18 or min(rPDU2SensorTempHumidityStatusTempC{site="amsterdam"}) / 10 > 26
for: 6m
labels:
alertgroup: "amsterdam"
annotations:
current: "{{ $value | humanize }}A"
- alert: site temperature
- expr: min(rPDU2SensorTempHumidityStatusTempC{site="dublin"}) / 10 < 18 or min(rPDU2SensorTempHumidityStatusTempC{site="dublin"}) / 10 > 25.5
+ expr: min(rPDU2SensorTempHumidityStatusTempC{site="dublin"}) / 10 < 18 or min(rPDU2SensorTempHumidityStatusTempC{site="dublin"}) / 10 > 26
for: 6m
labels:
alertgroup: "dublin"
alertgroup: "{{ $labels.instance }}"
annotations:
new_oom_kills: "{{ $value }}"
+ - name: mysql
+ rules:
+ - alert: mysql down
+ expr: mysql_up == 0
+ for: 1m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
+ - alert: mysql connection limit
+ expr: mysql_global_status_max_used_connections / mysql_global_variables_max_connections > 0.8
+ for: 1m
+ labels:
+ alertgroup: "{{ $labels.instance }}"
+ annotations:
+ connections_used: "{{ $value | humanizePercentage }}"
- name: network
rules:
- alert: interface transmit rate
labels:
alertgroup: "{{ $labels.instance }}"
annotations:
- new_ercrors: "{{ $value }}"
+ new_errors: "{{ $value }}"
- name: smart
rules:
- alert: smart failure
alertgroup: "{{ $labels.instance }}"
annotations:
percentage_used: "{{ $value | humanizePercentage }}"
+ - name: snmp
+ rules:
+ - alert: snmp pdus missing
+ expr: max_over_time(snmp_scrape_pdus_returned[1d]) - snmp_scrape_pdus_returned > 0
+ for: 15m
+ labels:
+ alertgroup: snmp
+ annotations:
+ missing_pdus: "{{ $value }}"
- name: ssl
rules:
- alert: ssl certificate probe failed