X-Git-Url: https://git.openstreetmap.org./chef.git/blobdiff_plain/52d0c6357c9d5afe687c8302d02a6e14cb6cecf4..44b52281d387a2a26df9cd9ac5e228c0976089fd:/cookbooks/prometheus/templates/default/alert_rules.yml.erb?ds=inline diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index ab8cfe3ec..23b94727c 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -444,6 +444,22 @@ groups: for: 5m labels: alertgroup: "{{ $labels.instance }}" + - name: rasdaemon + rules: + - alert: memory controller errors + expr: increase(rasdaemon_mc_events_total[1m]) > 0 + for: 0m + labels: + alertgroup: "{{ $labels.instance }}" + annotations: + new_errors: "{{ $value }}" + - alert: pcie aer errors + expr: increase(rasdaemon_aer_events_total[1m]) > 0 + for: 0m + labels: + alertgroup: "{{ $labels.instance }}" + annotations: + new_ercrors: "{{ $value }}" - name: smart rules: - alert: smart failure