From 5bbe6932cd6068a287a109cf811c075ab7d3bb47 Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Sun, 28 Aug 2022 22:25:54 +0100 Subject: [PATCH] Add alerts for rasdaemon events --- .../templates/default/alert_rules.yml.erb | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index ab8cfe3ec..2920d2b16 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -444,6 +444,22 @@ groups: for: 5m labels: alertgroup: "{{ $labels.instance }}" + - name: rasdaemon + rules: + - alert: memory controller errors + expr: increase(rasdaemon_mc_events_total) > 0 + for: 0m + labels: + alertgroup: "{{ $labels.instance }}" + annotations: + new_errors: "{{ $value }}" + - alert: pcie aer errors + expr: increase(rasdaemon_aer_events_total) > 0 + for: 0m + labels: + alertgroup: "{{ $labels.instance }}" + annotations: + new_errors: "{{ $value }}" - name: smart rules: - alert: smart failure -- 2.39.5