From a45c1200e1b7b9ffdefe5fef3af9b1a93f5de31d Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Mon, 6 Mar 2023 14:39:13 +0000 Subject: [PATCH] Add alert for failing discourse jobs --- .../prometheus/templates/default/alert_rules.yml.erb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 848123602..239731607 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -105,6 +105,15 @@ groups: alertgroup: database annotations: delay: "{{ $value | humanizeDuration }}" + - name: discourse + rules: + - alert: discourse job failure rate + expr: rate(discourse_job_failures[5m]) > 0 + for: 5m + labels: + alertgroup: discourse + annotations: + failure_rate: "{{ $value }} jobs/s" - name: dublin rules: - alert: pdu current draw -- 2.39.5