X-Git-Url: https://git.openstreetmap.org./chef.git/blobdiff_plain/8a772a64629eb2e7fe271191daea7579a35cdf80..88f820a038a839a687db7374235eca3003790296:/cookbooks/prometheus/templates/default/alert_rules.yml.erb?ds=inline diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index 2ab437c30..396de8de4 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -3,6 +3,13 @@ groups: - name: amsterdam rules: + - alert: uplink + expr: ifOperStatus{site="amsterdam",ifName=~"ge-[01]/2/2"} != 1 + for: 6m + labels: + alertgroup: "amsterdam" + annotations: + status: "{{ $value }}" - alert: pdu current draw expr: rPDU2PhaseStatusCurrent{site="amsterdam",rPDU2PhaseStatusIndex="1"} / 10 > 28 for: 6m @@ -109,6 +116,13 @@ groups: failure_rate: "{{ $value }} jobs/s" - name: dublin rules: + - alert: uplink + expr: ifOperStatus{site="dublin",ifName=~"ge-[01]/2/2"} != 1 + for: 6m + labels: + alertgroup: "dublin" + annotations: + status: "{{ $value }}" - alert: pdu current draw expr: rPDU2PhaseStatusCurrent{site="dublin",rPDU2PhaseStatusIndex="1"} / 10 > 28 for: 6m @@ -249,10 +263,12 @@ groups: - name: juniper rules: - alert: juniper cpu alarm - expr: jnxOperatingCPU{jnxOperatingContentsIndex="7"} > 30 + expr: jnxOperating5MinLoadAvg{jnxOperatingContentsIndex="9"} / 200 > 0.5 for: 5m labels: alertgroup: "{{ $labels.site }}" + annotations: + load_average: "{{ $value | humanizePercentage }}" - alert: juniper fan alarm expr: jnxOperatingState{jnxOperatingContentsIndex="4",jnxOperatingState!~"running.*"} > 0 for: 5m @@ -271,7 +287,7 @@ groups: labels: alertgroup: "{{ $labels.instance }}" - alert: exim queue length - expr: exim_queue > exim_queue_limit + expr: exim_queue > ignoring(job) exim_queue_limit for: 60m labels: alertgroup: mail