X-Git-Url: https://git.openstreetmap.org./chef.git/blobdiff_plain/a5ea7539a310ae6e5a975c4f15266d815e833f88..88f820a038a839a687db7374235eca3003790296:/cookbooks/prometheus/templates/default/alert_rules.yml.erb diff --git a/cookbooks/prometheus/templates/default/alert_rules.yml.erb b/cookbooks/prometheus/templates/default/alert_rules.yml.erb index c7c3a1bac..396de8de4 100644 --- a/cookbooks/prometheus/templates/default/alert_rules.yml.erb +++ b/cookbooks/prometheus/templates/default/alert_rules.yml.erb @@ -3,6 +3,13 @@ groups: - name: amsterdam rules: + - alert: uplink + expr: ifOperStatus{site="amsterdam",ifName=~"ge-[01]/2/2"} != 1 + for: 6m + labels: + alertgroup: "amsterdam" + annotations: + status: "{{ $value }}" - alert: pdu current draw expr: rPDU2PhaseStatusCurrent{site="amsterdam",rPDU2PhaseStatusIndex="1"} / 10 > 28 for: 6m @@ -109,6 +116,13 @@ groups: failure_rate: "{{ $value }} jobs/s" - name: dublin rules: + - alert: uplink + expr: ifOperStatus{site="dublin",ifName=~"ge-[01]/2/2"} != 1 + for: 6m + labels: + alertgroup: "dublin" + annotations: + status: "{{ $value }}" - alert: pdu current draw expr: rPDU2PhaseStatusCurrent{site="dublin",rPDU2PhaseStatusIndex="1"} / 10 > 28 for: 6m @@ -249,10 +263,12 @@ groups: - name: juniper rules: - alert: juniper cpu alarm - expr: jnxOperatingCPU{jnxOperatingContentsIndex="7"} > 30 + expr: jnxOperating5MinLoadAvg{jnxOperatingContentsIndex="9"} / 200 > 0.5 for: 5m labels: alertgroup: "{{ $labels.site }}" + annotations: + load_average: "{{ $value | humanizePercentage }}" - alert: juniper fan alarm expr: jnxOperatingState{jnxOperatingContentsIndex="4",jnxOperatingState!~"running.*"} > 0 for: 5m