]> git.openstreetmap.org Git - chef.git/commitdiff
Reduce sensitivity of alert for wireguard interface transmit errors
authorTom Hughes <tom@compton.nu>
Thu, 21 Jul 2022 23:30:17 +0000 (00:30 +0100)
committerTom Hughes <tom@compton.nu>
Thu, 21 Jul 2022 23:30:17 +0000 (00:30 +0100)
cookbooks/prometheus/templates/default/alert_rules.yml.erb

index 94a2323d358a03ddc25213ad7c2e04c0945b0b17..78f72e2a1ac642bcd47d81ec6463c33afc1d959d 100644 (file)
@@ -311,12 +311,19 @@ groups:
         annotations:
           bandwidth_used: "{{ $value | humanizePercentage }}"
       - alert: interface transmit errors
         annotations:
           bandwidth_used: "{{ $value | humanizePercentage }}"
       - alert: interface transmit errors
-        expr: rate(node_network_transmit_errs_total[1m]) / rate(node_network_transmit_packets_total[1m]) > 0.01
+        expr: rate(node_network_transmit_errs_total{device!~"wg.*"}[1m]) / rate(node_network_transmit_packets_total{device!~"wg.*"}[1m]) > 0.01
         for: 5m
         labels:
           alertgroup: "{{ $labels.instance }}"
         annotations:
           error_rate: "{{ $value | humanizePercentage }}"
         for: 5m
         labels:
           alertgroup: "{{ $labels.instance }}"
         annotations:
           error_rate: "{{ $value | humanizePercentage }}"
+      - alert: interface transmit errors
+        expr: rate(node_network_transmit_errs_total{device=~"wg.*"}[1m]) / rate(node_network_transmit_packets_total{device=~"wg.*"}[1m]) > 0.05
+        for: 1h
+        labels:
+          alertgroup: "{{ $labels.instance }}"
+        annotations:
+          error_rate: "{{ $value | humanizePercentage }}"
       - alert: interface receive errors
         expr: rate(node_network_receive_errs_total[1m]) / rate(node_network_receive_packets_total[1m]) > 0.01
         for: 5m
       - alert: interface receive errors
         expr: rate(node_network_receive_errs_total[1m]) / rate(node_network_receive_packets_total[1m]) > 0.01
         for: 5m