]> git.openstreetmap.org Git - chef.git/blob - cookbooks/prometheus/templates/default/prometheus.yml.erb
Add alerts for degraded raid arrays and failed raid disks
[chef.git] / cookbooks / prometheus / templates / default / prometheus.yml.erb
1 # DO NOT EDIT - This file is being maintained by Chef
2
3 global:
4   scrape_interval: 15s
5   evaluation_interval: 15s
6
7 alerting:
8   alertmanagers:
9     - path_prefix: /alertmanager
10       static_configs:
11         - targets:
12             - localhost:9093
13
14 rule_files:
15   - /etc/prometheus/*_rules.yml
16
17 scrape_configs:
18   - job_name: prometheus
19     scrape_interval: 5s
20     scrape_timeout: 5s
21     metrics_path: /prometheus/metrics
22     static_configs:
23       - targets:
24           - localhost:9090
25   - job_name: alertmanager
26     metrics_path: /alertmanager/metrics
27     static_configs:
28       - targets:
29           - localhost:9093
30   - job_name: promscale
31     static_configs:
32       - targets:
33           - localhost:9201
34   - job_name: ssl
35     scrape_interval: 15m
36     metrics_path: /probe
37     static_configs:
38 <% @certificates.values.sort_by { |c| c[:domains].first }.each do |certificate| -%>
39       - targets:
40 <% certificate[:nodes].sort_by { |h| h[:name] }.each do |host| -%>
41           - <%= certificate[:domains].first %>/<%= host[:name] %>:443
42 <% end -%>
43         labels:
44           domain: <%= certificate[:domains].first %>
45 <% end -%>
46     relabel_configs:
47       - source_labels: [__address__]
48         regex: "([^/]+)/.*"
49         target_label: __param_module
50       - source_labels: [__address__]
51         regex: "[^/]+/(.*)"
52         target_label: __param_target
53       - source_labels: [__param_target]
54         regex: "([^.]+)\\..*"
55         target_label: instance
56       - target_label: __address__
57         replacement: 127.0.0.1:9219
58 <% @jobs.sort.each do |name, targets| -%>
59   - job_name: <%= name %>
60 <% if targets.first[:scrape_interval] -%>
61     scrape_interval: <%= targets.first[:scrape_interval] %>
62 <% end -%>
63 <% if targets.first[:sni] -%>
64     tls_config:
65       server_name: <%= targets.first[:sni] %>
66     relabel_configs:
67       - target_label: __scheme__
68         replacement: https
69 <% end -%>
70     static_configs:
71 <% targets.each do |target| -%>
72       - targets:
73           - "<%= target[:address] %>"
74         labels:
75           instance: <%= target[:instance] %>
76 <% end -%>
77     metric_relabel_configs:
78 <% targets.each do |target| -%>
79 <% target[:metric_relabel].each do |relabel| -%>
80       - source_labels: [instance,<%= relabel[:source_labels] %>]
81         regex: "<%= target[:instance] %>;<%= relabel[:regex] %>"
82         action: <%= relabel[:action] %>
83 <% end -%>
84 <% end -%>
85 <% end -%>
86   - job_name: snmp
87     scrape_interval: 5m
88     scrape_timeout: 2m
89     metrics_path: /snmp
90     static_configs:
91 <% @snmp_targets.sort_by { |t| t[:instance] }.each do |target| -%>
92       - targets:
93 <% target[:modules].each do |module_name| -%>
94           - "<%= target[:target] %>/<%= module_name %>/<%= target[:address] %>"
95 <% end -%>
96         labels:
97           instance: <%= target[:instance] %>
98 <% target[:labels].sort.each do |name, value| -%>
99           <%= name %>: <%= value %>
100 <% end -%>
101 <% end -%>
102     relabel_configs:
103       - source_labels: [__address__]
104         regex: "([^/]+)/[^/]+/.*"
105         target_label: __param_target
106       - source_labels: [__address__]
107         regex: "[^/]+/([^/]+)/.*"
108         target_label: __param_module
109       - source_labels: [__address__]
110         regex: "[^/]+/[^/]+/(.*)"
111         target_label: __address__
112
113 remote_write:
114   - url: "http://localhost:9201/write"
115     write_relabel_configs:
116       - source_labels: [__name__]
117         regex: "go_.*"
118         action: drop
119       - source_labels: [__name__]
120         regex: "promhttp_.*"
121         action: drop
122     queue_config:
123       capacity: 10000
124       min_shards: 4
125       batch_send_deadline: 30s
126       max_backoff: 1s