1 # DO NOT EDIT - This file is being maintained by Chef
3 # Configure alert targets
4 contact.admins.command mail -s "Munin Notification" admins@openstreetmap.org
5 contact.null.command cat > /dev/null
7 # Send alerts to the admins by default
10 # Ignore uncontactable hosts for twelve hours
12 <% @clients.sort { |a,b| a[:hostname] <=> b[:hostname] }.each do |client| -%>
14 # Configure monitoring for <%= client[:fqdn] %>
15 [<%= client[:hostname] %>.openstreetmap]
16 <% if Time.now - Time.at(client[:ohai_time]) > @expiry_time -%>
19 <% if client[:networking][:roles][:external][:zone] == "ucl" -%>
20 address <%= client.internal_ipaddress %>
21 <% elsif client[:networking][:roles][:external][:zone] == "ic" -%>
22 address <%= client.internal_ipaddress || client.external_ipaddress %>
24 address <%= client.external_ipaddress %>
27 <% if client[:munin][:plugins] -%>
28 <% client[:munin][:plugins].keys.sort.each do |plugin| -%>
29 <% client[:munin][:plugins][plugin].keys.sort.each do |value| -%>
30 <% if client[:munin][:plugins][plugin][value].kind_of?(Hash) -%>
31 <% if client[:munin][:plugins][plugin][value][:graph] -%>
32 <%= plugin %>.<%= value %>.graph <%= client[:munin][:plugins][plugin][value][:graph] %>
34 <% if client[:munin][:plugins][plugin][value][:warning] -%>
35 <%= plugin %>.<%= value %>.warning <%= client[:munin][:plugins][plugin][value][:warning] %>
37 <% if client[:munin][:plugins][plugin][value][:critical] -%>
38 <%= plugin %>.<%= value %>.critical <%= client[:munin][:plugins][plugin][value][:critical] %>
41 <%= plugin %>.<%= value %> <%= client[:munin][:plugins][plugin][value] %>
46 <% if client[:munin][:graphs] -%>
47 <% client[:munin][:graphs].keys.sort.each do |graph| -%>
48 <% if client[:munin][:graphs][graph][:title] -%>
49 <%= graph %>.graph_title <%= client[:munin][:graphs][graph][:title] %>
51 <% if client[:munin][:graphs][graph][:vlabel] -%>
52 <%= graph %>.graph_vlabel <%= client[:munin][:graphs][graph][:vlabel] %>
54 <% if client[:munin][:graphs][graph][:category] -%>
55 <%= graph %>.graph_category <%= client[:munin][:graphs][graph][:category] %>
57 <% client[:munin][:graphs][graph][:values].keys.sort.each do |value| -%>
58 <% if client[:munin][:graphs][graph][:values][value][:sum] -%>
59 <%= graph %>.<%= value %>.sum <%= client[:munin][:graphs][graph][:values][value][:sum].join(" ") %>
61 <% if client[:munin][:graphs][graph][:values][value][:label] -%>
62 <%= graph %>.<%= value %>.label <%= client[:munin][:graphs][graph][:values][value][:label] %>
69 # Configure compound graphs for www.openstreetmap.org
72 apache_accesses.graph_title Apache accesses
73 apache_accesses.graph_vlabel accesses / ${graph_period}
74 apache_accesses.graph_category apache
75 apache_accesses.accesses80.sum <%= Chef::Munin.expand "%%.openstreetmap:apache_accesses.accesses80", @frontends %>
76 apache_accesses.accesses80.label port 80
77 apache_volume.graph_title Apache volume
78 apache_volume.graph_vlabel bytes per ${graph_period}
79 apache_volume.graph_category apache
80 apache_volume.volume80.sum <%= Chef::Munin.expand "%%.openstreetmap:apache_volume.volume80", @frontends %>
81 apache_volume.volume80.label port 80
82 if_eth0.graph_title eth0 traffic
83 if_eth0.graph_vlabel bits in (-) / out (+) per ${graph_period}
84 if_eth0.graph_category network
85 if_eth0.down.sum <%= Chef::Munin.expand "%%.openstreetmap:if_eth0.down", @frontends %>
86 if_eth0.down.label received
87 if_eth0.down.cdef down,8,*
88 if_eth0.up.sum <%= Chef::Munin.expand "%%.openstreetmap:if_eth0.up", @frontends %>
90 if_eth0.up.cdef up,8,*
91 if_eth1.graph_title eth1 traffic
92 if_eth1.graph_vlabel bits in (-) / out (+) per ${graph_period}
93 if_eth1.graph_category network
94 if_eth1.down.sum <%= Chef::Munin.expand "%%.openstreetmap:if_eth1.down", @frontends %>
95 if_eth1.down.label received
96 if_eth1.down.cdef down,8,*
97 if_eth1.up.sum <%= Chef::Munin.expand "%%.openstreetmap:if_eth1.up", @frontends %>
99 if_eth1.up.cdef up,8,*
100 api_calls_www.graph_title Active requests
101 api_calls_www.graph_vlabel Number of requests
102 api_calls_www.graph_category api
103 api_calls_www.web.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_%%.web", @frontends %>
104 api_calls_www.web.label Web site traffic
105 api_calls_www.upload.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_%%.upload", @frontends %>
106 api_calls_www.upload.label Changeset diff uploads
107 api_calls_www.other.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_%%.other", @frontends %>
108 api_calls_www.other.label Other API calls
109 api_calls_www.amf.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_%%.amf", @frontends %>
110 api_calls_www.amf.label AMF API calls
111 api_calls_www.history.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_%%.history", @frontends %>
112 api_calls_www.history.label Element history fetches
113 api_calls_www.full.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_%%.full", @frontends %>
114 api_calls_www.full.label Full element fetches
115 api_calls_www.map.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_%%.map", @frontends %>
116 api_calls_www.map.label Map API calls
117 api_calls_www.trkpts.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_%%.trkpts", @frontends %>
118 api_calls_www.trkpts.label GPX trackpoints calls
119 api_calls_num.graph_title Requests processed
120 api_calls_num.graph_vlabel Number of requests per minute
121 api_calls_num.graph_category api
122 api_calls_num.web.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_num.web", @frontends %>
123 api_calls_num.web.label Web site traffic
124 api_calls_num.upload.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_num.upload", @frontends %>
125 api_calls_num.upload.label Changeset diff uploads
126 api_calls_num.other.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_num.other", @frontends %>
127 api_calls_num.other.label Other API calls
128 api_calls_num.amf.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_num.amf", @frontends %>
129 api_calls_num.amf.label AMF API calls
130 api_calls_num.history.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_num.history", @frontends %>
131 api_calls_num.history.label Element history fetches
132 api_calls_num.full.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_num.full", @frontends %>
133 api_calls_num.full.label Full element fetches
134 api_calls_num.map.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_num.map", @frontends %>
135 api_calls_num.map.label Map API calls
136 api_calls_num.trkpts.sum <%= Chef::Munin.expand "%%.openstreetmap:api_calls_num.trkpts", @frontends %>
137 api_calls_num.trkpts.label GPX trackpoints calls
138 api_waits_www.graph_title Wait times for active requests
139 api_waits_www.graph_vlabel Average time of requests
140 api_waits_www.graph_category api
141 api_waits_www.web.sum <%= Chef::Munin.expand "%%.openstreetmap:api_waits_%%.web", @frontends %>
142 api_waits_www.web.label Web site traffic
143 api_waits_www.web.cdef web,2,/
144 api_waits_www.upload.sum <%= Chef::Munin.expand "%%.openstreetmap:api_waits_%%.upload", @frontends %>
145 api_waits_www.upload.label Changeset diff uploads
146 api_waits_www.upload.cdef upload,2,/
147 api_waits_www.other.sum <%= Chef::Munin.expand "%%.openstreetmap:api_waits_%%.other", @frontends %>
148 api_waits_www.other.label Other API calls
149 api_waits_www.other.cdef other,2,/
150 api_waits_www.amf.sum <%= Chef::Munin.expand "%%.openstreetmap:api_waits_%%.amf", @frontends %>
151 api_waits_www.amf.label AMF API calls
152 api_waits_www.amf.cdef amf,2,/
153 api_waits_www.history.sum <%= Chef::Munin.expand "%%.openstreetmap:api_waits_%%.history", @frontends %>
154 api_waits_www.history.label Element history fetches
155 api_waits_www.history.cdef history,2,/
156 api_waits_www.full.sum <%= Chef::Munin.expand "%%.openstreetmap:api_waits_%%.full", @frontends %>
157 api_waits_www.full.label Full element fetches
158 api_waits_www.full.cdef full,2,/
159 api_waits_www.map.sum <%= Chef::Munin.expand "%%.openstreetmap:api_waits_%%.map", @frontends %>
160 api_waits_www.map.label Map API calls
161 api_waits_www.map.cdef map,2,/
162 api_waits_www.trkpts.sum <%= Chef::Munin.expand "%%.openstreetmap:api_waits_%%.trkpts", @frontends %>
163 api_waits_www.trkpts.label GPX trackpoints calls
164 api_waits_www.trkpts.cdef trkpts,2,/
165 memcached_multi_bytes.graph_title Network Traffic
166 memcached_multi_bytes.graph_vlabel bits in (-) / out (+)
167 memcached_multi_bytes.graph_category memcached
168 memcached_multi_bytes.bytes_read.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_multi_bytes.bytes_read", @backends %>
169 memcached_multi_bytes.bytes_read.label Network Traffic coming in (-)
170 memcached_multi_bytes.bytes_read.cdef bytes_read,8,*
171 memcached_multi_bytes.bytes_written.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_multi_bytes.bytes_written", @backends %>
172 memcached_multi_bytes.bytes_written.label Traffic in (-) / out (+)
173 memcached_multi_bytes.bytes_written.cdef bytes_written,8,*
174 memcached_commands.graph_title Commands
175 memcached_commands.graph_vlabel Commands per ${graph_period}
176 memcached_commands.graph_category memcached
177 memcached_commands.cmd_get.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.cmd_get", @backends %>
178 memcached_commands.cmd_get.label Gets
179 memcached_commands.cmd_set.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.cmd_set", @backends %>
180 memcached_commands.cmd_set.label Sets
181 memcached_commands.get_hits.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.get_hits", @backends %>
182 memcached_commands.get_hits.label Get Hits
183 memcached_commands.get_misses.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.get_misses", @backends %>
184 memcached_commands.get_misses.label Get Misses
185 memcached_commands.delete_hits.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.delete_hits", @backends %>
186 memcached_commands.delete_hits.label Delete Hits
187 memcached_commands.delete_misses.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.delete_misses", @backends %>
188 memcached_commands.delete_misses.label Delete Misses
189 memcached_commands.incr_hits.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.incr_hits", @backends %>
190 memcached_commands.incr_hits.label Increment Hits
191 memcached_commands.incr_misses.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.incr_misses", @backends %>
192 memcached_commands.incr_misses.label Increment Misses
193 memcached_commands.decr_hits.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.decr_hits", @backends %>
194 memcached_commands.decr_hits.label Decrement Hits
195 memcached_commands.decr_misses.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_commands.decr_misses", @backends %>
196 memcached_commands.decr_misses.label Decrement Misses
197 memcached_multi_conns.graph_title Connections
198 memcached_multi_conns.graph_vlabel Connections per ${graph_period}
199 memcached_multi_conns.graph_category memcached
200 memcached_multi_conns.curr_conns.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_multi_conns.curr_conns", @backends %>
201 memcached_multi_conns.curr_conns.label Current Connections
202 memcached_multi_conns.max_conns.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_multi_conns.max_conns", @backends %>
203 memcached_multi_conns.max_conns.label Max Connections
204 memcached_multi_conns.avg_conns.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_multi_conns.avg_conns", @backends %>
205 memcached_multi_conns.avg_conns.label Avg Connections
206 memcached_evictions.graph_title Evictions
207 memcached_evictions.graph_vlabel Evictions per ${graph_period}
208 memcached_evictions.graph_category memcached
209 memcached_evictions.evictions.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_evictions.evictions", @backends %>
210 memcached_evictions.evictions.label Evictions
211 memcached_evictions.evicted_nonzero.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_evictions.evicted_nonzero", @backends %>
212 memcached_evictions.evicted_nonzero.label Evictions prior to Expire
213 memcached_evictions.reclaimed.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_evictions.reclaimed", @backends %>
214 memcached_evictions.reclaimed.label Reclaimed Items
215 memcached_items.graph_title Items
216 memcached_items.graph_vlabel Items in Memcached
217 memcached_items.graph_category memcached
218 memcached_items.curr_items.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_items.curr_items", @backends %>
219 memcached_items.curr_items.label Current Items
220 memcached_items.total_items.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_items.total_items", @backends %>
221 memcached_items.total_items.label New Items
222 memcached_memory.graph_title Memory Usage
223 memcached_memory.graph_vlabel Bytes Used
224 memcached_memory.graph_category memcached
225 memcached_memory.limit_maxbytes.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_memory.limit_maxbytes", @backends %>
226 memcached_memory.limit_maxbytes.label Maximum Bytes Allocated
227 memcached_memory.bytes.sum <%= Chef::Munin.expand "%%.openstreetmap:memcached_memory.bytes", @backends %>
228 memcached_memory.bytes.label Current Bytes Used
230 # Configure compound graphs for tile.openstreetmap.org
233 network_in.graph_title Inbound network traffic
234 network_in.graph_vlabel bits in per ${graph_period}
235 network_in.graph_category network
236 network_in.graph_order <%= Chef::Munin.expand "%%name%%=%%name%%.openstreetmap:if_%%interface%%.down", @tilecaches %>
237 network_in.graph_total total
238 <% @tilecaches.each do |tc| -%>
239 network_in.<%= tc[:name] %>.label <%= tc[:name] %>
240 network_in.<%= tc[:name] %>.cdef <%= tc[:name] %>,8,*
241 network_in.<%= tc[:name] %>.draw AREASTACK
243 network_out.graph_title Outbound network traffic
244 network_out.graph_vlabel bits out per ${graph_period}
245 network_out.graph_category network
246 network_out.graph_order <%= Chef::Munin.expand "%%name%%=%%name%%.openstreetmap:if_%%interface%%.up", @tilecaches %>
247 network_out.graph_total total
248 <% @tilecaches.each do |tc| -%>
249 network_out.<%= tc[:name] %>.label <%= tc[:name] %>
250 network_out.<%= tc[:name] %>.cdef <%= tc[:name] %>,8,*
251 network_out.<%= tc[:name] %>.draw AREASTACK
253 squid_delay_pools.graph_title IPs being delayed
254 squid_delay_pools.graph_args --base 1000 -l 0
255 squid_delay_pools.graph_vlabel IPs
256 squid_delay_pools.graph_order squid_delay1
257 squid_delay_pools.graph_category squid
258 squid_delay_pools.squid_delay1.sum <%= Chef::Munin.expand "%%name%%.openstreetmap:squid_delay_pools.squid_delay1", @tilecaches %>
259 squid_delay_pools.squid_delay1.label IPs
260 squid_delay_pools.squid_delay1.min 0
261 squid_delay_pools.squid_delay1.draw AREA
262 squid_requests.graph_title Squid client requests
263 squid_requests.graph_args --base 1000 -l 0
264 squid_requests.graph_vlabel requests / ${graph_period}
265 squid_requests.graph_order hits errors requests
266 squid_requests.graph_total total
267 squid_requests.graph_category squid
268 squid_requests.hits.sum <%= Chef::Munin.expand "%%name%%.openstreetmap:squid_requests.hits", @tilecaches %>
269 squid_requests.hits.label hits
270 squid_requests.hits.draw AREA
271 squid_requests.errors.sum <%= Chef::Munin.expand "%%name%%.openstreetmap:squid_requests.errors", @tilecaches %>
272 squid_requests.errors.label errors
273 squid_requests.errors.draw STACK
274 squid_requests.requests.sum <%= Chef::Munin.expand "%%name%%.openstreetmap:squid_requests.requests", @tilecaches %>
275 squid_requests.requests.label misses
276 squid_requests.requests.draw STACK
277 squid_traffic.graph_title Squid traffic status
278 squid_traffic.graph_args --base 1000
279 squid_traffic.graph_vlabel bits per ${graph_period}
280 squid_traffic.graph_order kbytes_in kbytes_out hit_kbytes_out
281 squid_traffic.graph_category squid
282 squid_traffic.kbytes_in.sum <%= Chef::Munin.expand "%%name%%.openstreetmap:squid_traffic.kbytes_in", @tilecaches %>
283 squid_traffic.kbytes_in.label received
284 squid_traffic.kbytes_in.cdef kbytes_in,8096,*
285 squid_traffic.kbytes_out.sum <%= Chef::Munin.expand "%%name%%.openstreetmap:squid_traffic.kbytes_out", @tilecaches %>
286 squid_traffic.kbytes_out.label sent
287 squid_traffic.kbytes_out.cdef kbytes_out,8096,*
288 squid_traffic.hit_kbytes_out.sum <%= Chef::Munin.expand "%%name%%.openstreetmap:squid_traffic.hit_kbytes_out", @tilecaches %>
289 squid_traffic.hit_kbytes_out.label from cache
290 squid_traffic.hit_kbytes_out.cdef hit_kbytes_out,8096,*
291 squid_times_http.graph_title Squid Http Service Times
292 squid_times_http.graph_category squid
293 squid_times_http.graph_args --lower-limit 0
294 squid_times_http.graph_vlabel median reponse times (s)
295 squid_times_http.graph_order <%= Chef::Munin.expand "%%name%%=%%name%%.openstreetmap:squid_times.mean_http", @tilecaches %>
296 <% @tilecaches.each do |tc| -%>
297 squid_times_http.<%= tc[:name] %>.label <%= tc[:name] %>
299 squid_times_cmis.graph_title Squid Cache Miss Service Times
300 squid_times_cmis.graph_category squid
301 squid_times_cmis.graph_args --lower-limit 0
302 squid_times_cmis.graph_vlabel median reponse times (s)
303 squid_times_cmis.graph_order <%= Chef::Munin.expand "%%name%%=%%name%%.openstreetmap:squid_times.mean_cmis", @tilecaches %>
304 <% @tilecaches.each do |tc| -%>
305 squid_times_cmis.<%= tc[:name] %>.label <%= tc[:name] %>
307 squid_times_chits.graph_title Squid Cache Hit Service Times
308 squid_times_chits.graph_category squid
309 squid_times_chits.graph_args --lower-limit 0
310 squid_times_chits.graph_vlabel median reponse times (s)
311 squid_times_chits.graph_order <%= Chef::Munin.expand "%%name%%=%%name%%.openstreetmap:squid_times.mean_chits", @tilecaches %>
312 <% @tilecaches.each do |tc| -%>
313 squid_times_chits.<%= tc[:name] %>.label <%= tc[:name] %>
315 squid_times_nhits.graph_title Squid Cache Near Hit Service Times
316 squid_times_nhits.graph_category squid
317 squid_times_nhits.graph_args --lower-limit 0
318 squid_times_nhits.graph_vlabel median reponse times (s)
319 squid_times_nhits.graph_order <%= Chef::Munin.expand "%%name%%=%%name%%.openstreetmap:squid_times.mean_nhits", @tilecaches %>
320 <% @tilecaches.each do |tc| -%>
321 squid_times_nhits.<%= tc[:name] %>.label <%= tc[:name] %>
323 squid_times_nmr.graph_title Squid Cache Not Modified Service Times
324 squid_times_nmr.graph_category squid
325 squid_times_nmr.graph_args --lower-limit 0
326 squid_times_nmr.graph_vlabel median reponse times (s)
327 squid_times_nmr.graph_order <%= Chef::Munin.expand "%%name%%=%%name%%.openstreetmap:squid_times.mean_nmr", @tilecaches %>
328 <% @tilecaches.each do |tc| -%>
329 squid_times_nmr.<%= tc[:name] %>.label <%= tc[:name] %>
331 squid_times_dnsl.graph_title Squid Cache DNS Lookup Service Times
332 squid_times_dnsl.graph_category squid
333 squid_times_dnsl.graph_args --lower-limit 0
334 squid_times_dnsl.graph_vlabel median reponse times (s)
335 squid_times_dnsl.graph_order <%= Chef::Munin.expand "%%name%%=%%name%%.openstreetmap:squid_times.mean_dnsl", @tilecaches %>
336 <% @tilecaches.each do |tc| -%>
337 squid_times_dnsl.<%= tc[:name] %>.label <%= tc[:name] %>