end
end
+watchdog_module = %w[hpwdt sp5100_tco].find do |module_name|
+ node[:hardware][:pci]&.any? { |_, pci| pci[:modules]&.any?(module_name) }
+end
+
if node[:kernel][:modules].include?("ipmi_si")
package "ipmitool"
package "freeipmi-tools"
options "--config.file=/etc/prometheus/ipmi_local.yml"
subscribes :restart, "template[/etc/prometheus/ipmi_local.yml]"
end
+
+ watchdog_module ||= "ipmi_watchdog"
end
package "irqbalance"
if controller && controller[:device]
device = controller[:device].sub("/dev/", "")
smart = disk[:smart_device]
-
- if device.start_with?("cciss/") && smart =~ /^cciss,(\d+)$/
- array = node[:hardware][:disk][:arrays][disk[:arrays].first]
- munin = "cciss-3#{array[:wwn]}-#{Regexp.last_match(1)}"
- elsif smart =~ /^.*,(\d+)$/
- munin = "#{device}-#{Regexp.last_match(1)}"
- elsif smart =~ %r{^.*,(\d+)/(\d+)$}
- munin = "#{device}-#{Regexp.last_match(1)}:#{Regexp.last_match(2)}"
- end
elsif disk[:device]
device = disk[:device].sub("/dev/", "")
smart = disk[:smart_device]
-
- if smart =~ /^.*,(\d+),(\d+),(\d+)$/
- munin = "#{device}-#{Regexp.last_match(1)}:#{Regexp.last_match(2)}:#{Regexp.last_match(3)}"
- end
end
elsif disk[:device] =~ %r{^/dev/(nvme\d+)n\d+$}
device = Regexp.last_match(1)
- munin = device
elsif disk[:device]
device = disk[:device].sub("/dev/", "")
- munin = device
end
- next if device.nil? || munin.nil?
+ next if device.nil?
Hash[
:device => device,
- :smart => smart,
- :munin => munin,
- :hddtemp => munin.tr("-:", "_")
+ :smart => smart
]
end
private_users false
protect_clock false
end
-
- # Don't try and do munin monitoring of disks behind
- # an Areca controller as they only allow one thing to
- # talk to the controller at a time and smartd will
- # throw errors if it clashes with munin
- disks = disks.reject { |disk| disk[:smart]&.start_with?("areca,") }
-
- disks.each do |disk|
- munin_plugin "smart_#{disk[:munin]}" do
- target "smart_"
- conf "munin.smart.erb"
- conf_variables :disk => disk
- end
- end
else
service "smartd" do
action [:stop, :disable]
end
end
-if disks.count.positive?
- munin_plugin "hddtemp_smartctl" do
- conf "munin.hddtemp.erb"
- conf_variables :disks => disks
- end
-else
- munin_plugin "hddtemp_smartctl" do
- action :delete
- conf "munin.hddtemp.erb"
- end
-end
-
-plugins = Dir.glob("/etc/munin/plugins/smart_*").map { |p| File.basename(p) } -
- disks.map { |d| "smart_#{d[:munin]}" }
-
-plugins.each do |plugin|
- munin_plugin plugin do
- action :delete
- conf "munin.smart.erb"
- end
-end
-
if File.exist?("/etc/mdadm/mdadm.conf")
mdadm_conf = edit_file "/etc/mdadm/mdadm.conf" do |line|
line.gsub!(/^MAILADDR .*$/, "MAILADDR admins@openstreetmap.org")
end
end
-if node[:hardware][:watchdog]
- package "watchdog"
+if watchdog_module
+ kernel_module watchdog_module do
+ action :install
+ end
+
+ execute "systemctl-reload" do
+ action :nothing
+ command "systemctl daemon-reload"
+ user "root"
+ group "root"
+ end
- template "/etc/default/watchdog" do
- source "watchdog.erb"
+ directory "/etc/systemd/system.conf.d" do
owner "root"
group "root"
- mode "644"
- variables :module => node[:hardware][:watchdog]
+ mode "755"
end
- service "watchdog" do
- action [:enable, :start]
+ template "/etc/systemd/system.conf.d/watchdog.conf" do
+ source "watchdog.conf.erb"
+ owner "root"
+ group "root"
+ mode "644"
+ notifies :run, "execute[systemctl-reload]"
end
end