]> git.openstreetmap.org Git - chef.git/commitdiff
Add local implementation of cciss-vol-statusd
authorTom Hughes <tom@compton.nu>
Tue, 2 Apr 2019 17:27:36 +0000 (18:27 +0100)
committerTom Hughes <tom@compton.nu>
Tue, 2 Apr 2019 17:37:16 +0000 (18:37 +0100)
cookbooks/hardware/recipes/default.rb
cookbooks/hardware/templates/default/cciss-vol-statusd.erb [new file with mode: 0755]

index 2fa629adfa09c5a674a07aeae25e6c8466add21e..66babcc94d6f1c6082e56362442326460cf345b6 100644 (file)
@@ -247,6 +247,26 @@ else
   end
 end
 
+if status_packages.include?("cciss-vol-status")
+  template "/usr/local/bin/cciss-vol-statusd" do
+    source "cciss-vol-statusd.erb"
+    owner "root"
+    group "root"
+    mode 0o755
+    notifies :restart, "service[cciss-vol-statusd]"
+  end
+
+  systemd_service "cciss-vol-statusd" do
+    description "Check cciss_vol_status values in the background"
+    exec_start "/usr/local/bin/cciss-vol-statusd"
+    private_tmp true
+    protect_system "full"
+    protect_home true
+    no_new_privileges true
+    notifies :restart, "service[cciss-vol-statusd]"
+  end
+end
+
 ["cciss-vol-status", "mpt-status", "sas2ircu-status", "megaraid-status", "megaclisas-status", "aacraid-status"].each do |status_package|
   if status_packages.include?(status_package)
     package status_package
diff --git a/cookbooks/hardware/templates/default/cciss-vol-statusd.erb b/cookbooks/hardware/templates/default/cciss-vol-statusd.erb
new file mode 100755 (executable)
index 0000000..8f25650
--- /dev/null
@@ -0,0 +1,77 @@
+#!/bin/sh
+
+NAME="cciss-vol-statusd"
+STATUSFILE=/var/run/$NAME.status
+
+# Do not touch you can configure this in /etc/default/cciss-vol-statusd
+MAILTO=root   # Where to report problems
+PERIOD=600    # Seconds between each check    (default 10 minutes)
+REMIND=7200   # Seconds between each reminder (default 2 hours)
+ID=/dev/cciss/c0d0
+
+[ -e /etc/default/cciss-vol-statusd ] && . /etc/default/cciss-vol-statusd
+
+# Gracefully exit if the package has been removed.
+test -x /usr/bin/cciss_vol_status || exit 0
+
+while true ; do
+    # Check ever $PERIOD seconds, send email on every status
+    # change and repeat ever $REMIND seconds if the raid is still
+    # bad.
+    if (cciss_vol_status $ID); then
+        BADRAID=false
+    else
+        BADRAID=true
+        logger -t cciss-vol-statusd "detected non-optimal RAID status"
+    fi
+    STATUSCHANGE=false
+    if [ true = "$BADRAID" ] ; then
+        # RAID not OK
+        (cciss_vol_status $ID) > $STATUSFILE.new
+        if [ ! -f $STATUSFILE ] ; then # RAID just became broken
+            STATUSCHANGE=true
+            mv $STATUSFILE.new $STATUSFILE
+        elif cmp -s $STATUSFILE $STATUSFILE.new ; then
+            # No change.  Should we send reminder?
+            LASTTIME="`stat -c '%Z' $STATUSFILE`"
+            NOW="`date +%s`"
+            SINCELAST="`expr $NOW - $LASTTIME`"
+            if [ $REMIND -le "$SINCELAST" ]; then
+                # Time to send reminder
+                STATUSCHANGE=true
+                mv $STATUSFILE.new $STATUSFILE
+            else
+                rm $STATUSFILE.new
+            fi
+        else
+            STATUSCHANGE=true
+            mv $STATUSFILE.new $STATUSFILE
+        fi
+    else
+        # RAID OK
+        if [ -f $STATUSFILE ] ; then
+            rm $STATUSFILE
+            STATUSCHANGE=true
+        fi
+    fi
+
+    if [ true = "$STATUSCHANGE" ]; then
+        hostname="`uname -n`"
+        (
+            cat <<EOF
+This is a RAID status update from cciss-vol-statusd.  The cciss_vol_status
+program reports that one of the RAIDs changed state:
+
+EOF
+            if [ -f $STATUSFILE ] ; then
+                cat $STATUSFILE
+            else
+                (cciss_vol_status $ID)
+            fi
+            echo
+            echo "Report from $0 on $hostname"
+        ) | mail -s "info: CCISS raid status change on $hostname" $MAILTO
+    fi
+
+    sleep $PERIOD
+done