Based on this article I come up with a script to monitor CCISS HP Smart RAID hardware C
#!/bin/bash
# Usage:
# hpacucli_mon <NUM OF ARRAYS> <NUM OF PHISYCAL DISKS> <MAIL TO>
HP_SLOT=`/usr/sbin/hpacucli ctrl all show status | grep -o -P 'Slot.{0,2}'| awk -F" " '{print $2}'`
if [ "$3" = "" ] || [ "$4" != "" ];then
echo ""
echo "ERROR: hpacucli_mon requires number of arrays, disks and valid email"
echo ""
echo "Usage: hpacucli_mon <NUM OF ARRAYS> <NUM OF PHISYCAL DISKS> <MAIL TO>"
echo ""
echo "To find amount of arrays and disks you do have run:"
echo "/usr/sbin/hpacucli ctrl slot=$HP_SLOT ld all show status"
echo "and"
echo "/usr/sbin/hpacucli ctrl slot=$HP_SLOT pd all show status"
exit
fi
EMAILMESSAGE="/tmp/hpacucli_message.txt"
LOCAL_IP=`/sbin/ifconfig eth0 | grep -Eo '(([0-9]{1,3}\.){3}[0-9]{1,3})' | grep -v ".255"`
MSG_SUBJECT="Smart HP array failure at $LOCAL_IP"
OK_ARRAY_CNT=`/usr/sbin/hpacucli ctrl slot=$HP_SLOT ld all show status | grep -o "OK" | wc -l`
OK_DISKS_CNT=`/usr/sbin/hpacucli ctrl slot=$HP_SLOT pd all show status | grep -o "OK" | wc -l`
if [ "$OK_ARRAY_CNT" -ne $1 ] || [ "$OK_DISKS_CNT" -ne $2 ]; then
echo "We have encountered a problem at $LOCAL_IP" > $EMAILMESSAGE
echo "Take look at this: ">> $EMAILMESSAGE
/usr/sbin/hpacucli ctrl slot=$HP_SLOT ld all show status >> $EMAILMESSAGE
/usr/sbin/hpacucli ctrl slot=$HP_SLOT pd all show status >> $EMAILMESSAGE
echo "===============================================" >> $EMAILMESSAGE
echo "INFORMATION PROVIDED BY SMARTCTL:" >> $EMAILMESSAGE
echo "" >> $EMAILMESSAGE
for (( i=0; i<$2; i++ ))
do
/usr/sbin/smartctl -a -d cciss,$i /dev/cciss/c0d0 | grep -E '(Serial|Health)' >> $EMAILMESSAGE
echo "" >> $EMAILMESSAGE
done
mail -s "$MSG_SUBJECT" "$3" < $EMAILMESSAGE
fi
create it as /usr/local/sbin/hpacucli_monthen add to your crontab (don't forget to set right arguments) :
00 */1 * * * /usr/local/sbin/hpacucli_mon 2 4 yourname@yourwebsite.com
Once a "logicaldrive" or HDD gets failed, you will be mailed to yourname@yourwebsite.com :
We have encountered a problem at 192.168.12.13Take look at this:
logicaldrive 1 (136.7 GB, RAID 1): OK
physicaldrive 2I:1:1 (port 2I:box 1:bay 1, 146 GB): OK physicaldrive 2I:1:2 (port 2I:box 1:bay 2, 146 GB): OK
===============================================INFORMATION PROVIDED BY SMARTCTL:
Serial number: 3NM51VPY000098372XJXSMART Health Status: OK
Serial number: 3NM1LPG100009740XJX5SMART Health Status: OK
No comments:
Post a Comment