2010-11-24 07:04:00 +00:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
#
|
|
|
|
# Sample postscript to check appstatus for GPFS,LAPI and LoadL on Linux system.
|
|
|
|
# Report the appstatus to xcatd on MN.
|
|
|
|
|
|
|
|
# Set the default timeout to 5 mins.
|
|
|
|
TIMEOUT=300
|
|
|
|
INTERVAL=15
|
|
|
|
PASSED=0
|
|
|
|
|
|
|
|
# The number of items to check, currently including:
|
|
|
|
# gpfs-daemon, gpfs-quorum, gpfs-filesystem, lapi-pnsd, loadl-schedd, loadl-startd
|
|
|
|
ITEMNUM=6
|
|
|
|
|
|
|
|
gpfsd="hpcbootstatus gpfs-daemon=up"
|
|
|
|
gpfsq="hpcbootstatus gpfs-quorum=achieved"
|
|
|
|
|
2010-12-07 08:37:44 +00:00
|
|
|
# Get server IP from xcatinfo file
|
|
|
|
if [ -f /opt/xcat/xcatinfo ]; then
|
|
|
|
SIP=`cut -d= -f2 /opt/xcat/xcatinfo`
|
|
|
|
if [ -n "$SIP" ]; then
|
|
|
|
MASTER=$SIP
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
echo "xCAT management server IP can't be determined.\nexiting..."
|
|
|
|
exit
|
|
|
|
fi
|
|
|
|
|
2010-12-08 03:26:44 +00:00
|
|
|
# add HPCbootstatus.linuxinit to /etc/init.d for diskful node reboot
|
|
|
|
cat /xcatpost/HPCbootstatus.linuxinit >/etc/init.d/HPCbootstatus.linuxinit
|
|
|
|
|
|
|
|
# Change permissions
|
|
|
|
chmod 755 /etc/init.d/HPCbootstatus.linuxinit
|
|
|
|
chkconfig --add HPCbootstatus.linuxinit
|
|
|
|
|
2010-11-24 07:04:00 +00:00
|
|
|
# Use a loop
|
|
|
|
while [ true ]; do
|
|
|
|
|
|
|
|
COUNT=0
|
|
|
|
# Query gpfs-daemon,mmfsd
|
|
|
|
# Check 1191 port
|
2010-11-25 07:16:56 +00:00
|
|
|
port=`netstat -an|grep 1191`
|
2010-11-24 07:04:00 +00:00
|
|
|
if [ "$port" != "" ]; then
|
|
|
|
# Sent message to xcatd
|
|
|
|
updateflag.awk $MASTER 3002 "$gpfsd"
|
|
|
|
echo "HPCbootstatus.linux: Reporting $gpfsd to xcatd on $MASTER!"
|
|
|
|
let COUNT=$COUNT+1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Query gpfs-quorum
|
|
|
|
quorum=`/usr/lpp/mmfs/bin/mmgetstate -s|grep achieved`
|
|
|
|
if [ "$quorum" != "" ]; then
|
|
|
|
# Sent message to xcatd
|
|
|
|
updateflag.awk $MASTER 3002 "$gpfsq"
|
|
|
|
echo "HPCbootstatus.linux: Reporting $gpfsq to xcatd on $MASTER!"
|
|
|
|
let COUNT=$COUNT+1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Query gpfs-filesystem
|
2010-11-25 07:16:56 +00:00
|
|
|
fs=`/bin/mount |grep "type gpfs"|awk '{print $3}'|tr '\n' '!'`
|
2010-11-24 07:04:00 +00:00
|
|
|
if [ "$fs" != "" ]; then
|
|
|
|
# Sent message to xcatd
|
2010-12-27 01:40:10 +00:00
|
|
|
updateflag.awk $MASTER 3002 "hpcbootstatus gpfs-filesystem=$fs"
|
|
|
|
echo "HPCbootstatus.linux: Reporting gpfs-filesystem=$fs to xcatd on $MASTER!"
|
2010-11-24 07:04:00 +00:00
|
|
|
let COUNT=$COUNT+1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Query lapi-pnsd
|
|
|
|
lapi=`/usr/bin/lssrc -s pnsd |/usr/bin/awk '(NR==2){print $3}'`
|
|
|
|
if [ "$lapi" != "" ]; then
|
|
|
|
# Sent message to xcatd
|
|
|
|
updateflag.awk $MASTER 3002 "hpcbootstatus lapi-pnsd=$lapi"
|
|
|
|
echo "HPCbootstatus.linux: Reporting lapi-pnsd=$lapi to xcatd on $MASTER!"
|
|
|
|
let COUNT=$COUNT+1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Query loadl-schedd
|
2010-11-25 07:16:56 +00:00
|
|
|
hn=`hostname| tr '[A-Z]' '[a-z]'`
|
2010-11-24 07:04:00 +00:00
|
|
|
llsc=`/usr/bin/llrstatus -h $hn -r %sca %scs`
|
|
|
|
if [ "$llsc" != "" ]; then
|
|
|
|
# Sent message to xcatd
|
|
|
|
updateflag.awk $MASTER 3002 "hpcbootstatus loadl-schedd=$llsc"
|
|
|
|
echo "HPCbootstatus.linux: Reporting loadl-schedd=$llsc to xcatd on $MASTER!"
|
|
|
|
let COUNT=$COUNT+1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Query loadl-startd
|
|
|
|
llst=`/usr/bin/llrstatus -h $hn -r %sta %sts`
|
|
|
|
if [ "$llst" != "" ]; then
|
|
|
|
# Sent message to xcatd
|
|
|
|
updateflag.awk $MASTER 3002 "hpcbootstatus loadl-startd=$llst"
|
|
|
|
echo "HPCbootstatus.linux: Reporting loadl-startd=$llst to xcatd on $MASTER!"
|
|
|
|
let COUNT=$COUNT+1
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ $COUNT = $ITEMNUM ] || [ $PASSED -gt $TIMEOUT ]; then
|
|
|
|
exit
|
|
|
|
fi
|
|
|
|
|
|
|
|
let PASSED=$PASSED+$INTERVAL
|
|
|
|
sleep $INTERVAL
|
|
|
|
echo "HPCbootstatus.linux: passed $PASSED"
|
|
|
|
|
|
|
|
done
|
|
|
|
|