diff --git a/xCAT/postscripts/HPCbootstatus b/xCAT/postscripts/HPCbootstatus new file mode 100644 index 000000000..983629105 --- /dev/null +++ b/xCAT/postscripts/HPCbootstatus @@ -0,0 +1,16 @@ +#!/bin/sh + +# Sample postscript to check appstatus for GPFS,LAPI and LoadL +# Report the appstatus to xcatd on MN. + +# See HPCbootstatus.aix and HPCbootstatus.linux for more information. + +OS=`uname` + +if [ $OS = AIX ]; then + echo "HPCbootstatus: query appstatus on AIX system." + HPCbootstatus.aix +else + echo "HPCbootstatus: query appstatus on Linux system." + HPCbootstatus.linux +fi diff --git a/xCAT/postscripts/HPCbootstatus.aix b/xCAT/postscripts/HPCbootstatus.aix new file mode 100644 index 000000000..41b21aa59 --- /dev/null +++ b/xCAT/postscripts/HPCbootstatus.aix @@ -0,0 +1,175 @@ +#!/usr/bin/perl + +use IO::Socket; + +# Sample postscript to check appstatus for GPFS,LAPI and LoadL on AIX system +# Report the appstatus to xcatd on MN. + +my $timeout = 300; +# The number of items to check, currently including: +# gpfs-daemon, gpfs-quorum, gpfs-filesystem, lapi-pnsd, loadl-schedd, loadl-startd +my $itemnum = 6; +my $passed = 0; +# Set sleep time +my $interval = 15; + +my $master = "192.168.5.128"; + +my $gpfsd = "hpcbootstatus gpfs-daemon=up"; +my $gpfsq = "hpcbootstatus gpfs-quorum=achieved"; + +while (1) +{ + my $count= 0; + + # Check gpfs-daemon via port 1191. + my $cmd = "netstat -nA|grep 1191"; + my $rc = `$cmd`; + + if($rc) + { + # send message to xcatd + if (&updateflag($master,$gpfsd) != 0) + { + print "HPCbootstatus.aix: Failed to report $gpfsd to xcatd on $master!\n"; + } + $count++; + } + + # Check gpfs-quorum via /usr/lpp/mmfs/bin/mmgetstate + $cmd = "/usr/lpp/mmfs/bin/mmgetstate -s|grep achieved"; + $rc = `$cmd`; + + if($rc) + { + # send message to xcatd + if (&updateflag($master,$gpfsq) != 0) + { + print "HPCbootstatus.aix: Failed to report $gpfsq to xcatd on $master!\n"; + } + $count++; + } + + # Check gpfs-filesystem via /usr/sbin/mount + $cmd = "cat /tmp/mount |grep mmfs |awk '{print \$2}'"; + my @fs = `$cmd`; + my $new = ""; + + if(scalar @fs) + { + foreach my $line (@fs) + { + chomp $line; + $new .= $line; + $new .= "!"; + } + + # send message to xcatd + my $gpfsf = "hpcbootstatus gfsp-filesystem=$new"; + if (&updateflag($master,$gpfsf) != 0) + { + print "HPCbootstatus.aix: Failed to report $gpfsq to xcatd on $master!\n"; + } + $count++; + } + + # Check LAPI via lssrc + $cmd = "/usr/bin/lssrc -s pnsd |/usr/bin/awk '(NR==2){print \$3}'"; + $rc = `$cmd`; + + if($rc) + { + # send message to xcatd + chomp $rc; + my $lapi = "hpcbootstatus lapi-pnsd=$rc"; + if (&updateflag($master,$lapi) != 0) + { + print "HPCbootstatus.aix: Failed to report $lapi to xcatd on $master!\n"; + } + $count++; + } + + # Check loadl-schedd via llrstatus + $cmd = "/usr/bin/llrstatus -h `hostname` -r %sca %scs"; + $rc = `$cmd`; + + if($rc) + { + # send message to xcatd + chomp $rc; + my $llsc = "hpcbootstatus loadl-schedd=$rc"; + if (&updateflag($master,$llsc) != 0) + { + print "HPCbootstatus.aix: Failed to report $llsc to xcatd on $master!\n"; + } + $count++; + } + + # Check loadl-startd via llrstatus + $cmd = "/usr/bin/llrstatus -h `hostname` -r %sta %sts"; + $rc = `$cmd`; + + if($rc) + { + # send message to xcatd + chomp $rc; + my $llst = "hpcbootstatus loadl-startd=$rc"; + if (&updateflag($master,$llst) != 0) + { + print "HPCbootstatus.aix: Failed to report $llst to xcatd on $master!\n"; + } + $count++; + } + + # All is done or timeout? + if (($count == $itemnum ) || ($passed >= $timeout)) + { + last; + } + + $passed += $interval; + sleep $interval; +} + +############################################################ +# +# updateflag +# Tells xCAT on the server about the appstatus. +# e.g. $status = "hpcbootstatus loadl-schedd=1!Avail"; +# +############################################################ +sub updateflag { + my $server = shift; + my $status = shift; + + my $port = "3002"; #by default + + my $servip = `host $server | awk '{print \$3}'`; + chomp($servip); + $servip =~ s/,$//; # remove trailing comma + my $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $servip, PeerPort => $port, Timeout => 0); + + my $try = 10; + while (!$remote && $try-- > 0) { + sleep 1; + $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $servip, PeerPort => $port, Timeout => 0); + print "HPCbootstatus.aix: Cannot connect to host \'$servip\'\n"; + } + unless ($remote) { + print "HPCbootstatus.aix: Cannot connect to host \'$servip\'\n"; + return 1; + } + $remote->autoflush(1); + + my $line; + while (defined ($line = <$remote>)) { + chomp $line; + if ($line eq "ready") { + print $remote "$status\n"; + } elsif ($line eq "done") { + last; + } + } + close $remote; + return 0; +} diff --git a/xCAT/postscripts/HPCbootstatus.linux b/xCAT/postscripts/HPCbootstatus.linux new file mode 100644 index 000000000..a01da9495 --- /dev/null +++ b/xCAT/postscripts/HPCbootstatus.linux @@ -0,0 +1,88 @@ +#!/bin/sh + +# +# Sample postscript to check appstatus for GPFS,LAPI and LoadL on Linux system. +# Report the appstatus to xcatd on MN. + +# Set the default timeout to 5 mins. +TIMEOUT=300 +INTERVAL=15 +PASSED=0 + +# The number of items to check, currently including: +# gpfs-daemon, gpfs-quorum, gpfs-filesystem, lapi-pnsd, loadl-schedd, loadl-startd +ITEMNUM=6 + +gpfsd="hpcbootstatus gpfs-daemon=up" +gpfsq="hpcbootstatus gpfs-quorum=achieved" + +# Use a loop +while [ true ]; do + + COUNT=0 + # Query gpfs-daemon,mmfsd + # Check 1191 port + port=`netstat -An|grep 1191` + if [ "$port" != "" ]; then + # Sent message to xcatd + updateflag.awk $MASTER 3002 "$gpfsd" + echo "HPCbootstatus.linux: Reporting $gpfsd to xcatd on $MASTER!" + let COUNT=$COUNT+1 + fi + + # Query gpfs-quorum + quorum=`/usr/lpp/mmfs/bin/mmgetstate -s|grep achieved` + if [ "$quorum" != "" ]; then + # Sent message to xcatd + updateflag.awk $MASTER 3002 "$gpfsq" + echo "HPCbootstatus.linux: Reporting $gpfsq to xcatd on $MASTER!" + let COUNT=$COUNT+1 + fi + + # Query gpfs-filesystem + fs=`/usr/sbin/mount |grep mmfs |awk '{print $2}'|tr '\n' '!'` + if [ "$fs" != "" ]; then + # Sent message to xcatd + updateflag.awk $MASTER 3002 "hpcbootstatus gfsp-filesystem=$fs" + echo "HPCbootstatus.linux: Reporting gfsp-filesystem=$fs to xcatd on $MASTER!" + let COUNT=$COUNT+1 + fi + + # Query lapi-pnsd + lapi=`/usr/bin/lssrc -s pnsd |/usr/bin/awk '(NR==2){print $3}'` + if [ "$lapi" != "" ]; then + # Sent message to xcatd + updateflag.awk $MASTER 3002 "hpcbootstatus lapi-pnsd=$lapi" + echo "HPCbootstatus.linux: Reporting lapi-pnsd=$lapi to xcatd on $MASTER!" + let COUNT=$COUNT+1 + fi + + # Query loadl-schedd + hn=`hostname` + llsc=`/usr/bin/llrstatus -h $hn -r %sca %scs` + if [ "$llsc" != "" ]; then + # Sent message to xcatd + updateflag.awk $MASTER 3002 "hpcbootstatus loadl-schedd=$llsc" + echo "HPCbootstatus.linux: Reporting loadl-schedd=$llsc to xcatd on $MASTER!" + let COUNT=$COUNT+1 + fi + + # Query loadl-startd + llst=`/usr/bin/llrstatus -h $hn -r %sta %sts` + if [ "$llst" != "" ]; then + # Sent message to xcatd + updateflag.awk $MASTER 3002 "hpcbootstatus loadl-startd=$llst" + echo "HPCbootstatus.linux: Reporting loadl-startd=$llst to xcatd on $MASTER!" + let COUNT=$COUNT+1 + fi + + if [ $COUNT = $ITEMNUM ] || [ $PASSED -gt $TIMEOUT ]; then + exit + fi + + let PASSED=$PASSED+$INTERVAL + sleep $INTERVAL + echo "HPCbootstatus.linux: passed $PASSED" + +done +