support xcatmon push model for HPC software.
git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@8254 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
parent
226a22d1fe
commit
d8e50df636
16
xCAT/postscripts/HPCbootstatus
Normal file
16
xCAT/postscripts/HPCbootstatus
Normal file
@ -0,0 +1,16 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Sample postscript to check appstatus for GPFS,LAPI and LoadL
|
||||
# Report the appstatus to xcatd on MN.
|
||||
|
||||
# See HPCbootstatus.aix and HPCbootstatus.linux for more information.
|
||||
|
||||
OS=`uname`
|
||||
|
||||
if [ $OS = AIX ]; then
|
||||
echo "HPCbootstatus: query appstatus on AIX system."
|
||||
HPCbootstatus.aix
|
||||
else
|
||||
echo "HPCbootstatus: query appstatus on Linux system."
|
||||
HPCbootstatus.linux
|
||||
fi
|
175
xCAT/postscripts/HPCbootstatus.aix
Normal file
175
xCAT/postscripts/HPCbootstatus.aix
Normal file
@ -0,0 +1,175 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use IO::Socket;
|
||||
|
||||
# Sample postscript to check appstatus for GPFS,LAPI and LoadL on AIX system
|
||||
# Report the appstatus to xcatd on MN.
|
||||
|
||||
my $timeout = 300;
|
||||
# The number of items to check, currently including:
|
||||
# gpfs-daemon, gpfs-quorum, gpfs-filesystem, lapi-pnsd, loadl-schedd, loadl-startd
|
||||
my $itemnum = 6;
|
||||
my $passed = 0;
|
||||
# Set sleep time
|
||||
my $interval = 15;
|
||||
|
||||
my $master = "192.168.5.128";
|
||||
|
||||
my $gpfsd = "hpcbootstatus gpfs-daemon=up";
|
||||
my $gpfsq = "hpcbootstatus gpfs-quorum=achieved";
|
||||
|
||||
while (1)
|
||||
{
|
||||
my $count= 0;
|
||||
|
||||
# Check gpfs-daemon via port 1191.
|
||||
my $cmd = "netstat -nA|grep 1191";
|
||||
my $rc = `$cmd`;
|
||||
|
||||
if($rc)
|
||||
{
|
||||
# send message to xcatd
|
||||
if (&updateflag($master,$gpfsd) != 0)
|
||||
{
|
||||
print "HPCbootstatus.aix: Failed to report $gpfsd to xcatd on $master!\n";
|
||||
}
|
||||
$count++;
|
||||
}
|
||||
|
||||
# Check gpfs-quorum via /usr/lpp/mmfs/bin/mmgetstate
|
||||
$cmd = "/usr/lpp/mmfs/bin/mmgetstate -s|grep achieved";
|
||||
$rc = `$cmd`;
|
||||
|
||||
if($rc)
|
||||
{
|
||||
# send message to xcatd
|
||||
if (&updateflag($master,$gpfsq) != 0)
|
||||
{
|
||||
print "HPCbootstatus.aix: Failed to report $gpfsq to xcatd on $master!\n";
|
||||
}
|
||||
$count++;
|
||||
}
|
||||
|
||||
# Check gpfs-filesystem via /usr/sbin/mount
|
||||
$cmd = "cat /tmp/mount |grep mmfs |awk '{print \$2}'";
|
||||
my @fs = `$cmd`;
|
||||
my $new = "";
|
||||
|
||||
if(scalar @fs)
|
||||
{
|
||||
foreach my $line (@fs)
|
||||
{
|
||||
chomp $line;
|
||||
$new .= $line;
|
||||
$new .= "!";
|
||||
}
|
||||
|
||||
# send message to xcatd
|
||||
my $gpfsf = "hpcbootstatus gfsp-filesystem=$new";
|
||||
if (&updateflag($master,$gpfsf) != 0)
|
||||
{
|
||||
print "HPCbootstatus.aix: Failed to report $gpfsq to xcatd on $master!\n";
|
||||
}
|
||||
$count++;
|
||||
}
|
||||
|
||||
# Check LAPI via lssrc
|
||||
$cmd = "/usr/bin/lssrc -s pnsd |/usr/bin/awk '(NR==2){print \$3}'";
|
||||
$rc = `$cmd`;
|
||||
|
||||
if($rc)
|
||||
{
|
||||
# send message to xcatd
|
||||
chomp $rc;
|
||||
my $lapi = "hpcbootstatus lapi-pnsd=$rc";
|
||||
if (&updateflag($master,$lapi) != 0)
|
||||
{
|
||||
print "HPCbootstatus.aix: Failed to report $lapi to xcatd on $master!\n";
|
||||
}
|
||||
$count++;
|
||||
}
|
||||
|
||||
# Check loadl-schedd via llrstatus
|
||||
$cmd = "/usr/bin/llrstatus -h `hostname` -r %sca %scs";
|
||||
$rc = `$cmd`;
|
||||
|
||||
if($rc)
|
||||
{
|
||||
# send message to xcatd
|
||||
chomp $rc;
|
||||
my $llsc = "hpcbootstatus loadl-schedd=$rc";
|
||||
if (&updateflag($master,$llsc) != 0)
|
||||
{
|
||||
print "HPCbootstatus.aix: Failed to report $llsc to xcatd on $master!\n";
|
||||
}
|
||||
$count++;
|
||||
}
|
||||
|
||||
# Check loadl-startd via llrstatus
|
||||
$cmd = "/usr/bin/llrstatus -h `hostname` -r %sta %sts";
|
||||
$rc = `$cmd`;
|
||||
|
||||
if($rc)
|
||||
{
|
||||
# send message to xcatd
|
||||
chomp $rc;
|
||||
my $llst = "hpcbootstatus loadl-startd=$rc";
|
||||
if (&updateflag($master,$llst) != 0)
|
||||
{
|
||||
print "HPCbootstatus.aix: Failed to report $llst to xcatd on $master!\n";
|
||||
}
|
||||
$count++;
|
||||
}
|
||||
|
||||
# All is done or timeout?
|
||||
if (($count == $itemnum ) || ($passed >= $timeout))
|
||||
{
|
||||
last;
|
||||
}
|
||||
|
||||
$passed += $interval;
|
||||
sleep $interval;
|
||||
}
|
||||
|
||||
############################################################
|
||||
#
|
||||
# updateflag
|
||||
# Tells xCAT on the server about the appstatus.
|
||||
# e.g. $status = "hpcbootstatus loadl-schedd=1!Avail";
|
||||
#
|
||||
############################################################
|
||||
sub updateflag {
|
||||
my $server = shift;
|
||||
my $status = shift;
|
||||
|
||||
my $port = "3002"; #by default
|
||||
|
||||
my $servip = `host $server | awk '{print \$3}'`;
|
||||
chomp($servip);
|
||||
$servip =~ s/,$//; # remove trailing comma
|
||||
my $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $servip, PeerPort => $port, Timeout => 0);
|
||||
|
||||
my $try = 10;
|
||||
while (!$remote && $try-- > 0) {
|
||||
sleep 1;
|
||||
$remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $servip, PeerPort => $port, Timeout => 0);
|
||||
print "HPCbootstatus.aix: Cannot connect to host \'$servip\'\n";
|
||||
}
|
||||
unless ($remote) {
|
||||
print "HPCbootstatus.aix: Cannot connect to host \'$servip\'\n";
|
||||
return 1;
|
||||
}
|
||||
$remote->autoflush(1);
|
||||
|
||||
my $line;
|
||||
while (defined ($line = <$remote>)) {
|
||||
chomp $line;
|
||||
if ($line eq "ready") {
|
||||
print $remote "$status\n";
|
||||
} elsif ($line eq "done") {
|
||||
last;
|
||||
}
|
||||
}
|
||||
close $remote;
|
||||
return 0;
|
||||
}
|
88
xCAT/postscripts/HPCbootstatus.linux
Normal file
88
xCAT/postscripts/HPCbootstatus.linux
Normal file
@ -0,0 +1,88 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Sample postscript to check appstatus for GPFS,LAPI and LoadL on Linux system.
|
||||
# Report the appstatus to xcatd on MN.
|
||||
|
||||
# Set the default timeout to 5 mins.
|
||||
TIMEOUT=300
|
||||
INTERVAL=15
|
||||
PASSED=0
|
||||
|
||||
# The number of items to check, currently including:
|
||||
# gpfs-daemon, gpfs-quorum, gpfs-filesystem, lapi-pnsd, loadl-schedd, loadl-startd
|
||||
ITEMNUM=6
|
||||
|
||||
gpfsd="hpcbootstatus gpfs-daemon=up"
|
||||
gpfsq="hpcbootstatus gpfs-quorum=achieved"
|
||||
|
||||
# Use a loop
|
||||
while [ true ]; do
|
||||
|
||||
COUNT=0
|
||||
# Query gpfs-daemon,mmfsd
|
||||
# Check 1191 port
|
||||
port=`netstat -An|grep 1191`
|
||||
if [ "$port" != "" ]; then
|
||||
# Sent message to xcatd
|
||||
updateflag.awk $MASTER 3002 "$gpfsd"
|
||||
echo "HPCbootstatus.linux: Reporting $gpfsd to xcatd on $MASTER!"
|
||||
let COUNT=$COUNT+1
|
||||
fi
|
||||
|
||||
# Query gpfs-quorum
|
||||
quorum=`/usr/lpp/mmfs/bin/mmgetstate -s|grep achieved`
|
||||
if [ "$quorum" != "" ]; then
|
||||
# Sent message to xcatd
|
||||
updateflag.awk $MASTER 3002 "$gpfsq"
|
||||
echo "HPCbootstatus.linux: Reporting $gpfsq to xcatd on $MASTER!"
|
||||
let COUNT=$COUNT+1
|
||||
fi
|
||||
|
||||
# Query gpfs-filesystem
|
||||
fs=`/usr/sbin/mount |grep mmfs |awk '{print $2}'|tr '\n' '!'`
|
||||
if [ "$fs" != "" ]; then
|
||||
# Sent message to xcatd
|
||||
updateflag.awk $MASTER 3002 "hpcbootstatus gfsp-filesystem=$fs"
|
||||
echo "HPCbootstatus.linux: Reporting gfsp-filesystem=$fs to xcatd on $MASTER!"
|
||||
let COUNT=$COUNT+1
|
||||
fi
|
||||
|
||||
# Query lapi-pnsd
|
||||
lapi=`/usr/bin/lssrc -s pnsd |/usr/bin/awk '(NR==2){print $3}'`
|
||||
if [ "$lapi" != "" ]; then
|
||||
# Sent message to xcatd
|
||||
updateflag.awk $MASTER 3002 "hpcbootstatus lapi-pnsd=$lapi"
|
||||
echo "HPCbootstatus.linux: Reporting lapi-pnsd=$lapi to xcatd on $MASTER!"
|
||||
let COUNT=$COUNT+1
|
||||
fi
|
||||
|
||||
# Query loadl-schedd
|
||||
hn=`hostname`
|
||||
llsc=`/usr/bin/llrstatus -h $hn -r %sca %scs`
|
||||
if [ "$llsc" != "" ]; then
|
||||
# Sent message to xcatd
|
||||
updateflag.awk $MASTER 3002 "hpcbootstatus loadl-schedd=$llsc"
|
||||
echo "HPCbootstatus.linux: Reporting loadl-schedd=$llsc to xcatd on $MASTER!"
|
||||
let COUNT=$COUNT+1
|
||||
fi
|
||||
|
||||
# Query loadl-startd
|
||||
llst=`/usr/bin/llrstatus -h $hn -r %sta %sts`
|
||||
if [ "$llst" != "" ]; then
|
||||
# Sent message to xcatd
|
||||
updateflag.awk $MASTER 3002 "hpcbootstatus loadl-startd=$llst"
|
||||
echo "HPCbootstatus.linux: Reporting loadl-startd=$llst to xcatd on $MASTER!"
|
||||
let COUNT=$COUNT+1
|
||||
fi
|
||||
|
||||
if [ $COUNT = $ITEMNUM ] || [ $PASSED -gt $TIMEOUT ]; then
|
||||
exit
|
||||
fi
|
||||
|
||||
let PASSED=$PASSED+$INTERVAL
|
||||
sleep $INTERVAL
|
||||
echo "HPCbootstatus.linux: passed $PASSED"
|
||||
|
||||
done
|
||||
|
Loading…
Reference in New Issue
Block a user