support xcatmon push model for HPC software.
git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@8254 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
		
							
								
								
									
										16
									
								
								xCAT/postscripts/HPCbootstatus
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								xCAT/postscripts/HPCbootstatus
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,16 @@
 | 
			
		||||
#!/bin/sh
 | 
			
		||||
 | 
			
		||||
# Sample postscript to check appstatus for GPFS,LAPI and LoadL
 | 
			
		||||
# Report the appstatus to xcatd on MN.
 | 
			
		||||
 | 
			
		||||
# See HPCbootstatus.aix and HPCbootstatus.linux for more information.
 | 
			
		||||
 | 
			
		||||
OS=`uname`
 | 
			
		||||
 | 
			
		||||
if [ $OS = AIX ]; then
 | 
			
		||||
    echo "HPCbootstatus: query appstatus on AIX system."
 | 
			
		||||
    HPCbootstatus.aix
 | 
			
		||||
else
 | 
			
		||||
    echo "HPCbootstatus: query appstatus on Linux system."
 | 
			
		||||
    HPCbootstatus.linux
 | 
			
		||||
fi
 | 
			
		||||
							
								
								
									
										175
									
								
								xCAT/postscripts/HPCbootstatus.aix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										175
									
								
								xCAT/postscripts/HPCbootstatus.aix
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,175 @@
 | 
			
		||||
#!/usr/bin/perl
 | 
			
		||||
 | 
			
		||||
use IO::Socket;
 | 
			
		||||
 | 
			
		||||
# Sample postscript to check appstatus for GPFS,LAPI and LoadL on AIX system
 | 
			
		||||
# Report the appstatus to xcatd on MN.
 | 
			
		||||
 | 
			
		||||
my $timeout = 300;
 | 
			
		||||
# The number of items to check, currently including:
 | 
			
		||||
# gpfs-daemon, gpfs-quorum, gpfs-filesystem, lapi-pnsd, loadl-schedd, loadl-startd
 | 
			
		||||
my $itemnum = 6;
 | 
			
		||||
my $passed = 0;
 | 
			
		||||
# Set sleep time
 | 
			
		||||
my $interval = 15;
 | 
			
		||||
 | 
			
		||||
my $master = "192.168.5.128";
 | 
			
		||||
 | 
			
		||||
my $gpfsd = "hpcbootstatus gpfs-daemon=up";
 | 
			
		||||
my $gpfsq = "hpcbootstatus gpfs-quorum=achieved";
 | 
			
		||||
 | 
			
		||||
while (1)
 | 
			
		||||
{
 | 
			
		||||
    my $count= 0;
 | 
			
		||||
    
 | 
			
		||||
    # Check gpfs-daemon via port 1191.
 | 
			
		||||
    my $cmd = "netstat -nA|grep 1191";
 | 
			
		||||
    my $rc = `$cmd`;
 | 
			
		||||
 | 
			
		||||
    if($rc)
 | 
			
		||||
    {
 | 
			
		||||
        # send message to xcatd
 | 
			
		||||
        if (&updateflag($master,$gpfsd) != 0)
 | 
			
		||||
        {
 | 
			
		||||
            print "HPCbootstatus.aix: Failed to report $gpfsd to xcatd on $master!\n";
 | 
			
		||||
        }
 | 
			
		||||
        $count++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Check gpfs-quorum via /usr/lpp/mmfs/bin/mmgetstate
 | 
			
		||||
    $cmd = "/usr/lpp/mmfs/bin/mmgetstate -s|grep achieved";
 | 
			
		||||
    $rc = `$cmd`;
 | 
			
		||||
    
 | 
			
		||||
    if($rc)
 | 
			
		||||
    {
 | 
			
		||||
        # send message to xcatd
 | 
			
		||||
        if (&updateflag($master,$gpfsq) != 0)
 | 
			
		||||
        {
 | 
			
		||||
            print "HPCbootstatus.aix: Failed to report $gpfsq to xcatd on $master!\n";
 | 
			
		||||
        }
 | 
			
		||||
        $count++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Check gpfs-filesystem via /usr/sbin/mount
 | 
			
		||||
    $cmd = "cat /tmp/mount |grep mmfs |awk '{print \$2}'";
 | 
			
		||||
    my @fs = `$cmd`;
 | 
			
		||||
    my $new = "";
 | 
			
		||||
 | 
			
		||||
    if(scalar @fs)
 | 
			
		||||
    {
 | 
			
		||||
        foreach my $line (@fs)
 | 
			
		||||
        {
 | 
			
		||||
            chomp $line;
 | 
			
		||||
            $new .= $line;
 | 
			
		||||
            $new .= "!";
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        # send message to xcatd
 | 
			
		||||
        my $gpfsf = "hpcbootstatus gfsp-filesystem=$new";
 | 
			
		||||
        if (&updateflag($master,$gpfsf) != 0)
 | 
			
		||||
        {
 | 
			
		||||
            print "HPCbootstatus.aix: Failed to report $gpfsq to xcatd on $master!\n";
 | 
			
		||||
        }
 | 
			
		||||
        $count++;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    # Check LAPI via lssrc
 | 
			
		||||
    $cmd = "/usr/bin/lssrc -s pnsd |/usr/bin/awk '(NR==2){print \$3}'";
 | 
			
		||||
    $rc = `$cmd`;
 | 
			
		||||
 | 
			
		||||
    if($rc)
 | 
			
		||||
    {
 | 
			
		||||
        # send message to xcatd
 | 
			
		||||
        chomp $rc;
 | 
			
		||||
        my $lapi = "hpcbootstatus lapi-pnsd=$rc";
 | 
			
		||||
        if (&updateflag($master,$lapi) != 0)
 | 
			
		||||
        {
 | 
			
		||||
            print "HPCbootstatus.aix: Failed to report $lapi to xcatd on $master!\n";
 | 
			
		||||
        }
 | 
			
		||||
        $count++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Check loadl-schedd via llrstatus
 | 
			
		||||
    $cmd = "/usr/bin/llrstatus -h `hostname` -r %sca %scs";
 | 
			
		||||
    $rc = `$cmd`;
 | 
			
		||||
 | 
			
		||||
    if($rc)
 | 
			
		||||
    {
 | 
			
		||||
        # send message to xcatd
 | 
			
		||||
        chomp $rc;
 | 
			
		||||
        my $llsc = "hpcbootstatus loadl-schedd=$rc";
 | 
			
		||||
        if (&updateflag($master,$llsc) != 0)
 | 
			
		||||
        {
 | 
			
		||||
            print "HPCbootstatus.aix: Failed to report $llsc to xcatd on $master!\n";
 | 
			
		||||
        }
 | 
			
		||||
        $count++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Check loadl-startd via llrstatus
 | 
			
		||||
    $cmd = "/usr/bin/llrstatus -h `hostname` -r %sta %sts";
 | 
			
		||||
    $rc = `$cmd`;
 | 
			
		||||
 | 
			
		||||
    if($rc)
 | 
			
		||||
    {
 | 
			
		||||
        # send message to xcatd
 | 
			
		||||
        chomp $rc;
 | 
			
		||||
        my $llst = "hpcbootstatus loadl-startd=$rc";
 | 
			
		||||
        if (&updateflag($master,$llst) != 0)
 | 
			
		||||
        {
 | 
			
		||||
            print "HPCbootstatus.aix: Failed to report $llst to xcatd on $master!\n";
 | 
			
		||||
        }
 | 
			
		||||
        $count++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # All is done or timeout?
 | 
			
		||||
    if (($count == $itemnum ) || ($passed >= $timeout))
 | 
			
		||||
    {
 | 
			
		||||
        last;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    $passed += $interval;
 | 
			
		||||
    sleep $interval;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
############################################################
 | 
			
		||||
#
 | 
			
		||||
#   updateflag
 | 
			
		||||
#     Tells xCAT on the server about the appstatus. 
 | 
			
		||||
#     e.g. $status = "hpcbootstatus loadl-schedd=1!Avail";
 | 
			
		||||
#
 | 
			
		||||
############################################################
 | 
			
		||||
sub  updateflag {
 | 
			
		||||
    my $server = shift;
 | 
			
		||||
    my $status = shift;
 | 
			
		||||
 | 
			
		||||
    my $port = "3002";  #by default
 | 
			
		||||
    
 | 
			
		||||
    my $servip = `host $server | awk '{print \$3}'`;
 | 
			
		||||
    chomp($servip);
 | 
			
		||||
    $servip =~ s/,$//; # remove trailing comma
 | 
			
		||||
    my $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr  => $servip, PeerPort  => $port, Timeout => 0);
 | 
			
		||||
 | 
			
		||||
    my $try = 10;
 | 
			
		||||
    while (!$remote && $try-- > 0) {
 | 
			
		||||
        sleep 1;
 | 
			
		||||
        $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr  => $servip, PeerPort  => $port, Timeout => 0);
 | 
			
		||||
        print "HPCbootstatus.aix: Cannot connect to host \'$servip\'\n";
 | 
			
		||||
    }
 | 
			
		||||
    unless ($remote) { 
 | 
			
		||||
        print "HPCbootstatus.aix: Cannot connect to host \'$servip\'\n";
 | 
			
		||||
	return 1;
 | 
			
		||||
    }
 | 
			
		||||
    $remote->autoflush(1);
 | 
			
		||||
 | 
			
		||||
    my $line;
 | 
			
		||||
    while (defined ($line = <$remote>)) {
 | 
			
		||||
        chomp $line;
 | 
			
		||||
        if ($line eq "ready") {
 | 
			
		||||
           print $remote "$status\n";
 | 
			
		||||
        } elsif ($line eq "done") {
 | 
			
		||||
	    last;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    close $remote;
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										88
									
								
								xCAT/postscripts/HPCbootstatus.linux
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								xCAT/postscripts/HPCbootstatus.linux
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,88 @@
 | 
			
		||||
#!/bin/sh 
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Sample postscript to check appstatus for GPFS,LAPI and LoadL on Linux system.
 | 
			
		||||
# Report the appstatus to xcatd on MN.
 | 
			
		||||
 | 
			
		||||
# Set the default timeout to 5 mins.
 | 
			
		||||
TIMEOUT=300
 | 
			
		||||
INTERVAL=15
 | 
			
		||||
PASSED=0
 | 
			
		||||
 | 
			
		||||
# The number of items to check, currently including:
 | 
			
		||||
# gpfs-daemon, gpfs-quorum, gpfs-filesystem, lapi-pnsd, loadl-schedd, loadl-startd
 | 
			
		||||
ITEMNUM=6
 | 
			
		||||
 | 
			
		||||
gpfsd="hpcbootstatus gpfs-daemon=up"
 | 
			
		||||
gpfsq="hpcbootstatus gpfs-quorum=achieved"
 | 
			
		||||
 | 
			
		||||
# Use a loop
 | 
			
		||||
while [ true ]; do
 | 
			
		||||
 | 
			
		||||
    COUNT=0
 | 
			
		||||
	# Query gpfs-daemon,mmfsd
 | 
			
		||||
	# Check 1191 port
 | 
			
		||||
	port=`netstat -An|grep 1191`
 | 
			
		||||
    if [ "$port" != "" ]; then
 | 
			
		||||
        # Sent message to xcatd
 | 
			
		||||
        updateflag.awk $MASTER 3002 "$gpfsd"
 | 
			
		||||
        echo "HPCbootstatus.linux: Reporting $gpfsd to xcatd on $MASTER!"
 | 
			
		||||
        let COUNT=$COUNT+1
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    # Query gpfs-quorum
 | 
			
		||||
    quorum=`/usr/lpp/mmfs/bin/mmgetstate -s|grep achieved`
 | 
			
		||||
    if [ "$quorum" != "" ]; then
 | 
			
		||||
        # Sent message to xcatd
 | 
			
		||||
        updateflag.awk $MASTER 3002 "$gpfsq"
 | 
			
		||||
        echo "HPCbootstatus.linux: Reporting $gpfsq to xcatd on $MASTER!"
 | 
			
		||||
        let COUNT=$COUNT+1
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
	# Query gpfs-filesystem
 | 
			
		||||
	fs=`/usr/sbin/mount |grep mmfs |awk '{print $2}'|tr '\n' '!'`
 | 
			
		||||
    if [ "$fs" != "" ]; then        
 | 
			
		||||
        # Sent message to xcatd
 | 
			
		||||
        updateflag.awk $MASTER 3002 "hpcbootstatus gfsp-filesystem=$fs"
 | 
			
		||||
        echo "HPCbootstatus.linux: Reporting gfsp-filesystem=$fs to xcatd on $MASTER!"
 | 
			
		||||
        let COUNT=$COUNT+1
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
	# Query lapi-pnsd
 | 
			
		||||
	lapi=`/usr/bin/lssrc -s pnsd |/usr/bin/awk '(NR==2){print $3}'`
 | 
			
		||||
    if [ "$lapi" != "" ]; then        
 | 
			
		||||
        # Sent message to xcatd
 | 
			
		||||
        updateflag.awk $MASTER 3002 "hpcbootstatus lapi-pnsd=$lapi"
 | 
			
		||||
        echo "HPCbootstatus.linux: Reporting lapi-pnsd=$lapi to xcatd on $MASTER!"
 | 
			
		||||
        let COUNT=$COUNT+1
 | 
			
		||||
    fi
 | 
			
		||||
    
 | 
			
		||||
	# Query loadl-schedd
 | 
			
		||||
	hn=`hostname`
 | 
			
		||||
	llsc=`/usr/bin/llrstatus -h $hn -r %sca %scs`
 | 
			
		||||
    if [ "$llsc" != "" ]; then        
 | 
			
		||||
        # Sent message to xcatd
 | 
			
		||||
        updateflag.awk $MASTER 3002 "hpcbootstatus loadl-schedd=$llsc"
 | 
			
		||||
        echo "HPCbootstatus.linux: Reporting loadl-schedd=$llsc to xcatd on $MASTER!"
 | 
			
		||||
        let COUNT=$COUNT+1
 | 
			
		||||
    fi
 | 
			
		||||
	
 | 
			
		||||
	# Query loadl-startd
 | 
			
		||||
	llst=`/usr/bin/llrstatus -h $hn -r %sta %sts`
 | 
			
		||||
    if [ "$llst" != "" ]; then        
 | 
			
		||||
        # Sent message to xcatd
 | 
			
		||||
        updateflag.awk $MASTER 3002 "hpcbootstatus loadl-startd=$llst"
 | 
			
		||||
        echo "HPCbootstatus.linux: Reporting loadl-startd=$llst to xcatd on $MASTER!"
 | 
			
		||||
        let COUNT=$COUNT+1
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    if [ $COUNT = $ITEMNUM ] || [ $PASSED -gt $TIMEOUT ]; then
 | 
			
		||||
		exit
 | 
			
		||||
	fi
 | 
			
		||||
 | 
			
		||||
    let PASSED=$PASSED+$INTERVAL
 | 
			
		||||
    sleep $INTERVAL
 | 
			
		||||
    echo "HPCbootstatus.linux: passed $PASSED" 
 | 
			
		||||
	
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user