mirror of
				https://github.com/xcat2/xcat-core.git
				synced 2025-10-31 11:22:27 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			411 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			411 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/perl
 | |
| # IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html
 | |
| 
 | |
| #--------------------------------------------------------
 | |
| #This is a template for developing a probe sub_command.
 | |
| #This template mainly implement the sub_comamd dispatch in hierarchical structure
 | |
| #Developer only need to focus on main probe job (by implement do_main_job function) and friendly output (by implement summary_all_jobs_output function) for user
 | |
| #This template can also be used in flat structure. but if developer think it's too heavy in flat, it's fine to develop sub command directly.
 | |
| #But in hierarchical structure, we strongly recommand using this template.
 | |
| #The main dispatch policy are:
 | |
| #1. if there isn't noderange input from commmand line. if there are service nodes defined in current MN,
 | |
| #   we dispatch exact same command input from STDIN to all SNs and current MN. if there isn't service nodes defined,
 | |
| #   just hanld command input from STDIN in current MN
 | |
| #2. If there is noderange input from command line by opion "-n", we will dispatch the command input from STDIN to SN which can hanle these ndoes
 | |
| #   For example, if we got command from STDIN like "probecommand -n test[1-15] -V" and test[1-5] 's SN is SN1, test[6-10]'s SN is SN2
 | |
| #   The dispatch result will be:
 | |
| #   For MN run:  probecommand -n test[11-15] -V
 | |
| #   For SN1 run: probecommand -n test[1-5] -V
 | |
| #   For SN2 run: probecommand -n test[6-10] -V
 | |
| #3. All the return message from SNs and MN will be saved in hash %summaryoutput, develper can use it while implement summary_all_jobs_output function
 | |
| #--------------------------------------------------------
 | |
| 
 | |
| 
 | |
| BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; }
 | |
| 
 | |
| use lib "$::XCATROOT/probe/lib/perl";
 | |
| use probe_utils;
 | |
| use xCAT::ServiceNodeUtils;
 | |
| use File::Basename;
 | |
| use Getopt::Long qw(:config no_ignore_case);
 | |
| use IO::Select;
 | |
| use Data::Dumper;
 | |
| 
 | |
| my $program_name = basename("$0");    #current sub_command name
 | |
| my $help    = 0;    #command line attribute '-h', get usage information
 | |
| my $test    = 0;    #command line attribute '-T'
 | |
| my $verbose = 0;    #command line attribute '-V'
 | |
| my $noderange;      #command line attribute '-n'
 | |
| my $output = "stdout"; #used by probe_utils->send_msg("$output", "o", "xxxxxxxxxx"); print output to STDOUT
 | |
| my $is_sn  = 0;        #flag current server is SN
 | |
| my $rst    = 0;        #the exit code of current command
 | |
| my $terminal = 0;      #means get INT signal from STDIN
 | |
| 
 | |
| #save all output from commands running on SNs and MN
 | |
| # one example:
 | |
| # $summaryoutput{mn} = @mn_output_history
 | |
| # $summaryoutput{SN1} = @SN1_output_history
 | |
| my %summaryoutput;
 | |
| 
 | |
| #a map of SNs and command which will be dispatched to current SN
 | |
| # one example:
 | |
| # $dispatchcmd{SN1} = "probecommand -n test[1-5] -V"
 | |
| # $dispatchcmd{SN2} = "probecommand -n test[6-10] -V"
 | |
| my %dispatchcmd;
 | |
| 
 | |
| #save command line attributes from STDIN
 | |
| my @tmpargv;
 | |
| 
 | |
| #--------------------------------
 | |
| # below are some options rules used by default
 | |
| #     -h : Get usage information of current sub command
 | |
| #     -V : Output more information for debug
 | |
| #     -T : To verify if $program_name can work, reserve option for probe framework, dosen't use by customer
 | |
| #     -n : In xCAT probe, -n is uesd to specify node range uniformly
 | |
| #--------------------------------
 | |
| $::USAGE = "Usage:
 | |
|     $program_name -h
 | |
|     $program_name [-V]
 | |
| 
 | |
| Description:
 | |
|     This is a template for developing a probe sub_command.
 | |
|     <# ADD DESCRIPTION FOR YOUR COMMAND #>
 | |
| 
 | |
| Options:
 | |
|     -h : Get usage information of $program_name
 | |
|     -V : Output more information for debug
 | |
| ";
 | |
| 
 | |
| #------------------------------------
 | |
| # Please implement the main checking job of current command in do_main_job function
 | |
| # If $outputtarget has input value, that means do_main_job is running on MN, so every message needed to print on STDOUT should be written into pipe $outputtarget.
 | |
| # If $outputtarget has no value, that means do_main_job is running on SN, all message just need to print on STDOUT
 | |
| # Recommand to use probe_utils->send_msg() to handle message you plan to print out
 | |
| # A simple example has been written in funciton.
 | |
| #------------------------------------
 | |
| sub do_main_job {
 | |
|     my $outputtarget = shift;
 | |
|     $outputtarget = "stdout" if (!$outputtarget);
 | |
|     my $rst = 0;
 | |
| 
 | |
|     probe_utils->send_msg($outputtarget, "o", "I reveive node range is $noderange");
 | |
| 
 | |
|     #<#DO YOUR OWN CHECKING JOB1#>
 | |
|     probe_utils->send_msg($outputtarget, "o", "first checking point");
 | |
| 
 | |
|     #<#DO YOUR OWN CHECKING JOB2#>
 | |
|     probe_utils->send_msg($outputtarget, "f", "second checking point");
 | |
| 
 | |
|     return $rst;
 | |
| }
 | |
| 
 | |
| #-------------------------------------
 | |
| # When this command return from all SNs and MN, you need to generate a summary
 | |
| # All history outpout from SNs and MN are saved in globle hash %summaryoutput.
 | |
| # $summaryoutput{mn} = @mnhistory
 | |
| # $summaryoutput{snname1} = @snname1history;
 | |
| # The entry in each histroy array isn't categorized, the message coming early is arranged before the one coming later.
 | |
| # A simple example of how to dump %summaryoutput has been written in function
 | |
| #-------------------------------------
 | |
| sub summary_all_jobs_output {
 | |
|     my $rst = 0;
 | |
| 
 | |
|     #DO SUMMARY DEPENDING ON YOUR SUB_COMMAND NEED
 | |
|     probe_utils->send_msg("$output", "d", "======================do summary=====================");
 | |
|     foreach my $sn (keys %summaryoutput) {
 | |
|         probe_utils->send_msg("$output", "d", "[$sn]");
 | |
|         foreach my $log (@{ $summaryoutput{$sn} }) {
 | |
|             probe_utils->send_msg("$output", "d", "$log");
 | |
|         }
 | |
|     }
 | |
|     return $rst;
 | |
| }
 | |
| 
 | |
| #-------------------------------------
 | |
| # Each probe sub command is supposed to support hierarchical.
 | |
| # This funtion is used to calclulate which SN should be dispatched which command
 | |
| #-------------------------------------
 | |
| sub calculate_dispatch_cmd {
 | |
|     my @snlist = xCAT::ServiceNodeUtils->getAllSN();
 | |
|     if ($noderange) {
 | |
|         my @nodes = `nodels $noderange 2>&1`;
 | |
| 
 | |
|         #if there is error in noderange
 | |
|         if ($?) {
 | |
|             my $error = join(" ", @nodes);
 | |
|             if ($error =~ /Error: Invalid nodes and\/or groups in noderange: (.+)/) {
 | |
|                 probe_utils->send_msg("$output", "f", "There are invaild nodes ($1) in command line attribute node range");
 | |
|             } else {
 | |
|                 probe_utils->send_msg("$output", "f", "There is error in command line attribute node range, please using nodels to check");
 | |
|             }
 | |
|             return 1;
 | |
|         } else {
 | |
| 
 | |
|             #calculate the mapping between SN and the nodes which belong to it.
 | |
|             chomp foreach (@nodes);
 | |
|             my $snnodemap = xCAT::ServiceNodeUtils->get_ServiceNode(\@nodes, "xcat", "MN");
 | |
| 
 | |
|             #print Dumper $snnodemap;
 | |
|             my %newsnnodemap;
 | |
|             foreach my $sn (keys %$snnodemap) {
 | |
|                 if (grep(/^$sn$/, @snlist)) {   # the node just belong to one SN
 | |
|                     push(@{ $newsnnodemap{$sn} }, @{ $snnodemap->{$sn} });
 | |
|                 } elsif ($sn =~ /(\w+),.+/) { # the node belong to more than one SN, count it into first SN
 | |
|                     if(grep(/^$1$/, @snlist)){
 | |
|                         push(@{ $newsnnodemap{$1} }, @{ $snnodemap->{$sn} });
 | |
|                     }else{
 | |
|                         probe_utils->send_msg("$output", "f", "The value $1  of 'servicenode' isn't a service node"); 
 | |
|                     }
 | |
|                 } else { # the nodes don't belong to any SN will be handled by MN
 | |
|                     push(@{ $newsnnodemap{mn} }, @{ $snnodemap->{$sn} });
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             #print Dumper \%newsnnodemap;
 | |
|             #generate new command for each SN, replace noderange
 | |
|             foreach my $sn (keys %newsnnodemap) {
 | |
|                 my $nodes = join(",", @{ $newsnnodemap{$sn} });
 | |
|                 if ($sn eq "mn") {
 | |
|                     $noderange = $nodes;
 | |
|                 } else {
 | |
|                     for (my $i = 0 ; $i <= $#tmpargv ; $i++) {
 | |
|                         if ($tmpargv[$i] eq "-n") {
 | |
|                             $tmpargv[ $i + 1 ] = $nodes;
 | |
|                             last;
 | |
|                         }
 | |
|                     }
 | |
|                     my $args = join(" ", @tmpargv);
 | |
|                     $dispatchcmd{$sn} = "$::XCATROOT/probe/subcmds/$program_name $args 2>&1";
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     } else {
 | |
| 
 | |
|         #there isn't noderange input from STDIN, dispatch command to all SN if there are SN defined in MN
 | |
|         if (@snlist) {
 | |
|             my $args = join(" ", @tmpargv);
 | |
|             my $sns  = join(",", @snlist);
 | |
|             $dispatchcmd{$sns} = "$::XCATROOT/probe/subcmds/$program_name $args 2>&1" if (!$?);
 | |
|         }
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| #-------------------------------------
 | |
| # main process start
 | |
| #-------------------------------------
 | |
| @tmpargv = @ARGV;
 | |
| if (
 | |
|     !GetOptions("--help|h" => \$help,
 | |
|         "T"   => \$test,
 | |
|         "n=s" => \$noderange,
 | |
|         "V"   => \$verbose))
 | |
| {
 | |
|     probe_utils->send_msg("$output", "f", "Invalid parameter for $program_name");
 | |
|     probe_utils->send_msg("$output", "d", "$::USAGE");
 | |
|     exit 1;
 | |
| }
 | |
| 
 | |
| if ($help) {
 | |
|     if ($output ne "stdout") {
 | |
|         probe_utils->send_msg("$output", "d", "$::USAGE");
 | |
|     } else {
 | |
|         print "$::USAGE";
 | |
|     }
 | |
|     exit 0;
 | |
| }
 | |
| 
 | |
| if ($test) {
 | |
|     probe_utils->send_msg("$output", "o", "This isn't a probe tool, this is just a template for sub command coding. Using it to develop sub command which need to cover hierarchical cluster");
 | |
|     exit 0;
 | |
| }
 | |
| 
 | |
| #Handle the interrupt signal from STDIN
 | |
| $SIG{TERM} = $SIG{INT} = sub {
 | |
|     $terminal = 1;
 | |
| };
 | |
| 
 | |
| #--------------------------------------------
 | |
| #  To confirm what current node is, MN or SN
 | |
| #--------------------------------------------
 | |
| $is_sn = 1 if (-e "/etc/xCATSN");
 | |
| 
 | |
| #if this node is SN, just run job, not to do dispatch
 | |
| if ($is_sn) {
 | |
|     $rst = do_main_job();
 | |
|     exit $rst;
 | |
| }
 | |
| 
 | |
| #--------------------------------------------
 | |
| # calculate which command should be dispatched to which SN
 | |
| #--------------------------------------------
 | |
| $rst = calculate_dispatch_cmd();
 | |
| 
 | |
| #print Dumper \%dispatchcmd;
 | |
| #print "nodes left to mn : $noderange\n";
 | |
| #print "========================\n";
 | |
| exit $rst if ($rst);
 | |
| 
 | |
| #--------------------------------------------
 | |
| # dispatch job to MN and SN
 | |
| #--------------------------------------------
 | |
| my $mnjobpid   = 0;
 | |
| my @snsjobpids = ();
 | |
| my @snsjobfds  = ();
 | |
| my $pipe_parent_read;
 | |
| my $pipe_child_write;
 | |
| pipe $pipe_parent_read, $pipe_child_write;
 | |
| {
 | |
|     #handle job in MN
 | |
|     $mnjobpid = fork();
 | |
|     if (!defined($mnjobpid)) {
 | |
|         probe_utils->send_msg("$output", "f", "fork process to handle MN job failed: $!");
 | |
|         $rst = 1;
 | |
|         last;
 | |
|     } elsif ($mnjobpid == 0) {
 | |
|         $SIG{TERM} = $SIG{INT} = sub {
 | |
|             exit 1;
 | |
|         };
 | |
| 
 | |
|         close $pipe_parent_read;
 | |
|         $rst = do_main_job($pipe_child_write);
 | |
|         exit $rst;
 | |
|     }
 | |
|     $SIG{CHLD} = sub { waitpid($mnjobpid, WNOHANG) };
 | |
|     close $pipe_child_write;
 | |
| 
 | |
|     #handle job dispatch to SN
 | |
|     foreach my $sn (keys %dispatchcmd) {
 | |
|         my $snjobcmd = "xdsh $sn -s \"$dispatchcmd{$sn}\" 2>&1";
 | |
| 
 | |
|         #print "$sn = $snjobcmd\n";
 | |
|         my $snjobfd;
 | |
|         my $snjobpid;
 | |
|         if (!($snjobpid = open($snjobfd, "$snjobcmd |"))) {
 | |
|             probe_utils->send_msg("$output", "f", "fork process to dispatch cmd $snjobcmd to $sn failed: $!");
 | |
|             next;
 | |
|         }
 | |
|         push(@snsjobpids, $snjobpid);
 | |
|         push(@snsjobfds,  $snjobfd);
 | |
|     }
 | |
| 
 | |
|     my $select = new IO::Select;
 | |
|     $select->add(\*$pipe_parent_read) if ($pipe_parent_read);
 | |
|     $select->add(\*$_) foreach (@snsjobfds);
 | |
|     $| = 1;
 | |
| 
 | |
|     my $line;
 | |
|     my %pipeisnonull;
 | |
|     $pipeisnonull{mn} = 1;
 | |
|     $pipeisnonull{$_} = 1 foreach (@snsjobfds);
 | |
|     my $onepipeisnonull = 1;
 | |
|     while ($onepipeisnonull) {
 | |
|         if (@hdls = $select->can_read(0)) {
 | |
|             foreach $hdl (@hdls) {
 | |
|                 if ($pipeisnonull{mn} && $hdl == \*$pipe_parent_read) {
 | |
|                     if (eof($pipe_parent_read)) {
 | |
|                         $pipeisnonull{mn} = 0;
 | |
|                     } else {
 | |
|                         chomp($line = <$pipe_parent_read>);
 | |
|                         print "$line\n";
 | |
|                         push @{ $summaryoutput{mn} }, $line;
 | |
|                     }
 | |
|                 } else {
 | |
|                     foreach my $fd (@snsjobfds) {
 | |
|                         if ($pipeisnonull{$fd} && $hdl == \*$fd) {
 | |
|                             if (eof($fd)) {
 | |
|                                 $pipeisnonull{$fd} = 0;
 | |
|                             } else {
 | |
|                                 chomp($line = <$fd>);
 | |
|                                 if ($line =~ /(Error:)\s+(\w+)\s+(.+)/i) {
 | |
|                                     push @{ $summaryoutput{$2} }, $line;
 | |
|                                     #if need to print to STDIN in real time, can make below line available
 | |
|                                     #print "[failed] :$2: $line\n";
 | |
|                                 } elsif ($line =~ /^(\w+)\s*:\s(.*)/) {
 | |
|                                     push @{ $summaryoutput{$1} }, $2;
 | |
|                                     #if need to print to STDIN in real time, can make below lines available
 | |
|                                     #$line = "$2:$1: $3" if ($line =~ /^(\w+)\s*:\s*(\[\w+\]\s*):\s*(.*)/);
 | |
|                                     #print "$line\n";
 | |
|                                 }
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             $onepipeisnonull = 0;
 | |
|             $onepipeisnonull |= $pipeisnonull{$_} foreach (keys %pipeisnonull);
 | |
|         }
 | |
|         last if ($terminal);
 | |
|         sleep 1;
 | |
|     }
 | |
| }
 | |
| close($pipe_child_write) if ($pipe_child_write);
 | |
| close($pipe_parent_read) if ($pipe_parent_read);
 | |
| close($_) foreach (@snsjobfds);
 | |
| 
 | |
| #start to clear up all sub processes
 | |
| my %runningpid;
 | |
| $runningpid{$mnjobpid} = 1 if ($mnjobpid);
 | |
| $runningpid{$_} = 1 foreach (@snsjobpids);
 | |
| my $existrunningpid = 0;
 | |
| $existrunningpid = 1 if (%runningpid);
 | |
| 
 | |
| my $trytime = 0;
 | |
| while ($existrunningpid) {
 | |
| 
 | |
|     #send terminal signal to all running process at same time
 | |
|     #try INT 5 up to 5 times
 | |
|     if ($try < 5) {
 | |
|         foreach my $pid (keys %runningpid) {
 | |
|             kill 'INT', $pid if ($runningpid{$pid});
 | |
|         }
 | |
| 
 | |
|         #try TERM 5 up to 5 times
 | |
|     } elsif ($try < 10) {
 | |
|         foreach my $pid (keys %runningpid) {
 | |
|             kill 'TERM', $pid if ($runningpid{$pid});
 | |
|         }
 | |
| 
 | |
|         #try KILL 1 time
 | |
|     } else {
 | |
|         foreach my $pid (keys %runningpid) {
 | |
|             kill 'KILL', $pid if ($runningpid{$pid});
 | |
|         }
 | |
|     }
 | |
|     ++$try;
 | |
| 
 | |
|     sleep 1;
 | |
| 
 | |
|     #To check how many process exit, set the flag of exited process to 0
 | |
|     foreach my $pid (keys %runningpid) {
 | |
|         $runningpid{$pid} = 0 if (waitpid($pid, WNOHANG));
 | |
|     }
 | |
| 
 | |
|     #To check if there are processes still running, if there are, try kill again in next loop
 | |
|     $existrunningpid = 0;
 | |
|     $existrunningpid |= $runningpid{$_} foreach (keys %runningpid);
 | |
| 
 | |
|     #just try 10 times, if still can't kill some process, give up
 | |
|     if ($try > 10) {
 | |
|         my $leftpid;
 | |
|         foreach my $pid (keys %runningpid) {
 | |
|             $leftpid .= "$pid " if ($runningpid{$pid});
 | |
|         }
 | |
|         print "Can't stop process $leftpid, please handle manually.\n";
 | |
|         last;
 | |
|     }
 | |
| }
 | |
| 
 | |
| #-------------------------------------
 | |
| # summary all jobs output to display
 | |
| #-------------------------------------
 | |
| $rst = summary_all_jobs_output();
 | |
| 
 | |
| exit $rst;
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 |