diff --git a/xCAT-probe/subcmds/code_template b/xCAT-probe/subcmds/code_template new file mode 100755 index 000000000..a681e9649 --- /dev/null +++ b/xCAT-probe/subcmds/code_template @@ -0,0 +1,410 @@ +#!/usr/bin/perl +# IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html + +#-------------------------------------------------------- +#This is a template for developing a probe sub_command. +#This template mainly implement the sub_comamd dispatch in hierarchical structure +#Developer only need to focus on main probe job (by implement do_main_job function) and friendly output (by implement summary_all_jobs_output function) for user +#This template can also be used in flat structure. but if developer think it's too heavy in flat, it's fine to develop sub command directly. +#But in hierarchical structure, we strongly recommand using this template. +#The main dispatch policy are: +#1. if there isn't noderange input from commmand line. if there are service nodes defined in current MN, +# we dispatch exact same command input from STDIN to all SNs and current MN. if there isn't service nodes defined, +# just hanld command input from STDIN in current MN +#2. If there is noderange input from command line by opion "-n", we will dispatch the command input from STDIN to SN which can hanle these ndoes +# For example, if we got command from STDIN like "probecommand -n test[1-15] -V" and test[1-5] 's SN is SN1, test[6-10]'s SN is SN2 +# The dispatch result will be: +# For MN run: probecommand -n test[11-15] -V +# For SN1 run: probecommand -n test[1-5] -V +# For SN2 run: probecommand -n test[6-10] -V +#3. All the return message from SNs and MN will be saved in hash %summaryoutput, develper can use it while implement summary_all_jobs_output function +#-------------------------------------------------------- + + +BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; } + +use lib "$::XCATROOT/probe/lib/perl"; +use probe_utils; +use xCAT::ServiceNodeUtils; +use File::Basename; +use Getopt::Long qw(:config no_ignore_case); +use IO::Select; +use Data::Dumper; + +my $program_name = basename("$0"); #current sub_command name +my $help = 0; #command line attribute '-h', get usage information +my $test = 0; #command line attribute '-T' +my $verbose = 0; #command line attribute '-V' +my $noderange; #command line attribute '-n' +my $output = "stdout"; #used by probe_utils->send_msg("$output", "o", "xxxxxxxxxx"); print output to STDOUT +my $is_sn = 0; #flag current server is SN +my $rst = 0; #the exit code of current command +my $terminal = 0; #means get INT signal from STDIN + +#save all output from commands running on SNs and MN +# one example: +# $summaryoutput{mn} = @mn_output_history +# $summaryoutput{SN1} = @SN1_output_history +my %summaryoutput; + +#a map of SNs and command which will be dispatched to current SN +# one example: +# $dispatchcmd{SN1} = "probecommand -n test[1-5] -V" +# $dispatchcmd{SN2} = "probecommand -n test[6-10] -V" +my %dispatchcmd; + +#save command line attributes from STDIN +my @tmpargv; + +#-------------------------------- +# below are some options rules used by default +# -h : Get usage information of current sub command +# -V : Output more information for debug +# -T : To verify if $program_name can work, reserve option for probe framework, dosen't use by customer +# -n : In xCAT probe, -n is uesd to specify node range uniformly +#-------------------------------- +$::USAGE = "Usage: + $program_name -h + $program_name [-V] + +Description: + This is a template for developing a probe sub_command. + <# ADD DESCRIPTION FOR YOUR COMMAND #> + +Options: + -h : Get usage information of $program_name + -V : Output more information for debug +"; + +#------------------------------------ +# Please implement the main checking job of current command in do_main_job function +# If $outputtarget has input value, that means do_main_job is running on MN, so every message needed to print on STDOUT should be written into pipe $outputtarget. +# If $outputtarget has no value, that means do_main_job is running on SN, all message just need to print on STDOUT +# Recommand to use probe_utils->send_msg() to handle message you plan to print out +# A simple example has been written in funciton. +#------------------------------------ +sub do_main_job { + my $outputtarget = shift; + $outputtarget = "stdout" if (!$outputtarget); + my $rst = 0; + + probe_utils->send_msg($outputtarget, "o", "I reveive node range is $noderange"); + + #<#DO YOUR OWN CHECKING JOB1#> + probe_utils->send_msg($outputtarget, "o", "first checking point"); + + #<#DO YOUR OWN CHECKING JOB2#> + probe_utils->send_msg($outputtarget, "f", "second checking point"); + + return $rst; +} + +#------------------------------------- +# When this command return from all SNs and MN, you need to generate a summary +# All history outpout from SNs and MN are saved in globle hash %summaryoutput. +# $summaryoutput{mn} = @mnhistory +# $summaryoutput{snname1} = @snname1history; +# The entry in each histroy array isn't categorized, the message coming early is arranged before the one coming later. +# A simple example of how to dump %summaryoutput has been written in function +#------------------------------------- +sub summary_all_jobs_output { + my $rst = 0; + + #DO SUMMARY DEPENDING ON YOUR SUB_COMMAND NEED + probe_utils->send_msg("$output", "d", "======================do summary====================="); + foreach my $sn (keys %summaryoutput) { + probe_utils->send_msg("$output", "d", "[$sn]"); + foreach my $log (@{ $summaryoutput{$sn} }) { + probe_utils->send_msg("$output", "d", "$log"); + } + } + return $rst; +} + +#------------------------------------- +# Each probe sub command is supposed to support hierarchical. +# This funtion is used to calclulate which SN should be dispatched which command +#------------------------------------- +sub calculate_dispatch_cmd { + my @snlist = xCAT::ServiceNodeUtils->getAllSN(); + if ($noderange) { + my @nodes = `nodels $noderange 2>&1`; + + #if there is error in noderange + if ($?) { + my $error = join(" ", @nodes); + if ($error =~ /Error: Invalid nodes and\/or groups in noderange: (.+)/) { + probe_utils->send_msg("$output", "f", "There are invaild nodes ($1) in command line attribute node range"); + } else { + probe_utils->send_msg("$output", "f", "There is error in command line attribute node range, please using nodels to check"); + } + return 1; + } else { + + #calculate the mapping between SN and the nodes which belong to it. + chomp foreach (@nodes); + my $snnodemap = xCAT::ServiceNodeUtils->get_ServiceNode(\@nodes, "xcat", "MN"); + + #print Dumper $snnodemap; + my %newsnnodemap; + foreach my $sn (keys %$snnodemap) { + if (grep(/^$sn$/, @snlist)) { # the node just belong to one SN + push(@{ $newsnnodemap{$sn} }, @{ $snnodemap->{$sn} }); + } elsif ($sn =~ /(\w+),.+/) { # the node belong to more than one SN, count it into first SN + if(grep(/^$1$/, @snlist)){ + push(@{ $newsnnodemap{$1} }, @{ $snnodemap->{$sn} }); + }else{ + probe_utils->send_msg("$output", "f", "The value $1 of 'servicenode' isn't a service node"); + } + } else { # the nodes don't belong to any SN will be handled by MN + push(@{ $newsnnodemap{mn} }, @{ $snnodemap->{$sn} }); + } + } + + #print Dumper \%newsnnodemap; + #generate new command for each SN, replace noderange + foreach my $sn (keys %newsnnodemap) { + my $nodes = join(",", @{ $newsnnodemap{$sn} }); + if ($sn eq "mn") { + $noderange = $nodes; + } else { + for (my $i = 0 ; $i <= $#tmpargv ; $i++) { + if ($tmpargv[$i] eq "-n") { + $tmpargv[ $i + 1 ] = $nodes; + last; + } + } + my $args = join(" ", @tmpargv); + $dispatchcmd{$sn} = "$::XCATROOT/probe/subcmds/$program_name $args 2>&1"; + } + } + } + } else { + + #there isn't noderange input from STDIN, dispatch command to all SN if there are SN defined in MN + if (@snlist) { + my $args = join(" ", @tmpargv); + my $sns = join(",", @snlist); + $dispatchcmd{$sns} = "$::XCATROOT/probe/subcmds/$program_name $args 2>&1" if (!$?); + } + } + return 0; +} + + + +#------------------------------------- +# main process start +#------------------------------------- +@tmpargv = @ARGV; +if ( + !GetOptions("--help|h" => \$help, + "T" => \$test, + "n=s" => \$noderange, + "V" => \$verbose)) +{ + probe_utils->send_msg("$output", "f", "Invalid parameter for $program_name"); + probe_utils->send_msg("$output", "d", "$::USAGE"); + exit 1; +} + +if ($help) { + if ($output ne "stdout") { + probe_utils->send_msg("$output", "d", "$::USAGE"); + } else { + print "$::USAGE"; + } + exit 0; +} + +if ($test) { + probe_utils->send_msg("$output", "o", "This isn't a probe tool, this is just a template for sub command coding. Using it to develop sub command which need to cover hierarchical cluster"); + exit 0; +} + +#Handle the interrupt signal from STDIN +$SIG{TERM} = $SIG{INT} = sub { + $terminal = 1; +}; + +#-------------------------------------------- +# To confirm what current node is, MN or SN +#-------------------------------------------- +$is_sn = 1 if (-e "/etc/xCATSN"); + +#if this node is SN, just run job, not to do dispatch +if ($is_sn) { + $rst = do_main_job(); + exit $rst; +} + +#-------------------------------------------- +# calculate which command should be dispatched to which SN +#-------------------------------------------- +$rst = calculate_dispatch_cmd(); + +#print Dumper \%dispatchcmd; +#print "nodes left to mn : $noderange\n"; +#print "========================\n"; +exit $rst if ($rst); + +#-------------------------------------------- +# dispatch job to MN and SN +#-------------------------------------------- +my $mnjobpid = 0; +my @snsjobpids = (); +my @snsjobfds = (); +my $pipe_parent_read; +my $pipe_child_write; +pipe $pipe_parent_read, $pipe_child_write; +{ + #handle job in MN + $mnjobpid = fork(); + if (!defined($mnjobpid)) { + probe_utils->send_msg("$output", "f", "fork process to handle MN job failed: $!"); + $rst = 1; + last; + } elsif ($mnjobpid == 0) { + $SIG{TERM} = $SIG{INT} = sub { + exit 1; + }; + + close $pipe_parent_read; + $rst = do_main_job($pipe_child_write); + exit $rst; + } + $SIG{CHLD} = sub { waitpid($mnjobpid, WNOHANG) }; + close $pipe_child_write; + + #handle job dispatch to SN + foreach my $sn (keys %dispatchcmd) { + my $snjobcmd = "xdsh $sn -s \"$dispatchcmd{$sn}\" 2>&1"; + + #print "$sn = $snjobcmd\n"; + my $snjobfd; + my $snjobpid; + if (!($snjobpid = open($snjobfd, "$snjobcmd |"))) { + probe_utils->send_msg("$output", "f", "fork process to dispatch cmd $snjobcmd to $sn failed: $!"); + next; + } + push(@snsjobpids, $snjobpid); + push(@snsjobfds, $snjobfd); + } + + my $select = new IO::Select; + $select->add(\*$pipe_parent_read) if ($pipe_parent_read); + $select->add(\*$_) foreach (@snsjobfds); + $| = 1; + + my $line; + my %pipeisnonull; + $pipeisnonull{mn} = 1; + $pipeisnonull{$_} = 1 foreach (@snsjobfds); + my $onepipeisnonull = 1; + while ($onepipeisnonull) { + if (@hdls = $select->can_read(0)) { + foreach $hdl (@hdls) { + if ($pipeisnonull{mn} && $hdl == \*$pipe_parent_read) { + if (eof($pipe_parent_read)) { + $pipeisnonull{mn} = 0; + } else { + chomp($line = <$pipe_parent_read>); + print "$line\n"; + push @{ $summaryoutput{mn} }, $line; + } + } else { + foreach my $fd (@snsjobfds) { + if ($pipeisnonull{$fd} && $hdl == \*$fd) { + if (eof($fd)) { + $pipeisnonull{$fd} = 0; + } else { + chomp($line = <$fd>); + if ($line =~ /(Error:)\s+(\w+)\s+(.+)/i) { + push @{ $summaryoutput{$2} }, $line; + #if need to print to STDIN in real time, can make below line available + #print "[failed] :$2: $line\n"; + } elsif ($line =~ /^(\w+)\s*:\s(.*)/) { + push @{ $summaryoutput{$1} }, $2; + #if need to print to STDIN in real time, can make below lines available + #$line = "$2:$1: $3" if ($line =~ /^(\w+)\s*:\s*(\[\w+\]\s*):\s*(.*)/); + #print "$line\n"; + } + } + } + } + } + } + $onepipeisnonull = 0; + $onepipeisnonull |= $pipeisnonull{$_} foreach (keys %pipeisnonull); + } + last if ($terminal); + sleep 1; + } +} +close($pipe_child_write) if ($pipe_child_write); +close($pipe_parent_read) if ($pipe_parent_read); +close($_) foreach (@snsjobfds); + +#start to clear up all sub processes +my %runningpid; +$runningpid{$mnjobpid} = 1 if ($mnjobpid); +$runningpid{$_} = 1 foreach (@snsjobpids); +my $existrunningpid = 0; +$existrunningpid = 1 if (%runningpid); + +my $trytime = 0; +while ($existrunningpid) { + + #send terminal signal to all running process at same time + #try INT 5 up to 5 times + if ($try < 5) { + foreach my $pid (keys %runningpid) { + kill 'INT', $pid if ($runningpid{$pid}); + } + + #try TERM 5 up to 5 times + } elsif ($try < 10) { + foreach my $pid (keys %runningpid) { + kill 'TERM', $pid if ($runningpid{$pid}); + } + + #try KILL 1 time + } else { + foreach my $pid (keys %runningpid) { + kill 'KILL', $pid if ($runningpid{$pid}); + } + } + ++$try; + + sleep 1; + + #To check how many process exit, set the flag of exited process to 0 + foreach my $pid (keys %runningpid) { + $runningpid{$pid} = 0 if (waitpid($pid, WNOHANG)); + } + + #To check if there are processes still running, if there are, try kill again in next loop + $existrunningpid = 0; + $existrunningpid |= $runningpid{$_} foreach (keys %runningpid); + + #just try 10 times, if still can't kill some process, give up + if ($try > 10) { + my $leftpid; + foreach my $pid (keys %runningpid) { + $leftpid .= "$pid " if ($runningpid{$pid}); + } + print "Can't stop process $leftpid, please handle manually.\n"; + last; + } +} + +#------------------------------------- +# summary all jobs output to display +#------------------------------------- +$rst = summary_all_jobs_output(); + +exit $rst; + + + +