mirror of
https://github.com/xcat2/xcat-core.git
synced 2025-06-03 03:50:08 +00:00
Merge pull request #2069 from hu-weihua/code_template
New template of probe command dispatch in hierarchy environment
This commit is contained in:
commit
13ea03e9d9
268
xCAT-probe/lib/perl/hierarchy.pm
Normal file
268
xCAT-probe/lib/perl/hierarchy.pm
Normal file
@ -0,0 +1,268 @@
|
||||
package hierarchy;
|
||||
|
||||
# IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html
|
||||
|
||||
BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; }
|
||||
use lib "$::XCATROOT/probe/lib/perl";
|
||||
use probe_utils;
|
||||
use xCAT::ServiceNodeUtils;
|
||||
|
||||
use strict;
|
||||
use Data::Dumper;
|
||||
use IO::Select;
|
||||
use File::Basename;
|
||||
use POSIX ":sys_wait_h";
|
||||
|
||||
sub new {
|
||||
my $self = {};
|
||||
my $class = shift;
|
||||
|
||||
$self->{program_name} = basename("$0");
|
||||
|
||||
my %dispatchcmd;
|
||||
$self->{dispatchcmd} = \%dispatchcmd;
|
||||
|
||||
my @subjobpids = ();
|
||||
my @subjobfds = ();
|
||||
my %subjobstates;
|
||||
my %fdnodemap;
|
||||
$self->{subjobpids} = \@subjobpids;
|
||||
$self->{subjobfds} = \@subjobfds;
|
||||
$self->{subjobstates} = \%subjobstates;
|
||||
$self->{allsubjobdone} = 0;
|
||||
$self->{fdnodemap} = \%fdnodemap;
|
||||
$self->{select} = new IO::Select;
|
||||
|
||||
bless($self, ref($class) || $class);
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub calculate_dispatch_cmd {
|
||||
my $self = shift;
|
||||
my $noderange = shift;
|
||||
my $argv_ref = shift;
|
||||
my $error_ref = shift;
|
||||
|
||||
@{$error_ref} = ();
|
||||
|
||||
my @snlist = xCAT::ServiceNodeUtils->getAllSN();
|
||||
if ($noderange) {
|
||||
my @nodes = probe_utils->parse_node_range($noderange);
|
||||
|
||||
#if there is error in noderange
|
||||
if ($?) {
|
||||
my $error = join(" ", @nodes);
|
||||
if ($error =~ /Error: Invalid nodes and\/or groups in noderange: (.+)/) {
|
||||
push @{$error_ref}, "There are invaild nodes ($1) in command line attribute node range";
|
||||
} else {
|
||||
push @{$error_ref}, "There is error in command line attribute node range, please using nodels to check";
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
|
||||
#calculate the mapping between SN and the nodes which belong to it.
|
||||
chomp foreach (@nodes);
|
||||
my $snnodemap = xCAT::ServiceNodeUtils->get_ServiceNode(\@nodes, "xcat", "MN");
|
||||
|
||||
my %newsnnodemap;
|
||||
my $rst = 0;
|
||||
foreach my $sn (keys %$snnodemap) {
|
||||
if (grep(/^$sn$/, @snlist)) { # the node just belong to one SN
|
||||
push(@{ $newsnnodemap{$sn} }, @{ $snnodemap->{$sn} });
|
||||
} elsif ($sn =~ /(\w+),.+/) { # the node belong to more than one SN, count it into first SN
|
||||
if (grep(/^$1$/, @snlist)) {
|
||||
push(@{ $newsnnodemap{$1} }, @{ $snnodemap->{$sn} });
|
||||
} else {
|
||||
push @{$error_ref}, "The value $1 of 'servicenode' isn't a service node";
|
||||
$rst = 1;
|
||||
}
|
||||
} else { # the nodes don't belong to any SN will be handled by MN
|
||||
push(@{ $newsnnodemap{mn} }, @{ $snnodemap->{$sn} });
|
||||
}
|
||||
}
|
||||
|
||||
return 1 if ($rst);
|
||||
|
||||
#print Dumper \%newsnnodemap;
|
||||
#generate new command for each SN, replace noderange
|
||||
foreach my $sn (keys %newsnnodemap) {
|
||||
my $nodes = join(",", @{ $newsnnodemap{$sn} });
|
||||
for (my $i = 0 ; $i <= @$argv_ref ; $i++) {
|
||||
if ($argv_ref->[$i] eq "-n") {
|
||||
$argv_ref->[ $i + 1 ] = $nodes;
|
||||
last;
|
||||
}
|
||||
}
|
||||
my $args = join(" ", @$argv_ref);
|
||||
$self->{dispatchcmd}->{$sn} = "$::XCATROOT/probe/subcmds/$self->{program_name} $args -H 2>&1";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
#there isn't noderange input from STDIN, dispatch command to all SN if there are SN defined in MN
|
||||
#if there isn't SN defined in MN, just dispatch command to MN itself
|
||||
my $args = join(" ", @$argv_ref);
|
||||
$self->{dispatchcmd}->{mn} = "$::XCATROOT/probe/subcmds/$self->{program_name} $args -H 2>&1";
|
||||
if (@snlist) {
|
||||
my $sns = join(",", @snlist);
|
||||
$self->{dispatchcmd}->{$sns} = "$::XCATROOT/probe/subcmds/$self->{program_name} $args -H 2>&1";
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub dispatch_cmd {
|
||||
my $self = shift;
|
||||
my $noderange = shift;
|
||||
my $argv_ref = shift;
|
||||
my $error_ref = shift;
|
||||
|
||||
@$error_ref = ();
|
||||
my $rst = 0;
|
||||
|
||||
$rst = $self->calculate_dispatch_cmd($noderange, $argv_ref, $error_ref);
|
||||
return $rst if ($rst);
|
||||
|
||||
foreach my $target_server (keys %{ $self->{dispatchcmd} }) {
|
||||
my $subjobcmd = undef;
|
||||
if ($target_server eq "mn") {
|
||||
$subjobcmd = $self->{dispatchcmd}->{$target_server};
|
||||
} else {
|
||||
$subjobcmd = "xdsh $target_server -s \"$self->{dispatchcmd}->{$target_server}\" 2>&1";
|
||||
}
|
||||
|
||||
#print "$subjobcmd\n";
|
||||
|
||||
my $subjobfd;
|
||||
my $subjobpid;
|
||||
if (!($subjobpid = open($subjobfd, "$subjobcmd |"))) {
|
||||
push @{$error_ref}, "Fork process to dispatch cmd $subjobcmd to $target_server failed: $!";
|
||||
$rst = 1;
|
||||
last;
|
||||
}
|
||||
push(@{ $self->{subjobpids} }, $subjobpid);
|
||||
push(@{ $self->{subjobfds} }, $subjobfd);
|
||||
$self->{fdnodemap}->{$subjobfd} = $target_server;
|
||||
}
|
||||
|
||||
if (@{ $self->{subjobpids} })
|
||||
{
|
||||
$self->{select}->add(\*$_) foreach (@{ $self->{subjobfds} });
|
||||
$| = 1;
|
||||
|
||||
foreach (@{ $self->{subjobfds} }) {
|
||||
$self->{subjobstates}->{$_} = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return $rst;
|
||||
}
|
||||
|
||||
sub read_reply {
|
||||
my $self = shift;
|
||||
my $reply_cache_ref = shift;
|
||||
|
||||
%$reply_cache_ref = ();
|
||||
|
||||
my @hdls;
|
||||
while (!$self->{allsubjobdone} && !%$reply_cache_ref) {
|
||||
if (@hdls = $self->{select}->can_read(0)) {
|
||||
foreach my $hdl (@hdls) {
|
||||
foreach my $fd (@{ $self->{subjobfds} }) {
|
||||
if (!$self->{subjobstates}->{$_} && $hdl == \*$fd) {
|
||||
if (eof($fd)) {
|
||||
$self->{subjobstates}->{$fd} = 1;
|
||||
} else {
|
||||
my $line;
|
||||
chomp($line = <$fd>);
|
||||
|
||||
#print ">>>$line\n";
|
||||
$line = "mn:$line" if ($self->{fdnodemap}->{$fd} eq "mn");
|
||||
push @{ $reply_cache_ref->{ $self->{fdnodemap}->{$fd} } }, $line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
sleep 0.1;
|
||||
|
||||
#check if all sub job have done
|
||||
$self->{allsubjobdone} = 1;
|
||||
$self->{allsubjobdone} &= $self->{subjobstates}->{$_} foreach (keys %{ $self->{subjobstates} });
|
||||
}
|
||||
|
||||
if (%$reply_cache_ref) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub destory {
|
||||
my $self = shift;
|
||||
my $error_ref = shift;
|
||||
|
||||
my $rst = 0;
|
||||
@$error_ref = ();
|
||||
|
||||
close($_) foreach (@{ $self->{subjobfds} });
|
||||
|
||||
my %runningpid;
|
||||
$runningpid{$_} = 1 foreach (@{ $self->{subjobpids} });
|
||||
my $existrunningpid = 0;
|
||||
$existrunningpid = 1 if (%runningpid);
|
||||
|
||||
my $try = 0;
|
||||
while ($existrunningpid) {
|
||||
|
||||
#send terminal signal to all running process at same time
|
||||
#try INT 5 up to 5 times
|
||||
if ($try < 5) {
|
||||
foreach my $pid (keys %runningpid) {
|
||||
kill 'INT', $pid if ($runningpid{$pid});
|
||||
}
|
||||
|
||||
#try TERM 5 up to 5 times
|
||||
} elsif ($try < 10) {
|
||||
foreach my $pid (keys %runningpid) {
|
||||
kill 'TERM', $pid if ($runningpid{$pid});
|
||||
}
|
||||
|
||||
#try KILL 1 time
|
||||
} else {
|
||||
foreach my $pid (keys %runningpid) {
|
||||
kill 'KILL', $pid if ($runningpid{$pid});
|
||||
}
|
||||
}
|
||||
++$try;
|
||||
|
||||
sleep 1;
|
||||
|
||||
#To check how many process exit, set the flag of exited process to 0
|
||||
foreach my $pid (keys %runningpid) {
|
||||
$runningpid{$pid} = 0 if (waitpid($pid, WNOHANG));
|
||||
}
|
||||
|
||||
#To check if there are processes still running, if there are, try kill again in next loop
|
||||
$existrunningpid = 0;
|
||||
$existrunningpid |= $runningpid{$_} foreach (keys %runningpid);
|
||||
|
||||
#just try 10 times, if still can't kill some process, give up
|
||||
if ($try > 10) {
|
||||
my $leftpid;
|
||||
foreach my $pid (keys %runningpid) {
|
||||
$leftpid .= "$pid " if ($runningpid{$pid});
|
||||
}
|
||||
push @{$error_ref}, "Can't stop process $leftpid, please handle manually.";
|
||||
$rst = 1;
|
||||
last;
|
||||
}
|
||||
}
|
||||
return $rst;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
1;
|
@ -2,14 +2,15 @@
|
||||
# IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html
|
||||
|
||||
#--------------------------------------------------------
|
||||
#This is a template for developing a probe sub_command.
|
||||
#This template mainly implement the sub_comamd dispatch in hierarchical structure
|
||||
#Developer only need to focus on main probe job (by implement do_main_job function) and friendly output (by implement summary_all_jobs_output function) for user
|
||||
#This is a template for developing a new probe sub_command. Especially in hierarchical environment.
|
||||
#This template mainly implement the sub_comamd dispatch in hierarchical structure and basic framework of a new sub_comamd.
|
||||
#Developer only need to focus on main probe job (by implement do_main_job function) and friendly output (by implement summary_all_jobs_output function) for user.
|
||||
#This template can also be used in flat structure. but if developer think it's too heavy in flat, it's fine to develop sub command directly.
|
||||
#But in hierarchical structure, we strongly recommand using this template.
|
||||
#
|
||||
#The main dispatch policy are:
|
||||
#1. if there isn't noderange input from commmand line. if there are service nodes defined in current MN,
|
||||
# we dispatch exact same command input from STDIN to all SNs and current MN. if there isn't service nodes defined,
|
||||
#1. if there isn't noderange input from commmand line and there are service nodes defined in current MN,
|
||||
# dispatch exact same command input from STDIN to all SNs and current MN. if there isn't service nodes defined,
|
||||
# just hanld command input from STDIN in current MN
|
||||
#2. If there is noderange input from command line by opion "-n", we will dispatch the command input from STDIN to SN which can hanle these ndoes
|
||||
# For example, if we got command from STDIN like "probecommand -n test[1-15] -V" and test[1-5] 's SN is SN1, test[6-10]'s SN is SN2
|
||||
@ -17,25 +18,24 @@
|
||||
# For MN run: probecommand -n test[11-15] -V
|
||||
# For SN1 run: probecommand -n test[1-5] -V
|
||||
# For SN2 run: probecommand -n test[6-10] -V
|
||||
#3. All the return message from SNs and MN will be saved in hash %summaryoutput, develper can use it while implement summary_all_jobs_output function
|
||||
#3. All the return message from SNs and MN will be saved in hash %summaryoutput, develper can use it when implement summary_all_jobs_output function
|
||||
#--------------------------------------------------------
|
||||
|
||||
|
||||
BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; }
|
||||
|
||||
use lib "$::XCATROOT/probe/lib/perl";
|
||||
use probe_utils;
|
||||
use xCAT::ServiceNodeUtils;
|
||||
use hierarchy;
|
||||
use File::Basename;
|
||||
use Getopt::Long qw(:config no_ignore_case);
|
||||
use IO::Select;
|
||||
use Data::Dumper;
|
||||
use Getopt::Long qw(:config no_ignore_case);
|
||||
|
||||
my $program_name = basename("$0"); #current sub_command name
|
||||
my $help = 0; #command line attribute '-h', get usage information
|
||||
my $test = 0; #command line attribute '-T'
|
||||
my $verbose = 0; #command line attribute '-V'
|
||||
my $noderange; #command line attribute '-n'
|
||||
|
||||
my $help = 0; #command line attribute '-h', get usage information
|
||||
my $test = 0; #command line attribute '-T'
|
||||
my $hierarchy = 0;
|
||||
my $verbose = 0; #command line attribute '-V'
|
||||
my $noderange; #command line attribute '-n'
|
||||
my $output = "stdout"; #used by probe_utils->send_msg("$output", "o", "xxxxxxxxxx"); print output to STDOUT
|
||||
my $is_sn = 0; #flag current server is SN
|
||||
my $rst = 0; #the exit code of current command
|
||||
@ -47,22 +47,19 @@ my $terminal = 0; #means get INT signal from STDIN
|
||||
# $summaryoutput{SN1} = @SN1_output_history
|
||||
my %summaryoutput;
|
||||
|
||||
#a map of SNs and command which will be dispatched to current SN
|
||||
# one example:
|
||||
# $dispatchcmd{SN1} = "probecommand -n test[1-5] -V"
|
||||
# $dispatchcmd{SN2} = "probecommand -n test[6-10] -V"
|
||||
my %dispatchcmd;
|
||||
my $is_sn;
|
||||
$is_sn = 1 if (-e "/etc/xCATSN");
|
||||
|
||||
#save command line attributes from STDIN
|
||||
my @tmpargv;
|
||||
|
||||
#--------------------------------
|
||||
#-------------------------------------
|
||||
# Usage
|
||||
#-------------------------------------
|
||||
# below are some options rules used by default
|
||||
# -h : Get usage information of current sub command
|
||||
# -V : Output more information for debug
|
||||
# -T : To verify if $program_name can work, reserve option for probe framework, dosen't use by customer
|
||||
# -n : In xCAT probe, -n is uesd to specify node range uniformly
|
||||
#--------------------------------
|
||||
#-------------------------------------
|
||||
my $program_name = basename("$0"); #current sub_command name
|
||||
$::USAGE = "Usage:
|
||||
$program_name -h
|
||||
$program_name [-V]
|
||||
@ -77,24 +74,20 @@ Options:
|
||||
";
|
||||
|
||||
#------------------------------------
|
||||
# Please implement the main checking job of current command in do_main_job function
|
||||
# If $outputtarget has input value, that means do_main_job is running on MN, so every message needed to print on STDOUT should be written into pipe $outputtarget.
|
||||
# If $outputtarget has no value, that means do_main_job is running on SN, all message just need to print on STDOUT
|
||||
# Recommand to use probe_utils->send_msg() to handle message you plan to print out
|
||||
# Please implement the main job of current command in do_main_job function
|
||||
# Recommand to use probe_utils->send_msg() to handle message you plan to print out to STDOUT
|
||||
# A simple example has been written in funciton.
|
||||
#------------------------------------
|
||||
sub do_main_job {
|
||||
my $outputtarget = shift;
|
||||
$outputtarget = "stdout" if (!$outputtarget);
|
||||
my $rst = 0;
|
||||
|
||||
probe_utils->send_msg($outputtarget, "o", "I reveive node range is $noderange");
|
||||
probe_utils->send_msg("$output", "o", "Received node range: $noderange");
|
||||
|
||||
#<#DO YOUR OWN CHECKING JOB1#>
|
||||
probe_utils->send_msg($outputtarget, "o", "first checking point");
|
||||
#<#DO YOUR OWN JOB1#>
|
||||
probe_utils->send_msg("$output", "o", "Do the first job");
|
||||
|
||||
#<#DO YOUR OWN CHECKING JOB2#>
|
||||
probe_utils->send_msg($outputtarget, "f", "second checking point");
|
||||
#<#DO YOUR OWN JOB2#>
|
||||
probe_utils->send_msg("$output", "f", "Do the second job");
|
||||
|
||||
return $rst;
|
||||
}
|
||||
@ -112,94 +105,27 @@ sub summary_all_jobs_output {
|
||||
|
||||
#DO SUMMARY DEPENDING ON YOUR SUB_COMMAND NEED
|
||||
probe_utils->send_msg("$output", "d", "======================do summary=====================");
|
||||
|
||||
#print "summaryoutput:\n";
|
||||
#print Dumper \%summaryoutput;
|
||||
|
||||
foreach my $sn (keys %summaryoutput) {
|
||||
probe_utils->send_msg("$output", "d", "[$sn]");
|
||||
foreach my $log (@{ $summaryoutput{$sn} }) {
|
||||
probe_utils->send_msg("$output", "d", "$log");
|
||||
probe_utils->send_msg("$output", "d", "\t$log");
|
||||
}
|
||||
}
|
||||
return $rst;
|
||||
}
|
||||
|
||||
#-------------------------------------
|
||||
# Each probe sub command is supposed to support hierarchical.
|
||||
# This funtion is used to calclulate which SN should be dispatched which command
|
||||
# main process
|
||||
#-------------------------------------
|
||||
sub calculate_dispatch_cmd {
|
||||
my @snlist = xCAT::ServiceNodeUtils->getAllSN();
|
||||
if ($noderange) {
|
||||
my @nodes = `nodels $noderange 2>&1`;
|
||||
|
||||
#if there is error in noderange
|
||||
if ($?) {
|
||||
my $error = join(" ", @nodes);
|
||||
if ($error =~ /Error: Invalid nodes and\/or groups in noderange: (.+)/) {
|
||||
probe_utils->send_msg("$output", "f", "There are invaild nodes ($1) in command line attribute node range");
|
||||
} else {
|
||||
probe_utils->send_msg("$output", "f", "There is error in command line attribute node range, please using nodels to check");
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
|
||||
#calculate the mapping between SN and the nodes which belong to it.
|
||||
chomp foreach (@nodes);
|
||||
my $snnodemap = xCAT::ServiceNodeUtils->get_ServiceNode(\@nodes, "xcat", "MN");
|
||||
|
||||
#print Dumper $snnodemap;
|
||||
my %newsnnodemap;
|
||||
foreach my $sn (keys %$snnodemap) {
|
||||
if (grep(/^$sn$/, @snlist)) { # the node just belong to one SN
|
||||
push(@{ $newsnnodemap{$sn} }, @{ $snnodemap->{$sn} });
|
||||
} elsif ($sn =~ /(\w+),.+/) { # the node belong to more than one SN, count it into first SN
|
||||
if(grep(/^$1$/, @snlist)){
|
||||
push(@{ $newsnnodemap{$1} }, @{ $snnodemap->{$sn} });
|
||||
}else{
|
||||
probe_utils->send_msg("$output", "f", "The value $1 of 'servicenode' isn't a service node");
|
||||
}
|
||||
} else { # the nodes don't belong to any SN will be handled by MN
|
||||
push(@{ $newsnnodemap{mn} }, @{ $snnodemap->{$sn} });
|
||||
}
|
||||
}
|
||||
|
||||
#print Dumper \%newsnnodemap;
|
||||
#generate new command for each SN, replace noderange
|
||||
foreach my $sn (keys %newsnnodemap) {
|
||||
my $nodes = join(",", @{ $newsnnodemap{$sn} });
|
||||
if ($sn eq "mn") {
|
||||
$noderange = $nodes;
|
||||
} else {
|
||||
for (my $i = 0 ; $i <= $#tmpargv ; $i++) {
|
||||
if ($tmpargv[$i] eq "-n") {
|
||||
$tmpargv[ $i + 1 ] = $nodes;
|
||||
last;
|
||||
}
|
||||
}
|
||||
my $args = join(" ", @tmpargv);
|
||||
$dispatchcmd{$sn} = "$::XCATROOT/probe/subcmds/$program_name $args 2>&1";
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
#there isn't noderange input from STDIN, dispatch command to all SN if there are SN defined in MN
|
||||
if (@snlist) {
|
||||
my $args = join(" ", @tmpargv);
|
||||
my $sns = join(",", @snlist);
|
||||
$dispatchcmd{$sns} = "$::XCATROOT/probe/subcmds/$program_name $args 2>&1" if (!$?);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#-------------------------------------
|
||||
# main process start
|
||||
#-------------------------------------
|
||||
@tmpargv = @ARGV;
|
||||
my @tmpargv = @ARGV;
|
||||
if (
|
||||
!GetOptions("--help|h" => \$help,
|
||||
"T" => \$test,
|
||||
"H" => \$hierarchy,
|
||||
"n=s" => \$noderange,
|
||||
"V" => \$verbose))
|
||||
{
|
||||
@ -227,177 +153,77 @@ $SIG{TERM} = $SIG{INT} = sub {
|
||||
$terminal = 1;
|
||||
};
|
||||
|
||||
#--------------------------------------------
|
||||
# To confirm what current node is, MN or SN
|
||||
#--------------------------------------------
|
||||
$is_sn = 1 if (-e "/etc/xCATSN");
|
||||
|
||||
#if this node is SN, just run job, not to do dispatch
|
||||
if ($is_sn) {
|
||||
#if it is called by hierarchy template, just run job, not to do dispatch
|
||||
if ($hierarchy || $is_sn) {
|
||||
$rst = do_main_job();
|
||||
exit $rst;
|
||||
}
|
||||
|
||||
#--------------------------------------------
|
||||
# calculate which command should be dispatched to which SN
|
||||
#--------------------------------------------
|
||||
$rst = calculate_dispatch_cmd();
|
||||
my $hierarchy_instance = hierarchy->new();
|
||||
|
||||
#print Dumper \%dispatchcmd;
|
||||
#print "nodes left to mn : $noderange\n";
|
||||
#print "========================\n";
|
||||
exit $rst if ($rst);
|
||||
|
||||
#--------------------------------------------
|
||||
# dispatch job to MN and SN
|
||||
#--------------------------------------------
|
||||
my $mnjobpid = 0;
|
||||
my @snsjobpids = ();
|
||||
my @snsjobfds = ();
|
||||
my $pipe_parent_read;
|
||||
my $pipe_child_write;
|
||||
pipe $pipe_parent_read, $pipe_child_write;
|
||||
{
|
||||
#handle job in MN
|
||||
$mnjobpid = fork();
|
||||
if (!defined($mnjobpid)) {
|
||||
probe_utils->send_msg("$output", "f", "fork process to handle MN job failed: $!");
|
||||
$rst = 1;
|
||||
last;
|
||||
} elsif ($mnjobpid == 0) {
|
||||
$SIG{TERM} = $SIG{INT} = sub {
|
||||
exit 1;
|
||||
};
|
||||
|
||||
close $pipe_parent_read;
|
||||
$rst = do_main_job($pipe_child_write);
|
||||
exit $rst;
|
||||
#-------starting to dispatch_cmd--------
|
||||
my @error;
|
||||
$rst = $hierarchy_instance->dispatch_cmd($noderange, \@tmpargv, \@error);
|
||||
if ($rst) {
|
||||
probe_utils->send_msg("$output", "f", "Calculate dispatch command failed");
|
||||
foreach (@error) {
|
||||
probe_utils->send_msg("$output", "", "$_");
|
||||
}
|
||||
$SIG{CHLD} = sub { waitpid($mnjobpid, WNOHANG) };
|
||||
close $pipe_child_write;
|
||||
|
||||
#handle job dispatch to SN
|
||||
foreach my $sn (keys %dispatchcmd) {
|
||||
my $snjobcmd = "xdsh $sn -s \"$dispatchcmd{$sn}\" 2>&1";
|
||||
|
||||
#print "$sn = $snjobcmd\n";
|
||||
my $snjobfd;
|
||||
my $snjobpid;
|
||||
if (!($snjobpid = open($snjobfd, "$snjobcmd |"))) {
|
||||
probe_utils->send_msg("$output", "f", "fork process to dispatch cmd $snjobcmd to $sn failed: $!");
|
||||
next;
|
||||
}
|
||||
push(@snsjobpids, $snjobpid);
|
||||
push(@snsjobfds, $snjobfd);
|
||||
if ($hierarchy_instance->destory(\@error)) {
|
||||
probe_utils->send_msg("$output", "", "$_") foreach (@error);
|
||||
}
|
||||
exit $rst;
|
||||
}
|
||||
|
||||
my $select = new IO::Select;
|
||||
$select->add(\*$pipe_parent_read) if ($pipe_parent_read);
|
||||
$select->add(\*$_) foreach (@snsjobfds);
|
||||
$| = 1;
|
||||
#----------start to read reply-------
|
||||
my %reply_cache;
|
||||
while ($hierarchy_instance->read_reply(\%reply_cache)) {
|
||||
foreach my $servers (keys %reply_cache) { #Dispatch_cmd may use SN range to dispatch cms to SNs at one time
|
||||
my @server_array = split(",", $servers);
|
||||
foreach my $server (@server_array) {
|
||||
foreach (@{ $reply_cache{$servers} }) {
|
||||
my $msg = "";
|
||||
my $logmsg = "";
|
||||
|
||||
my $line;
|
||||
my %pipeisnonull;
|
||||
$pipeisnonull{mn} = 1;
|
||||
$pipeisnonull{$_} = 1 foreach (@snsjobfds);
|
||||
my $onepipeisnonull = 1;
|
||||
while ($onepipeisnonull) {
|
||||
if (@hdls = $select->can_read(0)) {
|
||||
foreach $hdl (@hdls) {
|
||||
if ($pipeisnonull{mn} && $hdl == \*$pipe_parent_read) {
|
||||
if (eof($pipe_parent_read)) {
|
||||
$pipeisnonull{mn} = 0;
|
||||
} else {
|
||||
chomp($line = <$pipe_parent_read>);
|
||||
print "$line\n";
|
||||
push @{ $summaryoutput{mn} }, $line;
|
||||
#For cases like below:
|
||||
#c910f02c04p04: [ok] :All xCAT deamons are running
|
||||
if ($reply_cache{$servers}->[$_] =~ /^(\w+)\s*:\s*(\[\w+\]\s*):\s*(.*)/) {
|
||||
if ("$1" eq "$server") {
|
||||
$logmsg = "$2: $3";
|
||||
$msg = "$2:<$server>: $3";
|
||||
}
|
||||
|
||||
#For cases like below:
|
||||
#c910f02c04p05: IT IS POSSIBLE THAT SOMEONE IS DOING SOMETHING NASTY!
|
||||
} elsif ($reply_cache{$servers}->[$_] =~ /^(\w+)\s*:\s*(.*)/) {
|
||||
if ("$1" eq "$server") {
|
||||
$logmsg = "$2";
|
||||
$msg = "<$server>: $2";
|
||||
}
|
||||
|
||||
#For cases like below:
|
||||
#Unable to open socket connection to xcatd daemon on localhost:3001.
|
||||
} else {
|
||||
foreach my $fd (@snsjobfds) {
|
||||
if ($pipeisnonull{$fd} && $hdl == \*$fd) {
|
||||
if (eof($fd)) {
|
||||
$pipeisnonull{$fd} = 0;
|
||||
} else {
|
||||
chomp($line = <$fd>);
|
||||
if ($line =~ /(Error:)\s+(\w+)\s+(.+)/i) {
|
||||
push @{ $summaryoutput{$2} }, $line;
|
||||
#if need to print to STDIN in real time, can make below line available
|
||||
#print "[failed] :$2: $line\n";
|
||||
} elsif ($line =~ /^(\w+)\s*:\s(.*)/) {
|
||||
push @{ $summaryoutput{$1} }, $2;
|
||||
#if need to print to STDIN in real time, can make below lines available
|
||||
#$line = "$2:$1: $3" if ($line =~ /^(\w+)\s*:\s*(\[\w+\]\s*):\s*(.*)/);
|
||||
#print "$line\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
if (length($reply_cache{$servers}->[$_])) {
|
||||
$logmsg = $reply_cache{$servers}->[$_];
|
||||
$msg = "[failed] :[$server]: $reply_cache{$servers}->[$_]";
|
||||
}
|
||||
}
|
||||
probe_utils->send_msg("$output", "", "$msg") if (length($msg));
|
||||
push @{ $summaryoutput{$server} }, $logmsg if (length($logmsg));
|
||||
}
|
||||
$onepipeisnonull = 0;
|
||||
$onepipeisnonull |= $pipeisnonull{$_} foreach (keys %pipeisnonull);
|
||||
}
|
||||
last if ($terminal);
|
||||
sleep 1;
|
||||
}
|
||||
}
|
||||
close($pipe_child_write) if ($pipe_child_write);
|
||||
close($pipe_parent_read) if ($pipe_parent_read);
|
||||
close($_) foreach (@snsjobfds);
|
||||
|
||||
#start to clear up all sub processes
|
||||
my %runningpid;
|
||||
$runningpid{$mnjobpid} = 1 if ($mnjobpid);
|
||||
$runningpid{$_} = 1 foreach (@snsjobpids);
|
||||
my $existrunningpid = 0;
|
||||
$existrunningpid = 1 if (%runningpid);
|
||||
|
||||
my $trytime = 0;
|
||||
while ($existrunningpid) {
|
||||
|
||||
#send terminal signal to all running process at same time
|
||||
#try INT 5 up to 5 times
|
||||
if ($try < 5) {
|
||||
foreach my $pid (keys %runningpid) {
|
||||
kill 'INT', $pid if ($runningpid{$pid});
|
||||
}
|
||||
|
||||
#try TERM 5 up to 5 times
|
||||
} elsif ($try < 10) {
|
||||
foreach my $pid (keys %runningpid) {
|
||||
kill 'TERM', $pid if ($runningpid{$pid});
|
||||
}
|
||||
|
||||
#try KILL 1 time
|
||||
} else {
|
||||
foreach my $pid (keys %runningpid) {
|
||||
kill 'KILL', $pid if ($runningpid{$pid});
|
||||
}
|
||||
}
|
||||
++$try;
|
||||
|
||||
sleep 1;
|
||||
|
||||
#To check how many process exit, set the flag of exited process to 0
|
||||
foreach my $pid (keys %runningpid) {
|
||||
$runningpid{$pid} = 0 if (waitpid($pid, WNOHANG));
|
||||
}
|
||||
|
||||
#To check if there are processes still running, if there are, try kill again in next loop
|
||||
$existrunningpid = 0;
|
||||
$existrunningpid |= $runningpid{$_} foreach (keys %runningpid);
|
||||
|
||||
#just try 10 times, if still can't kill some process, give up
|
||||
if ($try > 10) {
|
||||
my $leftpid;
|
||||
foreach my $pid (keys %runningpid) {
|
||||
$leftpid .= "$pid " if ($runningpid{$pid});
|
||||
}
|
||||
print "Can't stop process $leftpid, please handle manually.\n";
|
||||
if ($terminal) {
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
#----------hierarchy_instance->destory-----------
|
||||
if ($hierarchy_instance->destory(\@error)) {
|
||||
probe_utils->send_msg("$output", "", "$_") foreach (@error);
|
||||
}
|
||||
|
||||
#-------------------------------------
|
||||
# summary all jobs output to display
|
||||
#-------------------------------------
|
||||
@ -408,3 +234,5 @@ exit $rst;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user