rollupdate Linux support

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@8025 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
mellor 2010-11-03 22:20:19 +00:00
parent 253b7d4ae7
commit e9efa7b2dc
2 changed files with 335 additions and 227 deletions

View File

@ -371,17 +371,29 @@ sub readFileInput {
# Convert the following values to lowercase
if ( ($attr eq 'scheduler') ||
($attr eq 'updateall') ||
($attr eq 'shutdownrequired') ) {
($attr eq 'skipshutdown') ) {
$val =~ tr/A-Z/a-z/;
}
# set the value in the hash for this entry
push( @{ $::FILEATTRS{$attr} }, $val );
# Set some required defaults if not specified
if (($prev_attr eq "prescript") && ($attr ne "prescriptnodes")) {
push ( @{ $::FILEATTRS{'prescriptnodes'} }, 'ALL_NODES_IN_UPDATEGROUP' );
}
if (($prev_attr eq "outofbandcmd") && ($attr ne "outofbandnodes")) {
push ( @{ $::FILEATTRS{'outofbandnodes'} }, 'ALL_NODES_IN_UPDATEGROUP' );
}
if (($prev_attr eq "mutex") && ($attr ne "mutex_count")) {
push ( @{ $::FILEATTRS{'mutex_count'} }, '1' );
}
if (($prev_attr eq "nodegroup_mutex") && ($attr eq "mutex_count")) {
$attr = "nodegroup_mutex_count";
}
if (($prev_attr eq "nodegroup_mutex") && ($attr ne "nodegroup_mutex_count")) {
push ( @{ $::FILEATTRS{'nodegroup_mutex_count'} }, '1' );
}
# set the value in the hash for this entry
push( @{ $::FILEATTRS{$attr} }, $val );
$prev_attr = $attr;
}
} # end while - go to next line
@ -573,21 +585,23 @@ sub ll_jobs {
}
$::updateall=0;
$::updateall_numperupdate=1;
$::updateall_nodecount=1;
if ( defined($::FILEATTRS{updateall}[0]) &&
( ($::FILEATTRS{updateall}[0] eq 'yes') ||
($::FILEATTRS{updateall}[0] eq 'y' ) ) ) {
$::updateall=1;
if ( defined($::FILEATTRS{updateall_numperupdate}[0]) ) {
$::updateall_numperupdate=$::FILEATTRS{updateall_numperupdate}[0];
if ( defined($::FILEATTRS{updateall_nodecount}[0]) ) {
$::updateall_nodecount=$::FILEATTRS{updateall_nodecount}[0];
}
}
# Create LL floating resources for mutual exclusion support
# and max_updates
if (&create_LL_mutex_resources > 0) {
return 1;
if (!$::updateall) {
if (&create_LL_mutex_resources($updategroup) > 0) {
return 1;
}
}
#
@ -622,9 +636,7 @@ sub ll_jobs {
close $TMPL_FILE;
# Query LL for list of machines and their status
### LL BUG WORKAROUND -- MUST SET LOADL_STATUS_LEVEL=MACHINE
my $cmd = "LOADL_STATUS_LEVEL=MACHINE llstatus -r %n %sta 2>/dev/null";
# my $cmd = "llstatus -r %n %sta 2>/dev/null";
my $cmd = "llstatus -r %n %sta 2>/dev/null";
if ($::VERBOSE) {
my $rsp;
push @{ $rsp->{data} }, "Running command: $cmd ";
@ -801,6 +813,9 @@ sub ll_jobs {
if (defined($::FILEATTRS{bringuptimeout}[0])){
push (@ugdflines, "bringuptimeout=$::FILEATTRS{bringuptimeout}[0]\n");
}
if (defined($::FILEATTRS{skipshutdown}[0])){
push (@ugdflines, "skipshutdown=$::FILEATTRS{skipshutdown}[0]\n");
}
my $ugdf_file = $lljobs_dir . "/rollupdate_" . $ugname . ".data";
my $UGDFFILE;
unless ( open( $UGDFFILE, ">$ugdf_file" ) ) {
@ -838,7 +853,7 @@ sub ll_jobs {
xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK );
return 1;
}
if ($::VRBOSE) {
if ($::VERBOSE) {
my $rsp;
push @{ $rsp->{data} }, "Writing LL hostlist file $llhl_file ";
xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK );
@ -887,8 +902,8 @@ sub ll_jobs {
my $lastcount = 0;
my $llcount = $machinecount;
if ($::updateall) {
$lastcount = $machinecount % $::updateall_numperupdate;
$llcount = $::updateall_numperupdate;
$lastcount = $machinecount % $::updateall_nodecount;
$llcount = $::updateall_nodecount;
}
my @jclines;
my @jclines2;
@ -1003,7 +1018,7 @@ sub ll_jobs {
} else {
my $submit_count = 1;
if ($::updateall){
$submit_count = $machinecount / $::updateall_numperupdate;
$submit_count = $machinecount / $::updateall_nodecount;
}
for (1..$submit_count) {
@llsubmit = xCAT::Utils->runcmd( "$cmd", 0 );
@ -1460,9 +1475,9 @@ sub get_mutex {
return $mutex_string;
}
my $mutex_count = scalar @::MUTEX;
if ( $mutex_count > 0 ) {
foreach my $row (0..($mutex_count-1)) {
my $num_mutexes = scalar @::MUTEX;
if ( $num_mutexes > 0 ) {
foreach my $row (0..($num_mutexes-1)) {
foreach my $ugi (0..(@{$::MUTEX[$row]} - 1)) {
if ( defined($::MUTEX[$row][$ugi]) && ($ugname eq $::MUTEX[$row][$ugi]) ) {
$mutex_string .= "XCATROLLINGUPDATE_MUTEX".$row."(1) ";
@ -1502,10 +1517,13 @@ sub get_mutex {
#-----------------------------------------------------------------------------
sub create_LL_mutex_resources {
my $updategroup=shift;
$::LL_MUTEX_RESOURCES_CREATED = 0;
my $mxindex=0;
my $fileattrs_index=0;
foreach my $mxline ( @{ $::FILEATTRS{'mutex'} } ) {
my $mx_count = $::FILEATTRS{'mutex_count'}[$fileattrs_index];
my @mxparts = split(/,/,$mxline);
if ( scalar @mxparts < 2 ) {
my $rsp;
@ -1526,12 +1544,62 @@ sub create_LL_mutex_resources {
$::MUTEX[$mxindex2][$mxpi] = $ugname;
$mxindex2++;
}
$mxindexmax = ($mxindex2 > $mxindexmax) ? $mxindex : $mxindexmax;
$mxindexmax = ($mxindex2 > $mxindexmax) ? $mxindex2 : $mxindexmax;
$mxpi++;
}
my $mxc;
for ($mxc=$mxindex; $mxc < $mxindexmax; $mxc++) {
$::MUTEX_COUNT[$mxc] = $mx_count;
}
$mxindex = $mxindexmax;
$fileattrs_index++;
}
# If nodegroup_mutex entries are specified, we need to use the
# list of all the nodes in each updategroup for this entire run.
# Then we need to get a list of all the nodes in the specified
# nodegroup and look for any intersections to create mutexes.
$fileattrs_index=0;
foreach my $mxnodegrp_range ( @{ $::FILEATTRS{'nodegroup_mutex'} } ) {
my $mx_count = $::FILEATTRS{'nodegroup_mutex_count'}[$fileattrs_index];
foreach my $mxnodegroup ( xCAT::NameRange::namerange( $mxnodegrp_range, 0 ) ) {
my $mxpi = 0;
mxnode_loop: foreach my $mxnode ( xCAT::NodeRange::noderange($mxnodegroup) ) {
foreach my $ugname ( keys %{$updategroup} ) {
foreach my $node ( @{ $updategroup->{$ugname} } ) {
if ($mxnode eq $node) {
# found a match, add updategroup to this mutex if we
# don't already have it listed
my $chk = 0;
while ( $chk < $mxpi ){
if ($::MUTEX[$mxindex][$chk] eq $ugname) {
# already have this one, skip to next
next mxnode_loop;
}
$chk++;
}
$::MUTEX[$mxindex][$mxpi] = $ugname;
$mxpi++;
next mxnode_loop;
} # end if found match
}
}
} # end mxnode_loop
if ($mxpi == 1) {
# only one updategroup in this mutex, not valid -- ignore it
undef $::MUTEX[$mxindex];
} elsif ( $mxpi > 1 ) {
$::MUTEX_COUNT[$mxindex] = $mx_count;
$mxindex++;
}
}
$fileattrs_index++;
}
# Build the actual FLOATING_RESOURCES and SCHEDULE_BY_RESOURCES
# strings to write into the LL database
my $resource_string = "";
my $max_updates = $::FILEATTRS{'maxupdates'}[0];
if ( ! defined($max_updates) || ($max_updates eq 'all') ) {
@ -1540,13 +1608,12 @@ sub create_LL_mutex_resources {
$resource_string .= "XCATROLLINGUPDATE_MAXUPDATES($max_updates) ";
}
my $mutex_count = scalar @::MUTEX;
if ( $mutex_count > 0 ) {
foreach my $row (0..($mutex_count-1)) {
# TODO -- UNCOMMENT/REPLACE WHEN LL PROVIDES RESERVATION RESOURCES
$resource_string .= "XCATROLLINGUPDATE_MUTEX".$row."(1) ";
my $num_mutexes = scalar @::MUTEX;
if ( $num_mutexes > 0 ) {
foreach my $row (0..($num_mutexes-1)) {
$resource_string .= "XCATROLLINGUPDATE_MUTEX".$row."($::MUTEX_COUNT[$row]) ";
}
}
}
if ( $resource_string ) {
my $cmd = "llconfig -d FLOATING_RESOURCES SCHEDULE_BY_RESOURCES CENTRAL_MANAGER_LIST RESOURCE_MGR_LIST";
@ -1568,68 +1635,47 @@ sub create_LL_mutex_resources {
$llrms = $llval; }
}
$cmd = "llconfig -c ";
my $updateFLOAT = 0;
my $updateSCHED = 0;
foreach my $float (split(/\s+/,$resource_string)) {
$float =~ s/\(/./g;
$float =~ s/\)/./g;
if ( $curFLOAT !~ /$float/ ) {
$updateFLOAT = 1;
last;
}
}
if ($updateFLOAT) {
$curFLOAT =~ s/XCATROLLINGUPDATE_MUTEX(\d)*\((\d)*\)//g;
$curFLOAT =~ s/XCATROLLINGUPDATE_MAXUPDATES(\d)*\((\d)*\)//g;
$curFLOAT .= $resource_string;
$cmd .= "FLOATING_RESOURCES=\"$curFLOAT\" ";
}
$curFLOAT =~ s/XCATROLLINGUPDATE_MUTEX(\d)*\((\d)*\)//g;
$curFLOAT =~ s/XCATROLLINGUPDATE_MAXUPDATES(\d)*\((\d)*\)//g;
$curFLOAT .= $resource_string;
$cmd .= "FLOATING_RESOURCES=\"$curFLOAT\" ";
$resource_string =~ s/\((\d)*\)//g;
foreach my $sched (split(/\s+/,$resource_string)) {
if ( $curSCHED !~ /$sched/ ) {
$updateSCHED = 1;
last;
}
}
if ($updateSCHED) {
$curSCHED =~ s/XCATROLLINGUPDATE_MUTEX(\d)*//g;
$curSCHED =~ s/XCATROLLINGUPDATE_MAXUPDATES(\d)*//g;
$curSCHED .= $resource_string;
$cmd .= "SCHEDULE_BY_RESOURCES=\"$curSCHED\" ";
}
if ( $updateFLOAT || $updateSCHED ) {
$curSCHED =~ s/XCATROLLINGUPDATE_MUTEX(\d)*//g;
$curSCHED =~ s/XCATROLLINGUPDATE_MAXUPDATES(\d)*//g;
$curSCHED .= $resource_string;
$cmd .= "SCHEDULE_BY_RESOURCES=\"$curSCHED\" ";
# TODO -- WAITING ON LLCONFIG OPTION TO NOT SEND CFG CMD TO ALL
#### NODES. NEED TO CHANGE CMD WHEN AVAILABLE.
my @llcfg_c;
if ($::TEST) {
my $rsp;
push @{ $rsp->{data} }, "In TEST mode. Will NOT run command: $cmd ";
xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK );
$::RUNCMD_RC = 0;
} else {
@llcfg_c = xCAT::Utils->runcmd( $cmd, 0 );
}
$cmd = "llrctl reconfig";
my @llms = split(/\s+/,$llcms." ".$llrms);
my %have = ();
my @llnodes;
foreach my $m (@llms) {
my ($sm,$rest) = split(/\./,$m);
push(@llnodes, $sm) unless $have{$sm}++;
}
if ($::TEST) {
my $rsp;
push @{ $rsp->{data} }, "In TEST mode. Will NOT run command: xdsh <llcm,llrm> $cmd ";
xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK );
$::RUNCMD_RC = 0;
} else {
xCAT::Utils->runxcmd(
{ command => ['xdsh'],
node => \@llnodes,
arg => [ "-v", $cmd ]
},
$::SUBREQ, -1);
}
my @llcfg_c;
if ($::TEST) {
my $rsp;
push @{ $rsp->{data} }, "In TEST mode. Will NOT run command: $cmd ";
xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK );
$::RUNCMD_RC = 0;
} else {
@llcfg_c = xCAT::Utils->runcmd( $cmd, 0 );
}
$cmd = "llrctl reconfig";
my @llms = split(/\s+/,$llcms." ".$llrms);
my %have = ();
my @llnodes;
foreach my $m (@llms) {
my ($sm,$rest) = split(/\./,$m);
push(@llnodes, $sm) unless $have{$sm}++;
}
if ($::TEST) {
my $rsp;
push @{ $rsp->{data} }, "In TEST mode. Will NOT run command: xdsh <llcm,llrm> $cmd ";
xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK );
$::RUNCMD_RC = 0;
} else {
xCAT::Utils->runxcmd(
{ command => ['xdsh'],
node => \@llnodes,
arg => [ "-v", $cmd ]
},
$::SUBREQ, -1);
}
}
@ -1720,6 +1766,7 @@ sub runrollupdate {
# Load the datafile
&readDataFile($::datafile);
# set some defaults
$::ug_name = $::DATAATTRS{updategroup}[0];
my ($statusattr,$statusval,$statustimeout);
if (defined($::DATAATTRS{bringupappstatus}[0])) {
@ -1737,6 +1784,13 @@ sub runrollupdate {
} else {
$statustimeout = 10;
}
my $skipshutdown = 0;
if ((defined($::DATAATTRS{skipshutdown}[0])) &&
( ($::DATAATTRS{skipshutdown}[0] eq "yes") ||
($::DATAATTRS{skipshutdown}[0] eq "y") ||
($::DATAATTRS{skipshutdown}[0] eq "1") ) ) {
$skipshutdown = 1;
}
# make sure nodes are in correct state
my $hostlist = &get_hostlist;
@ -1804,102 +1858,107 @@ sub runrollupdate {
# Shutdown the nodes
# FUTURE: Replace if we ever develop cluster shutdown function
xCAT::Utils->setAppStatus(\@nodes,"RollingUpdate","shutting_down");
my $shutdown_cmd = "shutdown -F &";
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'xdsh $hostlist -v $shutdown_cmd\' \n";
close (RULOG);
}
if ($::TEST) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: In TEST mode. Will NOT run command \'xdsh $hostlist -v $shutdown_cmd\' \n";
close (RULOG);
} else {
xCAT::Utils->runxcmd( { command => ['xdsh'],
node => \@nodes,
arg => [ "-v", $shutdown_cmd ]
}, $::SUBREQ, -1);
}
my $slept = 0;
my $alldown = 1;
my $nodelist = join( ',', @nodes );
if (! $::TEST) {
do {
$alldown = 1;
my $shutdownmax = 5;
if ( defined($::DATAATTRS{shutdowntimeout}[0] ) ) {
$shutdownmax = $::DATAATTRS{shutdowntimeout}[0];
}
my $pwrstat_cmd = "rpower $nodelist stat";
if ( ! $skipshutdown ) {
xCAT::Utils->setAppStatus(\@nodes,"RollingUpdate","shutting_down");
my $shutdown_cmd;
if (xCAT::Utils->isAIX()) { $shutdown_cmd = "shutdown -F &"; }
else { $shutdown_cmd = "shutdown -h now &"; }
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'$pwrstat_cmd\' \n";
print RULOG localtime()." $::ug_name: Running command \'xdsh $hostlist -v $shutdown_cmd\' \n";
close (RULOG);
}
my $pwrstat = xCAT::Utils->runxcmd( $pwrstat_cmd, $::SUBREQ, -1, 1 );
foreach my $pline (@{$pwrstat}) {
my ( $pnode, $pstat, $rest ) = split( /\s+/, $pline );
if ( ( $pstat eq "Running" )
|| ( $pstat eq "Shutting" )
|| ( $pstat eq "on" ) )
{
if ($::TEST) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: In TEST mode. Will NOT run command \'xdsh $hostlist -v $shutdown_cmd\' \n";
close (RULOG);
} else {
xCAT::Utils->runxcmd( { command => ['xdsh'],
node => \@nodes,
arg => [ "-v", $shutdown_cmd ]
}, $::SUBREQ, -1);
}
my $slept = 0;
my $alldown = 1;
my $nodelist = join( ',', @nodes );
if (! $::TEST) {
do {
$alldown = 1;
my $shutdownmax = 5;
if ( defined($::DATAATTRS{shutdowntimeout}[0] ) ) {
$shutdownmax = $::DATAATTRS{shutdowntimeout}[0];
}
my $pwrstat_cmd = "rpower $nodelist stat";
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'$pwrstat_cmd\' \n";
close (RULOG);
}
my $pwrstat = xCAT::Utils->runxcmd( $pwrstat_cmd, $::SUBREQ, -1, 1 );
foreach my $pline (@{$pwrstat}) {
my ( $pnode, $pstat, $rest ) = split( /\s+/, $pline );
if ( ( $pstat eq "Running" )
|| ( $pstat eq "Shutting" )
|| ( $pstat eq "on" ) )
{
# give up on shutdown after requested wait time and force the
# node off
if ( $slept >= ($shutdownmax * 60) ) {
$pnode =~ s/://g;
my $pwroff_cmd = "rpower $pnode off";
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'$pwroff_cmd\' \n";
close (RULOG);
# give up on shutdown after requested wait time and force the
# node off
if ( $slept >= ($shutdownmax * 60) ) {
$pnode =~ s/://g;
my $pwroff_cmd = "rpower $pnode off";
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'$pwroff_cmd\' \n";
close (RULOG);
}
xCAT::Utils->runxcmd( $pwroff_cmd, $::SUBREQ, -1 );
}
else {
$alldown = 0;
last;
}
xCAT::Utils->runxcmd( $pwroff_cmd, $::SUBREQ, -1 );
}
else {
$alldown = 0;
last;
}
}
}
# If all nodes are not down yet, wait some more
unless ($alldown) {
sleep(20);
$slept += 20;
}
} until ($alldown);
} # end not TEST
# If all nodes are not down yet, wait some more
unless ($alldown) {
sleep(20);
$slept += 20;
}
} until ($alldown);
} # end not TEST
# Run out-of-band commands for this update group
xCAT::Utils->setAppStatus(\@nodes,"RollingUpdate","running_outofbandcmds");
foreach my $obline ( @{ $::DATAATTRS{'outofbandcmd'} } ) {
$obline =~ s/\$NODELIST/$hostlist/g;
# Run the command
if ($::VERBOSE) {
# Run out-of-band commands for this update group
xCAT::Utils->setAppStatus(\@nodes,"RollingUpdate","running_outofbandcmds");
foreach my $obline ( @{ $::DATAATTRS{'outofbandcmd'} } ) {
$obline =~ s/\$NODELIST/$hostlist/g;
# Run the command
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running out-of-band command \'$obline\'\n";
close (RULOG);
}
my @oboutput;
if ($::TEST) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: In TEST mode. Will NOT run out-of-band command \'$obline\'\n";
close (RULOG);
} else {
@oboutput = xCAT::Utils->runcmd( $obline, 0 );
}
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Out-of-band command output:\n";
foreach my $oboline (@oboutput) {
print RULOG $oboline."\n";
}
close (RULOG);
my @oboutput;
if ($::TEST) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: In TEST mode. Will NOT run out-of-band command \'$obline\'\n";
close (RULOG);
} else {
@oboutput = xCAT::Utils->runcmd( $obline, 0 );
}
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Out-of-band command output:\n";
foreach my $oboline (@oboutput) {
print RULOG $oboline."\n";
}
close (RULOG);
}
}
}
} # end !$skipshutdown
@ -1910,10 +1969,14 @@ sub runrollupdate {
if ( defined($::DATAATTRS{bringuporder}[0]) ) {
$numboots = scalar( @{$::DATAATTRS{bringuporder}} );
}
if ( $skipshutdown ) {
$numboots = 0;
}
my @remaining_nodes = @nodes;
foreach my $bootindex (0..$numboots){
my @bootnodes;
if (defined($::DATAATTRS{bringuporder}[$bootindex])) {
if ((!$skipshutdown) &&
(defined($::DATAATTRS{bringuporder}[$bootindex]))) {
@bootnodes = split(/,/,$::DATAATTRS{bringuporder}[$bootindex]);
foreach my $rn (@remaining_nodes) {
if ((defined($rn)) && (grep(/^$rn$/,@bootnodes))) {
@ -1930,69 +1993,71 @@ sub runrollupdate {
if (!scalar (@bootnodes)) { next; }
# reboot command determined by nodehm power/mgt attributes
my $hmtab = xCAT::Table->new('nodehm');
my @rpower_nodes;
my @rnetboot_nodes;
my $hmtab_entries =
$hmtab->getNodesAttribs( \@bootnodes, [ 'node', 'mgt', 'power' ] );
foreach my $node (@bootnodes) {
my $pwr = $hmtab_entries->{$node}->[0]->{power};
unless ( defined($pwr) ) { $pwr = $hmtab_entries->{$node}->[0]->{mgt}; }
if ( $pwr eq 'hmc' ) {
push( @rnetboot_nodes, $node );
}
else {
push( @rpower_nodes, $node );
}
}
if (!$skipshutdown) {
my $hmtab = xCAT::Table->new('nodehm');
my @rpower_nodes;
my @rnetboot_nodes;
my $hmtab_entries =
$hmtab->getNodesAttribs( \@bootnodes, [ 'node', 'mgt', 'power' ] );
foreach my $node (@bootnodes) {
my $pwr = $hmtab_entries->{$node}->[0]->{power};
unless ( defined($pwr) ) { $pwr = $hmtab_entries->{$node}->[0]->{mgt}; }
if ( $pwr eq 'hmc' ) {
push( @rnetboot_nodes, $node );
}
else {
push( @rpower_nodes, $node );
}
}
xCAT::Utils->setAppStatus(\@bootnodes,"RollingUpdate","rebooting");
if ( scalar(@rnetboot_nodes) > 0 ) {
my $rnb_nodelist = join( ',', @rnetboot_nodes );
# my $cmd = "rnetboot $rnb_nodelist -f";
xCAT::Utils->setAppStatus(\@bootnodes,"RollingUpdate","rebooting");
if ( scalar(@rnetboot_nodes) > 0 ) {
my $rnb_nodelist = join( ',', @rnetboot_nodes );
# my $cmd = "rnetboot $rnb_nodelist -f";
#### TODO: DO WE STILL NEED 2 LISTS?
#### RUNNING rpower FOR ALL BOOTS NOW! MUCH FASTER!!!
my $cmd = "rpower $rnb_nodelist on";
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'$cmd\' \n";
close (RULOG);
}
if ($::TEST) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: In TEST mode. Will NOT run command \'$cmd\' \n";
close (RULOG);
} else {
xCAT::Utils->runxcmd( $cmd, $::SUBREQ, 0 );
}
} elsif ( scalar(@rpower_nodes) > 0 ) {
my $rp_nodelist = join( ',', @rpower_nodes );
my $cmd = "rpower $rp_nodelist boot";
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'$cmd\' \n";
close (RULOG);
}
if ($::TEST) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: In TEST mode. Will NOT run command \'$cmd\' \n";
close (RULOG);
} else {
xCAT::Utils->runxcmd( $cmd, $::SUBREQ, 0 );
}
}
my $cmd = "rpower $rnb_nodelist on";
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'$cmd\' \n";
close (RULOG);
}
if ($::TEST) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: In TEST mode. Will NOT run command \'$cmd\' \n";
close (RULOG);
} else {
xCAT::Utils->runxcmd( $cmd, $::SUBREQ, 0 );
}
} elsif ( scalar(@rpower_nodes) > 0 ) {
my $rp_nodelist = join( ',', @rpower_nodes );
my $cmd = "rpower $rp_nodelist boot";
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Running command \'$cmd\' \n";
close (RULOG);
}
if ($::TEST) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: In TEST mode. Will NOT run command \'$cmd\' \n";
close (RULOG);
} else {
xCAT::Utils->runxcmd( $cmd, $::SUBREQ, 0 );
}
}
} # end !$skipshutdown
# wait for bringupstatus to be set
my $not_done = 1;
my $totalwait = 0;
while ($not_done && $totalwait < ($statustimeout * 60)) {
if ($::VERBOSE) {
if ($::VERBOSE) {
open (RULOG, ">>$::LOGDIR/$::LOGFILE");
print RULOG localtime()." $::ug_name: Checking xCAT database $statusattr for value $statusval \n";
close (RULOG);
}
my $nltab_stats =
$nltab->getNodesAttribs( \@bootnodes, [ 'node', $statusattr ] );
}
my $nltab_stats =
$nltab->getNodesAttribs( \@bootnodes, [ 'node', $statusattr ] );
my %ll_res;
$not_done = 0;
foreach my $bn (@bootnodes) {

View File

@ -40,7 +40,6 @@ oldfeature=oldvalue
newfeature=newvalue
#
# updategroup: A set of nodes to be updated as a single group
# updategroup = name(noderange)
@ -65,15 +64,26 @@ mapgroups=block[01-10]
# mutex: Identify updategroups that are mutually exclusive and must not be
# mutex:
# mutex_count:
# Identify updategroups that are mutually exclusive and must not be
# updated at the same time in order to maintain active resources within
# the cluster. Only 1 updategroup listed in the entry will be updated at
# a time.
# the cluster.
# If the mutex stanza is immediately followed by a mutex_count stanza,
# up to mutex_count updategroups may be updated at the same time.
# If no mutex_count stanza is specified, only 1 updategroup listed
# in the entry will be updated at a time.
# mutex=updategroup,updategroup,...
# For example, the update jobs for ns1 and for ns2 will not be allowed
# to run at the same time:
mutex=ns1,ns2
# In this example, at most any 2 of the 4 updategroups io1 to io4 may
# be updated at the same time:
mutex=io1,io2,io3,io4
mutex_count=2
# You may list multiple mutex stanzas in this file to identify different
# sets of mutual exclusion.
# Multiple mutually exclusive sets can be specified using updategroup name
@ -84,6 +94,35 @@ mutex=ns1,ns2
#mutex=block2a,block2b,block2c
#mutex=block3a,block3b,block3c
# nodegroup_mutex=<nodegroup_name>
# mutex_count=<count>
# (This gets more complicated...) Mutual exclusion for any nodes in
# this xCAT nodegroup. For each updategroup listed above, if any nodes
# in that group are a member of this xCAT nodegroup, add it to the mutex
# entry.
# For example, you specifiy:
# nodegroup_mutex=IOservers
# Where your xCAT nodegroup is defined as:
# IOservers=n4,n8,n12
# And your updategroups specified above are:
# updategroup=CEC1(n1-n4)
# updategroup=CEC2(n5-n8)
# updategroup=CEC3(n9-n12)
# updategroup=CEC4(n13-n16)
# The following mutex will be created:
# mutex=CEC1,CEC2,CEC3
# With mutex_count working the same as above.
#
# And, to make it even more powerful, you can list an xCAT nodegroup range
# to create multiple nodegroup_mutex stanzas.
# For example, this:
# nodegroup_mutex=block[1-3]IO
# would be equivalent to:
# nodegroup_mutex=block1IO
# nodegroup_mutex=block2IO
# nodegroup_mutex=block3IO
# maxupdates: Maximum number of updategroups that can be updated at one time
@ -170,13 +209,16 @@ update_if_down=yes
# NOT IMPLEMENTED YET! FUTURE SUPPORT FOR ROLLING UPDATE OF DISKFULL NODES
# shutdownrequired: Should a shutdown command be sent to the nodes.
# skipshutdown: Should a shutdown command be sent to the nodes.
# Shutdown is required for diskless nodes. For diskfull nodes, simple
# updates may be applied to the nodes through prescripts, and a node
# reboot may not be required.
# Default is "yes".
shutdownrequired=yes
# If skipshutdown is set to "yes", outofbandcmd scripts will NOT be
# run and bringuporder will be ignored (since no bringup). However,
# bringupstatus/bringupappstatus will still be checked to verify that
# the node update has completed.
# Default is skipshutdown=no.
skipshutdown=no
@ -215,6 +257,7 @@ shutdowntimeout=5
# This can be used to run operations such as firmware updates.
# Multiple outofbandcmd entries or outofbandcmd/outofbandnodes pairs of
# entries can be specified. Each command will be run in order.
# outofbandcmd scripts will not be run if "skipshutdown" is set to "yes".
#
#outofbandcmd=/u/admin/bin/myfirmwareupdates $NODELIST
#