From 17341def63a73b056a33995036c755d657f009b2 Mon Sep 17 00:00:00 2001 From: mellor Date: Tue, 11 Jan 2011 19:35:07 +0000 Subject: [PATCH] rollupdate defect 3151874 -- cancel reservation if bringuptimeout reached git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@8618 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-server/lib/xcat/plugins/rollupdate.pm | 132 +++++++++++++++++++-- 1 file changed, 122 insertions(+), 10 deletions(-) diff --git a/xCAT-server/lib/xcat/plugins/rollupdate.pm b/xCAT-server/lib/xcat/plugins/rollupdate.pm index 459b94094..b4a7a3e73 100644 --- a/xCAT-server/lib/xcat/plugins/rollupdate.pm +++ b/xCAT-server/lib/xcat/plugins/rollupdate.pm @@ -2007,6 +2007,9 @@ sub runrollupdate { } xCAT::Utils->setAppStatus(\@bootnodes,"RollingUpdate","rebooting"); + if ($bootindex < $numboots) { + xCAT::Utils->setAppStatus(\@remaining_nodes,"RollingUpdate","waiting_on_bringuporder"); + } if ( scalar(@rnetboot_nodes) > 0 ) { my $rnb_nodelist = join( ',', @rnetboot_nodes ); # my $cmd = "rnetboot $rnb_nodelist -f"; @@ -2046,6 +2049,7 @@ sub runrollupdate { # wait for bringupstatus to be set my $not_done = 1; my $totalwait = 0; + my %ll_res; while ($not_done && $totalwait < ($statustimeout * 60)) { if ($::VERBOSE) { open (RULOG, ">>$::LOGDIR/$::LOGFILE"); @@ -2054,11 +2058,12 @@ sub runrollupdate { } my $nltab_stats = $nltab->getNodesAttribs( \@bootnodes, [ 'node', $statusattr ] ); - my %ll_res; + %ll_res = (); $not_done = 0; foreach my $bn (@bootnodes) { if ( $nltab_stats->{$bn}->[0]->{$statusattr} !~ /$statusval/ ) { - $not_done = 1; + $ll_res{$bn}{not_done}=1; + $not_done = 1; } else { $ll_res{$bn}{remove}=1; } @@ -2083,11 +2088,27 @@ sub runrollupdate { } } if ($not_done) { - open (RULOG, ">>$::LOGDIR/$::LOGFILE"); - print RULOG "\n"; - print RULOG localtime()." ERROR: Update group $::ug_name: Reached bringuptimeout before all nodes completed bringup. Some nodes may not have been updated. \n"; - print RULOG "\n"; - close (RULOG); + if (($::scheduler eq "loadleveler") && + ($::ll_reservation_id)){ + open (RULOG, ">>$::LOGDIR/$::LOGFILE"); + print RULOG "\n"; + print RULOG localtime()." ERROR: Update group $::ug_name: Reached bringuptimeout before all nodes completed bringup. Some nodes may not have been updated. \n"; + print RULOG "Cancelling LL reservation $::ll_reservation_id \n"; + print RULOG "\n"; + close (RULOG); + + my @remove_res; + $remove_res[0]='CANCEL_DUE_TO_ERROR'; + &remove_LL_reservations(\@remove_res); + my @error_nodes; + foreach my $bn (keys %ll_res) { + if ($ll_res{$bn}{not_done}) { + push (@error_nodes,$bn); + } + } + xCAT::Utils->setAppStatus(\@error_nodes,"RollingUpdate","ERROR_bringuptimeout_reached"); + xCAT::Utils->setAppStatus(\@remaining_nodes,"RollingUpdate","ERROR_bringuptimeout_reached_for_previous_node"); + } last; } @@ -2237,7 +2258,12 @@ sub get_hostlist { #----------------------------------------------------------------------------- sub remove_LL_reservations { - my $nodes = shift; + my $input_nodes = shift; + my $nodes = $input_nodes; + my $CANCEL_DUE_TO_ERROR = 0; + if ( $input_nodes->[0] eq 'CANCEL_DUE_TO_ERROR') { + $CANCEL_DUE_TO_ERROR = 1; + } my $cmd; if ($::VERBOSE) { @@ -2270,11 +2296,18 @@ sub remove_LL_reservations { my $remove_reservation = 0; my $remove_cmd = "llchres -R $::ll_reservation_id -h -"; + if ($CANCEL_DUE_TO_ERROR) { + $nodes = \@llnodes; + } foreach my $n (@{$nodes}) { if ( grep(/^$n$/,@llnodes) ) { $remove_count++; # change features for this node - &change_LL_feature($n); + if ($CANCEL_DUE_TO_ERROR) { + &remove_LL_updatefeature_only($n); + } else { + &change_LL_feature($n); + } if ( $remove_count < $llnode_count ) { $remove_cmd .= " $n"; } else { @@ -2445,6 +2478,86 @@ sub change_LL_feature { +#---------------------------------------------------------------------------- + +=head3 remove_LL_updatefeature_only + + Changes the LL feature for the node to remove only the updatefeature + Will NOT remove oldfeature or set newfeature + + Arguments: + Returns: + 0 - OK + 1 - error + Globals: + Error: + Example: + + Comments: + +=cut + +#----------------------------------------------------------------------------- +sub remove_LL_updatefeature_only { + my $node = shift; + + if (!defined($::DATAATTRS{updatefeature}[0]) ) { + return 0; + } + # Query current feature + my $cmd = "llconfig -h $node -d FEATURE"; + if ($::VERBOSE) { + open (RULOG, ">>$::LOGDIR/$::LOGFILE"); + print RULOG localtime()." $::ug_name: Running command \'$cmd\'\n"; + close (RULOG); + } + my ($llcfgout) = xCAT::Utils->runcmd( $cmd, 0 ); + if ($::VERBOSE) { + open (RULOG, ">>$::LOGDIR/$::LOGFILE"); + print RULOG localtime()." Return code: $::RUNCMD_RC\n"; + close (RULOG); + } + + # Remove old feature + my $newfeature_string = ""; + my @llfeatures; + if ( $llcfgout =~ /:FEATURE =/ ) { + my ($stuff,$curfeature_string) = split(/=/,$llcfgout); + @llfeatures = split(/\s+/,$curfeature_string); + my $updateallfeature = " "; + if (defined($::DATAATTRS{updatefeature}[0])) { + $updateallfeature = $::DATAATTRS{updatefeature}[0]; + } + foreach my $f (@llfeatures) { + if ($f eq $updateallfeature) { + $f = " "; + } + } + $newfeature_string = join(" ",@llfeatures); + } + + # Change in LL database + $cmd = "llconfig -N -h $node -c FEATURE=\"$newfeature_string\""; + if ($::VERBOSE) { + open (RULOG, ">>$::LOGDIR/$::LOGFILE"); + print RULOG localtime()." $::ug_name: Running command \'$cmd\'\n"; + close (RULOG); + } + xCAT::Utils->runcmd( $cmd, 0 ); + if ($::VERBOSE) { + open (RULOG, ">>$::LOGDIR/$::LOGFILE"); + print RULOG localtime()." Return code: $::RUNCMD_RC\n"; + close (RULOG); + } + + # Send LL reconfig to all central mgrs and resource mgrs + llreconfig(); + + return 0; +} + + + #---------------------------------------------------------------------------- @@ -2494,7 +2607,6 @@ sub llreconfig { $runlocal=1; } } - if ($runlocal) { if ($::VERBOSE) { open (RULOG, ">>$::LOGDIR/$::LOGFILE");