From 98fd1a36ac8193d34a5990136efcfeb883e047f4 Mon Sep 17 00:00:00 2001 From: mellor Date: Wed, 22 Oct 2008 16:02:14 +0000 Subject: [PATCH] rollupdate - new files git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2376 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- perl-xCAT/xCAT/NameRange.pm | 271 ++++++ xCAT-client/pods/man1/rollupdate.1.pod | 83 ++ xCAT-server/lib/xcat/plugins/rollupdate.pm | 867 ++++++++++++++++++ xCAT-server/share/xcat/rollupdate/ll.tmpl | 27 + .../xcat/rollupdate/rollupdate.input.sample | 84 ++ .../share/xcat/rollupdate/send_reboot_request | 80 ++ 6 files changed, 1412 insertions(+) create mode 100644 perl-xCAT/xCAT/NameRange.pm create mode 100644 xCAT-client/pods/man1/rollupdate.1.pod create mode 100644 xCAT-server/lib/xcat/plugins/rollupdate.pm create mode 100644 xCAT-server/share/xcat/rollupdate/ll.tmpl create mode 100644 xCAT-server/share/xcat/rollupdate/rollupdate.input.sample create mode 100644 xCAT-server/share/xcat/rollupdate/send_reboot_request diff --git a/perl-xCAT/xCAT/NameRange.pm b/perl-xCAT/xCAT/NameRange.pm new file mode 100644 index 000000000..29a7ae9fc --- /dev/null +++ b/perl-xCAT/xCAT/NameRange.pm @@ -0,0 +1,271 @@ +# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html +package xCAT::NameRange; +require xCAT::Table; +require Exporter; +use strict; + +#Perl implementation of namerange +# NOTE: This is identical to xCAT::NodeRange except that no +# database access occurs, no nodes are verified, and +# no nodegroups are expanded. +# Made a new utility since NodeRange is used EVERYWHERE in +# xCAT code and did not want to risk de-stabilizing existing code. +our @ISA = qw(Exporter); +our @EXPORT = qw(namerange); + +my $recurselevel=0; + + +sub subnodes (\@@) { + #Subtract set of nodes from the first list + my $nodes = shift; + my $node; + foreach $node (@_) { + @$nodes = (grep(!/^$node$/,@$nodes)); + } +} + +sub expandatom { + my $atom = shift; + my @nodes= (); + if ($atom =~ /^\(.*\)$/) { # handle parentheses by recursively calling namerange() + $atom =~ s/^\((.*)\)$/$1/; + $recurselevel++; + return namerange($atom); + } + if ($atom =~ /@/) { + $recurselevel++; + return namerange($atom); + } + + if ($atom =~ m/^\//) { # A regular expression - not supported in namerange + return ($atom); + } + + if ($atom =~ m/(.*)\[(.*)\](.*)/) { # square bracket range + #for the time being, we are only going to consider one [] per atom + #xcat 1.2 does no better + my @subelems = split(/([\,\-\:])/,$2); + my $subrange=""; + while (my $subelem = shift @subelems) { + my $subop=shift @subelems; + $subrange=$subrange."$1$subelem$3$subop"; + } + foreach (split /,/,$subrange) { + my @newnodes=expandatom($_); + @nodes=(@nodes,@newnodes); + } + return @nodes; + } + + if ($atom =~ m/\+/) { # process the + operator + $atom =~ m/^([^0-9]*)([0-9]+)([^\+]*)\+([0-9]+)/; + my $pref=$1; + my $startnum=$2; + my $suf=$3; + my $end=$4+$startnum; + my $endnum = sprintf("%d",$end); + if (length ($startnum) > length ($endnum)) { + $endnum = sprintf("%0".length($startnum)."d",$end); + } + foreach ("$startnum".."$endnum") { + my @addnodes=expandatom($pref.$_.$suf); + @nodes=(@nodes,@addnodes); + } + return (@nodes); + } + + if ($atom =~ m/[-:]/) { # process the minus range operator + my $left; + my $right; + if ($atom =~ m/:/) { + ($left,$right)=split /:/,$atom; + } else { + my $count= ($atom =~ tr/-//); + if (($count % 2)==0) { #can't understand even numbers of - in range context + # we might not really be in range context + return ($atom); + } + my $expr="([^-]+?".("-[^-]*"x($count/2)).")-(.*)"; + $atom =~ m/$expr/; + $left=$1; + $right=$2; + } + if ($left eq $right) { #if they said node1-node1 for some strange reason + return expandatom($left); + } + my @leftarr=split(/(\d+)/,$left); + my @rightarr=split(/(\d+)/,$right); + if (scalar(@leftarr) != scalar(@rightarr)) { #Mismatch formatting.. + # guess it's meant to be a nodename + return ($atom); + } + my $prefix = ""; + my $suffix = ""; + foreach (0..$#leftarr) { + my $idx = $_; + if ($leftarr[$idx] =~ /^\d+$/ and $rightarr[$idx] =~ /^\d+$/) { #pure numeric component + if ($leftarr[$idx] ne $rightarr[$idx]) { #We have found the iterator (only supporting one for now) + my $prefix = join('',@leftarr[0..($idx-1)]); #Make a prefix of the pre-validated parts + my $luffix; #However, the remainder must still be validated to be the same + my $ruffix; + if ($idx eq $#leftarr) { + $luffix=""; + $ruffix=""; + } else { + $ruffix = join('',@rightarr[($idx+1)..$#rightarr]); + $luffix = join('',@leftarr[($idx+1)..$#leftarr]); + } + if ($luffix ne $ruffix) { #the suffixes mismatched.. + return ($atom); + } + foreach ($leftarr[$idx]..$rightarr[$idx]) { + my @addnodes=expandatom($prefix.$_.$luffix); + @nodes=(@nodes,@addnodes); + } + return (@nodes); #the return has been built, return, exiting loop and all + } + } elsif ($leftarr[$idx] ne $rightarr[$idx]) { + return ($atom); + } + $prefix .= $leftarr[$idx]; #If here, it means that the pieces were the same, but more to come + } + #I cannot conceive how the code could possibly be here, but whatever it is, it must be questionable + return ($atom); + } + + return ($atom); +} + +sub namerange { + #We for now just do left to right operations + my $range=shift; + my %nodes = (); + my %delnodes = (); + my $op = ","; + my @elems = split(/(,(?![^[]*?])(?![^\(]*?\)))/,$range); # commas outside of [] or () + if (scalar(@elems)==1) { + @elems = split(/(@(?![^\(]*?\)))/,$range); # only split on @ when no , are present (inner recursion) + } + + while (my $atom = shift @elems) { + if ($atom =~ /^-/) { # if this is an exclusion, strip off the minus, but remember it + $atom = substr($atom,1); + $op = $op."-"; + } + + if ($atom =~ /^\^(.*)$/) { # get a list of nodes from a file + open(NRF,$1); + while () { + my $line=$_; + unless ($line =~ m/^[\^#]/) { + $line =~ m/^([^: ]*)/; + my $newrange = $1; + chomp($newrange); + $recurselevel++; + my @filenodes = namerange($newrange); + foreach (@filenodes) { + $nodes{$_}=1; + } + } + } + close(NRF); + next; + } + + my %newset = map { $_ =>1 } expandatom($atom); # expand the atom and make each entry in the resulting array a key in newset + + if ($op =~ /@/) { # compute the intersection of the current atom and the node list we have received before this + foreach (keys %nodes) { + unless ($newset{$_}) { + delete $nodes{$_}; + } + } + } elsif ($op =~ /,-/) { # add the nodes from this atom to the exclude list + foreach (keys %newset) { + $delnodes{$_}=1; #delay removal to end + } + } else { # add the nodes from this atom to the total node list + foreach (keys %newset) { + $nodes{$_}=1; + } + } + $op = shift @elems; + + } # end of main while loop + + # Now remove all the exclusion nodes + foreach (keys %nodes) { + if ($delnodes{$_}) { + delete $nodes{$_}; + } + } + if ($recurselevel) { + $recurselevel--; + } + return sort (keys %nodes); + +} + + +1; + +=head1 NAME + +xCAT::NameRange - Perl module for xCAT namerange expansion + +=head1 SYNOPSIS + + use xCAT::NameRange; + my @nodes=namerange("storage@rack1,node[1-200],^/tmp/nodelist,node300-node400,node401+10,500-550"); + +=head1 DESCRIPTION + +namerange interprets xCAT noderange formatted strings and returns a list of +names. The following two operations are supported on elements, and interpreted +left to right: + + , union next element with everything to the left. + + @ take intersection of element to the right with everything on the left + (i.e. mask out anything to the left not belonging to what is described to + the right) + +Each element can be a number of things: + + A node name, i.e.: + +=item * node1 + +A hyphenated node range (only one group of numbers may differ between the left and right hand side, and those numbers will increment in a base 10 fashion): + +node1-node200 node1-compute-node200-compute +node1:node200 node1-compute:node200-compute + +A namerange denoted by brackets: + +node[1-200] node[001-200] + +A regular expression describing the namerange: + +/d(1.?.?|200) + +A node plus offset (this increments the first number found in nodename): + +node1+199 + +And most of the above substituting groupnames. +3C +3C + +NameRange tries to be intelligent about detecting padding, so you can: +node001-node200 +And it will increment according to the pattern. + + +=head1 COPYRIGHT + +Copyright 2007 IBM Corp. All rights reserved. + + +=cut diff --git a/xCAT-client/pods/man1/rollupdate.1.pod b/xCAT-client/pods/man1/rollupdate.1.pod new file mode 100644 index 000000000..3a84e54b4 --- /dev/null +++ b/xCAT-client/pods/man1/rollupdate.1.pod @@ -0,0 +1,83 @@ +=head1 NAME + +B - performs cluster rolling update + + +=head1 SYNOPSIS + +B I B<|> B [B<-V> | B<--verbose>] + +B [B<-?> | B<-h> | B<--help> | B<-v> | B<--version>] + + +=head1 DESCRIPTION + +The B command creates and submits scheduler jobs that will notify xCAT to shutdown a group of nodes, run optional out-of-band commands from the xCAT management node, and reboot the nodes. Currently, only LoadLeveler is supported as a job scheduler with B. + +Input to the B command is passed in as stanza data through STDIN. Information such as the sets of nodes that will be updated, the name of the job scheduler, a template for generating job command files, and other control data are required. See +/opt/xcat/share/xcat/rollupdate/rollupdate.input.sample +for stanza keywords, usage, and examples. + +The B command will use the input data to determine each set of nodes that will be managed together as an update group. For each update group, a job scheduler command file is created and submitted. When the group of nodes becomes available and the scheduler runs the job, the job will send a message to the xCAT daemon on the management node to begin the update process for all the nodes in the update group. The nodes will be stopped by the job scheduler (for LoadLeveler, the nodes are drained), an operating system shutdown command will be sent to each node, out-of-band operations can be run on the management node, and the nodes are powered back on. + +The B command assumes that, if the update is to include rebooting stateless nodes to a new operating system image, the image has been created and tested, and that all relevant xCAT commands have been run for the nodes such that the new image will be loaded when xCAT reboots the nodes. + + +=head1 OPTIONS + +=over 10 + +=item B<-v|--version> + +Command Version. + +=item B<-V|--verbose> + +Display additional progress and error messages. + +=item B<-?|-h|--help> + +Display usage message. + +=back + + +=head1 RETURN VALUE + +=over 3 + +=item 0 + +The command completed successfully. + +=item 1 + +An error has occurred. + +=back + + +=head1 EXAMPLES + +=over 3 + +=item 1. + +To run a cluster rolling update based on the information provided in the file +/u/admin/rolling_updates/update_all.stanza +enter: + + cat /admin/rolling_updates/update_all.stanza | rollupdate + +=back + + +=head1 FILES + +/opt/xcat/bin/rollupdate + + +=head1 SEE ALSO + +L + diff --git a/xCAT-server/lib/xcat/plugins/rollupdate.pm b/xCAT-server/lib/xcat/plugins/rollupdate.pm new file mode 100644 index 000000000..cbb2126f6 --- /dev/null +++ b/xCAT-server/lib/xcat/plugins/rollupdate.pm @@ -0,0 +1,867 @@ +#!/usr/bin/env perl -w +# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html +##################################################### +# +# xCAT plugin package to handle rolling updates +# +##################################################### + +package xCAT_plugin::rollupdate; + +require xCAT::NodeRange; +require xCAT::NameRange; +require xCAT::Table; +require Data::Dumper; +require Getopt::Long; +require xCAT::MsgUtils; +require File::Path; +use strict; +use warnings; + +# +# Globals +# +# + +#------------------------------------------------------------------------------ + +=head1 rollupdate + +This program module file supports the cluster rolling update functions. + +Supported commands: + rollupdate - Create scheduler job command files and submit the jobs + rebootnodes - Reboot the updategroup in response to request from scheduler + job + +If adding to this file, please take a moment to ensure that: + + 1. Your contrib has a readable pod header describing the purpose and use of + the subroutine. + + 2. Your contrib is under the correct heading and is in alphabetical order + under that heading. + + 3. You have run tidypod on this file and saved the html file + +=cut + +#------------------------------------------------------------------------------ + +=head2 Cluster Rolling Update + +=cut + +#------------------------------------------------------------------------------ + +#---------------------------------------------------------------------------- + +=head3 handled_commands + + Return a list of commands handled by this plugin + +=cut + +#----------------------------------------------------------------------------- +sub handled_commands { + return { + rollupdate => "rollupdate", + rebootnodes => "rollupdate" + }; +} + +#---------------------------------------------------------------------------- + +=head3 preprocess_request + + + Arguments: + + Returns: + 0 - OK + 1 - error + Globals: + + Error: + + Example: + + Comments: +=cut + +#----------------------------------------------------------------------------- +sub preprocess_request { + + # set management node as server for all requests + # any requests sent to service node need to get + # get sent up to the MN + + my $req = shift; + unless ( defined( $req->{_xcatdest} ) ) { + $req->{_xcatdest} = xCAT::Utils->get_site_Master(); + } + return [$req]; +} + +#---------------------------------------------------------------------------- + +=head3 process_request + + Process the rolling update commands + + Arguments: + + Returns: + 0 - OK + 1 - error + Globals: + + Error: + + Example: + + Comments: +=cut + +#----------------------------------------------------------------------------- +sub process_request { + $::request = shift; + $::CALLBACK = shift; + $::SUBREQ = shift; + my $ret; + + # globals used by all subroutines. + $::command = $::request->{command}->[0]; + $::args = $::request->{arg}; + $::stdindata = $::request->{stdin}->[0]; + + # figure out which cmd and call the subroutine to process + if ( $::command eq "rollupdate" ) { + $ret = &rollupdate; + } + elsif ( $::command eq "rebootnodes" ) { + $ret = &rebootnodes; + } + + return $ret; +} + +#---------------------------------------------------------------------------- + +=head3 rollupdate_usage + + Arguments: + Returns: + Globals: + + Error: + + Example: + + Comments: +=cut + +#----------------------------------------------------------------------------- + +# display the usage +sub rollupdate_usage { + my $rsp; + push @{ $rsp->{data} }, + "\nUsage: rollupdate - Submit cluster rolling update jobs \n"; + push @{ $rsp->{data} }, " rollupdate [-h | --help | -?] \n"; + push @{ $rsp->{data} }, + " rollupdate [-V | --verbose] [-v | --version] \n "; + push @{ $rsp->{data} }, +" - stanza file, see /opt/xcat/share/xcat/rollupdate/rollupdate.input.sample"; + push @{ $rsp->{data} }, " for example \n"; + xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK ); + return 0; +} + +#---------------------------------------------------------------------------- + +=head3 processArgs + + Process the command line and any input files provided on cmd line. + + Arguments: + + Returns: + 0 - OK + 1 - just print usage + 2 - error + Globals: + + Error: + + Example: + + Comments: +=cut + +#----------------------------------------------------------------------------- +sub processArgs { + my $gotattrs = 0; + + if ( defined( @{$::args} ) ) { + @ARGV = @{$::args}; + } + else { + + # return 2; # can run with no args right now + } + + # if (scalar(@ARGV) <= 0) { + # return 2; + # } + + # parse the options + # options can be bundled up like -vV, flag unsupported options + Getopt::Long::Configure( "bundling", "no_ignore_case", "no_pass_through" ); + Getopt::Long::GetOptions( + 'help|h|?' => \$::opt_h, + 'test|t' => \$::opt_t, + 'verbose|V' => \$::opt_V, + 'version|v' => \$::opt_v, + ); + + # Option -h for Help + # if user specifies "-t" & "-h" they want a list of valid attrs + if ( defined($::opt_h) ) { + return 2; + } + + # opt_t not yet supported + if ( defined($::opt_t) ) { + my $rsp; + push @{ $rsp->{data} }, "The \'-t\' option is not yet implemented."; + xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK ); + return 2; + } + + # Option -v for version + if ( defined($::opt_v) ) { + my $rsp; + my $version = xCAT::Utils->Version(); + push @{ $rsp->{data} }, "$::command - $version\n"; + xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK ); + return 1; # no usage - just exit + } + + # Option -V for verbose output + if ( defined($::opt_V) ) { + $::verbose = 1; + $::VERBOSE = 1; + } + + # process @ARGV + #while (my $a = shift(@ARGV)) + #{ + # no args for command yet + #} + + # process the input file + if ( defined($::stdindata) ) { + my $rc = readFileInput($::stdindata); + if ($rc) { + my $rsp; + push @{ $rsp->{data} }, "Could not process file input data.\n"; + xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK ); + return 1; + } + } + else { + + # No stanza file, print usage + return 2; + } + + return 0; +} + +#---------------------------------------------------------------------------- + +=head3 readFileInput + + Process the command line input piped in from a stanza file. + + Arguments: + Returns: + 0 - OK + 1 - error + Globals: + Error: + Example: + + Comments: + Set %::FILEATTRS + (i.e.- $::FILEATTRS{attr}=[val]) + +=cut + +#----------------------------------------------------------------------------- +sub readFileInput { + my ($filedata) = @_; + + my @lines = split /\n/, $filedata; + + foreach my $l (@lines) { + + # skip blank and comment lines + next if ( $l =~ /^\s*$/ || $l =~ /^\s*#/ ); + + # process a real line + if ( $l =~ /^\s*(\w+)\s*=\s*(.*)\s*/ ) { + my $attr = $1; + my $val = $2; + $attr =~ s/^\s*//; # Remove any leading whitespace + $attr =~ s/\s*$//; # Remove any trailing whitespace + $attr =~ tr/A-Z/a-z/; # Convert to lowercase + $val =~ s/^\s*//; + $val =~ s/\s*$//; + + # set the value in the hash for this entry + push( @{ $::FILEATTRS{$attr} }, $val ); + } + } # end while - go to next line + + return 0; +} + +#---------------------------------------------------------------------------- + +=head3 rollupdate + + Support for the xCAT rollupdate command. + + Arguments: + Returns: + 0 - OK + 1 - error + Globals: + + Error: + + Example: + + Comments: +=cut + +#----------------------------------------------------------------------------- + +sub rollupdate { + + my $rc = 0; + my $error = 0; + + # process the command line + $rc = &processArgs; + if ( $rc != 0 ) { + + # rc: 0 - ok, 1 - return, 2 - help, 3 - error + if ( $rc != 1 ) { + &rollupdate_usage; + } + return ( $rc - 1 ); + } + + # + # Build updategroup nodelists + # + my %updategroup; + foreach my $ugline ( @{ $::FILEATTRS{'updategroup'} } ) { + my ( $ugname, $ugval ) = split( /\(/, $ugline ); + $ugval =~ s/\)$//; # remove trailing ')' + @{ $updategroup{$ugname} } = xCAT::NodeRange::noderange($ugval); + if ( xCAT::NodeRange::nodesmissed() ) { + my $rsp; + push @{ $rsp->{data} }, "Error processing stanza line: "; + push @{ $rsp->{data} }, "updategroup=" . $ugline; + push @{ $rsp->{data} }, "Invalid nodes in noderange: " + . join( ',', xCAT::NodeRange::nodesmissed() ); + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + } + + foreach my $mgline ( @{ $::FILEATTRS{'mapgroups'} } ) { + my @ugnamelist = xCAT::NameRange::namerange( $mgline, 0 ); + foreach my $ugname (@ugnamelist) { + @{ $updategroup{$ugname} } = xCAT::NodeRange::noderange($ugname); + if ( xCAT::NodeRange::nodesmissed() ) { + my $rsp; + push @{ $rsp->{data} }, "Error processing stanza line: "; + push @{ $rsp->{data} }, "mapgroups=" . $mgline; + push @{ $rsp->{data} }, "Invalid nodes in group $ugname: " + . join( ',', xCAT::NodeRange::nodesmissed() ); + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + } + } + unless (%updategroup) { + my $rsp; + push @{ $rsp->{data} }, +"Error processing stanza input: No updategroup or mapgroups entries found. "; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + + # + # Build and submit scheduler jobs + # + my $scheduler = $::FILEATTRS{'scheduler'}[0]; + $scheduler =~ tr/A-Z/a-z/; + if ( ( !$scheduler ) + || ( $scheduler eq "loadleveler" ) ) + { + $rc = ll_jobs( \%updategroup ); + } + else { + + # TODO: support scheduler plugins here + my $rsp; + push @{ $rsp->{data} }, "Error processing stanza line: "; + push @{ $rsp->{data} }, "scheduler=" . $::FILEATTRS{'scheduler'}[0]; + push @{ $rsp->{data} }, "Scheduler not supported"; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + + return $rc; +} + +#---------------------------------------------------------------------------- + +=head3 ll_jobs + + Build and submit LoadLeveler jobs + + Arguments: + Returns: + 0 - OK + 1 - error + Globals: + + Error: + + Example: + + Comments: +=cut + +#----------------------------------------------------------------------------- + +sub ll_jobs { + my $updategroup = shift; + my $rc = 0; + + # + # Load job command file template + # + my $tmpl_file_name = $::FILEATTRS{'jobtemplate'}[0]; + unless ( defined($tmpl_file_name) ) { + my $rsp; + push @{ $rsp->{data} }, + "Error processing stanza input: No jobtemplate entries found. "; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + + my $TMPL_FILE; + unless ( open( $TMPL_FILE, "<", $tmpl_file_name ) ) { + my $rsp; + push @{ $rsp->{data} }, +"Error processing stanza input: jobtemplate file $tmpl_file_name not found. "; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + my @lines = <$TMPL_FILE>; + close $TMPL_FILE; + + # Query LL for list of machines and their status + my $cmd = "llstatus -r %n %sca 2>/dev/null"; + my @llstatus = xCAT::Utils->runcmd( $cmd, 0 ); + if ( $::RUNCMD_RC != 0 ) { + my $rsp; + push @{ $rsp->{data} }, "Could not run llstatus command."; + push @{ $rsp->{data} }, @llstatus; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + my %machines; + foreach my $machineline (@llstatus) { + my ( $mlong, $mshort, $mstatus ); + ( $mlong, $mstatus ) = split( /\!/, $machineline ); + ($mshort) = split( /\./, $mlong ); + $machines{$mlong} = { mname => $mlong, mstatus => $mstatus }; + if ( !( $mlong eq $mshort ) ) { + $machines{$mshort} = { mname => $mlong, mstatus => $mstatus }; + } + } + + # + # Generate job command file for each updategroup + # + # Get LL userid + my $lluser = $::FILEATTRS{scheduser}[0]; + unless ( defined($lluser) ) { + my $rsp; + push @{ $rsp->{data} }, + "Error processing stanza input: No scheduser entries found. "; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + my ( $login, $pass, $uid, $gid ); + ( $login, $pass, $uid, $gid ) = getpwnam($lluser); + unless ( defined($uid) ) { + my $rsp; + push @{ $rsp->{data} }, +"Error processing stanza input: scheduser userid $lluser not in passwd file. "; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + my $lljobs_dir = $::FILEATTRS{jobdir}[0]; + unless ( defined($lljobs_dir) ) { + my $rsp; + push @{ $rsp->{data} }, + "Error processing stanza input: No jobdir entries found. "; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + unless ( -d $lljobs_dir ) { + unless ( mkpath($lljobs_dir) ) { + my $rsp; + push @{ $rsp->{data} }, "Could not create directory $lljobs_dir"; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + unless ( chown( $uid, $gid, $lljobs_dir ) ) { + my $rsp; + push @{ $rsp->{data} }, + "Could not change owner of directory $lljobs_dir to $lluser"; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + } + + my $run_if_down = $::FILEATTRS{update_if_down}[0]; + $run_if_down =~ tr/[A-Z]/[a-z]/; + if ( $run_if_down eq 'y' ) { $run_if_down = 'yes'; } + + # TODO - need to handle hierarchy here + # one idea: build a node-to-server mapping that gets passed to + # the executable script so that it can figure out dynamically + # which service node to contact based on which node LL selects + # as the master node for the parallel job. + # don't forget to handle service node pools. a couple ideas: + # pass in all service nodes on network and just keep trying to + # connect until we get a response from one of them + # OR do something similar to what we do with installs and + # find who the initial DHCP server for this node was (supposed + # to be stored somewhere on the node -- needs investigation) + my $sitetab = xCAT::Table->new('site'); + my ($tmp) = $sitetab->getAttribs( { 'key' => 'master' }, 'value' ); + my $xcatserver = $tmp->{value}; + ($tmp) = $sitetab->getAttribs( { 'key' => 'xcatiport' }, 'value' ); + my $xcatport = $tmp->{value}; + + my @calldirectly; + ugloop: foreach my $ugname ( keys %{$updategroup} ) { + + # Build substitution strings + my ( $nodelist, $machinelist ); + my $machinecount = 0; + foreach my $node ( @{ $updategroup->{$ugname} } ) { + if ( defined( $machines{$node} ) + && ( $machines{$node}{'mstatus'} eq "1" ) ) + { + $machinelist .= " \"$machines{$node}{'mname'}\""; + $machinecount++; + $nodelist .= ",$node"; + } + elsif ( $run_if_down eq 'yes' ) { + $nodelist .= ",$node"; + } + elsif ( $run_if_down eq 'cancel' ) { + my $rsp; + push @{ $rsp->{data} }, +"Node $node is not active in LL and \"update_if_down=cancel\". Update for updategroup $ugname is canceled."; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + ++$rc; + next ugloop; + } + } + if ( defined($nodelist) ) { $nodelist =~ s/^\,//; } + + if ( defined($machinelist) ) { + $machinelist =~ s/^\s+//; + + # Build output file + my @jclines; + foreach my $line (@lines) { + my $jcline = $line; + $jcline =~ s/\[\[NODESET\]\]/$ugname/; + $jcline =~ s/\[\[XNODELIST\]\]/$nodelist/; + $jcline =~ s/\[\[XCATSERVER\]\]/$xcatserver/; + $jcline =~ s/\[\[XCATPORT\]\]/$xcatport/; + $jcline =~ s/\[\[LLMACHINES\]\]/$machinelist/; + $jcline =~ s/\[\[LLCOUNT\]\]/$machinecount/; + push( @jclines, $jcline ); + } + my $lljob_file = $lljobs_dir . "/rollupate_" . $ugname . ".cmd"; + my $JOBFILE; + unless ( open( $JOBFILE, ">$lljob_file" ) ) { + my $rsp; + push @{ $rsp->{data} }, "Could not open file $lljob_file"; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + print $JOBFILE @jclines; + close($JOBFILE); + chown( $uid, $gid, $lljob_file ); + + # Submit LL job + my $cmd = qq~su - $lluser "-c llsubmit $lljob_file"~; + my @llsubmit = xCAT::Utils->runcmd( "$cmd", 0 ); + if ( $::RUNCMD_RC != 0 ) { + my $rsp; + push @{ $rsp->{data} }, "Could not run llsubmit command."; + push @{ $rsp->{data} }, @llsubmit; + xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); + return 1; + } + + my $nltab = xCAT::Table->new('nodelist'); + my @nodes = split( /\,/, $nodelist ); + $nltab->setNodesAttribs( + \@nodes, + { + appstatus => + "ROLLUPDATE-update_job_submitted" + } + ); + } + elsif ( defined($nodelist) ) { + + # No nodes in LL to submit job to -- not able to schedule. + # Call xCAT directly for all other nodes. + # TODO - this will serialize updating the updategroups + # is this okay, or do we want forked child processes? + push @calldirectly, $ugname; + } + } + + if ( scalar(@calldirectly) > 0 ) { + foreach my $ugname (@calldirectly) { + my $nodelist = join( ',', @{ $updategroup->{$ugname} } ); + my $rsp; + push @{ $rsp->{data} }, + "No active LL nodes in update group $ugname"; + push @{ $rsp->{data} }, +"These nodes will be updated now. This will take a few minutes..."; + xCAT::MsgUtils->message( "I", $rsp, $::CALLBACK ); + my $nltab = xCAT::Table->new('nodelist'); + my @nodes = split( /\,/, $nodelist ); + $nltab->setNodesAttribs( + \@nodes, + { + appstatus => + "ROLLUPDATE-update_job_submitted" + } + ); + xCAT::Utils->runxcmd( + { + command => ['rebootnodes'], + _xcat_clienthost => [ $nodes[0] ], + arg => [ "loadleveler", $nodelist ] + }, + $::SUBREQ, + 0 + ); + } + } + + return $rc; +} + +#---------------------------------------------------------------------------- + +=head3 rebootnodes + + Reboot updategroup in response to request from scheduler job + + Arguments: + Returns: + 0 - OK + 1 - error + Globals: + + Error: + + Example: + + Comments: + Note that since this command only gets called from the daemon + through a port request from a node, there is no active callback + to return messages to. Log only critical errors to the system log. +=cut + +#----------------------------------------------------------------------------- + +sub rebootnodes { + my $nodes = $::request->{node}; + my $command = $::request->{command}->[0]; + my $scheduler = $::request->{arg}->[0]; + my $hostlist = $::request->{arg}->[1]; + my $rc; + + my $client; + if ( defined( $::request->{'_xcat_clienthost'} ) ) { + $client = $::request->{'_xcat_clienthost'}->[0]; + } + if ( defined($client) ) { ($client) = xCAT::NodeRange::noderange($client) } + unless ( defined($client) ) { #Not able to do identify the host in question + return; + } + + my @nodes = split( /\,/, $hostlist ); + + # make sure nodes are in correct state + my $nltab = xCAT::Table->new('nodelist'); + foreach my $node (@nodes) { + my ($ent) = $nltab->getAttribs( { node => $node }, "appstatus" ); + unless ( defined($ent) + && ( $ent->{appstatus} eq "ROLLUPDATE-update_job_submitted" ) ) + { + my $rsp; + xCAT::MsgUtils->message( + "S", +"ROLLUPDATE failure: Node $node appstatus not in valid state for rolling update " + ); + return 1; + } + } + + # remove nodes from LL + $scheduler =~ tr/[A-Z]/[a-z]/; + if ( $scheduler eq 'loadleveler' ) { + + # Query LL for list of machines and their status + my $cmd = "llstatus -r %n %sca 2>/dev/null"; + my @llstatus = xCAT::Utils->runcmd( $cmd, 0 ); + my %machines; + foreach my $machineline (@llstatus) { + my ( $mlong, $mshort, $mstatus ); + ( $mlong, $mstatus ) = split( /\!/, $machineline ); + ($mshort) = split( /\./, $mlong ); + $machines{$mlong} = { mname => $mlong, mstatus => $mstatus }; + if ( !( $mlong eq $mshort ) ) { + $machines{$mshort} = { mname => $mlong, mstatus => $mstatus }; + } + } + foreach my $node (@nodes) { + if ( defined( $machines{$node} ) + && ( $machines{$node}{'mstatus'} eq "1" ) ) + { + my $cmd = "llctl -h $node drain"; + xCAT::Utils->runcmd( $cmd, 0 ); + } + } + } + + # Shutdown the nodes + # FUTURE: Replace if we ever develop cluster shutdown function + my $shutdown_cmd = "shutdown -F &"; + xCAT::Utils->runxcmd( + { + command => ['xdsh'], + node => \@nodes, + arg => [ "-v", $shutdown_cmd ] + }, + $::SUBREQ, + 0 + ); + sleep(60); # give shutdown 1 minute + my $slept = 60; + my $alldown = 1; + my $nodelist = join( ',', @nodes ); + do { + $alldown = 1; + my $pwrstat_cmd = "rpower $nodelist stat"; + my @pwrstat = xCAT::Utils->runxcmd( $pwrstat_cmd, $::SUBREQ, 0 ); + foreach my $pline (@pwrstat) { + my ( $pnode, $pstat ) = split( /\s+/, $pline ); + if ( ( $pstat eq "Running" ) + || ( $pstat eq "Shutting" ) + || ( $pstat eq "on" ) ) + { + + # give up on shutdown after 5 minutes and force the + # node off + if ( $slept >= 300 ) { + my $pwroff_cmd = "rpower $pnode off"; + xCAT::Utils->runxcmd( $pwroff_cmd, $::SUBREQ, 0 ); + } + else { + $alldown = 0; + last; + } + } + } + + # If all nodes are not down yet, wait some more + unless ($alldown) { + sleep(15); + $slept += 15; + } + } until ($alldown); + + # Run any out-of-band commands here + # TODO - need to figure what to run + # maybe use custom directory and run script based on updategroup name + # or something? + + # reboot the nodes + # reboot command determined by nodehm power/mgt attributes + my $hmtab = xCAT::Table->new('nodehm'); + my @rpower_nodes; + my @rnetboot_nodes; + my $hmtab_entries = + $hmtab->getNodesAttribs( \@nodes, [ 'node', 'mgt', 'power' ] ); + foreach my $node (@nodes) { + my $pwr = $hmtab_entries->{$node}->[0]->{power}; + unless ( defined($pwr) ) { $pwr = $hmtab_entries->{$node}->[0]->{mgt}; } + if ( $pwr eq 'hmc' ) { + push( @rnetboot_nodes, $node ); + } + else { + push( @rpower_nodes, $node ); + } + } + + # my $nltab = xCAT::Table->new('nodelist'); + $nltab->setNodesAttribs( \@nodes, { appstatus => "ROLLUPDATE-rebooting" } ); + if ( scalar(@rnetboot_nodes) > 0 ) { + my $rnb_nodelist = join( ',', @rnetboot_nodes ); + my $cmd = "rnetboot $rnb_nodelist -f"; + if ($::VERBOSE) { + system("date >> /tmp/rollupdate.out"); + system("echo running command: $cmd >> /tmp/rollupdate.out"); + } + xCAT::Utils->runxcmd( $cmd, $::SUBREQ, 0 ); + } + elsif ( scalar(@rpower_nodes) > 0 ) { + my $rp_nodelist = join( ',', @rpower_nodes ); + my $cmd = "rpower $rp_nodelist boot"; + if ($::VERBOSE) { + system("date >> /tmp/rollupdate.out"); + system("echo running command: $cmd >> /tmp/rollupdate.out"); + } + xCAT::Utils->runxcmd( $cmd, $::SUBREQ, 0 ); + } + + return; +} + +1; diff --git a/xCAT-server/share/xcat/rollupdate/ll.tmpl b/xCAT-server/share/xcat/rollupdate/ll.tmpl new file mode 100644 index 000000000..90da42e01 --- /dev/null +++ b/xCAT-server/share/xcat/rollupdate/ll.tmpl @@ -0,0 +1,27 @@ +#!/bin/sh +# +# Sample job command template file used to generate cluster rolling update +# jobs that will be submitted to LoadLeveler. +# xCAT will substitute the following when creating the LL job command files: +# [[NODESET]] - nodeset name +# [[XNODELIST]] - comma-delimited list of xcat node names +# [[XCATSERVER]] - xcat server name +# [[XCATPORT]]] - xcatd port on server +# [[LLMACHINES]] - the LL machine list. Quoted, space delimited list of LL +# machine names that were available at the time the xCAT +# rollupdate command was run +# [[LLCOUNT]] - number of machines in LLMACHINES list +# +# @ job_name = rollupdate_[[NODESET]] +# @ job_type = parallel +# @ node_usage = not_shared +# @ restart = no +# @ error = /home/lluser/rollupdate_jobs/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).err +# @ output = /home/lluser/rollupdate_jobs/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).out +# @ executable = /opt/xcat/share/xcat/rollupdate/send_reboot_request +# @ arguments = 1 loadleveler [[XCATSERVER]] [[XCATPORT]] [[XNODELIST]] +# @ node = [[LLCOUNT]] +# @ tasks_per_node = 1 +# @ requirements = (Machine == {[[LLMACHINES]]}) +# @ queue + diff --git a/xCAT-server/share/xcat/rollupdate/rollupdate.input.sample b/xCAT-server/share/xcat/rollupdate/rollupdate.input.sample new file mode 100644 index 000000000..9acf9c2bb --- /dev/null +++ b/xCAT-server/share/xcat/rollupdate/rollupdate.input.sample @@ -0,0 +1,84 @@ +# +# +# Sample stanza file used as input to the rollupdate command +# + +# +#scheduler: The job scheduler used to submit the rolling update jobs +# Currently only LoadLeveler is supported +scheduler=loadleveler + +# +#scheduser: The userid with authority to submit scheduler jobs +# Note: LL does not allow jobs to be submitted by root +scheduser=lluser + +# +#updategroup: A set of nodes to be updated as a single group +# updategroup = name(noderange) +# where "name" is the name to be assigned to the updategroup and +# "noderange" is any valid xCAT noderange syntax (see noderange man page) +#updategroup=ns01(c4lpar201-c4lpar204) +updategroup=ns11(c4lpar211-c4lpar214) + +# +#mapgroups: Many updategroups can also be defined through a +# single statement using nodegroup mappings. +# mapgroups=nodegroup_range +# Where nodegroup_range is processed in the same way xCAT handles node name +# ranges to generate a set of nodegroup names. Each nodegroup will be +# mapped to an updategroup with the same name. +# For example, the following will create 10 updategroups from the 10 +# nodegroups named block01 to block10. +#mapgroups=block[01-10] +mapgroups=lpar03-lpar04 +mapgroups=lpar05 + +#TODO -- mutex is not supported yet. This will be ignored! +#mutex: identify updategroups that are mutually exclusive and must not be +# updated at the same time in order to maintain active resources within +# the cluster. Only 1 updategroup will be updated at a time +# mutex=updategroup,updategroup,... +# For example, the update jobs for ns1 and for ns2 will not be allowed to run at +# the same time: +#mutex=ns1,ns2 +# +# Multiple mutually exclusive sets can be specified using updategroup name +# ranges. For example, the following: +#mutex=block[1-3]a,block[1-3]b,block[1-3]c +# would be equivalent to: +#mutex=block1a,block1b,block1a +#mutex=block2a,block2b,block2c +#mutex=block3a,block3b,block3c + +# +#jobtemplate: Scheduler job template file. See sample LoadLeveler file for details +# on how the template will be processed: +jobtemplate=/opt/xcat/share/xcat/rollupdate/ll.tmpl + +# +#jobdir: Directory to write the job command files to +# For LL, this directory needs to be on a filesystem available +# to all nodes +jobdir=/u/lluser/rollupdate_jobs + +# +#update_if_down: If set to "yes", also attempt the update for any node in an +# updategroup that is down or not active/available in the scheduler +# (useful if you have nodes that are not part of your scheduler's cluster). +# If set to "no", any node in an updategroup that is not active in the +# scheduler will be skipped. +# If set to "cancel", if any node in an updategroup is not active, +# the entire updategroup will be skipped. +update_if_down=yes +#update_if_down=no +#update_if_down=cancel + +# +#TODO - Out-of-band operations are not supported yet. This will be ignored. +#oob_op: Out-of-band command to run on the xCAT management node once the node +# has been shutdown but before it is rebooted. +# You may specify the string $NODELIST if you would like the +# comma-delimited list of xCAT nodenames passed into your command. +# This is where we plan to support operations such as firmware updates. +#oob_op=/u/admin/bin/myfirmwareupdates $NODELIST diff --git a/xCAT-server/share/xcat/rollupdate/send_reboot_request b/xCAT-server/share/xcat/rollupdate/send_reboot_request new file mode 100644 index 000000000..a53c77278 --- /dev/null +++ b/xCAT-server/share/xcat/rollupdate/send_reboot_request @@ -0,0 +1,80 @@ +#!/usr/bin/env perl +# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html + +use IO::Socket; + +#----------------------------------------------------------------------------- + +=head1 send_reboot_request + +This program is run by a job command scheduler such as LoadLeveler +to send a rebootnodes request to the xcatiport so the xcatd daemon +can initiate a shutdown and reboot of the nodes for rolling updates. +The request will be ignored if the nodes are not in the correct state. + +See the rollupdate man page and documentation for more information. + + +Syntax: + send_reboot_request +where: + is a number in minutes this command will sleep after sending + the request to xcatd. For LL, this will give xCAT time to + drain the schedd on the nodelist. Otherwise, when this job + ends, LL may submit new jobs to the nodes and those jobs will + die when the node is shutdown. Default is 10 minutes. + is the job scheduler plugin to be invoked. 'loadleveler' is + currently the only scheduler supported by xCAT. + is the xcatd service node for the node running this script. + is the xcatiport the xcatd is listening on. + is a comma-delimited list of xCAT nodenames in this update group + that will be updated at the same time. + +=cut + +#----------------------------------------------------------------------------- +# Main + +# send a request to the xcatd daemon to request a reboot + +$| = 1; # autoflush stdout +my $hostname = `/usr/bin/hostname -s`; +print "running on $hostname \n"; +system "date"; + +my $sleeptime = 60 * ( shift @ARGV ); +if ( $sleeptime <= 0 ) { $sleeptime = 600; } +my $scheduler = shift @ARGV; +my $xcatd_server = shift @ARGV; +my $port = shift @ARGV; +my $nodelist = shift @ARGV; + +print "opening port to $xcatd_server : $port \n"; +my $remote = IO::Socket::INET->new( + Proto => "tcp", + PeerAddr => $xcatd_server, + PeerPort => $port, +); +unless ($remote) { + print "Cannot connect to host \'$xcatd_server \'\n"; + exit 1; +} +$remote->autoflush(1); +while ( defined( $line = <$remote> ) ) { + chomp $line; + system "date"; + print "read from port: $line\n"; + if ( $line eq "ready" ) { + system "date"; + print "sending: rebootnodes $scheduler $nodelist\n"; + print $remote "rebootnodes $scheduler $nodelist\n"; + } + if ( $line eq "done" ) { last; } +} +close $remote; + +print "sleeping for $sleeptime seconds \n"; +sleep $sleeptime; +print "job is done \n"; +system "date"; +exit 0;