Rolling Update support for AIX

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@7453 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
mellor 2010-09-14 22:29:41 +00:00
parent 5307871b16
commit 9b6954d30d
10 changed files with 2323 additions and 529 deletions

View File

@ -5878,5 +5878,139 @@ sub setupAIXconserver
return $rc;
}
#-------------------------------------------------------------------------------
=head3 setAppStatus
Description:
Set an AppStatus value for a specific application in the nodelist
appstatus attribute for a list of nodes
Arguments:
@nodes
$application
$status
Returns:
Return result of call to setNodesAttribs
Globals:
none
Error:
none
Example:
xCAT::Utils::setAppStatus(\@nodes,$application,$status);
Comments:
=cut
#-----------------------------------------------------------------------------
sub setAppStatus
{
my ($class, $nodes_ref, $application, $status) = @_;
my @nodes = @$nodes_ref;
#get current local time to set in appstatustime attribute
my (
$sec, $min, $hour, $mday, $mon,
$year, $wday, $yday, $isdst
)
= localtime(time);
my $currtime = sprintf("%02d-%02d-%04d %02d:%02d:%02d",
$mon + 1, $mday, $year + 1900,
$hour, $min, $sec);
my $nltab = xCAT::Table->new('nodelist');
my $nodeappstat = $nltab->getNodesAttribs(\@nodes,['appstatus']);
my %new_nodeappstat;
foreach my $node (keys %$nodeappstat) {
my $new_appstat = "";
my $changed = 0;
# Search current appstatus and change if app entry exists
my $cur_appstat = $nodeappstat->{$node}->[0]->{appstatus};
if ($cur_appstat) {
my @appstatus_entries = split(/,/,$cur_appstat);
foreach my $appstat (@appstatus_entries) {
my ($app, $stat) = split(/=/,$appstat);
if ($app eq $application) {
$new_appstat .= ",$app=$status";
$changed = 1;
} else {
$new_appstat .= ",$appstat";
}
}
}
# If no app entry exists, add it
if (!$changed){
$new_appstat .= ",$application=$status";
}
$new_appstat =~ s/^,//;
$new_nodeappstat{$node}->{appstatus} = $new_appstat;
$new_nodeappstat{$node}->{appstatustime} = $currtime;
}
return $nltab->setNodesAttribs(\%new_nodeappstat);
}
#-------------------------------------------------------------------------------
=head3 getAppStatus
Description:
Get an AppStatus value for a specific application from the
nodelist appstatus attribute for a list of nodes
Arguments:
@nodes
$application
Returns:
a hashref of nodes set to application status value
Globals:
none
Error:
none
Example:
my $appstatus = $xCAT::Utils::getAppStatus(\@nodes,$application);
my $node1_status = $appstatus->{node1};
Comments:
=cut
#-----------------------------------------------------------------------------
sub getAppStatus
{
my ($class, $nodes_ref, $application) = @_;
my @nodes = @$nodes_ref;
my $nltab = xCAT::Table->new('nodelist');
my $nodeappstat = $nltab->getNodesAttribs(\@nodes,['appstatus']);
my $ret_nodeappstat;
foreach my $node (keys %$nodeappstat) {
my $cur_appstat = $nodeappstat->{$node}->[0]->{appstatus};
my $found = 0;
if ($cur_appstat) {
my @appstatus_entries = split(/,/,$cur_appstat);
foreach my $appstat (@appstatus_entries) {
my ($app, $stat) = split(/=/,$appstat);
if ($app eq $application) {
$ret_nodeappstat->{$node} = $stat;
$found = 1;
}
}
}
# If no app entry exists, return empty
if (!$found){
$ret_nodeappstat->{$node} = "";
}
}
return $ret_nodeappstat;
}
1;

View File

@ -5,22 +5,24 @@ B<rollupdate> - performs cluster rolling update
=head1 SYNOPSIS
B<cat> I<stanza-file> B<|> B<rollupdate> [B<-V> | B<--verbose>]
B<cat> I<stanza-file> B<|> B<rollupdate> [B<-V> | B<--verbose>] [B<-t>| B<--test>]
B<rollupdate> [B<-?> | B<-h> | B<--help> | B<-v> | B<--version>]
=head1 DESCRIPTION
The B<rollupdate> command creates and submits scheduler jobs that will notify xCAT to shutdown a group of nodes, run optional out-of-band commands from the xCAT management node, and reboot the nodes. Currently, only LoadLeveler is supported as a job scheduler with B<rollupdate>.
The B<rollupdate> command creates and submits scheduler reservation jobs that will notify xCAT to shutdown a group of nodes, run optional out-of-band commands from the xCAT management node, and reboot the nodes. Currently, only LoadLeveler is supported as a job scheduler with B<rollupdate>.
Input to the B<rollupdate> command is passed in as stanza data through STDIN. Information such as the sets of nodes that will be updated, the name of the job scheduler, a template for generating job command files, and other control data are required. See
/opt/xcat/share/xcat/rollupdate/rollupdate.input.sample
and
/opt/xcat/share/xcat/rollupdate/rollupdate_all.input.sample
for stanza keywords, usage, and examples.
The B<rollupdate> command will use the input data to determine each set of nodes that will be managed together as an update group. For each update group, a job scheduler command file is created and submitted. When the group of nodes becomes available and the scheduler runs the job, the job will send a message to the xCAT daemon on the management node to begin the update process for all the nodes in the update group. The nodes will be stopped by the job scheduler (for LoadLeveler, the nodes are drained), an operating system shutdown command will be sent to each node, out-of-band operations can be run on the management node, and the nodes are powered back on.
The B<rollupdate> command will use the input data to determine each set of nodes that will be managed together as an update group. For each update group, a job scheduler command file is created and a reservation request is submitted. When the group of nodes becomes available and the scheduler activates the reservation, the xcatd daemon on the management node will be notified to begin the update process for all the nodes in the update group. If specified, prescripts will be run, an operating system shutdown command will be sent to each node, out-of-band operations can be run on the management node, and the nodes are powered back on.
The B<rollupdate> command assumes that, if the update is to include rebooting stateless nodes to a new operating system image, the image has been created and tested, and that all relevant xCAT commands have been run for the nodes such that the new image will be loaded when xCAT reboots the nodes.
The B<rollupdate> command assumes that, if the update is to include rebooting stateless or statelite nodes to a new operating system image, the image has been created and tested, and that all relevant xCAT commands have been run for the nodes such that the new image will be loaded when xCAT reboots the nodes.
=head1 OPTIONS
@ -33,7 +35,11 @@ Command Version.
=item B<-V|--verbose>
Display additional progress and error messages.
Display additional progress and error messages. Output is also logged in /var/log/xcat/rollupdate.log.
=item B<-t|--test>
Run the rollupdate command in test mode only to verify the output files that are created. No scheduler reservation requests will be submitted.
=item B<-?|-h|--help>
@ -75,6 +81,11 @@ enter:
=head1 FILES
/opt/xcat/bin/rollupdate
/opt/xcat/share/xcat/rollupdate/rollupdate.input.sample
/opt/xcat/share/xcat/rollupdate/ll.tmpl
/opt/xcat/share/xcat/rollupdate/rollupdate_all.input.sample
/opt/xcat/share/xcat/rollupdate/llall.tmpl
/var/log/xcat/rollupdate.log
=head1 SEE ALSO

File diff suppressed because it is too large Load Diff

View File

@ -327,11 +327,6 @@ if ($inet6support) {
plugin_command({command=>['syncfiles'],_xcat_clienthost=>[$node]},undef,\&build_response);
print $conn "syncfiles done\n";
close($conn);
} elsif ($text =~ /^rebootnodes/) {
my @cmdargs = split(/\s+/,$text);
my $rebootcmd = shift(@cmdargs);
close($conn);
plugin_command({command=>[$rebootcmd],arg=>\@cmdargs,_xcat_clienthost=>[$node]},undef,\&build_response);
} elsif ($text =~ /^setiscsiparms/) {
$text =~ s/^setiscsiparms\s+//;
my $kname;

View File

@ -1,37 +0,0 @@
#!/bin/sh
# Sample postscript to be added to /install/postscripts and in the
# in the postscripts table, will setup the node with LL on install
# Setup for LL config and execution
master=$MASTER
# set LoadLeveler paths in /etc/profile
if ! grep "xCAT Customizations" /etc/profile > /dev/null 2>&1 ; then
echo "" >> /etc/profile
echo "# xCAT Customizations " >> /etc/profile
echo "export PATH=\$PATH:/usr/lpp/LoadL/full/bin " >> /etc/profile
echo "export MANPATH=\$MANPATH:/usr/lpp/LoadL/full/man " >> /etc/profile
# set a few other things while we're at it
echo "set -o vi" >> /etc/profile
echo "export HOST=`hostname`" >> /etc/profile
echo "export ENV=/.kshrc" >> /etc/profile
echo "export SECONDS=\"\$(date '+3600*%H+60*%M+%S')\"" >> /etc/profile
echo "typeset -Z2 _h _m _s" >> /etc/profile
echo "_hh=\"(SECONDS/3600)%24\" _mm=\"(SECONDS/60)%60\" _ss=\"SECONDS%60\"" >> /etc/profile
echo "_time='\${_x[((_m=_mm)==(_h=_hh))==(_s=_ss)]}\$_h:\$_m:\$_s'" >> /etc/profile
echo "PS1=\"(\$_time) \`hostname -s\`\"' # '" >> /etc/profile
fi
if ! grep 'loadl' /etc/inittab ; then
echo "loadl:2:once:/usr/lpp/LoadL/full/bin/llctl start > /dev/console 2>&1" >> /etc/inittab
/usr/bin/errupdate -f /usr/lpp/LoadL/full/include/loadl_err.S
/usr/bin/mkdir -p /tmp/var/loadl
/usr/bin/chown -R loadl:loadl /tmp/var/loadl
/usr/bin/su - loadl "-c /usr/lpp/LoadL/full/bin/llinit -local /tmp/var/loadl -release /usr/lpp/LoadL/full -cm $MASTER"
/usr/lpp/LoadL/full/bin/llctl start
fi
logger -t xCAT "LoadL setup"
exit 0

View File

@ -1,31 +1,24 @@
#!/bin/sh
#
# Sample job command template file used to generate cluster rolling update
# jobs that will be submitted to LoadLeveler.
# job command files that will be used to create reservations in LoadLeveler.
# xCAT will substitute the following when creating the LL job command files:
# [[NODESET]] - nodeset name
# [[XNODELIST]] - comma-delimited list of xcat node names
# [[XCATSERVER]] - xcat server name
# [[XCATPORT]]] - xcatd port on server
# [[LLMACHINES]] - the LL machine list. Quoted, space delimited list of LL
# machine names that were available at the time the xCAT
# rollupdate command was run
# [[LLCOUNT]] - number of machines in LLMACHINES list
# [[NODESET]] - the update group name for the nodes in this reservation
# [[JOBDIR]] - the directory specified in the rollupdate input stanza
# jobdir entry
# [[LLHOSTFILE]] - the file generated by the xCAT rollupdate command that
# contains the list of LL machines in this update group
# that were available at the time the command was run.
# [[MUTEXRESOURCES]] - The list of LL resources created by xCAT to handle
# mutual exclusion and maxupdates
#
# @ job_name = rollupdate_[[NODESET]]
# @ job_type = parallel
## Note: really want node usage to be not shared, but there
## was a timing bug in LL that sometimes caused a rollupdate job to
## not start. Should be fixed by now.
# @ node_usage = not_shared
# @ restart = no
# @ error = /home/lluser/rollupdate_jobs/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).err
# @ output = /home/lluser/rollupdate_jobs/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).out
# @ executable = /opt/xcat/share/xcat/rollupdate/send_reboot_request
# @ arguments = 10 --verbose loadleveler [[XCATSERVER]] [[XCATPORT]] [[XNODELIST]]
# @ node = [[LLCOUNT]]
# @ tasks_per_node = 1
# @ requirements = (Machine == {[[LLMACHINES]]})
# @ wall_clock_limit = 21:00,20:00
# @ error = [[JOBDIR]]/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).err
# @ output = [[JOBDIR]]/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).out
# @ host_file = [[LLHOSTFILE]]
# @ resources = [[MUTEXRESOURCES]]
# @ queue

View File

@ -0,0 +1,26 @@
#!/bin/sh
#
# Sample job command template file used to generate cluster rolling update
# jobs that will be submitted to LoadLeveler.
# Use this template with the Rolling Update "update_all" feature
# It only uses a node count and not a specific hostlist
# xCAT will substitute the following when creating the LL job command files:
# [[NODESET]] - the update group name for the nodes in this reservation
# [[JOBDIR]] - the directory specified in the rollupdate input stanza
# jobdir entry
# [[LLCOUNT]] - REQUIRED - used by xCAT to set the number of machines to
# reserve
# [[UPDATEALLFEATURE]] - REQUIRED - used by xCAT to control the rolling update
# [[MUTEXRESOURCES]] - the resources xCAT created for max_updates
#
# @ job_name = rollupdate_[[NODESET]]
# @ job_type = parallel
# @ node_usage = not_shared
# @ restart = no
# @ error = [[JOBDIR]]/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).err
# @ output = [[JOBDIR]]/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).out
# @ node = [[LLCOUNT]]
# @ requirements = (Feature == "[[UPDATEALLFEATURE]]")
# @ resources = [[MUTEXRESOURCES]]
# @ queue

View File

@ -1,84 +1,259 @@
#
#
# Sample stanza file used as input to the rollupdate command
# This sample uses explicit update groups and allows mutual exclusion
# in updates between nodes and other functions.
#
# For an example of simple updates using the updateall feature instead of
# update groups, see the sample file rollupdate_all.input.sample
#
# Unless otherwise noted in the descriptions below, if multiple stanza
# lines are specified for the same keyword, only the FIRST entry will be
# used and all others will be ignored.
#
#
#
#scheduler: The job scheduler used to submit the rolling update jobs
# Currently only LoadLeveler is supported
# scheduler: The job scheduler used to submit the rolling update jobs
# Currently only LoadLeveler is supported
scheduler=loadleveler
#
#scheduser: The userid with authority to submit scheduler jobs
# Note: LL does not allow jobs to be submitted by root
scheduser=lluser
# scheduser: The userid with authority to submit scheduler jobs
# Note: LL does not allow jobs to be submitted by root
scheduser=loadl
# Scheduler Feature values
# Node feature values that will be changed in the scheduler during the
# update process. These features can be used if users need to run jobs
# on only old nodes or only new nodes, but cannot have the job span both
# old and new nodes due to software incompatibilities.
# oldfeature: This feature value will be removed from the node definition
# in the scheduler after the node has been updated
oldfeature=oldvalue
# newfeature: A new feature value that will be set in the scheduler for each
# node after it has been updated.
newfeature=newvalue
#
#updategroup: A set of nodes to be updated as a single group
# updategroup = name(noderange)
# where "name" is the name to be assigned to the updategroup and
# "noderange" is any valid xCAT noderange syntax (see noderange man page)
#updategroup=ns01(c4lpar201-c4lpar204)
# updategroup: A set of nodes to be updated as a single group
# updategroup = name(noderange)
# where "name" is the name to be assigned to the updategroup and
# "noderange" is any valid xCAT noderange syntax (see noderange man page)
# You may list multiple updategroup stanzas in this file and all of them will
# be processed.
updategroup=ns01(c4lpar201-c4lpar204)
updategroup=ns11(c4lpar211-c4lpar214)
#
#mapgroups: Many updategroups can also be defined through a
# single statement using nodegroup mappings.
# mapgroups=nodegroup_range
# Where nodegroup_range is processed in the same way xCAT handles node name
# ranges to generate a set of nodegroup names. Each nodegroup will be
# mapped to an updategroup with the same name.
# mapgroups: Many updategroups can also be defined through a
# single statement using nodegroup mappings.
# mapgroups=nodegroup_range
# Where nodegroup_range is processed in the same way xCAT handles node name
# ranges to generate a set of nodegroup names. Each nodegroup will be
# mapped to an updategroup with the same name.
# You may list multiple mapgroups stanzas in this file and all of them will
# be processed.
# For example, the following will create 10 updategroups from the 10
# nodegroups named block01 to block10.
#mapgroups=block[01-10]
mapgroups=lpar03-lpar04
mapgroups=lpar05
# nodegroups named block01 to block10.
mapgroups=block[01-10]
#TODO -- mutex is not supported yet. This will be ignored!
#mutex: identify updategroups that are mutually exclusive and must not be
# updated at the same time in order to maintain active resources within
# the cluster. Only 1 updategroup will be updated at a time
# mutex=updategroup,updategroup,...
# For example, the update jobs for ns1 and for ns2 will not be allowed to run at
# the same time:
#mutex=ns1,ns2
#
# mutex: Identify updategroups that are mutually exclusive and must not be
# updated at the same time in order to maintain active resources within
# the cluster. Only 1 updategroup listed in the entry will be updated at
# a time.
# mutex=updategroup,updategroup,...
# For example, the update jobs for ns1 and for ns2 will not be allowed
# to run at the same time:
mutex=ns1,ns2
# You may list multiple mutex stanzas in this file to identify different
# sets of mutual exclusion.
# Multiple mutually exclusive sets can be specified using updategroup name
# ranges. For example, the following:
# ranges. For example, the following:
#mutex=block[1-3]a,block[1-3]b,block[1-3]c
# would be equivalent to:
#mutex=block1a,block1b,block1a
#mutex=block1a,block1b,block1c
#mutex=block2a,block2b,block2c
#mutex=block3a,block3b,block3c
#
#jobtemplate: Scheduler job template file. See sample LoadLeveler file for details
# on how the template will be processed:
# maxupdates: Maximum number of updategroups that can be updated at one time
# This allows you to ensure you will always have enough computing
# resources in your cluster and that not all nodes will attempt to
# be updated at once.
# A value of "all" specifies that there is no restriction
# maxupdates=16
maxupdates=all
# jobtemplate: Scheduler job template file. See this sample LoadLeveler file
# for details on how the template will be processed:
jobtemplate=/opt/xcat/share/xcat/rollupdate/ll.tmpl
#
#jobdir: Directory to write the job command files to
# For LL, this directory needs to be on a filesystem available
# to all nodes
jobdir=/u/lluser/rollupdate_jobs
#jobdir: Directory to write the job command files and other data files to.
# For LL, this directory needs to be on a filesystem available to all nodes.
jobdir=/u/loadl/rollupdate_jobs
#reservationcallback: INTERNAL KEYWORD used for development only.
# This is the reservation notify or callback command.
# For Loadleveler, this script must reside on the LoadLeveler central
# manager and will be called when the reservation for an updategroup
# becomes active.
# The default is:
#reservationcallback=/opt/xcat/bin/runrollupdate
#
#reservationduration: Maximum time to hold a LoadLeveler reservation for
# the update process. This value in minutes should be longer than the
# expected time to shutdown, update, and reboot all the nodes in an update
# group. xCAT will release the nodes from the reservation as they come
# back up, and will cancel the reservation when the last node has completed.
reservationduration=15
#update_if_down: If set to "yes", also attempt the update for any node in an
# updategroup that is down or not active/available in the scheduler
# (useful if you have nodes that are not part of your scheduler's cluster).
# updategroup that is down or not active/available in the scheduler
# (useful if you have nodes that are not part of your scheduler's cluster).
# If set to "no", any node in an updategroup that is not active in the
# scheduler will be skipped.
# scheduler will be skipped.
# If set to "cancel", if any node in an updategroup is not active,
# the entire updategroup will be skipped.
# the entire updategroup will be skipped.
update_if_down=yes
#update_if_down=no
#update_if_down=cancel
# prescript
# prescriptnodes
# (optional) Command to be run on the xCAT management node before issuing
# the shutdown command for the nodes in the updategroup.
# If prescriptnodes is also specified, the command will only be run for
# nodes being from the updategroup that are also included in that xCAT
# noderange.
# If prescriptnodes is not specified, the command will be run for all
# nodes in the updategroup.
# For prescript, you may specify the string $NODELIST if you would like the
# comma-delimited list of xCAT nodenames passed into your command.
# This can be used to run operations such as shutting down the global
# filesystem on all the nodes, or moving critical services
# to a backup server for specific nodes.
# Multiple prescript entries or prescript/prescriptnodes pairs of entries may
# be specified. Each command will be run in order.
#
#TODO - Out-of-band operations are not supported yet. This will be ignored.
#oob_op: Out-of-band command to run on the xCAT management node once the node
# has been shutdown but before it is rebooted.
#prescript=/u/admin/bin/shutdownGPFS $NODELIST
#
#prescript=/u/admin/bin/moveGPFSconfigserver $NODELIST
#prescriptnodes=gpfsconfig
#
#prescript=/u/admin/bin/moveLLscheduler $NODELIST
#prescriptnodes=llsched
#
#prescript=/u/admin/bin/moveXCATservicenode $NODELIST
#prescriptnodes=service
# NOT IMPLEMENTED YET! FUTURE SUPPORT FOR ROLLING UPDATE OF DISKFULL NODES
# shutdownrequired: Should a shutdown command be sent to the nodes.
# Shutdown is required for diskless nodes. For diskfull nodes, simple
# updates may be applied to the nodes through prescripts, and a node
# reboot may not be required.
# Default is "yes".
shutdownrequired=yes
# NOT IMPLEMENTED YET! FUTURE SUPPORT...
# shutdownorder: (optional) The order that nodes within an updategroup
# will be shut down. The value is any valid xCAT noderange.
# Multiple shutdownorder entries may be specified, and they will be
# processed as entered in this file. If more than one node in the
# updategroup matches a shutdownorder entry, they will be shutdown at
# the same time. xCAT will wait until the node is completely shutdown
# before proceeding to the next nodes. Any nodes in the updategroup not
# listed will be shutdown at the end.
#shutdownorder=compute
#shutdownorder=gpfsio
#shutdownorder=service
# shutdowntimeout: (optional) The number of minutes xCAT should wait for
# an OS shutdown to complete before giving up and issuing a hard power off
# command and continuing with the rolling update process.
shutdowntimeout=5
# outofbandcmd
# outofbandnodes
# (optional) Out-of-band command to run on the xCAT management
# node once the node has been shutdown but before it is rebooted.
# If outofbandnodes is also specified, the command will only be run for
# nodes from the updategroup that are also included in that xCAT noderange.
# If outofbandnodes is not specified, the command will be run for all
# nodes in the updategroup.
# You may specify the string $NODELIST if you would like the
# comma-delimited list of xCAT nodenames passed into your command.
# This is where we plan to support operations such as firmware updates.
#oob_op=/u/admin/bin/myfirmwareupdates $NODELIST
# comma-delimited list of xCAT nodenames passed into your command.
# This can be used to run operations such as firmware updates.
# Multiple outofbandcmd entries or outofbandcmd/outofbandnodes pairs of
# entries can be specified. Each command will be run in order.
#
#outofbandcmd=/u/admin/bin/myfirmwareupdates $NODELIST
#
#outofbandcmd=/u/admin/bin/IOsubsystemupdates $NODELIST
#outofbandnodes=gpfsio
# bringuporder: (optional) The order that nodes within an updategroup
# will be brought up The value is any valid xCAT noderange.
# Multiple bringuporder entries may be specified, and they will be processed
# as entered in this file. If more than one node in the updategroup
# matches a bringuporder entry, they will be brought up at the same time.
# xCAT will wait until the node is completely shutdown before proceeding
# to the next set of nodes. Any nodes in the updategroup not listed will be
# brought up at the end.
#bringuporder=service
#bringuporder=gpfsio
#bringuporder=compute
# bringupstatus:
# OR
# bringupappstatus:
# The xCAT database node status or appstatus value that xCAT will check and
# will wait for to determine that the node is up. Once this status is
# reached, xCAT will continue bringing up more nodes (if bringuporder is set)
# and will release this node from the scheduler reservation.
# If both attributes are set, only bringupappstatus will be used.
#bringupappstatus="gpfs=ready"
bringupstatus=booted
# bringuptimeout: (optional) The maximum number of minutes xCAT should wait
# after issuing the rpower on command for the nodes to reach bringupstatus
# before giving up. If using bringuporder and this timeout is reached for
# one set of nodes, no additional nodes will be attempted to be brought up.
# The scheduler reservation will be cancelled.
bringuptimeout=10

View File

@ -0,0 +1,206 @@
#
#
# Sample stanza file used as input to the rollupdate command
# This sample assumes that you have a simple set of updates that can
# be applied to all nodes in any order. Only the list of all nodes needs
# to be specified. You do not need to specify individual update groups.
#
# For more complex updates, see the sample file rollupdate.input.sample
#
# Unless otherwise noted in the descriptions below, if multiple stanza
# lines are specified for the same keyword, only the FIRST entry will be
# used and all others will be ignored.
#
#
# scheduler: The job scheduler used to submit the rolling update jobs
# Currently only LoadLeveler is supported
scheduler=loadleveler
# scheduser: The userid with authority to submit scheduler jobs
# Note: LL does not allow jobs to be submitted by root
scheduser=loadl
# updateall: Allows you to apply updates to all nodes without specifying
# specific update groups. This should be used for simple compute node
# updates that have no special dependencies on other nodes and update order
# is not important. Only those nodes that are currently active in
# in the scheduler will be updated.
# For LoadLeveler reservation jobs, you can control which nodes will be
# updated by specifying different machine requirements, classes, or other
# controls that LL allows you in the LL job template that you provide.
# If updateall is used, the following entries MUST be specified in this file:
# updateall_nodes
# updateall_numperupdate
# job_template
# job_dir
# If updateall is used, the following entries can also be used:
# maxupdates
# oldfeature
# newfeature
# reservationduration
# update_if_down
# prescript (but NO prescriptnodes)
# shutdowntimeout
# outofbandcmd (but NO outofbandnodes)
# bringupstatus,bringupappstatus
# bringuptimeout
# All other entries in this file will be ignored
updateall=yes
# updateall_nodes: The xCAT noderange of the nodes to be updated.
updateall_nodes=compute
# updateall_numperupdate: The number of nodes that will be reserved at one time
# in the scheduler and updated together. The smaller the number, the more
# scheduler reservation jobs that will be submitted.
# NOTE: LoadLeveler performance decreases with large numbers of
# reservations. Do not set this value so low that you will exceed
# the maximum number of reservations allowed for your cluster or
# that you will degrade LL performance for your production jobs.
updateall_numperupdate=3
# Scheduler Feature values
# Node feature values that will be changed in the scheduler during the
# update process. These features can be used if users need to run jobs
# on only old nodes or only new nodes, but cannot have the job span both
# old and new nodes due to software incompatibilities.
# oldfeature: This feature value will be removed from the node definition
# in the scheduler after the node has been updated
oldfeature=oldvalue
# newfeature: A new feature value that will be set in the scheduler for each
# node after it has been updated.
newfeature=newvalue
# maxupdates: Maximum number of updategroups that can be updated at one time
# This allows you to ensure you will always have enough computing
# resources in your cluster and that not all nodes will attempt to
# be updated at once.
# A value of "all" specifies that there is no restriction
#maxupdates=all
maxupdates=16
#jobtemplate: Scheduler job template file. See this sample LoadLeveler file
# for details on how the template will be processed:
jobtemplate=/opt/xcat/share/xcat/rollupdate/llall.tmpl
#jobdir: Directory to write the job command files to
# For LL, this directory needs to be on a filesystem available to all nodes.
jobdir=/u/loadl/rollupdate_jobs
#reservationcallback: INTERNAL KEYWORD used for development only.
# This is the reservation notify or callback command.
# For Loadleveler, this script must reside on the LoadLeveler central
# manager and will be called when the reservation for an updategroup
# becomes active.
# The default is:
#reservationcallback=/opt/xcat/bin/runrollupdate
#
#reservationduration: Maximum time to hold a LoadLeveler reservation for
# the update process. This value in minutes should be longer than the
# expected time to shutdown, update, and reboot all the nodes in an update
# group. xCAT will release the nodes from the reservation as they come
# back up, and will cancel the reservation when the last node has completed.
reservationduration=15
#update_if_down: If set to "yes", also attempt the update for any node in an
# updategroup that is down or not active/available in the scheduler
# (useful if you have nodes that are not part of your scheduler's cluster).
# If set to "no", any node in an updategroup that is not active in the
# scheduler will be skipped.
# If set to "cancel", if any node in an updategroup is not active,
# the entire updategroup will be skipped.
# NOTE: For updateall operation, update_if_down must be "no" or "cancel".
# If set to "yes", "cancel" will be assumed.
#update_if_down=yes
update_if_down=no
#update_if_down=cancel
# prescript
# (optional) Command to be run on the xCAT management node before issuing
# the shutdown command for the nodes in the updategroup.
# NOTE: For updateall operation, prescriptnodes are not supported.
# Prescripts will be run for all nodes.
# You may specify the string $NODELIST if you would like the
# comma-delimited list of xCAT nodenames passed into your command.
# Multiple prescript entries can be specified. Each command will be run in
# order.
#prescript=/u/admin/bin/shutdownGPFS $NODELIST
# NOT IMPLEMENTED YET! FUTURE SUPPORT FOR ROLLING UPDATE OF DISKFULL NODES
# shutdownrequired: Should a shutdown command be sent to the nodes.
# Shutdown is required for diskless nodes. For diskfull nodes, simple
# updates may be applied to the nodes through prescripts, and a node
# reboot may not be required.
# Default is "yes".
shutdownrequired=yes
# shutdowntimeout: (optional) The number of minutes xCAT should wait for
# an OS shutdown to complete before giving up and issuing a hard power off
# command and continuing with the rolling update process.
shutdowntimeout=5
# outofbandcmd
# (optional) Out-of-band command to run on the xCAT management
# node once the node has been shutdown but before it is rebooted.
# NOTE: For updateall operation, outofbandnodes are not supported.
# Out-of-band commands will be run for all nodes.
# You may specify the string $NODELIST if you would like the
# comma-delimited list of xCAT nodenames passed into your command.
# Multiple outofbandcmd (with optional outofbandnodes) entries can be
# specified. Each command will be run in order.
#outofbandcmd=/u/admin/bin/myfirmwareupdates $NODELIST
# bringupstatus:
# OR
# bringupappstatus:
# The xCAT database node status or appstatus value that xCAT will check and
# will wait for to determine that the node is up. Once this status is
# reached, xCAT will continue bringing up more nodes (if bringuporder is set)
# and will release this node from the scheduler reservation.
# If both attributes are set, only bringupappstatus will be used.
#bringupappstatus="gpfs=ready"
bringupstatus=booted
# bringuptimeout: (optional) The maximum number of minutes xCAT should wait
# after issuing the rpower on command for the nodes to reach bringupstatus
# before giving up. If using bringuporder and this timeout is reached for
# one set of nodes, no additional nodes will be attempted to be brought up.
# The scheduler reservation will be cancelled.
bringuptimeout=10

View File

@ -1,88 +0,0 @@
#!/usr/bin/env perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
use IO::Socket;
#-----------------------------------------------------------------------------
=head1 send_reboot_request
This program is run by a job command scheduler such as LoadLeveler
to send a rebootnodes request to the xcatiport so the xcatd daemon
can initiate a shutdown and reboot of the nodes for rolling updates.
The request will be ignored if the nodes are not in the correct state.
See the rollupdate man page and documentation for more information.
Syntax:
send_reboot_request <waittime> <scheduler> <xcatserver> <xcatport> <nodelist>
where:
<waittime> is a number in minutes this command will sleep after sending
the request to xcatd. For LL, this will give xCAT time to
drain the schedd on the nodelist. Otherwise, when this job
ends, LL may submit new jobs to the nodes and those jobs will
die when the node is shutdown. Default is 10 minutes.
[--verbose|-V] optional verbose indicator which if set will tell the
xcatd daemon to log status messages in the log file
/var/log/xcat/rollupdate.log on the xCAT management node.
<scheduler> is the job scheduler plugin to be invoked. 'loadleveler' is
currently the only scheduler supported by xCAT.
<xcatserver> is the xcatd service node for the node running this script.
<xcatport> is the xcatiport the xcatd is listening on.
<nodelist> is a comma-delimited list of xCAT nodenames in this update group
that will be updated at the same time.
=cut
#-----------------------------------------------------------------------------
# Main
# send a request to the xcatd daemon to request a reboot
$| = 1; # autoflush stdout
my $hostname = `/usr/bin/hostname -s`;
print "running on $hostname \n";
system "date";
my $sleeptime = 60 * ( shift @ARGV );
if ( $sleeptime <= 0 ) { $sleeptime = 600; }
my $scheduler = shift @ARGV;
my $verbose = "";
if ( ($scheduler eq '--verbose') || ($scheduler eq '-V') ) {
$verbose = $scheduler;
$scheduler = shift @ARGV;
}
my $xcatd_server = shift @ARGV;
my $port = shift @ARGV;
my $nodelist = shift @ARGV;
print "opening port to $xcatd_server : $port \n";
my $remote = IO::Socket::INET->new(
Proto => "tcp",
PeerAddr => $xcatd_server,
PeerPort => $port,
);
unless ($remote) {
print "Cannot connect to host \'$xcatd_server \'\n";
exit 1;
}
$remote->autoflush(1);
while ( defined( $line = <$remote> ) ) {
chomp $line;
system "date";
print "read from port: $line\n";
if ( $line eq "ready" ) {
system "date";
print "sending: rebootnodes $verbose $scheduler $nodelist\n";
print $remote "rebootnodes $verbose $scheduler $nodelist\n";
}
if ( $line eq "done" ) { last; }
}
close $remote;
print "sleeping for $sleeptime seconds \n";
sleep $sleeptime;
print "job is done \n";
system "date";
exit 0;