2008-10-22 16:02:14 +00:00
|
|
|
#
|
|
|
|
#
|
|
|
|
# Sample stanza file used as input to the rollupdate command
|
2010-09-14 22:29:41 +00:00
|
|
|
# This sample uses explicit update groups and allows mutual exclusion
|
|
|
|
# in updates between nodes and other functions.
|
|
|
|
#
|
|
|
|
# For an example of simple updates using the updateall feature instead of
|
|
|
|
# update groups, see the sample file rollupdate_all.input.sample
|
|
|
|
#
|
|
|
|
# Unless otherwise noted in the descriptions below, if multiple stanza
|
|
|
|
# lines are specified for the same keyword, only the FIRST entry will be
|
|
|
|
# used and all others will be ignored.
|
2008-10-22 16:02:14 +00:00
|
|
|
#
|
|
|
|
#
|
2010-09-14 22:29:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# scheduler: The job scheduler used to submit the rolling update jobs
|
|
|
|
# Currently only LoadLeveler is supported
|
2008-10-22 16:02:14 +00:00
|
|
|
scheduler=loadleveler
|
|
|
|
|
2010-09-14 22:29:41 +00:00
|
|
|
|
|
|
|
# scheduser: The userid with authority to submit scheduler jobs
|
|
|
|
# Note: LL does not allow jobs to be submitted by root
|
|
|
|
scheduser=loadl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Scheduler Feature values
|
|
|
|
# Node feature values that will be changed in the scheduler during the
|
|
|
|
# update process. These features can be used if users need to run jobs
|
|
|
|
# on only old nodes or only new nodes, but cannot have the job span both
|
|
|
|
# old and new nodes due to software incompatibilities.
|
|
|
|
# oldfeature: This feature value will be removed from the node definition
|
|
|
|
# in the scheduler after the node has been updated
|
|
|
|
oldfeature=oldvalue
|
|
|
|
|
|
|
|
# newfeature: A new feature value that will be set in the scheduler for each
|
|
|
|
# node after it has been updated.
|
|
|
|
newfeature=newvalue
|
|
|
|
|
|
|
|
|
2008-10-22 16:02:14 +00:00
|
|
|
#
|
2010-09-14 22:29:41 +00:00
|
|
|
# updategroup: A set of nodes to be updated as a single group
|
|
|
|
# updategroup = name(noderange)
|
|
|
|
# where "name" is the name to be assigned to the updategroup and
|
|
|
|
# "noderange" is any valid xCAT noderange syntax (see noderange man page)
|
|
|
|
# You may list multiple updategroup stanzas in this file and all of them will
|
|
|
|
# be processed.
|
|
|
|
updategroup=ns01(c4lpar201-c4lpar204)
|
2008-10-22 16:02:14 +00:00
|
|
|
updategroup=ns11(c4lpar211-c4lpar214)
|
|
|
|
|
2010-09-14 22:29:41 +00:00
|
|
|
# mapgroups: Many updategroups can also be defined through a
|
|
|
|
# single statement using nodegroup mappings.
|
|
|
|
# mapgroups=nodegroup_range
|
|
|
|
# Where nodegroup_range is processed in the same way xCAT handles node name
|
|
|
|
# ranges to generate a set of nodegroup names. Each nodegroup will be
|
|
|
|
# mapped to an updategroup with the same name.
|
|
|
|
# You may list multiple mapgroups stanzas in this file and all of them will
|
|
|
|
# be processed.
|
2008-10-22 16:02:14 +00:00
|
|
|
# For example, the following will create 10 updategroups from the 10
|
2010-09-14 22:29:41 +00:00
|
|
|
# nodegroups named block01 to block10.
|
|
|
|
mapgroups=block[01-10]
|
|
|
|
|
|
|
|
|
|
|
|
|
2010-11-03 22:20:19 +00:00
|
|
|
# mutex:
|
|
|
|
# mutex_count:
|
|
|
|
# Identify updategroups that are mutually exclusive and must not be
|
2010-09-14 22:29:41 +00:00
|
|
|
# updated at the same time in order to maintain active resources within
|
2010-11-03 22:20:19 +00:00
|
|
|
# the cluster.
|
|
|
|
# If the mutex stanza is immediately followed by a mutex_count stanza,
|
|
|
|
# up to mutex_count updategroups may be updated at the same time.
|
|
|
|
# If no mutex_count stanza is specified, only 1 updategroup listed
|
|
|
|
# in the entry will be updated at a time.
|
2010-09-14 22:29:41 +00:00
|
|
|
# mutex=updategroup,updategroup,...
|
|
|
|
# For example, the update jobs for ns1 and for ns2 will not be allowed
|
|
|
|
# to run at the same time:
|
|
|
|
mutex=ns1,ns2
|
|
|
|
|
2010-11-03 22:20:19 +00:00
|
|
|
# In this example, at most any 2 of the 4 updategroups io1 to io4 may
|
|
|
|
# be updated at the same time:
|
|
|
|
mutex=io1,io2,io3,io4
|
|
|
|
mutex_count=2
|
|
|
|
|
|
|
|
|
2010-09-14 22:29:41 +00:00
|
|
|
# You may list multiple mutex stanzas in this file to identify different
|
|
|
|
# sets of mutual exclusion.
|
2008-10-22 16:02:14 +00:00
|
|
|
# Multiple mutually exclusive sets can be specified using updategroup name
|
2010-09-14 22:29:41 +00:00
|
|
|
# ranges. For example, the following:
|
2008-10-22 16:02:14 +00:00
|
|
|
#mutex=block[1-3]a,block[1-3]b,block[1-3]c
|
|
|
|
# would be equivalent to:
|
2010-09-14 22:29:41 +00:00
|
|
|
#mutex=block1a,block1b,block1c
|
2008-10-22 16:02:14 +00:00
|
|
|
#mutex=block2a,block2b,block2c
|
|
|
|
#mutex=block3a,block3b,block3c
|
|
|
|
|
2010-11-03 22:20:19 +00:00
|
|
|
# nodegroup_mutex=<nodegroup_name>
|
|
|
|
# mutex_count=<count>
|
|
|
|
# (This gets more complicated...) Mutual exclusion for any nodes in
|
|
|
|
# this xCAT nodegroup. For each updategroup listed above, if any nodes
|
|
|
|
# in that group are a member of this xCAT nodegroup, add it to the mutex
|
|
|
|
# entry.
|
|
|
|
# For example, you specifiy:
|
|
|
|
# nodegroup_mutex=IOservers
|
|
|
|
# Where your xCAT nodegroup is defined as:
|
|
|
|
# IOservers=n4,n8,n12
|
|
|
|
# And your updategroups specified above are:
|
|
|
|
# updategroup=CEC1(n1-n4)
|
|
|
|
# updategroup=CEC2(n5-n8)
|
|
|
|
# updategroup=CEC3(n9-n12)
|
|
|
|
# updategroup=CEC4(n13-n16)
|
|
|
|
# The following mutex will be created:
|
|
|
|
# mutex=CEC1,CEC2,CEC3
|
|
|
|
# With mutex_count working the same as above.
|
|
|
|
#
|
|
|
|
# And, to make it even more powerful, you can list an xCAT nodegroup range
|
|
|
|
# to create multiple nodegroup_mutex stanzas.
|
|
|
|
# For example, this:
|
|
|
|
# nodegroup_mutex=block[1-3]IO
|
|
|
|
# would be equivalent to:
|
|
|
|
# nodegroup_mutex=block1IO
|
|
|
|
# nodegroup_mutex=block2IO
|
|
|
|
# nodegroup_mutex=block3IO
|
|
|
|
|
|
|
|
|
2010-09-14 22:29:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
# maxupdates: Maximum number of updategroups that can be updated at one time
|
|
|
|
# This allows you to ensure you will always have enough computing
|
|
|
|
# resources in your cluster and that not all nodes will attempt to
|
|
|
|
# be updated at once.
|
|
|
|
# A value of "all" specifies that there is no restriction
|
|
|
|
# maxupdates=16
|
|
|
|
maxupdates=all
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# jobtemplate: Scheduler job template file. See this sample LoadLeveler file
|
|
|
|
# for details on how the template will be processed:
|
2008-10-22 16:02:14 +00:00
|
|
|
jobtemplate=/opt/xcat/share/xcat/rollupdate/ll.tmpl
|
|
|
|
|
|
|
|
|
2010-09-14 22:29:41 +00:00
|
|
|
|
|
|
|
#jobdir: Directory to write the job command files and other data files to.
|
|
|
|
# For LL, this directory needs to be on a filesystem available to all nodes.
|
|
|
|
jobdir=/u/loadl/rollupdate_jobs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#reservationcallback: INTERNAL KEYWORD used for development only.
|
|
|
|
# This is the reservation notify or callback command.
|
|
|
|
# For Loadleveler, this script must reside on the LoadLeveler central
|
|
|
|
# manager and will be called when the reservation for an updategroup
|
|
|
|
# becomes active.
|
|
|
|
# The default is:
|
|
|
|
#reservationcallback=/opt/xcat/bin/runrollupdate
|
2008-10-22 16:02:14 +00:00
|
|
|
#
|
2010-09-14 22:29:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
#reservationduration: Maximum time to hold a LoadLeveler reservation for
|
|
|
|
# the update process. This value in minutes should be longer than the
|
|
|
|
# expected time to shutdown, update, and reboot all the nodes in an update
|
|
|
|
# group. xCAT will release the nodes from the reservation as they come
|
|
|
|
# back up, and will cancel the reservation when the last node has completed.
|
|
|
|
reservationduration=15
|
|
|
|
|
|
|
|
|
|
|
|
|
2008-10-22 16:02:14 +00:00
|
|
|
#update_if_down: If set to "yes", also attempt the update for any node in an
|
2010-09-14 22:29:41 +00:00
|
|
|
# updategroup that is down or not active/available in the scheduler
|
|
|
|
# (useful if you have nodes that are not part of your scheduler's cluster).
|
2008-10-22 16:02:14 +00:00
|
|
|
# If set to "no", any node in an updategroup that is not active in the
|
2010-09-14 22:29:41 +00:00
|
|
|
# scheduler will be skipped.
|
2008-10-22 16:02:14 +00:00
|
|
|
# If set to "cancel", if any node in an updategroup is not active,
|
2010-09-14 22:29:41 +00:00
|
|
|
# the entire updategroup will be skipped.
|
2008-10-22 16:02:14 +00:00
|
|
|
update_if_down=yes
|
|
|
|
#update_if_down=no
|
|
|
|
#update_if_down=cancel
|
|
|
|
|
2010-09-14 22:29:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
# prescript
|
|
|
|
# prescriptnodes
|
|
|
|
# (optional) Command to be run on the xCAT management node before issuing
|
|
|
|
# the shutdown command for the nodes in the updategroup.
|
|
|
|
# If prescriptnodes is also specified, the command will only be run for
|
|
|
|
# nodes being from the updategroup that are also included in that xCAT
|
|
|
|
# noderange.
|
|
|
|
# If prescriptnodes is not specified, the command will be run for all
|
|
|
|
# nodes in the updategroup.
|
|
|
|
# For prescript, you may specify the string $NODELIST if you would like the
|
|
|
|
# comma-delimited list of xCAT nodenames passed into your command.
|
|
|
|
# This can be used to run operations such as shutting down the global
|
|
|
|
# filesystem on all the nodes, or moving critical services
|
|
|
|
# to a backup server for specific nodes.
|
|
|
|
# Multiple prescript entries or prescript/prescriptnodes pairs of entries may
|
|
|
|
# be specified. Each command will be run in order.
|
|
|
|
#
|
|
|
|
#prescript=/u/admin/bin/shutdownGPFS $NODELIST
|
|
|
|
#
|
|
|
|
#prescript=/u/admin/bin/moveGPFSconfigserver $NODELIST
|
|
|
|
#prescriptnodes=gpfsconfig
|
|
|
|
#
|
|
|
|
#prescript=/u/admin/bin/moveLLscheduler $NODELIST
|
|
|
|
#prescriptnodes=llsched
|
2008-10-22 16:02:14 +00:00
|
|
|
#
|
2010-09-14 22:29:41 +00:00
|
|
|
#prescript=/u/admin/bin/moveXCATservicenode $NODELIST
|
|
|
|
#prescriptnodes=service
|
|
|
|
|
|
|
|
|
|
|
|
|
2010-11-03 22:20:19 +00:00
|
|
|
# skipshutdown: Should a shutdown command be sent to the nodes.
|
2010-09-14 22:29:41 +00:00
|
|
|
# Shutdown is required for diskless nodes. For diskfull nodes, simple
|
|
|
|
# updates may be applied to the nodes through prescripts, and a node
|
|
|
|
# reboot may not be required.
|
2010-11-03 22:20:19 +00:00
|
|
|
# If skipshutdown is set to "yes", outofbandcmd scripts will NOT be
|
|
|
|
# run and bringuporder will be ignored (since no bringup). However,
|
|
|
|
# bringupstatus/bringupappstatus will still be checked to verify that
|
|
|
|
# the node update has completed.
|
|
|
|
# Default is skipshutdown=no.
|
|
|
|
skipshutdown=no
|
2010-09-14 22:29:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# NOT IMPLEMENTED YET! FUTURE SUPPORT...
|
|
|
|
# shutdownorder: (optional) The order that nodes within an updategroup
|
|
|
|
# will be shut down. The value is any valid xCAT noderange.
|
|
|
|
# Multiple shutdownorder entries may be specified, and they will be
|
|
|
|
# processed as entered in this file. If more than one node in the
|
|
|
|
# updategroup matches a shutdownorder entry, they will be shutdown at
|
|
|
|
# the same time. xCAT will wait until the node is completely shutdown
|
|
|
|
# before proceeding to the next nodes. Any nodes in the updategroup not
|
|
|
|
# listed will be shutdown at the end.
|
|
|
|
#shutdownorder=compute
|
|
|
|
#shutdownorder=gpfsio
|
|
|
|
#shutdownorder=service
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# shutdowntimeout: (optional) The number of minutes xCAT should wait for
|
|
|
|
# an OS shutdown to complete before giving up and issuing a hard power off
|
|
|
|
# command and continuing with the rolling update process.
|
|
|
|
shutdowntimeout=5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# outofbandcmd
|
|
|
|
# outofbandnodes
|
|
|
|
# (optional) Out-of-band command to run on the xCAT management
|
|
|
|
# node once the node has been shutdown but before it is rebooted.
|
|
|
|
# If outofbandnodes is also specified, the command will only be run for
|
|
|
|
# nodes from the updategroup that are also included in that xCAT noderange.
|
|
|
|
# If outofbandnodes is not specified, the command will be run for all
|
|
|
|
# nodes in the updategroup.
|
2008-10-22 16:02:14 +00:00
|
|
|
# You may specify the string $NODELIST if you would like the
|
2010-09-14 22:29:41 +00:00
|
|
|
# comma-delimited list of xCAT nodenames passed into your command.
|
|
|
|
# This can be used to run operations such as firmware updates.
|
|
|
|
# Multiple outofbandcmd entries or outofbandcmd/outofbandnodes pairs of
|
|
|
|
# entries can be specified. Each command will be run in order.
|
2010-11-03 22:20:19 +00:00
|
|
|
# outofbandcmd scripts will not be run if "skipshutdown" is set to "yes".
|
2010-09-14 22:29:41 +00:00
|
|
|
#
|
|
|
|
#outofbandcmd=/u/admin/bin/myfirmwareupdates $NODELIST
|
|
|
|
#
|
|
|
|
#outofbandcmd=/u/admin/bin/IOsubsystemupdates $NODELIST
|
|
|
|
#outofbandnodes=gpfsio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# bringuporder: (optional) The order that nodes within an updategroup
|
|
|
|
# will be brought up The value is any valid xCAT noderange.
|
|
|
|
# Multiple bringuporder entries may be specified, and they will be processed
|
|
|
|
# as entered in this file. If more than one node in the updategroup
|
|
|
|
# matches a bringuporder entry, they will be brought up at the same time.
|
|
|
|
# xCAT will wait until the node is completely shutdown before proceeding
|
|
|
|
# to the next set of nodes. Any nodes in the updategroup not listed will be
|
|
|
|
# brought up at the end.
|
|
|
|
#bringuporder=service
|
|
|
|
#bringuporder=gpfsio
|
|
|
|
#bringuporder=compute
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# bringupstatus:
|
|
|
|
# OR
|
|
|
|
# bringupappstatus:
|
|
|
|
# The xCAT database node status or appstatus value that xCAT will check and
|
|
|
|
# will wait for to determine that the node is up. Once this status is
|
|
|
|
# reached, xCAT will continue bringing up more nodes (if bringuporder is set)
|
|
|
|
# and will release this node from the scheduler reservation.
|
|
|
|
# If both attributes are set, only bringupappstatus will be used.
|
|
|
|
#bringupappstatus="gpfs=ready"
|
|
|
|
bringupstatus=booted
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# bringuptimeout: (optional) The maximum number of minutes xCAT should wait
|
|
|
|
# after issuing the rpower on command for the nodes to reach bringupstatus
|
|
|
|
# before giving up. If using bringuporder and this timeout is reached for
|
|
|
|
# one set of nodes, no additional nodes will be attempted to be brought up.
|
|
|
|
# The scheduler reservation will be cancelled.
|
|
|
|
bringuptimeout=10
|
|
|
|
|