From 17b94163f0b092b9b66bcb334ab2258ab8f806b1 Mon Sep 17 00:00:00 2001 From: mellor Date: Mon, 5 Jan 2009 18:02:00 +0000 Subject: [PATCH] minor changes for demo git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2559 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-server/lib/xcat/plugins/rollupdate.pm | 33 ++++++++++++++-------- xCAT-server/share/xcat/rollupdate/ll.tmpl | 6 +++- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/xCAT-server/lib/xcat/plugins/rollupdate.pm b/xCAT-server/lib/xcat/plugins/rollupdate.pm index cc7906f54..d11c4b897 100644 --- a/xCAT-server/lib/xcat/plugins/rollupdate.pm +++ b/xCAT-server/lib/xcat/plugins/rollupdate.pm @@ -654,6 +654,18 @@ sub ll_jobs { close($JOBFILE); chown( $uid, $gid, $lljob_file ); + # Need to change status before actually submittly LL jobs + # If LL jobs happen to run right away, the update code checking + # for the status may run before we've had a chance to actually update it + my $nltab = xCAT::Table->new('nodelist'); + my @nodes = split( /\,/, $nodelist ); + $nltab->setNodesAttribs( + \@nodes, + { + appstatus => + "ROLLUPDATE-update_job_submitted" + } + ); # Submit LL job my $cmd = qq~su - $lluser "-c llsubmit $lljob_file"~; if ($::VERBOSE) { @@ -669,16 +681,6 @@ sub ll_jobs { xCAT::MsgUtils->message( "E", $rsp, $::CALLBACK ); return 1; } - - my $nltab = xCAT::Table->new('nodelist'); - my @nodes = split( /\,/, $nodelist ); - $nltab->setNodesAttribs( - \@nodes, - { - appstatus => - "ROLLUPDATE-update_job_submitted" - } - ); } elsif ( defined($nodelist) ) { @@ -826,6 +828,9 @@ sub rebootnodes { } } + # my $nltab = xCAT::Table->new('nodelist'); + $nltab->setNodesAttribs( \@nodes, { appstatus => "ROLLUPDATE-shutting_down" } ); + # remove nodes from LL $scheduler =~ tr/[A-Z]/[a-z]/; if ( $scheduler eq 'loadleveler' ) { @@ -852,7 +857,8 @@ sub rebootnodes { if ( defined( $machines{$node} ) && ( $machines{$node}{'mstatus'} eq "1" ) ) { - my $cmd = "llctl -h $node drain"; + #my $cmd = "llctl -h $node drain"; + my $cmd = "llctl -h $node flush startd"; if ($::VERBOSE) { open (RULOG, ">>$::LOGDIR/$::LOGFILE"); print RULOG localtime()." Running command \'$cmd\'\n"; @@ -861,6 +867,8 @@ sub rebootnodes { xCAT::Utils->runcmd( $cmd, 0 ); } } + # give LL a chance to catch up + sleep 15; } # Shutdown the nodes @@ -951,7 +959,8 @@ sub rebootnodes { $nltab->setNodesAttribs( \@nodes, { appstatus => "ROLLUPDATE-rebooting" } ); if ( scalar(@rnetboot_nodes) > 0 ) { my $rnb_nodelist = join( ',', @rnetboot_nodes ); - my $cmd = "rnetboot $rnb_nodelist -f"; + # my $cmd = "rnetboot $rnb_nodelist -f"; + my $cmd = "rpower $rnb_nodelist on"; if ($::VERBOSE) { open (RULOG, ">>$::LOGDIR/$::LOGFILE"); print RULOG localtime()." Running command \'$cmd\' \n"; diff --git a/xCAT-server/share/xcat/rollupdate/ll.tmpl b/xCAT-server/share/xcat/rollupdate/ll.tmpl index ad5373dbf..73f24fd4c 100644 --- a/xCAT-server/share/xcat/rollupdate/ll.tmpl +++ b/xCAT-server/share/xcat/rollupdate/ll.tmpl @@ -14,14 +14,18 @@ # # @ job_name = rollupdate_[[NODESET]] # @ job_type = parallel +## Note: really want node usage to be not shared, but there +## was a timing bug in LL that sometimes caused a rollupdate job to +## not start. Should be fixed by now. # @ node_usage = not_shared # @ restart = no # @ error = /home/lluser/rollupdate_jobs/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).err # @ output = /home/lluser/rollupdate_jobs/rollupdate_[[NODESET]].$(Host).$(Cluster).$(Process).out # @ executable = /opt/xcat/share/xcat/rollupdate/send_reboot_request -# @ arguments = 5 loadleveler [[XCATSERVER]] [[XCATPORT]] [[XNODELIST]] +# @ arguments = 10 --verbose loadleveler [[XCATSERVER]] [[XCATPORT]] [[XNODELIST]] # @ node = [[LLCOUNT]] # @ tasks_per_node = 1 # @ requirements = (Machine == {[[LLMACHINES]]}) +# @ wall_clock_limit = 21:00,20:00 # @ queue