From a52c7166c25f0f3aad3a5e1a81d6dc6f57024131 Mon Sep 17 00:00:00 2001 From: chenglch Date: Wed, 11 Nov 2015 02:19:15 -0500 Subject: [PATCH 1/8] Fork a rflash process for each node Original implementation use thread to deal with the rflash procedure for multiple nodes, but thread is not suggested by perl. This patch aims to replace the thread with process. --- xCAT-server/lib/xcat/plugins/ipmi.pm | 149 ++++++++++++++++----------- 1 file changed, 87 insertions(+), 62 deletions(-) diff --git a/xCAT-server/lib/xcat/plugins/ipmi.pm b/xCAT-server/lib/xcat/plugins/ipmi.pm index 0b01a8a88..4ae15ec55 100644 --- a/xCAT-server/lib/xcat/plugins/ipmi.pm +++ b/xCAT-server/lib/xcat/plugins/ipmi.pm @@ -36,6 +36,7 @@ my $iem_support; my $vpdhash; my %allerrornodes=(); my $global_sessdata; +my %child_pids; my $IPMIXCAT = "/opt/xcat/bin/ipmitool-xcat"; my $NON_BLOCK = 1; @@ -1574,12 +1575,12 @@ sub calc_ipmitool_version { # 0 when no response from bmc #----------------------------------------------------------------# sub check_bmc_status_with_ipmitool { - my $pre_cmd = shift; - my $interval = shift; - my $retry = shift; - my $count = 0; - my $cmd = $pre_cmd." power status"; - while ($count < $retry) { + my $pre_cmd = shift; + my $interval = shift; + my $retry = shift; + my $count = 0; + my $cmd = $pre_cmd." power status"; + while ($count < $retry) { xCAT::Utils->runcmd($cmd, -1); if ($::RUNCMD_RC != 0) { sleep($interval); @@ -1640,7 +1641,6 @@ sub do_firmware_update { $callback,$sessdata->{node},%allerrornodes); return -1; } - # step 2 reset cold $cmd = $pre_cmd." mc reset cold"; $output = xCAT::Utils->runcmd($cmd, -1); @@ -1694,14 +1694,22 @@ sub rflash { if ($sessdata->{subcommand} eq 'check') { my %firmware_version; check_firmware_version($sessdata, \%firmware_version); - foreach my $c_id (@{$sessdata->{component_ids}}) { + my $msg=""; + my $i; + for ($i = 0; $i < scalar(@{$sessdata->{component_ids}}); $i++) { + my $c_id = ${$sessdata->{component_ids}}[$i]; my $version = $firmware_version{$c_id}; - my $format_ver = sprintf("%3d.%02x %02X%02X%02X%02X", $version->[0], $version->[1], $version->[2], + my $format_ver = sprintf("%3d.%02x %02X%02X%02X%02X", + $version->[0], $version->[1], $version->[2], $version->[3], $version->[4], $version->[5]); - xCAT::SvrUtils::sendmsg("Node firmware version for component $c_id: $format_ver", - $callback,$sessdata->{node},%allerrornodes); + $msg = $msg.$sessdata->{node}.": ". + "Node firmware version for component $c_id: $format_ver"; + if ( $i != scalar(@{$sessdata->{component_ids}}) -1 ) { + $msg = $msg."\n"; + } } + $callback->({data=>$msg}); return; } return do_firmware_update($sessdata); @@ -1718,20 +1726,74 @@ sub rflash { } } -sub start_rflash_thread { +#----------------------------------------------------------------# +# Running rflash procedure in a child process +# Note (chenglch) If the parent process abort unexpectedly, the +# child process can not be terminated by xcat. +#----------------------------------------------------------------# +sub do_rflash_process { my $node = shift; - # NOTE (chenglch): Actually if multiple client or rest api works on the same node, - # the bmc of the node may not be protected while rflash is running. As xcat may not - # support lock on node level, just require a lock for rflash command for specific node. - my $lock = xCAT::Utils->acquire_lock("rflash_$node", $NON_BLOCK); - if (! $lock){ - xCAT::SvrUtils::sendmsg ([1,"rflash is running on $node, please retry after a while"], + my $pid = xCAT::Utils->xfork; + if ( !defined($pid) ) { + xCAT::SvrUtils::sendmsg ([1,"Fork rflash process Error."], $callback,$node,%allerrornodes); return; } - donode($node, @_); - while (xCAT::IPMI->waitforrsp()) { yield }; - xCAT::Utils->release_lock($lock, $NON_BLOCK); + # child + elsif ( $pid == 0 ) { + $SIG{CHLD} = $SIG{INT} = $SIG{TERM} = "DEFAULT"; + # NOTE (chenglch): Actually if multiple client or rest api works on the same node, + # the bmc of the node may not be protected while rflash is running. As xcat may not + # support lock on node level, just require a lock for rflash command for specific node. + my $lock = xCAT::Utils->acquire_lock("rflash_$node", $NON_BLOCK); + if (! $lock){ + xCAT::SvrUtils::sendmsg ([1,"rflash is running on $node, please retry after a while"], + $callback,$node,%allerrornodes); + exit(1); + } + donode($node, @_); + while (xCAT::IPMI->waitforrsp()) { yield }; + xCAT::Utils->release_lock($lock, $NON_BLOCK); + exit(0); + } + # parent + else { + $child_pids{$pid} = $node; + } + return $pid; +} + +sub start_rflash_processes { + my $donargs_ptr = shift; + my @donargs = @{$donargs_ptr}; + my $ipmitimeout = shift; + my $ipmitrys = shift; + my $command = shift; + my %namedargs=@_; + my $extra=$namedargs{-args}; + my @exargs=@$extra; + + $SIG{INT} = $SIG{TERM} = sub { + foreach ( keys %child_pids ) { + kill 2, $_; + } + exit 0; + }; + $SIG{CHLD} = sub { + my $cpid; + while ( ( $cpid = waitpid( -1, WNOHANG ) ) > 0 ) { + if ( $child_pids{$cpid} ) { + delete $child_pids{$cpid}; + } + } + }; + foreach (@donargs) { + do_rflash_process( $_->[0],$_->[1],$_->[2],$_->[3],$_->[4], + $ipmitimeout,$ipmitrys,$command,-args=>\@exargs); + } + while ( ( scalar( keys %child_pids ) ) > 0 ) { + yield; + } } sub fpc_firmup_config { @@ -7544,6 +7606,8 @@ sub process_request { if ($request->{command}->[0] eq "rflash") { my %args_hash; if (!defined($extrargs)) { + $callback->({error=>"No option or hpm file is provided.", + errorcode=>1}); return; } foreach my $opt (@$extrargs) { @@ -7634,21 +7698,10 @@ sub process_request { } } - my $children = 0; - my $sub_fds = new IO::Select; # NOTE (chenglch) rflash for one node need about 5-10 minutes. There is no need to rflash node - # one by one, so parallel thread is used here. + # one by one, fork a process for each node. if ($command eq 'rflash') { - my %thread_group; - # TODO (chenglch) the size of the noderange maybe very large, so many thread is created here. - # Thread pool or limit size is needed. - foreach (@donargs) { - $thread_group{$_->[0]} = threads->new(\&start_rflash_thread, $_->[0],$_->[1],$_->[2],$_->[3],$_->[4], - $ipmitimeout,$ipmitrys,$command,-args=>\@exargs); - } - foreach (@donargs) { - $thread_group{$_->[0]}->join(); - } + start_rflash_processes(\@donargs, $ipmitimeout,$ipmitrys,$command,-args=>\@exargs); } else { foreach (@donargs) { @@ -7664,34 +7717,6 @@ sub process_request { } } while (xCAT::IPMI->waitforrsp()) { yield }; - if (keys %needbladeinv) { - #ok, we have some inventory data that, for now, suggests blade plugin to getdata from blade plugin -# my @bladenodes = keys %needbladeinv; -# $request->{arg}=['mac']; -# $request->{node}=\@bladenodes; -# require xCAT_plugin::blade; -# xCAT_plugin::blade::process_request($request,$callback); - } -####return; -####while ($sub_fds->count > 0 and $children > 0) { -#### my $handlednodes={}; -#### forward_data($callback,$sub_fds,$handlednodes); -#### #update the node status to the nodelist.status table -#### if ($check) { -#### updateNodeStatus($handlednodes, \@allerrornodes); -#### } -####} -#### -#####Make sure they get drained, this probably is overkill but shouldn't hurt -####my $rc=1; -####while ( $rc>0 ) { -#### my $handlednodes={}; -#### $rc=forward_data($callback,$sub_fds,$handlednodes); -#### #update the node status to the nodelist.status table -#### if ($check) { -#### updateNodeStatus($handlednodes, \@allerrornodes); -#### } -####} if ($check) { #print "allerrornodes=@allerrornodes\n"; From f21ed4b5ce764d163362e094e88e1f8c0e47ad4c Mon Sep 17 00:00:00 2001 From: Yin Luo Date: Wed, 11 Nov 2015 16:56:14 +0800 Subject: [PATCH 2/8] Update discinfo.pm for support Rhelhpc6.7 Update discinfo.pm for support Rhelhpc6.7(RHEL ComputeNode version). --- perl-xCAT/xCAT/data/discinfo.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/perl-xCAT/xCAT/data/discinfo.pm b/perl-xCAT/xCAT/data/discinfo.pm index d7ecf0bb9..d3cdf5906 100755 --- a/perl-xCAT/xCAT/data/discinfo.pm +++ b/perl-xCAT/xCAT/data/discinfo.pm @@ -87,6 +87,7 @@ require Exporter; "1359576195.413831" => "rhelhpc6.4",#x86_64, RHEL ComputeNode "1384196516.465862" => "rhelhpc6.5",#x86_64, RHEL ComputeNode "1411733344.599861" => "rhelhpc6.6",#x86_64, RHEL ComputeNode + "1435823078.264564" => "rhelhpc6.7",#x86_64, RHEL ComputeNode "1399449226.140088" => "rhelhpc7.0",#x86_64, RHEL ComputeNode "1194015916.783841" => "fedora8", "1194015385.299901" => "fedora8", From 4c363d047a72ce269cf5192516cea95c2af2faba Mon Sep 17 00:00:00 2001 From: ertaozh Date: Sun, 15 Nov 2015 22:10:32 -0500 Subject: [PATCH 3/8] fix issue 352: 2.9.2rspconfig HMC sshcfg does not show disable state even the state is disabled --- perl-xCAT/xCAT/PPCcfg.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/perl-xCAT/xCAT/PPCcfg.pm b/perl-xCAT/xCAT/PPCcfg.pm index 4ee4c1f15..92af8e851 100644 --- a/perl-xCAT/xCAT/PPCcfg.pm +++ b/perl-xCAT/xCAT/PPCcfg.pm @@ -446,6 +446,7 @@ sub sshcfg { ##################################### if ( !defined( $mode )) { my ($keytype, $key_string) = split /\ /, $sshkey; + chomp($key_string); xCAT::MsgUtils->verbose_message($request, "rspconfig :check sshcfg for user:$logon on node:$server."); my $result = xCAT::PPCcli::send_cmd( $exp, "cat $auth" ); my $Rc = shift(@$result); @@ -461,6 +462,7 @@ sub sshcfg { ################################# foreach ( @$result ) { my ($tmp1, $tmp2) = split /\ /, $_; + chomp($tmp2); if ( "$tmp2" eq "$key_string" ) { return( [[$server,"enabled",SUCCESS]] ); } From 0590c98e2b0c86bcd255e90c905e359e59ea43f9 Mon Sep 17 00:00:00 2001 From: caomengmeng Date: Mon, 16 Nov 2015 16:09:44 +0800 Subject: [PATCH 4/8] Update post_script.rst --- .../common/deployment/prepostscripts/post_script.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst b/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst index a3163365d..56a733d59 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst @@ -224,7 +224,7 @@ Using the mypostscript template Using the mypostscript template ''''''''''''''''''''''''''''''' -xCAT provides a way for the admin to customize the information that will be provide to the postscripts/postbootscripts when they run on the node. This is done by editing the mypostscript.tmpl file. The attributes that are provided in the shipped mypostscript.tmpl file should not be removed. They are needed by the default xCAT postscripts. +xCAT provides a way for the admin to customize the information that will be provided to the postscripts/postbootscripts when they run on the node. This is done by editing the mypostscript.tmpl file. The attributes that are provided in the shipped mypostscript.tmpl file should not be removed. They are needed by the default xCAT postscripts. The mypostscript.tmpl, is shipped in the /opt/xcat/share/xcat/mypostscript directory. From 5c1ded329ac688a4550f92ef1033cbd4cb456a6b Mon Sep 17 00:00:00 2001 From: caomengmeng Date: Mon, 16 Nov 2015 16:12:31 +0800 Subject: [PATCH 5/8] Update post_script.rst --- .../common/deployment/prepostscripts/post_script.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst b/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst index 56a733d59..6ff5ff960 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst @@ -233,9 +233,9 @@ If the admin customizes the mypostscript.tmpl, they should copy the mypostscript site table precreatemypostscripts attribute ''''''''''''''''''''''''''''''''''''''''''' -If the site table precreatemypostscripts attribute is set to 1 or yes, it will instruct xcat at nodeset and updatenode time to query the db once for all of the nodes passed into the command and create the mypostscript file for each node and put them in a directory in $TFTPDIR(for example /tftpboot). The created mypostscript.. file in the /tftpboot/mypostscripts directory will not be regenerated unless another nodeset or updatenode command is run to that node. This should be used when the system definition has stabilized. It saves time on the updatenode or reboot by not regenerating the mypostscript file. +If the site table precreatemypostscripts attribute is set to 1 or yes, it will instruct xCAT at nodeset and updatenode time to query the db once for all of the nodes passed into the command and create the mypostscript file for each node and put them in a directory in $TFTPDIR(for example /tftpboot). The created mypostscript.. file in the /tftpboot/mypostscripts directory will not be regenerated unless another nodeset or updatenode command is run to that node. This should be used when the system definition has stabilized. It saves time on the updatenode or reboot by not regenerating the mypostscript file. -If the precreatemyposcripts attribute is yes, and a database change is made or xcat code is upgraded, then you should run a new nodeset or updatenode to regenerate the /tftpboot/mypostscript/mypostscript.. file to pick up the latest database setting. The default for precreatemypostscripts is no/0. +If the precreatemyposcripts attribute is yes, and a database change is made or xCAT code is upgraded, then you should run a new nodeset or updatenode to regenerate the /tftpboot/mypostscript/mypostscript.. file to pick up the latest database setting. The default for precreatemypostscripts is no/0. When you run nodeset or updatenode, it will search the **/install/postscripts/mypostscript.tmpl** first. If the **/install/postscripts/mypostscript.tmpl** exists, it will use that template to generate the mypostscript for each node. Otherwise, it will use **/opt/xcat/share/xcat/mypostscript/mypostscript.tmpl**. From f3ae273d4f815a55f69bc630bc70e1a911e28c96 Mon Sep 17 00:00:00 2001 From: caomengmeng Date: Mon, 16 Nov 2015 16:13:56 +0800 Subject: [PATCH 6/8] Update post_script.rst --- .../common/deployment/prepostscripts/post_script.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst b/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst index 6ff5ff960..8393c5d65 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst @@ -245,7 +245,8 @@ Content of the template for mypostscript **The attributes that are defined in the shipped mypostscript.tmpl file** should not be removed. The xCAT default postscripts rely on that information to run successfully. **The following will explain the entries in the mypostscript.tmpl file**. -The SITE_TABLE_ALL_ATTRIBS_EXPORT line in the file directs the code to export all attributes defined in the site table. Note the attributes are not always defined exactly as in the site table to avoid conflict with other table attributes of the same name. For example, the site table master attribute is named SITEMASTER in the generated mypostscript file. :: +The SITE_TABLE_ALL_ATTRIBS_EXPORT line in the file directs the code to export all attributes defined in the site table. +Note: the attributes are not always defined exactly as in the site table to avoid conflict with other table attributes of the same name. For example, the site table master attribute is named SITEMASTER in the generated mypostscript file. :: #SITE_TABLE_ALL_ATTRIBS_EXPORT# From ab7a20a081c263ea5990a3e1d3522b83472e7aa8 Mon Sep 17 00:00:00 2001 From: caomengmeng Date: Mon, 16 Nov 2015 16:16:36 +0800 Subject: [PATCH 7/8] Update post_script.rst --- .../common/deployment/prepostscripts/post_script.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst b/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst index 8393c5d65..795414afc 100644 --- a/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst +++ b/docs/source/guides/admin-guides/manage_clusters/common/deployment/prepostscripts/post_script.rst @@ -405,7 +405,7 @@ Kinds of variables in the template VARNAME=#TABLE:tablename:$NODE:attribute# -For example, to get the new updatstatus attribute from the nodelist table: :: +For example, to get the new updatestatus attribute from the nodelist table: :: UPDATESTATUS=#TABLE:nodelist:$NODE:updatestatus# export UPDATESTATUS From 2608c753c1ad6c5fb4c78ed7147850c9bb592a6f Mon Sep 17 00:00:00 2001 From: chenglch Date: Mon, 16 Nov 2015 03:33:25 -0500 Subject: [PATCH 8/8] Fix defect: Error to check the rflash option rflash for ipmi also supports the fpc machines, delete the check constraints for unknown options. --- xCAT-server/lib/xcat/plugins/ipmi.pm | 4 ---- 1 file changed, 4 deletions(-) diff --git a/xCAT-server/lib/xcat/plugins/ipmi.pm b/xCAT-server/lib/xcat/plugins/ipmi.pm index 4ae15ec55..b9dad817e 100644 --- a/xCAT-server/lib/xcat/plugins/ipmi.pm +++ b/xCAT-server/lib/xcat/plugins/ipmi.pm @@ -7625,10 +7625,6 @@ sub process_request { return; } $args_hash{hpm} = $opt; - } else { - $callback->({error=>"Error command: Option $opt is not supported.", - errorcode=>1}); - return; } } if (exists($args_hash{hpm})){