From 9b4d5e973951016606d78202c1c809f3a229c05c Mon Sep 17 00:00:00 2001 From: nott Date: Sat, 19 May 2012 20:38:06 +0000 Subject: [PATCH] hasn- fix statelite.table and client_data issues git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@12824 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-server/lib/xcat/plugins/aixinstall.pm | 232 +++++++++++++++++---- xCAT-server/lib/xcat/plugins/snmove.pm | 69 ++++-- 2 files changed, 239 insertions(+), 62 deletions(-) diff --git a/xCAT-server/lib/xcat/plugins/aixinstall.pm b/xCAT-server/lib/xcat/plugins/aixinstall.pm index 9a42634fd..0c5cdf2fe 100644 --- a/xCAT-server/lib/xcat/plugins/aixinstall.pm +++ b/xCAT-server/lib/xcat/plugins/aixinstall.pm @@ -32,6 +32,8 @@ use strict; use Socket; use File::Path; +use Fcntl qw/:flock/; + # options can be bundled up like -vV Getopt::Long::Configure("bundling"); $Getopt::Long::ignorecase = 0; @@ -8288,21 +8290,34 @@ sub prenimnodeset $moveit++; } } - if (!$::SKIPSYNC) { - # do a re-sync - my $scmd = "nim -Fo sync_roots $imghash{$i}{spot}"; - my $output = xCAT::InstUtils->xcmd($callback, $subreq, "xdsh", $nimprime, $scmd, 0); - if ($::RUNCMD_RC != 0) - { - my $rsp; - push @{$rsp->{data}}, "Could not update $imghash{$i}{shared_root}.\n"; - if ($::VERBOSE) - { - push @{$rsp->{data}}, "$output"; - } - xCAT::MsgUtils->message("E", $rsp, $callback); - } - } + if (!$::SKIPSYNC) { + + # do a re-sync + # if it's allocated then don't update it + my $alloc_count = xCAT::InstUtils->get_nim_attr_val($imghash{$i}{shared_root}, "alloc_count", $callback, "", $subreq); + if (defined($alloc_count) && ($alloc_count != 0)) + { + my $rsp; + push @{$rsp->{data}}, "The resource named \'$imghash{$i}{shared_root}\' is currently allocated. It will not be re-synchronized.\n"; + xCAT::MsgUtils->message("I", $rsp, $callback); + } + else + { + + my $scmd = "nim -Fo sync_roots $imghash{$i}{spot}"; + my $output = xCAT::InstUtils->xcmd($callback, $subreq, "xdsh", $nimprime, $scmd, 0); + if ($::RUNCMD_RC != 0) + { + my $rsp; + push @{$rsp->{data}}, "Could not update $imghash{$i}{shared_root}.\n"; + if ($::VERBOSE) + { + push @{$rsp->{data}}, "$output"; + } + xCAT::MsgUtils->message("E", $rsp, $callback); + } + } + } if ($moveit) { # copy back the .client data files @@ -9295,7 +9310,7 @@ sub doSFScopy my $service = "xcat"; if (\@nlist) { - $sn = xCAT::Utils->getSNformattedhash(\@nlist, $service, "MN"); + $snode = xCAT::Utils->getSNformattedhash(\@nlist, $service, "MN"); } foreach my $sn (keys %$snode) { foreach my $img (@imagenames) { @@ -9307,7 +9322,10 @@ sub doSFScopy # don't copy dump or paging if ( ($restype eq 'dump') || ($restype eq 'paging') ) { - push(@dontcopy, $imghash{$img}{$restype}); + if (!grep(/^$imghash{$img}{$restype}$/, @dontcopy)) + { + push(@dontcopy, $imghash{$img}{$restype}); + } next; } @@ -9316,20 +9334,15 @@ sub doSFScopy if ( ($nimtype ne 'standalone') && ($restype eq 'lpp_source')) { # don't copy lpp_source for diskless/dataless nodes - push(@dontcopy, $imghash{$img}{'lpp_source'}); + if (!grep(/^$imghash{$img}{'lpp_source'}$/, @dontcopy)) + { + push(@dontcopy, $imghash{$img}{'lpp_source'}); + } next; } - if ( ($nimtype ne 'standalone') && ($restype eq '')) - { - # don't copy lpp_source for diskless/dataless nodes - push(@dontcopy, $imghash{$img}{'lpp_source'}); - next; - } - foreach my $res (split /,/, $imghash{$img}{$restype}) { - if (grep (/^$res$/, @dontcopy)) { next; } @@ -9339,7 +9352,10 @@ sub doSFScopy if (defined($alloc_count) && ($alloc_count != 0)) { # if it's allocated then don't copy it - push(@dontcopy, $res); + if (!grep(/^$res$/, @dontcopy)) + { + push(@dontcopy, $res); + } my $rsp; push @{$rsp->{data}}, "NIM resource $res is currently allocated on service node $sn and will not be re-copied to the service nodes.\n"; @@ -9381,6 +9397,7 @@ sub doSFScopy # if the resources need to be copied my %resinfo; + if (!grep(/^$res$/, @dontcopy)) { # copy appropriate files to the SN @@ -10178,6 +10195,42 @@ sub mkdsklsnode my $time = `date | cut -f5 -d' '`; chomp $time; + # + # wait for shared root lock - need to be sure we don't change it + # while another SN is in the process of def and backup + # + + # see if the shared_root is being modified + my $origloc; + my $SRlock; + my $locked=0; + my $lockfile; + if ($imagehash{$image_name}{shared_root} ) { + # get the shared_root location + $origloc = xCAT::InstUtils->get_nim_attr_val($imagehash{$image_name}{shared_root}, 'location', $callback, $Sname); + + # see if this is a shared filesystem environment + my $sitetab = xCAT::Table->new('site'); + my ($tmp) = $sitetab->getAttribs({'key' => 'sharedinstall'}, 'value'); + my $sharedinstall = $tmp->{value}; + $sitetab->close; + if (!$sharedinstall) { + $sharedinstall="no"; + } + + chomp $sharedinstall; + + # try to get a lock if this is shared_root and using + # a shared filesystem + if ($origloc && ($sharedinstall eq "sns")) { + + $lockfile = "$origloc/lockfile"; + open($SRlock, "<", $lockfile); + flock($SRlock,LOCK_EX); + $locked++; + } + } + my $rsp; push @{$rsp->{data}}, "$Sname: Initializing NIM machine \'$nim_name\'. \n"; xCAT::MsgUtils->message("I", $rsp, $callback); @@ -10197,7 +10250,13 @@ sub mkdsklsnode push(@nodesfailed, $node); next; } - } + + if ($locked) { + flock($SRlock,LOCK_UN); + close($SRlock); + } + + } # end doinit # Update /tftpboot/nodeip.info to export the variable BASECUST_REMOVAL # then during the network boot, rc.dd_boot script will check this variable @@ -10732,23 +10791,53 @@ sub mkdsklsnode my $snbk = $Sname . "_" . $i; my $bkloc = "$loc/$snbk/.client_data"; - my $fcmd; - if (-d $bkloc) + my $mkcmd; + if (! -d $bkloc) { - # if backup exist then rm contents - $fcmd = qq~/usr/bin/rm $bkloc/* ; ~; - } else { # else create dir - $fcmd=qq~/usr/bin/mkdir -m 644 -p $bkloc ; ~; + $mkcmd=qq~/usr/bin/mkdir -m 644 -p $bkloc ~; + my $output = xCAT::Utils->runcmd("$mkcmd", -1); + if ($::RUNCMD_RC != 0) + { + my $rsp; + push @{$rsp->{data}}, "Could not create $bkloc\n"; + xCAT::MsgUtils->message("E", $rsp, $callback); + } } - my $ccmd=qq~$fcmd /usr/bin/cp -p -r $cdloc/* $bkloc~; - my $output = xCAT::Utils->runcmd("$ccmd", -1); + # should only backup files for the specific nodes + + # get list of files from $cdloc dir + my $rcmd = qq~/usr/bin/ls $cdloc 2>/dev/null~; + my @rlist = xCAT::Utils->runcmd("$rcmd", -1); if ($::RUNCMD_RC != 0) { my $rsp; - push @{$rsp->{data}}, "Could not back up $cdloc on $Sname \n"; + push @{$rsp->{data}}, "Could not list contents of $cdloc.\n"; xCAT::MsgUtils->message("E", $rsp, $callback); + $error++; + } + + foreach my $nd (@nodelist) { + $nd =~ s/\..*$//; + + # for each file in $cdloc + my $filestring = ""; + foreach my $f (@rlist) { + # if file contains node name then copy it + if ($f =~ /$nd/) { + $filestring .="$cdloc/$f "; + } + } + my $ccmd=qq~/usr/bin/cp -p -r $filestring $bkloc 2>/dev/null~; + my $output = xCAT::Utils->runcmd("$ccmd", -1); + if ($::RUNCMD_RC != 0) + { + my $rsp; + push @{$rsp->{data}}, "Could not copy files to $bkloc. \n"; + xCAT::MsgUtils->message("E", $rsp, $callback); + $error++; + } } } } @@ -11670,10 +11759,24 @@ sub make_SN_resource my $moveit = 0; my $origloc; my $origlocbak; + my $lockfile; + my $SRlock; + my $locked; if ( ($::DEFONLY || ($sharedinstall eq "sns")) && ( $restype eq "shared_root")) { + + $origloc = $lochash{$imghash{$image}{$restype}}; - $origlocbak = "$origloc.bak"; - # ex. /install/nim/shared_root/71Bdskls_shared_root + $origlocbak = "$origloc.bak"; + # ex. /install/nim/shared_root/71Bdskls_shared_root + # + # need to set a lock so that some other SN doesn't + # modify anything while we're doing this + # + $lockfile = "$origloc/lockfile"; + open($SRlock, "<", $lockfile); + flock($SRlock,LOCK_EX); + $locked++; + if (-d $origloc) { my $mvcmd = qq~/usr/sbin/mvdir $origloc $origlocbak~; my $output = xCAT::Utils->runcmd("$mvcmd", -1); @@ -11695,6 +11798,10 @@ sub make_SN_resource ) != 0 ) { + if ( $locked) { + flock($SRlock,LOCK_UN); + close($SRlock); + } next; } @@ -11720,6 +11827,11 @@ sub make_SN_resource xCAT::MsgUtils->message("E", $rsp, $callback); } } + + if ( $locked) { + flock($lockfile,LOCK_UN); + close($lockfile); + } } # only make lpp_source for standalone type images if ( ($restype eq "lpp_source") @@ -12248,13 +12360,43 @@ sub rmdsklsnode $nodename = $name . "_" . $::opt_i; } + # see if the node is defined as a nim client + my $lscmd = qq~/usr/sbin/lsnim -l $nodename 2>/dev/null~; + $output = xCAT::Utils->runcmd("$lscmd", -1); + if ($::RUNCMD_RC != 0) + { + # doesn't exist + if ($::VERBOSE) + { + my $rsp; + push @{$rsp->{data}}, "Node \'$nodename\' is not defined."; + xCAT::MsgUtils->message("I", $rsp, $callback); + } + next; + } + # see if the node is running - # use nodelist.status - if ($nlhash && $nlhash->{$nodename}->[0]->{'status'} eq 'booted') + + my $badmstate; + my $badnodestat; + # check BOTH Mstate and nodestat + + # check NIM Mstate for node + my $mstate = xCAT::InstUtils->get_nim_attr_val($nodename, "Mstate", $callback, $Sname, $subreq); + if ($mstate && (!($mstate =~ /currently running/)) ) { + $badmstate++; + } + + # check xCAT nodelist.status for the node + if ($nlhash && $nlhash->{$nodename}->[0]->{'status'} ne 'booted') + { + $badnodestat++; + } + + if (!$badmstate && !$badnodestat) { if ($::FORCE) { - if ($::VERBOSE) { my $rsp; @@ -12266,15 +12408,14 @@ sub rmdsklsnode my $scmd = "shutdown -F &"; my $output; $output = - xCAT::InstUtils->xcmd($callback, $subreq, "xdsh", $nodename, - $scmd, 0); + xCAT::InstUtils->xcmd($callback, $subreq, "xdsh", $nodename, $scmd, 0); } else { # don't remove the def my $rsp; push @{$rsp->{data}}, - "The nodelist.status of \'$nodename\' is currently \'booted\', use -f flag to forcely remove it."; + "The node \'$nodename\' is currently running. Use the -f flag to force the removal."; xCAT::MsgUtils->message("E", $rsp, $callback); $error++; push(@nodesfailed, $nodename); @@ -13454,7 +13595,6 @@ sub parse_otherpkgs push @rpm_pkgs, $pname; } -# ndebug elsif (($p =~ /epkg\.Z/) || ($p =~ /^E:/)) { if ($p =~ /:/) diff --git a/xCAT-server/lib/xcat/plugins/snmove.pm b/xCAT-server/lib/xcat/plugins/snmove.pm index 2fd398a51..ce6c434d7 100644 --- a/xCAT-server/lib/xcat/plugins/snmove.pm +++ b/xCAT-server/lib/xcat/plugins/snmove.pm @@ -562,16 +562,6 @@ sub process_request $sharedinstall="no"; } - # - # handle the statelite update for the sharedinstall=sns case - # - using a shared file system across all service nodes - # - if ( ($::isaix) && ($sharedinstall eq "sns") ){ - my $s = &sfsSLconfig(\@nodes, \%nhash, \%sn_hash, $old_node_hash, $nimprime, $callback, $sub_req); - } - - # TBD - handle sharedinstall =all case ???? - # handle the statelite update for sharedinstall=no # - not using a shared files system my %SLmodhash; @@ -836,6 +826,19 @@ sub process_request } } + # + # handle the statelite update for the sharedinstall=sns case + # - using a shared file system across all service nodes + # - must be done AFTER node def is updated! + # + if ( ($::isaix) && ($sharedinstall eq "sns") ){ + my $s = &sfsSLconfig(\@nodes, \%nhash, \%sn_hash, $old_node_hash, $nimprime, $callback, $sub_req); + } + + # TBD - handle sharedinstall =all case ???? + + + # run makeconservercf my @nodes_con = keys(%sn_hash1); if (@nodes_con > 0) @@ -884,6 +887,7 @@ sub process_request $nimtab->close(); # now try to restore any backup client data + # for each service node foreach my $s (keys %SRloc) { @@ -898,15 +902,50 @@ sub process_request my $snbk = "$s" . "_" . "$osi"; my $bkloc = "$sloc/$snbk/.client_data"; - my $ccmd=qq~/usr/bin/cp -r -p $bkloc/* $cdloc 2>/dev/null~; + # get a list of files from the backup dir + my $rcmd = qq~/usr/bin/ls $bkloc 2>/dev/null~; + + my $rlist = xCAT::InstUtils->xcmd($callback, $sub_req, "xdsh", $s, $rcmd, 0); - my $output = xCAT::InstUtils->xcmd($callback, $sub_req, "xdsh", $s, $ccmd, 0); if ($::RUNCMD_RC != 0) { - if ($::VERBOSE) { + my $rsp; + push @{$rsp->{data}}, "Could not list contents of $bkloc.\n"; + xCAT::MsgUtils->message("E", $rsp, $callback); + $error++; + } + + # restore file on node by node basis + # we don't want all the files! + # - just the ones we are moving + foreach my $nd (@nodes) { + + $nd =~ s/\..*$//; + + # for each file in $bkloc + my $filestring = ""; + foreach my $f ( split(/\n/, $rlist) ){ + my $junk; + my $file; + if ($f =~ /:/) { + ($junk, $file) = split(/:/, $f); + } + $file =~ s/\s*//g; # remove blanks + + # if file contains node name then copy it + if ($file =~ /$nd/) { + $filestring .= "$bkloc/$file "; + } + } + my $ccmd=qq~/usr/bin/cp -p -r $filestring $cdloc 2>/dev/null~; + + my $output = xCAT::InstUtils->xcmd($callback, $sub_req, "xdsh", $s, $rcmd, 0); + if ($::RUNCMD_RC != 0) + { my $rsp; - push @{$rsp->{data}}, "Could not copy $bkloc on $s.\n"; + push @{$rsp->{data}}, "Could not copy files to $cdloc.\n"; xCAT::MsgUtils->message("E", $rsp, $callback); + $error++; } } } @@ -2047,7 +2086,6 @@ sub sfsSLconfig } } - # get hash of statelite table entries my $statetab = xCAT::Table->new('statelite', -create => 1); my $recs = $statetab->getAllEntries; @@ -2081,7 +2119,6 @@ sub sfsSLconfig # if the $server value was the old SN hostname # then we need to # update the statelite table with the new SN name - if ( $server eq $old_node_hash->{$n}->{'oldmaster'} ) { my $stmnt = "$sn_hash{$n}{'xcatmaster'}:$dir"; $SLmodhash{$item}{'statemnt'} = $stmnt;