From 71a904afc75bb4451f432dfa19e8dc35d9ba25af Mon Sep 17 00:00:00 2001
From: nott <nott@8638fb3e-16cb-4fca-ae20-7b5d299a9bcd>
Date: Fri, 18 May 2012 20:58:39 +0000
Subject: [PATCH] fixes for mult hasn issues

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/branches/2.7@12809 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
---
 xCAT-server/lib/xcat/plugins/aixinstall.pm | 184 ++++++++++++++++-----
 1 file changed, 147 insertions(+), 37 deletions(-)

diff --git a/xCAT-server/lib/xcat/plugins/aixinstall.pm b/xCAT-server/lib/xcat/plugins/aixinstall.pm
index 9a42634fd..3a83a2e66 100644
--- a/xCAT-server/lib/xcat/plugins/aixinstall.pm
+++ b/xCAT-server/lib/xcat/plugins/aixinstall.pm
@@ -32,6 +32,8 @@ use strict;
 use Socket;
 use File::Path;
 
+use Fcntl qw/:flock/;
+
 # options can be bundled up like -vV
 Getopt::Long::Configure("bundling");
 $Getopt::Long::ignorecase = 0;
@@ -8288,21 +8290,34 @@ sub prenimnodeset
 						$moveit++;
 					}
 				}
-			if (!$::SKIPSYNC) {
-                # do a re-sync
-                my $scmd = "nim -Fo sync_roots $imghash{$i}{spot}";
-                my $output = xCAT::InstUtils->xcmd($callback, $subreq, "xdsh", $nimprime, $scmd, 0);
-                if ($::RUNCMD_RC != 0)
-                {
-                    my $rsp;
-                    push @{$rsp->{data}}, "Could not update $imghash{$i}{shared_root}.\n";
-                    if ($::VERBOSE)
-                    {
-                        push @{$rsp->{data}}, "$output";
-                    }
-                    xCAT::MsgUtils->message("E", $rsp, $callback);
-                }
-			}
+				if (!$::SKIPSYNC) {
+
+                	# do a re-sync
+					# if it's allocated then don't update it
+                	my $alloc_count = xCAT::InstUtils->get_nim_attr_val($imghash{$i}{shared_root}, "alloc_count", $callback, "", $subreq);
+                	if (defined($alloc_count) && ($alloc_count != 0))
+                	{
+                    	my $rsp;
+                    	push @{$rsp->{data}}, "The resource named \'$imghash{$i}{shared_root}\' is currently allocated. It will not be re-synchronized.\n";
+                    	xCAT::MsgUtils->message("I", $rsp, $callback);
+                	}
+					else
+					{
+
+                		my $scmd = "nim -Fo sync_roots $imghash{$i}{spot}";
+                		my $output = xCAT::InstUtils->xcmd($callback, $subreq, "xdsh", $nimprime, $scmd, 0);
+                		if ($::RUNCMD_RC != 0)
+                		{
+                    		my $rsp;
+                    		push @{$rsp->{data}}, "Could not update $imghash{$i}{shared_root}.\n";
+                    		if ($::VERBOSE)
+                    		{
+                        		push @{$rsp->{data}}, "$output";
+                    		}
+                    		xCAT::MsgUtils->message("E", $rsp, $callback);
+                		}
+					}
+				}
 
 				if ($moveit) {
 					# copy back the .client data files
@@ -9295,7 +9310,7 @@ sub doSFScopy
     my $service = "xcat";
     if (\@nlist)
     {
-        $sn = xCAT::Utils->getSNformattedhash(\@nlist, $service, "MN");
+		$snode = xCAT::Utils->getSNformattedhash(\@nlist, $service, "MN");
     }
     foreach my $sn (keys %$snode) {
 		foreach my $img (@imagenames) {
@@ -9307,7 +9322,10 @@ sub doSFScopy
 
 				#  don't copy dump or paging
 				if ( ($restype eq 'dump') || ($restype eq 'paging') ) {
-					push(@dontcopy, $imghash{$img}{$restype});
+					if (!grep(/^$imghash{$img}{$restype}$/, @dontcopy))
+					{
+						push(@dontcopy, $imghash{$img}{$restype});
+					}
 					next;
 				}
 
@@ -9316,20 +9334,15 @@ sub doSFScopy
 				if ( ($nimtype ne 'standalone') && ($restype eq 'lpp_source'))
 				{
 					# don't copy lpp_source for diskless/dataless nodes
-					push(@dontcopy, $imghash{$img}{'lpp_source'});
+					if (!grep(/^$imghash{$img}{'lpp_source'}$/, @dontcopy))
+                    {
+						push(@dontcopy, $imghash{$img}{'lpp_source'});
+					}
 					next;
 				}
 
-				if ( ($nimtype ne 'standalone') && ($restype eq ''))
-               	{
-                   	# don't copy lpp_source for diskless/dataless nodes
-                   	push(@dontcopy, $imghash{$img}{'lpp_source'});
-                   	next;
-               	}
-
 				foreach my $res (split /,/, $imghash{$img}{$restype})
 				{
-
 					if (grep (/^$res$/, @dontcopy)) {
 						next;
 					}
@@ -9339,7 +9352,10 @@ sub doSFScopy
 
 					if (defined($alloc_count) && ($alloc_count != 0)) {
 						# if it's allocated then don't copy it
-						push(@dontcopy, $res);
+						if (!grep(/^$res$/, @dontcopy))
+                   		{
+							push(@dontcopy, $res);
+						}
 
 						my $rsp;
 						push @{$rsp->{data}}, "NIM resource $res is currently allocated on service node $sn and will not be re-copied to the service nodes.\n";
@@ -9381,6 +9397,7 @@ sub doSFScopy
 
 					# if the resources need to be copied
 					my %resinfo;
+
 					if (!grep(/^$res$/, @dontcopy))
 					{
 						# copy appropriate files to the SN
@@ -10178,6 +10195,42 @@ sub mkdsklsnode
             my $time = `date | cut -f5 -d' '`;
             chomp $time;
 
+			#
+			#  wait for shared root lock - need to be sure we don't change it
+			# 		while another SN is in the process of def and backup
+			#
+
+			# see if the shared_root is being modified
+			my $origloc;
+			my $SRlock;
+			my $locked=0;
+			my $lockfile;
+			if ($imagehash{$image_name}{shared_root} ) {
+				# get the shared_root location
+				$origloc = xCAT::InstUtils->get_nim_attr_val($imagehash{$image_name}{shared_root}, 'location', $callback, $Sname);
+
+				# see if this is a shared filesystem environment
+				my $sitetab = xCAT::Table->new('site');
+				my ($tmp) = $sitetab->getAttribs({'key' => 'sharedinstall'}, 'value');
+				my $sharedinstall = $tmp->{value};
+				$sitetab->close;
+				if (!$sharedinstall) {
+					$sharedinstall="no";
+				}
+
+				chomp $sharedinstall;
+
+				# try to get a lock if this is shared_root and using
+				#		a shared filesystem
+				if ($origloc && ($sharedinstall eq "sns")) {
+
+					$lockfile = "$origloc/lockfile";
+					open($SRlock, "<", $lockfile);
+					flock($SRlock,LOCK_EX);
+					$locked++;
+				}
+			}
+
             my $rsp;
             push @{$rsp->{data}}, "$Sname: Initializing NIM machine \'$nim_name\'. \n";
 			xCAT::MsgUtils->message("I", $rsp, $callback);
@@ -10197,7 +10250,13 @@ sub mkdsklsnode
                 push(@nodesfailed, $node);
                 next;
             }
-        }
+
+			if ($locked) {
+				flock($SRlock,LOCK_UN);
+				close($SRlock);
+			}
+
+        } # end doinit
 
         # Update /tftpboot/nodeip.info to export the variable BASECUST_REMOVAL
         # then during the network boot, rc.dd_boot script will check this variable
@@ -11670,10 +11729,24 @@ sub make_SN_resource
 					my $moveit = 0;
 					my $origloc;
 					my $origlocbak;
+					my $lockfile;
+					my $SRlock;
+					my $locked;
 					if ( ($::DEFONLY || ($sharedinstall eq "sns")) && ( $restype eq "shared_root")) {
+
+
 						$origloc =  $lochash{$imghash{$image}{$restype}};
-						$origlocbak = "$origloc.bak";
-						# ex. /install/nim/shared_root/71Bdskls_shared_root
+                        $origlocbak = "$origloc.bak";
+                        # ex. /install/nim/shared_root/71Bdskls_shared_root
+						#
+						# need to set a lock so that some other SN doesn't
+						#		modify anything while we're doing this
+						#
+						$lockfile = "$origloc/lockfile";
+						open($SRlock, "<", $lockfile);
+						flock($SRlock,LOCK_EX);
+						$locked++;
+
 						if (-d $origloc) {
 							my $mvcmd = qq~/usr/sbin/mvdir $origloc $origlocbak~;
 							my $output = xCAT::Utils->runcmd("$mvcmd", -1);
@@ -11695,6 +11768,10 @@ sub make_SN_resource
                         ) != 0
                       )
                     {
+						if ( $locked) {
+                        	flock($SRlock,LOCK_UN);
+                        	close($SRlock);
+                    	}
                         next;
                     }
 
@@ -11720,6 +11797,11 @@ sub make_SN_resource
                             xCAT::MsgUtils->message("E", $rsp, $callback);
                         }
 					}
+
+					if ( $locked) {
+						flock($lockfile,LOCK_UN);
+						close($lockfile);
+					}
 				}
                 # only make lpp_source for standalone type images
                 if (   ($restype eq "lpp_source")
@@ -12248,13 +12330,43 @@ sub rmdsklsnode
             $nodename = $name . "_" . $::opt_i;
         }
 
+		# see if the node is defined as a nim client
+		my $lscmd = qq~/usr/sbin/lsnim -l $nodename 2>/dev/null~;
+		$output = xCAT::Utils->runcmd("$lscmd", -1);
+        if ($::RUNCMD_RC != 0)
+        {
+            # doesn't exist 
+			if ($::VERBOSE)
+			{
+				my $rsp;
+				push @{$rsp->{data}}, "Node \'$nodename\' is not defined.";
+				xCAT::MsgUtils->message("I", $rsp, $callback);
+			}
+			next;
+        }
+
         # see if the node is running
-        # use nodelist.status
-        if ($nlhash && $nlhash->{$nodename}->[0]->{'status'} eq 'booted')
+
+		my $badmstate;
+        my $badnodestat;
+		# check BOTH Mstate and nodestat
+
+		# check NIM Mstate for node
+		my $mstate = xCAT::InstUtils->get_nim_attr_val($nodename, "Mstate", $callback, $Sname, $subreq);
+        if ($mstate && (!($mstate =~ /currently running/)) ) {
+			$badmstate++;
+		}
+
+        # check xCAT nodelist.status for the node
+        if ($nlhash && $nlhash->{$nodename}->[0]->{'status'} ne 'booted')
+		{
+			$badnodestat++;
+		}
+
+		if (!$badmstate && !$badnodestat) 
         {
             if ($::FORCE)
             {
-
                 if ($::VERBOSE)
                 {
                     my $rsp;
@@ -12266,15 +12378,14 @@ sub rmdsklsnode
                 my $scmd = "shutdown -F &";
                 my $output;
                 $output =
-                  xCAT::InstUtils->xcmd($callback, $subreq, "xdsh", $nodename,
-                                        $scmd, 0);
+                  xCAT::InstUtils->xcmd($callback, $subreq, "xdsh", $nodename, $scmd, 0);
             }
             else
             {
                 # don't remove the def
                 my $rsp;
                 push @{$rsp->{data}},
-                  "The nodelist.status of \'$nodename\' is currently \'booted\', use -f flag to forcely remove it.";
+                  "The node \'$nodename\' is currently running. Use the -f flag to force the removal.";
                 xCAT::MsgUtils->message("E", $rsp, $callback);
                 $error++;
                 push(@nodesfailed, $nodename);
@@ -13454,7 +13565,6 @@ sub parse_otherpkgs
             push @rpm_pkgs, $pname;    
         }
 
-# ndebug
 		elsif (($p =~ /epkg\.Z/) || ($p =~ /^E:/))
         {
 			if ($p =~ /:/)