support invoking prescripts once for each nodes at the same time

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@5690 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
2010-04-06 19:47:13 +00:00 · 2010-04-06 19:47:13 +00:00 · 09c63c2383
commit 09c63c2383
parent 97e04e1302
3 changed files with 147 additions and 36 deletions
--- a/perl-xCAT/xCAT/Schema.pm
+++ b/perl-xCAT/xCAT/Schema.pm
@ -846,18 +846,25 @@ prescripts => {
  # Do not put description text past column 88, so it displays well in a 100 char wide window.
  # ----------------------------------------------------------------------------------|
 	begin => 
-   "The scripts to be run at the beginning of the nodeset (Linux) command.\n\n". 
+   "The scripts to be run at the beginning of the nodeset (Linux) command.\n". 
   " The format is:\n".
   "   [action1:]s1,s2...[|action2:s3,s4,s5...]\n".
   " where:\n".
   "  - action1 and action2 are the nodeset/nimnodeset actions specified in the command\n".
   "  - s1 and s2 are the scripts to run for action1 in order\n".
-   "  - s3, s4, and s5 are the scripts to run for actions2\n\n".
+   "  - s3, s4, and s5 are the scripts to run for actions2\n".
   " If actions are omitted, the scripts apply to all actions.\n".
-   " All the scripts should be copied to /install/prescripts directory.\n\n".
   " Examples:\n".
   "   myscript1,myscript2\n".
-   "   install:myscript1,myscript2|netboot:myscript3",
+   "   install:myscript1,myscript2|netboot:myscript3\n\n".
+   " All the scripts should be copied to /install/prescripts directory.\n".
+   " The following two environment variables will be passed to each script: \n".
+   "   NODES a coma separated list of node names that need to run the script for\n".
+   "   ACTION current nodeset action.\n\n".
+   " If '#xCAT setting:MAX_INSTANCE=number' is specified in the script, the script\n".
+   " will get invoked for each node in parallel, but no more than number of instances\n".
+   " will be invoked at at a time. If it is not sepcified, the script will be invoked\n".
+   " once for all the nodes.\n",
    end => "The scripts to be run at the end of the nodeset (Linux) command. The format is the same as the 'begin' column.",
 	comments => 'Any user-written notes.',
 	disable => "Set to 'yes' or '1' to comment out this row.",
--- a/xCAT-client/pods/man8/nodeset.8.pod
+++ b/xCAT-client/pods/man8/nodeset.8.pod
@ -30,7 +30,8 @@ B<nodeset> only sets the next boot state, but does not reboot.
 B<nodeset>  is  called  by rinstall and winstall and is also called by the
 installation process remotely to set the boot state back to "boot".

-User can supply their own scripts to be run on the mn or on the service node (if their is hierarchy) for a node during the nodeset command. Such scripts are called B<prescripts>. They should be copied to /install/prescripts dirctory. A table called I<prescripts> is used to specify the scripts and their associated actions. The scripts to be run at the beginning of the nodeset command are stored in the 'begin' column of I<prescripts> table. The scripts to be run at the end of the noodeset command are stored in the 'end' column of I<prescripts> table. Please run 'tabdump prescripts -d' command for details. The following two environment variables will be passed to each script: NODES contains all the names of the nodes that need to run the script for and ACTION contains the current nodeset action.
+User can supply their own scripts to be run on the mn or on the service node (if their is hierarchy) for a node during the nodeset command. Such scripts are called B<prescripts>. They should be copied to /install/prescripts dirctory. A table called I<prescripts> is used to specify the scripts and their associated actions. The scripts to be run at the beginning of the nodeset command are stored in the 'begin' column of I<prescripts> table. The scripts to be run at the end of the noodeset command are stored in the 'end' column of I<prescripts> table. Please run 'tabdump prescripts -d' command for details. The following two environment variables will be passed to each script: NODES contains all the names of the nodes that need to run the script for and ACTION contains the current nodeset action. If I<#xCAT setting:MAX_INSTANCE=number> is specified in the script, the script will get invoked for each node in parallel, but no more than I<number> of instances will be invoked at at a time. If it is not sepcified, the script will be invoked once for all the nodes.
+

 =head1 B<Options>

--- a/xCAT-server/lib/xcat/plugins/prescripts.pm
+++ b/xCAT-server/lib/xcat/plugins/prescripts.pm
@ -11,6 +11,10 @@ require xCAT::Utils;
 require xCAT::MsgUtils;
 use Getopt::Long;
 use Sys::Hostname;
+use Time::HiRes qw(gettimeofday sleep);
+use POSIX "WNOHANG";
+
+
 1;

 #-------------------------------------------------------
@ -158,26 +162,43 @@ sub runbeginpre
 	my $runnodes=$script_hash{$scripts};
        if ($runnodes && (@$runnodes>0)) {
 	    my $runnodes_s=join(',', @$runnodes);
-	    my $rsp = {};
-	    $rsp->{data}->[0]="$localhostname: Running begin scripts $scripts for nodes $runnodes_s.";
-	    $callback->($rsp);

 	    #now run the scripts 
-	    undef $SIG{CHLD};
 	    my @script_array=split(',', $scripts);
            foreach my $s (@script_array) {
-		my $ret=`NODES=$runnodes_s ACTION=$action $installdir/prescripts/$s 2>&1`;
-		my $err_code=$?;
-		if ($ret) {
-		    my $rsp = {};
-		    $rsp->{data}->[0]="$localhostname: $s: $ret";
-		    $callback->($rsp);
+		my $rsp = {};
+		$rsp->{data}->[0]="$localhostname: Running begin script $s for nodes $runnodes_s.";
+		$callback->($rsp);
+
+                #check if the script need to be invoked for each node in parallel. 
+                #script must contian a line like this in order to be run this way: #xCAT setting: MAX_INSTANCE=4
+                #where 4 is the maximum instance at a time
+                my $max_instance=0; 
+                my $ret=`grep -E '#+xCAT setting: *MAX_INSTANCE=' $installdir/prescripts/$s`;
+                if ($? == 0) {
+		   $max_instance=`echo "$ret" | cut -d= -f2`; 
+                   chomp($max_instance);
 		}
-		if ($err_code != 0) {
-		    $rsp = {};
-		    $rsp->{error}->[0]="$localhostname: $s: return code=$err_code. Error message=$ret";
-		    $callback->($rsp);
-		    #last;
+                
+                if ($max_instance > 0) {
+		    #run the script for each node in paralell, no more than max_instance at a time
+		    run_script_single_node($installdir, $s,$action,$max_instance,$runnodes,$callback);
+		} else { 
+		    undef $SIG{CHLD};
+                    #pass all the nodes to the script, only invoke the script once
+		    my $ret=`NODES=$runnodes_s ACTION=$action $installdir/prescripts/$s 2>&1`;
+		    my $err_code=$?;
+		    if ($err_code != 0) {
+			my $rsp = {};
+			$rsp->{error}->[0]="$localhostname: $s: return code=$err_code. Error message=$ret";
+			$callback->($rsp);
+		    } else {
+			if ($ret) {
+			    my $rsp = {};
+			    $rsp->{data}->[0]="$localhostname: $s: $ret";
+			    $callback->($rsp);
+			}
+		    }
 		}
 	    }
 	}
@ -206,26 +227,41 @@ sub runendpre
 	    my $runnodes_s=join(',', @$runnodes);
            my %runnodes_hash=();

-	    my $rsp = {};
-	    $rsp->{data}->[0]="$localhostname: Running end scripts $scripts for nodes $runnodes_s.";
-	    $callback->($rsp);
-
 	    #now run the scripts 
-	    undef $SIG{CHLD};
 	    my @script_array=split(',', $scripts);
            foreach my $s (@script_array) {
-		my $ret=`NODES=$runnodes_s ACTION=$action $installdir/prescripts/$s 2>&1`;
-		my $err_code=$?;
-		if ($ret) {
-		    my $rsp = {};
-		    $rsp->{data}->[0]="$localhostname: $s: $ret";
-		    $callback->($rsp);
+		my $rsp = {};
+		$rsp->{data}->[0]="$localhostname: Running end script $s for nodes $runnodes_s.";
+		$callback->($rsp);
+
+                #check if the script need to be invoked for each node in parallel. 
+                #script must contian a line like this in order to be run this way: #xCAT setting: MAX_INSTANCE=4
+                #where 4 is the maximum instance at a time
+                my $max_instance=0; 
+                my $ret=`grep -E '#+xCAT setting: *MAX_INSTANCE=' $installdir/prescripts/$s`;
+                if ($? == 0) {
+		   $max_instance=`echo "$ret" | cut -d= -f2`; 
+                   chomp($max_instance);
 		}
-		if ($err_code != 0) {
-		    $rsp = {};
-		    $rsp->{error}->[0]="$localhostname: $s: return code=$err_code. Error message=$ret";
-		    $callback->($rsp);
-		    #last;
+                
+                if ($max_instance > 0) {
+		    #run the script for each node in paralell, no more than max_instance at a time
+		    run_script_single_node($installdir, $s,$action,$max_instance,$runnodes,$callback);
+		} else { 
+		    undef $SIG{CHLD};
+		    my $ret=`NODES=$runnodes_s ACTION=$action $installdir/prescripts/$s 2>&1`;
+		    my $err_code=$?;
+		    if ($err_code != 0) {
+			my $rsp = {};
+			$rsp->{error}->[0]="$localhostname: $s: return code=$err_code. Error message=$ret";
+			$callback->($rsp);
+		    } else {
+			if ($ret) {
+			    my $rsp = {};
+			    $rsp->{data}->[0]="$localhostname: $s: $ret";
+			    $callback->($rsp);
+			}
+		    }
 		}
 	    }
 	}
@ -324,3 +360,70 @@ sub  parseprescripts
    }
    return $ret;
 }
+
+
+#-------------------------------------------------------
+=head3  run_script_single_node
+   
+=cut
+#-------------------------------------------------------
+sub  run_script_single_node
+{
+    my $installdir=shift; #/install
+    my $s=shift;  #script name
+    my $action=shift;
+    my $max=shift;  #max number of instances to be run at a time
+    my $nodes=shift; #nodes to be run
+    my $callback=shift; #callback
+    
+    my $children=0;
+    my $localhostname=hostname();
+    
+    foreach my $node ( @$nodes ) {
+	$SIG{CHLD} = sub { my $pid = 0; while (($pid = waitpid(-1, WNOHANG)) > 0) {  $children--; } };
+	
+	while ( $children >= $max ) {
+	    Time::HiRes::sleep(0.5);
+	    next;
+	}
+	
+	my $pid = xCAT::Utils->xfork;
+	if ( !defined($pid) ) {
+	    # Fork error
+	    my $rsp = {};
+	    $rsp->{data}->[0]="$localhostname: Fork error before running script $s for node $node";
+	    $callback->($rsp);
+	    return 1;
+	}
+	elsif ( $pid == 0 ) {
+	    # Child process
+	    undef $SIG{CHLD};
+	    my $ret=`NODES=$node ACTION=$action $installdir/prescripts/$s 2>&1`;
+	    my $err_code=$?;
+	    my $rsp = {};
+	    if ($err_code != 0) {
+		$rsp = {};
+		$rsp->{error}->[0]="$localhostname: $s: node=$node. return code=$err_code. Error message=$ret";
+		$callback->($rsp);
+	    } else {
+		if ($ret) {
+		    $rsp->{data}->[0]="$localhostname: $s: node=$node. $ret";
+		    $callback->($rsp);
+		}
+	    }    
+	    exit $err_code;
+	}
+	else {
+	    # Parent process
+	    $children++;
+	}
+    }
+    
+    #drain one more time
+    while ($children > 0) {
+	Time::HiRes::sleep(0.5);
+	
+	$SIG{CHLD} = sub { my $pid = 0; while (($pid = waitpid(-1, WNOHANG)) > 0) { $children--; } };
+    }
+    return 0;
+}