fix RMC monitoring for scaling
git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2522 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
		| @@ -860,8 +860,7 @@ sub reportError | ||||
|   my $callback=shift; | ||||
|   if ($callback) { | ||||
|     my $rsp={}; | ||||
|     my $localhostname=hostname(); | ||||
|     $rsp->{data}->[0]="$localhostname: $error"; | ||||
|     $rsp->{data}->[0]=$error; | ||||
|     $callback->($rsp); | ||||
|   } else { xCAT::MsgUtils->message('S', "[mon]: $error\n"); } | ||||
|   return; | ||||
| @@ -989,11 +988,14 @@ sub addNodes { | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     if ($scope==0) { next; } | ||||
|     if ($inactiveHash{$node}) { next;} | ||||
|  | ||||
|     push(@normal_nodes, $node); | ||||
|     if ($scope==0) { next; } | ||||
|  | ||||
|     #copy the configuration script and run it locally | ||||
|     if($iphash{$node}) { | ||||
|       pop(@normal_nodes); | ||||
|       $result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$master"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`;       | ||||
|       if ($?) { | ||||
|         reportError($result, $callback); | ||||
| @@ -1001,6 +1003,7 @@ sub addNodes { | ||||
|       } | ||||
|     } else { | ||||
|       if ($flag) { #define MCP on HMC | ||||
|         pop(@normal_nodes); | ||||
| 	if (!$hmc_ssh_enabled) { | ||||
|           my $result=`XCATBYPASS=Y $::XCATROOT/bin/rspconfig $node sshcfg=enable 2>&1`; | ||||
|           if ($?) { | ||||
| @@ -1023,8 +1026,6 @@ sub addNodes { | ||||
|             if ($?) { reportError($result, $callback); } | ||||
| 	  } | ||||
|         } | ||||
|       } else { #normal nodes | ||||
| 	push(@normal_nodes, $node); | ||||
|       } | ||||
|     } | ||||
|   }  | ||||
| @@ -1032,21 +1033,39 @@ sub addNodes { | ||||
|   #let updatenode command to handle the normal nodes as a bulk | ||||
|   if (@normal_nodes>0) { | ||||
|     my $nr=join(',',@normal_nodes);  | ||||
|     reportError("Configuring the following nodes. It may takes a while.\n$nr", $callback);  | ||||
|  | ||||
|     #use 2 here to tell xcataixpost that there is only one postscript, download only it. It applies to AIX only  | ||||
|     #get the fanout value | ||||
|     my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon"); | ||||
|  | ||||
|     my $fanout_string=""; | ||||
|     my $fanout_value=$settings{'rfanout'}; | ||||
|     if ($fanout_value) { $fanout_string="DSH_FANOUT=$fanout_value";} | ||||
|  | ||||
|     #for local mode, need to referesh the IBM.MCP class to initialize the hb | ||||
|     if ($scope==0) { | ||||
|       #$result=`XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr /usr/bin/refrsrc-api -c IBM.MCP 2>&1"`; | ||||
|       if ($?) { reportError($result, $callback); } | ||||
|       return (0, "ok");  | ||||
|     } | ||||
|  | ||||
|     #this is remore case | ||||
|     reportError("Configuring the following nodes. It may take a while.\n$nr", $callback); | ||||
|     my $cmd; | ||||
|     #use 2 here to tell xcataixpost that there is only one postscript, download only it. It applies to AIX only      | ||||
|     if (xCAT::Utils->isLinux()) { | ||||
|       $cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcatdsklspost 2 configrmcnode 2>&1"; | ||||
|       $cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcatdsklspost 2 configrmcnode 2>&1"; | ||||
|     } | ||||
|     else { | ||||
|       $cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcataixpost 2 configrmcnode 2>&1"; | ||||
|       $cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcataixpost 2 configrmcnode 2>&1"; | ||||
|     } | ||||
|     if (! open (CMD, "$cmd |")) { | ||||
|       reportError("Cannot run command $cmd", $callback); | ||||
|     } else { | ||||
|       while (<CMD>) { | ||||
|         reportError("$_", $callback); | ||||
| 	chomp; | ||||
|         my $rsp={}; | ||||
|         $rsp->{data}->[0]="$_"; | ||||
|         $callback->($rsp); | ||||
|       } | ||||
|       close(CMD); | ||||
|     } | ||||
| @@ -1175,20 +1194,29 @@ sub removeNodes { | ||||
|   #let updatenode command to handle the normal nodes as a bulk | ||||
|   if (@normal_nodes>0) { | ||||
|     my $nr=join(',',@normal_nodes);  | ||||
|  | ||||
|     my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon"); | ||||
|  | ||||
|     my $fanout_string=""; | ||||
|     my $fanout_value=$settings{'rfanout'}; | ||||
|     if ($fanout_value) { $fanout_string="DSH_FANOUT=$fanout_value";} | ||||
|  | ||||
|     #copy the configuration script and run it locally | ||||
|     $result=`XCATBYPASS=Y $::XCATROOT/bin/xdcp $nr $::XCATROOT/sbin/rmcmon/configrmcnode /tmp 2>&1 `; | ||||
|     $result=`XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdcp $nr $::XCATROOT/sbin/rmcmon/configrmcnode /tmp 2>&1 `; | ||||
|     if ($?) { | ||||
|       reportError("rmcmon:removeNodes: cannot copy the file configrmcnode to nodes $nr:\$result", $callback); | ||||
|       next; | ||||
|       reportError("$result", $callback); | ||||
|     } | ||||
|  | ||||
|     reportError("De-configuring the following nodes. It may takes a while.\n$nr", $callback);  | ||||
|     my $cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s MS_NODEID=$ms_node_id /tmp/configrmcnode -1 2>&1"; | ||||
|     reportError("De-configuring the following nodes. It may take a while.\n$nr", $callback);  | ||||
|     my $cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s MS_NODEID=$ms_node_id /tmp/configrmcnode -1 2>&1"; | ||||
|     if (! open (CMD1, "$cmd |")) { | ||||
|       reportError("Cannot run command $cmd", $callback); | ||||
|     } else { | ||||
|       while (<CMD1>) { | ||||
|         reportError("$_", $callback); | ||||
| 	chomp; | ||||
|         my $rsp={}; | ||||
|         $rsp->{data}->[0]="$_"; | ||||
|         $callback->($rsp); | ||||
|       } | ||||
|       close(CMD1); | ||||
|     } | ||||
| @@ -1236,7 +1264,8 @@ sub getDescription { | ||||
|     good for threadhold monitoring. xCAT automatically sets up the  | ||||
|     monitoring domain for RMC during node deployment time.  | ||||
|   Settings: | ||||
|     none."; | ||||
|     rfanout -- indicating the fanout number for configuring or deconfiguring  | ||||
|         remote nodes."; | ||||
| } | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
|   | ||||
| @@ -3,10 +3,8 @@ | ||||
| $RES::Condition{'NodeReachability'} = { | ||||
| 	Name => q(NodeReachability), | ||||
| 	ResourceClass => q(IBM.MngNode), | ||||
| 	EventExpression => q(Status@P==1 && Status!=1), | ||||
| 	EventDescription => q(An event will be generated when a node becomes network unreachable from the management server.), | ||||
|         RearmExpression => q(Status=1), | ||||
|         RearmDescription => q(A rearm event will be generated when the node is reachable again.), | ||||
| 	EventExpression => q(Status!=Status@P), | ||||
| 	EventDescription => q(An event will be generated when a status changes), | ||||
| 	ManagementScope => q(1), | ||||
| 	Severity => q(2), | ||||
| }; | ||||
|   | ||||
| @@ -22,7 +22,7 @@ if [ -z "$NODE" ]; then | ||||
|    NODE=`hostname` | ||||
| fi | ||||
|  | ||||
| logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONMASTER=$MONMASTER,MS_NODEID=$MS_NODEID" | ||||
| #logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONMASTER=$MONMASTER,MS_NODEID=$MS_NODEID" | ||||
|  | ||||
| #check if rsct is installed and running | ||||
| if [ ! -e /usr/bin/lsrsrc ]; then   | ||||
| @@ -139,6 +139,32 @@ if [ -z "$PID" ]; then | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| #wait for RMC daemon to fully functional | ||||
| RETRY_LIMIT=20          # number of 2 second retry intervals (40 second total) | ||||
| let i=$RETRY_LIMIT | ||||
| while : | ||||
| do | ||||
|   lsrsrc > /dev/null 2>&1 | ||||
|   rc=$? | ||||
|   if [[ $rc -eq 0 ]] | ||||
|   then | ||||
|     break | ||||
|   fi | ||||
|  | ||||
|   i=$((i - 1)) | ||||
|   if (( i > 0 )) | ||||
|   then | ||||
|       sleep 2 | ||||
|   else | ||||
|       break | ||||
|   fi | ||||
| done | ||||
| if [[ $rc -ne 0 ]] | ||||
| then | ||||
|   logger xCAT "RMC subsystem has not started" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
|  | ||||
| if [ $ADD -eq 1 ]; then | ||||
|   #enable remote client connection | ||||
| @@ -178,6 +204,7 @@ else | ||||
|     exit 1 | ||||
|   fi | ||||
| fi | ||||
| echo "done" | ||||
| exit 0; | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -244,7 +244,7 @@ if (!-e $runfile){ | ||||
|     print ODM ' | ||||
| errnotify: | ||||
|       en_pid = 0 | ||||
|       en_name = "xcat_rmc_errlog_sensor" | ||||
|       en_name = "xcat_rmc_errlog" | ||||
|       en_persistenceflg = 1 | ||||
|       en_method = "' . "$::XCATROOT/sbin/rmcmon/errmsgque" . ' sequence = $1 error_id = $2 class = $3 type = $4 alert_flags = $5 res_name = $6 res_type = $7 res_class = $8 label = $9" | ||||
| '; | ||||
| @@ -296,7 +296,7 @@ elsif ($^O =~ /^aix/i) { | ||||
|     # set $ENV{'SENSOR_MonitorStatus'} to 2 | ||||
|     # should not do clean up when IBM.SensorRM is stopped | ||||
|     if (&isRMrunning("IBM.SensorRM")) { | ||||
|       runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog_sens\"", -1); | ||||
|       runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog\"", -1); | ||||
|       if (-e $runfile) { | ||||
|         unlink($runfile); | ||||
|       } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user