From b47e2d3585d7ef804a946c3f0b0f49fb71130657 Mon Sep 17 00:00:00 2001 From: linggao Date: Sat, 22 Nov 2008 04:01:31 +0000 Subject: [PATCH] fix RMC monitoring for scaling git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2522 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-rmc/plugin/rmcmon.pm | 63 ++++++++++++++----- .../sn/IBM.Condition/NodeReachability.pm | 6 +- xCAT-rmc/scripts/configrmcnode | 29 ++++++++- xCAT-rmc/scripts/monerrorlog | 4 +- 4 files changed, 78 insertions(+), 24 deletions(-) diff --git a/xCAT-rmc/plugin/rmcmon.pm b/xCAT-rmc/plugin/rmcmon.pm index 10e37063e..0839ec604 100644 --- a/xCAT-rmc/plugin/rmcmon.pm +++ b/xCAT-rmc/plugin/rmcmon.pm @@ -860,8 +860,7 @@ sub reportError my $callback=shift; if ($callback) { my $rsp={}; - my $localhostname=hostname(); - $rsp->{data}->[0]="$localhostname: $error"; + $rsp->{data}->[0]=$error; $callback->($rsp); } else { xCAT::MsgUtils->message('S', "[mon]: $error\n"); } return; @@ -989,11 +988,14 @@ sub addNodes { } } - if ($scope==0) { next; } if ($inactiveHash{$node}) { next;} + push(@normal_nodes, $node); + if ($scope==0) { next; } + #copy the configuration script and run it locally if($iphash{$node}) { + pop(@normal_nodes); $result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$master"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`; if ($?) { reportError($result, $callback); @@ -1001,6 +1003,7 @@ sub addNodes { } } else { if ($flag) { #define MCP on HMC + pop(@normal_nodes); if (!$hmc_ssh_enabled) { my $result=`XCATBYPASS=Y $::XCATROOT/bin/rspconfig $node sshcfg=enable 2>&1`; if ($?) { @@ -1023,8 +1026,6 @@ sub addNodes { if ($?) { reportError($result, $callback); } } } - } else { #normal nodes - push(@normal_nodes, $node); } } } @@ -1032,21 +1033,39 @@ sub addNodes { #let updatenode command to handle the normal nodes as a bulk if (@normal_nodes>0) { my $nr=join(',',@normal_nodes); - reportError("Configuring the following nodes. It may takes a while.\n$nr", $callback); - #use 2 here to tell xcataixpost that there is only one postscript, download only it. It applies to AIX only + #get the fanout value + my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon"); + + my $fanout_string=""; + my $fanout_value=$settings{'rfanout'}; + if ($fanout_value) { $fanout_string="DSH_FANOUT=$fanout_value";} + + #for local mode, need to referesh the IBM.MCP class to initialize the hb + if ($scope==0) { + #$result=`XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr /usr/bin/refrsrc-api -c IBM.MCP 2>&1"`; + if ($?) { reportError($result, $callback); } + return (0, "ok"); + } + + #this is remore case + reportError("Configuring the following nodes. It may take a while.\n$nr", $callback); my $cmd; + #use 2 here to tell xcataixpost that there is only one postscript, download only it. It applies to AIX only if (xCAT::Utils->isLinux()) { - $cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcatdsklspost 2 configrmcnode 2>&1"; + $cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcatdsklspost 2 configrmcnode 2>&1"; } else { - $cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcataixpost 2 configrmcnode 2>&1"; + $cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcataixpost 2 configrmcnode 2>&1"; } if (! open (CMD, "$cmd |")) { reportError("Cannot run command $cmd", $callback); } else { while () { - reportError("$_", $callback); + chomp; + my $rsp={}; + $rsp->{data}->[0]="$_"; + $callback->($rsp); } close(CMD); } @@ -1175,20 +1194,29 @@ sub removeNodes { #let updatenode command to handle the normal nodes as a bulk if (@normal_nodes>0) { my $nr=join(',',@normal_nodes); + + my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon"); + + my $fanout_string=""; + my $fanout_value=$settings{'rfanout'}; + if ($fanout_value) { $fanout_string="DSH_FANOUT=$fanout_value";} + #copy the configuration script and run it locally - $result=`XCATBYPASS=Y $::XCATROOT/bin/xdcp $nr $::XCATROOT/sbin/rmcmon/configrmcnode /tmp 2>&1 `; + $result=`XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdcp $nr $::XCATROOT/sbin/rmcmon/configrmcnode /tmp 2>&1 `; if ($?) { - reportError("rmcmon:removeNodes: cannot copy the file configrmcnode to nodes $nr:\$result", $callback); - next; + reportError("$result", $callback); } - reportError("De-configuring the following nodes. It may takes a while.\n$nr", $callback); - my $cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s MS_NODEID=$ms_node_id /tmp/configrmcnode -1 2>&1"; + reportError("De-configuring the following nodes. It may take a while.\n$nr", $callback); + my $cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s MS_NODEID=$ms_node_id /tmp/configrmcnode -1 2>&1"; if (! open (CMD1, "$cmd |")) { reportError("Cannot run command $cmd", $callback); } else { while () { - reportError("$_", $callback); + chomp; + my $rsp={}; + $rsp->{data}->[0]="$_"; + $callback->($rsp); } close(CMD1); } @@ -1236,7 +1264,8 @@ sub getDescription { good for threadhold monitoring. xCAT automatically sets up the monitoring domain for RMC during node deployment time. Settings: - none."; + rfanout -- indicating the fanout number for configuring or deconfiguring + remote nodes."; } #-------------------------------------------------------------------------------- diff --git a/xCAT-rmc/resources/sn/IBM.Condition/NodeReachability.pm b/xCAT-rmc/resources/sn/IBM.Condition/NodeReachability.pm index bb63cea05..973eda74b 100644 --- a/xCAT-rmc/resources/sn/IBM.Condition/NodeReachability.pm +++ b/xCAT-rmc/resources/sn/IBM.Condition/NodeReachability.pm @@ -3,10 +3,8 @@ $RES::Condition{'NodeReachability'} = { Name => q(NodeReachability), ResourceClass => q(IBM.MngNode), - EventExpression => q(Status@P==1 && Status!=1), - EventDescription => q(An event will be generated when a node becomes network unreachable from the management server.), - RearmExpression => q(Status=1), - RearmDescription => q(A rearm event will be generated when the node is reachable again.), + EventExpression => q(Status!=Status@P), + EventDescription => q(An event will be generated when a status changes), ManagementScope => q(1), Severity => q(2), }; diff --git a/xCAT-rmc/scripts/configrmcnode b/xCAT-rmc/scripts/configrmcnode index 82f31867d..499b4c72c 100755 --- a/xCAT-rmc/scripts/configrmcnode +++ b/xCAT-rmc/scripts/configrmcnode @@ -22,7 +22,7 @@ if [ -z "$NODE" ]; then NODE=`hostname` fi -logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONMASTER=$MONMASTER,MS_NODEID=$MS_NODEID" +#logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONMASTER=$MONMASTER,MS_NODEID=$MS_NODEID" #check if rsct is installed and running if [ ! -e /usr/bin/lsrsrc ]; then @@ -139,6 +139,32 @@ if [ -z "$PID" ]; then fi fi +#wait for RMC daemon to fully functional +RETRY_LIMIT=20 # number of 2 second retry intervals (40 second total) +let i=$RETRY_LIMIT +while : +do + lsrsrc > /dev/null 2>&1 + rc=$? + if [[ $rc -eq 0 ]] + then + break + fi + + i=$((i - 1)) + if (( i > 0 )) + then + sleep 2 + else + break + fi +done +if [[ $rc -ne 0 ]] +then + logger xCAT "RMC subsystem has not started" + exit 1 +fi + if [ $ADD -eq 1 ]; then #enable remote client connection @@ -178,6 +204,7 @@ else exit 1 fi fi +echo "done" exit 0; diff --git a/xCAT-rmc/scripts/monerrorlog b/xCAT-rmc/scripts/monerrorlog index 1572b7437..f31176908 100755 --- a/xCAT-rmc/scripts/monerrorlog +++ b/xCAT-rmc/scripts/monerrorlog @@ -244,7 +244,7 @@ if (!-e $runfile){ print ODM ' errnotify: en_pid = 0 - en_name = "xcat_rmc_errlog_sensor" + en_name = "xcat_rmc_errlog" en_persistenceflg = 1 en_method = "' . "$::XCATROOT/sbin/rmcmon/errmsgque" . ' sequence = $1 error_id = $2 class = $3 type = $4 alert_flags = $5 res_name = $6 res_type = $7 res_class = $8 label = $9" '; @@ -296,7 +296,7 @@ elsif ($^O =~ /^aix/i) { # set $ENV{'SENSOR_MonitorStatus'} to 2 # should not do clean up when IBM.SensorRM is stopped if (&isRMrunning("IBM.SensorRM")) { - runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog_sens\"", -1); + runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog\"", -1); if (-e $runfile) { unlink($runfile); }