fix RMC monitoring for scaling

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2522 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
linggao 2008-11-22 04:01:31 +00:00
parent f89c50077d
commit b47e2d3585
4 changed files with 78 additions and 24 deletions

View File

@ -860,8 +860,7 @@ sub reportError
my $callback=shift;
if ($callback) {
my $rsp={};
my $localhostname=hostname();
$rsp->{data}->[0]="$localhostname: $error";
$rsp->{data}->[0]=$error;
$callback->($rsp);
} else { xCAT::MsgUtils->message('S', "[mon]: $error\n"); }
return;
@ -989,11 +988,14 @@ sub addNodes {
}
}
if ($scope==0) { next; }
if ($inactiveHash{$node}) { next;}
push(@normal_nodes, $node);
if ($scope==0) { next; }
#copy the configuration script and run it locally
if($iphash{$node}) {
pop(@normal_nodes);
$result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$master"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`;
if ($?) {
reportError($result, $callback);
@ -1001,6 +1003,7 @@ sub addNodes {
}
} else {
if ($flag) { #define MCP on HMC
pop(@normal_nodes);
if (!$hmc_ssh_enabled) {
my $result=`XCATBYPASS=Y $::XCATROOT/bin/rspconfig $node sshcfg=enable 2>&1`;
if ($?) {
@ -1023,8 +1026,6 @@ sub addNodes {
if ($?) { reportError($result, $callback); }
}
}
} else { #normal nodes
push(@normal_nodes, $node);
}
}
}
@ -1032,21 +1033,39 @@ sub addNodes {
#let updatenode command to handle the normal nodes as a bulk
if (@normal_nodes>0) {
my $nr=join(',',@normal_nodes);
reportError("Configuring the following nodes. It may takes a while.\n$nr", $callback);
#use 2 here to tell xcataixpost that there is only one postscript, download only it. It applies to AIX only
#get the fanout value
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon");
my $fanout_string="";
my $fanout_value=$settings{'rfanout'};
if ($fanout_value) { $fanout_string="DSH_FANOUT=$fanout_value";}
#for local mode, need to referesh the IBM.MCP class to initialize the hb
if ($scope==0) {
#$result=`XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr /usr/bin/refrsrc-api -c IBM.MCP 2>&1"`;
if ($?) { reportError($result, $callback); }
return (0, "ok");
}
#this is remore case
reportError("Configuring the following nodes. It may take a while.\n$nr", $callback);
my $cmd;
#use 2 here to tell xcataixpost that there is only one postscript, download only it. It applies to AIX only
if (xCAT::Utils->isLinux()) {
$cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcatdsklspost 2 configrmcnode 2>&1";
$cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcatdsklspost 2 configrmcnode 2>&1";
}
else {
$cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcataixpost 2 configrmcnode 2>&1";
$cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcataixpost 2 configrmcnode 2>&1";
}
if (! open (CMD, "$cmd |")) {
reportError("Cannot run command $cmd", $callback);
} else {
while (<CMD>) {
reportError("$_", $callback);
chomp;
my $rsp={};
$rsp->{data}->[0]="$_";
$callback->($rsp);
}
close(CMD);
}
@ -1175,20 +1194,29 @@ sub removeNodes {
#let updatenode command to handle the normal nodes as a bulk
if (@normal_nodes>0) {
my $nr=join(',',@normal_nodes);
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon");
my $fanout_string="";
my $fanout_value=$settings{'rfanout'};
if ($fanout_value) { $fanout_string="DSH_FANOUT=$fanout_value";}
#copy the configuration script and run it locally
$result=`XCATBYPASS=Y $::XCATROOT/bin/xdcp $nr $::XCATROOT/sbin/rmcmon/configrmcnode /tmp 2>&1 `;
$result=`XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdcp $nr $::XCATROOT/sbin/rmcmon/configrmcnode /tmp 2>&1 `;
if ($?) {
reportError("rmcmon:removeNodes: cannot copy the file configrmcnode to nodes $nr:\$result", $callback);
next;
reportError("$result", $callback);
}
reportError("De-configuring the following nodes. It may takes a while.\n$nr", $callback);
my $cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s MS_NODEID=$ms_node_id /tmp/configrmcnode -1 2>&1";
reportError("De-configuring the following nodes. It may take a while.\n$nr", $callback);
my $cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s MS_NODEID=$ms_node_id /tmp/configrmcnode -1 2>&1";
if (! open (CMD1, "$cmd |")) {
reportError("Cannot run command $cmd", $callback);
} else {
while (<CMD1>) {
reportError("$_", $callback);
chomp;
my $rsp={};
$rsp->{data}->[0]="$_";
$callback->($rsp);
}
close(CMD1);
}
@ -1236,7 +1264,8 @@ sub getDescription {
good for threadhold monitoring. xCAT automatically sets up the
monitoring domain for RMC during node deployment time.
Settings:
none.";
rfanout -- indicating the fanout number for configuring or deconfiguring
remote nodes.";
}
#--------------------------------------------------------------------------------

View File

@ -3,10 +3,8 @@
$RES::Condition{'NodeReachability'} = {
Name => q(NodeReachability),
ResourceClass => q(IBM.MngNode),
EventExpression => q(Status@P==1 && Status!=1),
EventDescription => q(An event will be generated when a node becomes network unreachable from the management server.),
RearmExpression => q(Status=1),
RearmDescription => q(A rearm event will be generated when the node is reachable again.),
EventExpression => q(Status!=Status@P),
EventDescription => q(An event will be generated when a status changes),
ManagementScope => q(1),
Severity => q(2),
};

View File

@ -22,7 +22,7 @@ if [ -z "$NODE" ]; then
NODE=`hostname`
fi
logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONMASTER=$MONMASTER,MS_NODEID=$MS_NODEID"
#logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONMASTER=$MONMASTER,MS_NODEID=$MS_NODEID"
#check if rsct is installed and running
if [ ! -e /usr/bin/lsrsrc ]; then
@ -139,6 +139,32 @@ if [ -z "$PID" ]; then
fi
fi
#wait for RMC daemon to fully functional
RETRY_LIMIT=20 # number of 2 second retry intervals (40 second total)
let i=$RETRY_LIMIT
while :
do
lsrsrc > /dev/null 2>&1
rc=$?
if [[ $rc -eq 0 ]]
then
break
fi
i=$((i - 1))
if (( i > 0 ))
then
sleep 2
else
break
fi
done
if [[ $rc -ne 0 ]]
then
logger xCAT "RMC subsystem has not started"
exit 1
fi
if [ $ADD -eq 1 ]; then
#enable remote client connection
@ -178,6 +204,7 @@ else
exit 1
fi
fi
echo "done"
exit 0;

View File

@ -244,7 +244,7 @@ if (!-e $runfile){
print ODM '
errnotify:
en_pid = 0
en_name = "xcat_rmc_errlog_sensor"
en_name = "xcat_rmc_errlog"
en_persistenceflg = 1
en_method = "' . "$::XCATROOT/sbin/rmcmon/errmsgque" . ' sequence = $1 error_id = $2 class = $3 type = $4 alert_flags = $5 res_name = $6 res_type = $7 res_class = $8 label = $9"
';
@ -296,7 +296,7 @@ elsif ($^O =~ /^aix/i) {
# set $ENV{'SENSOR_MonitorStatus'} to 2
# should not do clean up when IBM.SensorRM is stopped
if (&isRMrunning("IBM.SensorRM")) {
runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog_sens\"", -1);
runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog\"", -1);
if (-e $runfile) {
unlink($runfile);
}