fix RMC monitoring for scaling
git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2522 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
parent
f89c50077d
commit
b47e2d3585
@ -860,8 +860,7 @@ sub reportError
|
||||
my $callback=shift;
|
||||
if ($callback) {
|
||||
my $rsp={};
|
||||
my $localhostname=hostname();
|
||||
$rsp->{data}->[0]="$localhostname: $error";
|
||||
$rsp->{data}->[0]=$error;
|
||||
$callback->($rsp);
|
||||
} else { xCAT::MsgUtils->message('S', "[mon]: $error\n"); }
|
||||
return;
|
||||
@ -989,11 +988,14 @@ sub addNodes {
|
||||
}
|
||||
}
|
||||
|
||||
if ($scope==0) { next; }
|
||||
if ($inactiveHash{$node}) { next;}
|
||||
|
||||
push(@normal_nodes, $node);
|
||||
if ($scope==0) { next; }
|
||||
|
||||
#copy the configuration script and run it locally
|
||||
if($iphash{$node}) {
|
||||
pop(@normal_nodes);
|
||||
$result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$master"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`;
|
||||
if ($?) {
|
||||
reportError($result, $callback);
|
||||
@ -1001,6 +1003,7 @@ sub addNodes {
|
||||
}
|
||||
} else {
|
||||
if ($flag) { #define MCP on HMC
|
||||
pop(@normal_nodes);
|
||||
if (!$hmc_ssh_enabled) {
|
||||
my $result=`XCATBYPASS=Y $::XCATROOT/bin/rspconfig $node sshcfg=enable 2>&1`;
|
||||
if ($?) {
|
||||
@ -1023,8 +1026,6 @@ sub addNodes {
|
||||
if ($?) { reportError($result, $callback); }
|
||||
}
|
||||
}
|
||||
} else { #normal nodes
|
||||
push(@normal_nodes, $node);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1032,21 +1033,39 @@ sub addNodes {
|
||||
#let updatenode command to handle the normal nodes as a bulk
|
||||
if (@normal_nodes>0) {
|
||||
my $nr=join(',',@normal_nodes);
|
||||
reportError("Configuring the following nodes. It may takes a while.\n$nr", $callback);
|
||||
|
||||
#use 2 here to tell xcataixpost that there is only one postscript, download only it. It applies to AIX only
|
||||
#get the fanout value
|
||||
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon");
|
||||
|
||||
my $fanout_string="";
|
||||
my $fanout_value=$settings{'rfanout'};
|
||||
if ($fanout_value) { $fanout_string="DSH_FANOUT=$fanout_value";}
|
||||
|
||||
#for local mode, need to referesh the IBM.MCP class to initialize the hb
|
||||
if ($scope==0) {
|
||||
#$result=`XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr /usr/bin/refrsrc-api -c IBM.MCP 2>&1"`;
|
||||
if ($?) { reportError($result, $callback); }
|
||||
return (0, "ok");
|
||||
}
|
||||
|
||||
#this is remore case
|
||||
reportError("Configuring the following nodes. It may take a while.\n$nr", $callback);
|
||||
my $cmd;
|
||||
#use 2 here to tell xcataixpost that there is only one postscript, download only it. It applies to AIX only
|
||||
if (xCAT::Utils->isLinux()) {
|
||||
$cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcatdsklspost 2 configrmcnode 2>&1";
|
||||
$cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcatdsklspost 2 configrmcnode 2>&1";
|
||||
}
|
||||
else {
|
||||
$cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcataixpost 2 configrmcnode 2>&1";
|
||||
$cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s -e /install/postscripts/xcataixpost 2 configrmcnode 2>&1";
|
||||
}
|
||||
if (! open (CMD, "$cmd |")) {
|
||||
reportError("Cannot run command $cmd", $callback);
|
||||
} else {
|
||||
while (<CMD>) {
|
||||
reportError("$_", $callback);
|
||||
chomp;
|
||||
my $rsp={};
|
||||
$rsp->{data}->[0]="$_";
|
||||
$callback->($rsp);
|
||||
}
|
||||
close(CMD);
|
||||
}
|
||||
@ -1175,20 +1194,29 @@ sub removeNodes {
|
||||
#let updatenode command to handle the normal nodes as a bulk
|
||||
if (@normal_nodes>0) {
|
||||
my $nr=join(',',@normal_nodes);
|
||||
|
||||
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("rmcmon");
|
||||
|
||||
my $fanout_string="";
|
||||
my $fanout_value=$settings{'rfanout'};
|
||||
if ($fanout_value) { $fanout_string="DSH_FANOUT=$fanout_value";}
|
||||
|
||||
#copy the configuration script and run it locally
|
||||
$result=`XCATBYPASS=Y $::XCATROOT/bin/xdcp $nr $::XCATROOT/sbin/rmcmon/configrmcnode /tmp 2>&1 `;
|
||||
$result=`XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdcp $nr $::XCATROOT/sbin/rmcmon/configrmcnode /tmp 2>&1 `;
|
||||
if ($?) {
|
||||
reportError("rmcmon:removeNodes: cannot copy the file configrmcnode to nodes $nr:\$result", $callback);
|
||||
next;
|
||||
reportError("$result", $callback);
|
||||
}
|
||||
|
||||
reportError("De-configuring the following nodes. It may takes a while.\n$nr", $callback);
|
||||
my $cmd="XCATBYPASS=Y $::XCATROOT/bin/xdsh $nr -s MS_NODEID=$ms_node_id /tmp/configrmcnode -1 2>&1";
|
||||
reportError("De-configuring the following nodes. It may take a while.\n$nr", $callback);
|
||||
my $cmd="XCATBYPASS=Y $fanout_string $::XCATROOT/bin/xdsh $nr -s MS_NODEID=$ms_node_id /tmp/configrmcnode -1 2>&1";
|
||||
if (! open (CMD1, "$cmd |")) {
|
||||
reportError("Cannot run command $cmd", $callback);
|
||||
} else {
|
||||
while (<CMD1>) {
|
||||
reportError("$_", $callback);
|
||||
chomp;
|
||||
my $rsp={};
|
||||
$rsp->{data}->[0]="$_";
|
||||
$callback->($rsp);
|
||||
}
|
||||
close(CMD1);
|
||||
}
|
||||
@ -1236,7 +1264,8 @@ sub getDescription {
|
||||
good for threadhold monitoring. xCAT automatically sets up the
|
||||
monitoring domain for RMC during node deployment time.
|
||||
Settings:
|
||||
none.";
|
||||
rfanout -- indicating the fanout number for configuring or deconfiguring
|
||||
remote nodes.";
|
||||
}
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
|
@ -3,10 +3,8 @@
|
||||
$RES::Condition{'NodeReachability'} = {
|
||||
Name => q(NodeReachability),
|
||||
ResourceClass => q(IBM.MngNode),
|
||||
EventExpression => q(Status@P==1 && Status!=1),
|
||||
EventDescription => q(An event will be generated when a node becomes network unreachable from the management server.),
|
||||
RearmExpression => q(Status=1),
|
||||
RearmDescription => q(A rearm event will be generated when the node is reachable again.),
|
||||
EventExpression => q(Status!=Status@P),
|
||||
EventDescription => q(An event will be generated when a status changes),
|
||||
ManagementScope => q(1),
|
||||
Severity => q(2),
|
||||
};
|
||||
|
@ -22,7 +22,7 @@ if [ -z "$NODE" ]; then
|
||||
NODE=`hostname`
|
||||
fi
|
||||
|
||||
logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONMASTER=$MONMASTER,MS_NODEID=$MS_NODEID"
|
||||
#logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONMASTER=$MONMASTER,MS_NODEID=$MS_NODEID"
|
||||
|
||||
#check if rsct is installed and running
|
||||
if [ ! -e /usr/bin/lsrsrc ]; then
|
||||
@ -139,6 +139,32 @@ if [ -z "$PID" ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
#wait for RMC daemon to fully functional
|
||||
RETRY_LIMIT=20 # number of 2 second retry intervals (40 second total)
|
||||
let i=$RETRY_LIMIT
|
||||
while :
|
||||
do
|
||||
lsrsrc > /dev/null 2>&1
|
||||
rc=$?
|
||||
if [[ $rc -eq 0 ]]
|
||||
then
|
||||
break
|
||||
fi
|
||||
|
||||
i=$((i - 1))
|
||||
if (( i > 0 ))
|
||||
then
|
||||
sleep 2
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ $rc -ne 0 ]]
|
||||
then
|
||||
logger xCAT "RMC subsystem has not started"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
if [ $ADD -eq 1 ]; then
|
||||
#enable remote client connection
|
||||
@ -178,6 +204,7 @@ else
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo "done"
|
||||
exit 0;
|
||||
|
||||
|
||||
|
@ -244,7 +244,7 @@ if (!-e $runfile){
|
||||
print ODM '
|
||||
errnotify:
|
||||
en_pid = 0
|
||||
en_name = "xcat_rmc_errlog_sensor"
|
||||
en_name = "xcat_rmc_errlog"
|
||||
en_persistenceflg = 1
|
||||
en_method = "' . "$::XCATROOT/sbin/rmcmon/errmsgque" . ' sequence = $1 error_id = $2 class = $3 type = $4 alert_flags = $5 res_name = $6 res_type = $7 res_class = $8 label = $9"
|
||||
';
|
||||
@ -296,7 +296,7 @@ elsif ($^O =~ /^aix/i) {
|
||||
# set $ENV{'SENSOR_MonitorStatus'} to 2
|
||||
# should not do clean up when IBM.SensorRM is stopped
|
||||
if (&isRMrunning("IBM.SensorRM")) {
|
||||
runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog_sens\"", -1);
|
||||
runcmd("/bin/odmdelete -o errnotify -q \" en_name=xcat_rmc_errlog\"", -1);
|
||||
if (-e $runfile) {
|
||||
unlink($runfile);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user