more work on RMC monitoring

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@1755 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
linggao 2008-06-24 20:45:27 +00:00
parent 787c5eb8ff
commit daee2e2437
2 changed files with 114 additions and 123 deletions

View File

@ -18,14 +18,12 @@ use xCAT::MsgUtils;
#print "xCAT_monitoring::rmcmon loaded\n";
1;
#now the RMC domain can automatically setup when xCAT starts. predefined conditions and sensor are defined on ms.
#TODO: predefined responses
#TODO: node status monitoring for xCAT.
#TODO: script to define sensors on the nodes.
#TODO: how to push the sensor scripts to nodes?
#TODO: what to do when stop is called? stop all the associations or just the ones that were predefined? or leve them there?
#TODO: do we need to stop all the RMC daemons when stop is called?
#I will come back to work on these once I have SNMP stuff done.
#TODO: monitoring HMC with old RSCT and new RSCT
#-------------------------------------------------------------------------------
=head1 xCAT_monitoring:rmcmon
@ -52,7 +50,7 @@ use xCAT::MsgUtils;
=cut
#--------------------------------------------------------------------------------
sub start {
print "rmcmon::start called\n";
#print "rmcmon::start called\n";
my $noderef=xCAT_monitoring::monitorctrl->getMonHierarchy();
@ -85,7 +83,7 @@ sub start {
}
chomp($result);
my @rmc_nodes=split(/\n/, $result);
print "RMC defined nodes=@rmc_nodes\n";
#print "RMC defined nodes=@rmc_nodes\n";
#the identification of this node
@ -99,6 +97,7 @@ sub start {
my @key_a=split(',', $key);
if (! $iphash{$key_a[0]}) { next;}
my $mon_nodes=$noderef->{$key};
my $master=$key_a[1];
#check what has changed
my %summary;
@ -126,19 +125,19 @@ sub start {
my %nodes_status=xCAT_monitoring::rmcmon->pingNodeStatus(@nodes_to_add);
my $active_nodes=$nodes_status{$::STATUS_ACTIVE};
my $inactive_nodes=$nodes_status{$::STATUS_INACTIVE};
print "active nodes to add:@$active_nodes\ninactive nodes to add: @$inactive_nodes\n";
#print "active nodes to add:@$active_nodes\ninactive nodes to add: @$inactive_nodes\n";
if (@$inactive_nodes>0) {
xCAT::MsgUtils->message('SI', "[mon]: The following nodes cannot be added to the RMC cluster because they are inactive:\n @$inactive_nodes\n");
}
if (@$active_nodes>0) {
addNodes_noChecking(@$active_nodes);
addNodes_noChecking($active_nodes, $master);
}
}
#remove unwanted nodes to the RMC cluster
if (@nodes_to_remove>0) {
print "nodes to remove @nodes_to_remove\n";
removeNodes_noChecking(@nodes_to_remove);
#print "nodes to remove @nodes_to_remove\n";
removeNodes_noChecking(\@nodes_to_remove, $master);
}
#create conditions/responses/sensors on the service node or mn
@ -424,6 +423,34 @@ sub stopNodeStatusMon {
}
#--------------------------------------------------------------------------------
=head3 getNodeID
This function gets the nodeif for the given node.
Arguments:
node
Returns:
node id for the given node
=cut
#--------------------------------------------------------------------------------
sub getNodeID {
my $node=shift;
if ($node =~ /xCAT_monitoring::rmcmon/) {
$node=shift;
}
my $tab=xCAT::Table->new("mac", -create =>0);
my $tmp=$tab->getNodeAttribs($node, ['mac']);
if (defined($tmp) && ($tmp)) {
my $mac=$tmp->{mac};
$mac =~ s/://g;
$mac .= "0000";
$tab->close();
return $mac;
}
$tab->close();
return undef;
}
#--------------------------------------------------------------------------------
=head3 addNodes
This function adds the nodes into the RMC cluster.
@ -440,68 +467,9 @@ sub stopNodeStatusMon {
sub addNodes {
return (0, "ok"); #not handle it now, wait when nodelist.status work is done
my $noderef=shift;
if ($noderef =~ /xCAT_monitoring::rmcmon/) {
$noderef=shift;
}
my $bWithInfo=shift;
#print "rmcmon::addNodes get called\n";
my $mon_nodes=$noderef;
my @hostinfo=xCAT::Utils->determinehostname();
%iphash=();
foreach(@hostinfo) {$iphash{$_}=1;}
my @nodes_to_add=();
my $table3=xCAT::Table->new("nodetype", -create =>0);
foreach(@$mon_nodes) {
my $node_pair=$_;
my $node=$node_pair->[0];
my $status=$node_pair->[1];
#get node type
my $tmp3=$table3->getNodeAttribs($node, ['nodetype']);
my $nodetype="osi"; #default
if (defined($tmp3) && ($tmp3)) {
if ($tmp3->{nodetype}) { $nodetype=$tmp3->{nodetype}; }
}
if ($nodetype =~ /$::NODETYPE_OSI/) {
#RMC deals only with osi type. empty type is treated as osi type
#check if the node has already defined
$result=`lsrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`;
if (($?) && ($result !~ /2612-023/)) { #2612-023 no resources found error
xCAT::MsgUtils->message('SI', "[mon]: $result\n");
next;
}
#check if the node is active
my $active=0;
if (exists($iphash{$node})) { $active=1;}
elsif ($status && ($status eq $::STATUS_ACTIVE)) { $active=1; }
else {
`fping -a $node 2> /dev/null`;
if ($?==0) {$active=1; }
}
if (!$active) {
xCAT::MsgUtils->message('SI', "[mon]: Cannot add the node $node into the RMC domian. The node is inactive.\n");
next;
}
push(@nodes_to_add, $node);
}
}
$table3->close();
if (@nodes_to_add>0) {
return addNodes_noChecking(@nodes_to_add);
}
return (0, "ok");
}
#--------------------------------------------------------------------------------
=head3 addNodes_noChecking
This function gdds the nodes into the RMC cluster, it does not check the OSI type and
@ -513,9 +481,20 @@ sub addNodes {
=cut
#--------------------------------------------------------------------------------
sub addNodes_noChecking {
@mon_nodes = @_;
my $pmon_nodes=shift;
if ($pmon_nodes =~ /xCAT_monitoring::rmcmon/) {
$pmon_nodes=shift;
}
my @mon_nodes = @$pmon_nodes;
my $master=shift;
#print "rmcmon::addNodes_noChecking get called with @mon_nodes\n";
my @hostinfo=xCAT::Utils->determinehostname();
%iphash=();
foreach(@hostinfo) {$iphash{$_}=1;}
my $ms_host_name=hostname();
my $ms_node_id;
@ -545,7 +524,7 @@ sub addNodes_noChecking {
}
#get info for the node
if($ms_host_name eq $node) {
if($iphash{$node}) {
$mn_node_id=$ms_node_id;
} else {
$mn_node_id=`$::XCATROOT/bin/psh --nonodecheck $node /usr/sbin/rsct/bin/lsnodeid 2>&1`;
@ -575,8 +554,8 @@ sub addNodes_noChecking {
}
#copy the configuration script and run it locally
if($ms_host_name eq $node) {
$result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$ms_host_name"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`;
if($iphash{$node}) {
$result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$master"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`;
if ($?) {
xCAT::MsgUtils->message('SI', "[mon]: $result\n");
next;
@ -588,14 +567,14 @@ sub addNodes_noChecking {
next;
}
$result=`$::XCATROOT/bin/psh --nonodecheck $node NODE=$node MASTER_NAME=$ms_host_name MASTER_IPS=$ms_ipaddresses MASTER_NODEID=0x$ms_node_id /tmp/configrmcnode 1 2>&1`;
$result=`$::XCATROOT/bin/psh --nonodecheck $node NODE=$node MONSERVER=$master MS_NODEID=$ms_node_id /tmp/configrmcnode 1 2>&1`;
if ($?) {
xCAT::MsgUtils->message('SI', "[mon]: $result\n");
}
}
}
return (0, "ok");
return (0, "ok");
}
#--------------------------------------------------------------------------------
@ -614,42 +593,6 @@ sub addNodes_noChecking {
sub removeNodes {
return (0, "ok"); #not handle it now, wait when nodelist.status work is done
my $noderef=shift;
if ($noderef =~ /xCAT_monitoring::rmcmon/) {
$noderef=shift;
}
my $bWithInfo=shift;
my $mon_nodes=$noderef;
#print "rmcmon::removeNodes called\n";
my @nodes_to_remove=();
my $table3=xCAT::Table->new("nodetype", -create =>0);
foreach(@$mon_nodes) {
my $node_pair=$_;
my $node=$node_pair->[0];
my $status=$node_pair->[1];
#get node type
my $tmp3=$table3->getNodeAttribs($node, ['nodetype']);
my $nodetype="osi"; #default
if (defined($tmp3) && ($tmp3)) {
if ($tmp3->{nodetype}) { $nodetype=$tmp3->{nodetype}; }
}
if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) {
#RMC deals only with osi type. empty type is treated as osi type
push(@nodes_to_remove, $node);
}
}
$table3->close();
if (@nodes_to_remove>0) {
return removeNodes_noChecking(@nodes_to_remove);
}
return (0, "ok");
}
@ -657,13 +600,19 @@ sub removeNodes {
=head3 removeNodes_noChecking
This function removes the nodes from the RMC cluster.
Arguments:
nodes --an array of node names to be removed.
nodes --a pointer to a array of node names to be removed.
Returns:
(error code, error message)
=cut
#--------------------------------------------------------------------------------
sub removeNodes_noChecking {
my @mon_nodes = @_;
my $pmon_nodes=shift;
if ($pmon_nodes =~ /xCAT_monitoring::rmcmon/) {
$pmon_nodes=shift;
}
my @mon_nodes = @$pmon_nodes;
my $ms_host_name=hostname();
@ -740,7 +689,18 @@ sub processSettingChanges {
sub getDescription {
return
" Description:
rmcmon .....
rmcmon uses IBM's Resource Monitoring and Control (RMC) component
of Reliable Scalable Cluster Technology (RSCT) to monitor the
xCAT cluster. RMC has built-in resources such as CPU, memory,
process, network, file system etc for monitoring. RMC can also be
used to provide node liveness status monitoring for xCAT. RMC is
good for threadhold monitoring. xCAT automatically sets up the
monitoring domain for RMC during node deployment time. To start
RMC monitoring, use
monstart rmcmon
or
monstart rmcmon -n (to include node status monitoring).
Settings:
key: value.\n";
none.\n";
}

View File

@ -6,21 +6,40 @@
# This script is used for RMC node configuration
# usage:
# To add node to the cluster:
# NODE=nodename MASTER_NAME=msname \
# MASTER_IPS={"9.114.46.26","..."} MASTER_NODEID=0xfbb5ec1f64dd299c \
# NODE=nodename NODEID=fbb5ec1f64dd299c \
# MONSERVER=msname_or_ip MS_NODEID=fbb5ec1f64dd299c \
# configrmcnode 1
# To remove node to the cluster
# NODE=nodename configrmcnode -1
##########################################################################################
ADD=$1;
logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, MASTER_NAME=$MASTER_NAME, MASTER_IPS=$MASTER_IPS, MASTER_NODEID=$MASTER_NODEID"
logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONSERVER=$MONSERVER,MS_NODEID=$MS_NODEID"
#check if rsct is installed and running
if [ ! -e /usr/bin/lsrsrc ]; then
logger xCAT "RMC setup on node $NODE: RSCT is not is not installed."
exit 1;
fi
#ask RMC take the new nodeid
if [[ $NODEID != "" ]]; then
if [ -e /var/ct/cfg/ct_node_id ]; then
sed -i s/^[^\#].*$/$NODEID/ /var/ct/cfg/ct_node_id
else
echo $NODEID > /var/ct/cfg/ct_node_id
fi
if [ -e /etc/ct_node_id ]; then
sed -i s/^[^\#].*$/$NODEID/ /etc/ct_node_id
else
echo $NODEID > /etc/ct_node_id
fi
result=`/usr/sbin/rsct/install/bin/recfgct -s 2>&1`
if [ $? -ne 0 ]; then
logger xCAT "RMC setup on node $NODE: Cannot reconfig RSCT with new node id"
fi
fi
PID=`/bin/ps -ef | /bin/grep rmcd | /bin/grep -v grep | /bin/awk '{print $2}'`
if [ !$PID ]; then
@ -32,12 +51,24 @@ if [ !$PID ]; then
fi
fi
#enable remote client connection
/usr/bin/rmcctrl -p; /usr/bin/refrsrc IBM.MCP
if [ $ADD -eq 1 ]; then
#enable remote client connection
/usr/bin/rmcctrl -p; /usr/bin/refrsrc IBM.MCP
#get IP address of MS
ms_ip=$MONSERVER
result=`ping -c1 $$MONSERVER 2>&1`
if [ $? -eq 0 ]; then
index1=`expr index "$result" "\("`
index2=`expr index "$result" "\)"`
ms_ip=${result:$index1+1:$index2-$index1-2}
else
logger xCAT "RMC setup on node $NODE:$result"
fi
#define resource in IBM.MCP class on node
result1=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$NODE"::KeyToken::"$MASTER_NAME"::IPAddresses::"$MASTER_IPS"::NodeID::$MASTER_NODEID 2>&1`
result1=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$NODE"::KeyToken::"$MONSERVER"::IPAddresses::"{\"$ms_ip\"}"::NodeID::0x$MS_NODEID 2>&1`
if [ $? -gt 0 ]; then
logger xCAT "Define resource in IBM.MCP class on node $NODE. result=$result1"
exit 1