more work on RMC monitoring

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@530 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
linggao 2008-02-20 02:02:11 +00:00
parent 502900fee2
commit b5869e05a4
3 changed files with 323 additions and 176 deletions

View File

@ -13,8 +13,18 @@ use xCAT::Utils;
use xCAT::GlobalDef;
#print "xCAT_monitoring::rmcmon loaded\n";
1;
#now the RMC domain can automatically setup when xCAT starts. predefined conditions and sensor are defined on ms.
#TODO: conveting all print statement to logging
#TODO: predefined responses
#TODO: node status monitoring for xCAT.
#TODO: script to define sensors on the nodes.
#TODO: how to push the sensor scripts to nodes?
#TODO: what to do when stop is called? stop all the associations or just the ones that were predefined? or leve them there?
#TODO: do we need to stop all the RMC daemons when stop is called?
#I will come back to work on these once I have SNMP stuff done.
#-------------------------------------------------------------------------------
=head1 xCAT_monitoring:rmcmon
=head2 Package Description
@ -66,29 +76,115 @@ sub start {
return (1, "rmc deamon cannot be started\n");
}
}
#enable remote client connection
`/usr/bin/rmcctrl -p`;
#get a list of managed nodes
$result=`/usr/bin/lsrsrc-api -s IBM.MngNode::::Name`;
$result=`/usr/bin/lsrsrc-api -s IBM.MngNode::::Name 2>&1`;
if ($?) {
if ($result !~ /2612-023/) {#2612-023 no resources found error
print "$result\n";
return (1,$result);
}
$result='';
}
chomp($result);
my @rmc_nodes=split(/\n/, $result);
#print "all defined nodes=@rmc_nodes\n";
foreach (keys(%$noderef)) {
my $server=$_;
my $localhostname=hostname();
my $mon_nodes=$noderef->{$localhostname};
my $mon_nodes=$noderef->{$_};
#check what has changed
my %summary;
foreach (@rmc_nodes) { $summary{$_}=-1;}
if ($mon_nodes) {
foreach(@$mon_nodes) {
my $node_pair=$_;
my $node=$node_pair->[0];
my $nodetype=$node_pair->[1];
my $node=$_->[0];
my $nodetype=$_->[1];
if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) {
$summary{$node}++;
}
}
}
my @nodes_to_add=();
my @nodes_to_remove=();
foreach(keys(%summary)) {
#print "node=$_ summary=$summary{$_}\n";
if ($summary{$_}==1) {push(@nodes_to_add, $_);}
elsif ($summary{$_}==-1) {push(@nodes_to_remove, $_);}
}
#add new nodes to the RMC cluster
if (@nodes_to_add>0) {
my %nodes_status=xCAT_monitoring::rmcmon::pingNodeStatus(@nodes_to_add);
my $active_nodes=$nodes_status{$::STATUS_ACTIVE};
my $inactive_nodes=$nodes_status{$::STATUS_INACTIVE};
if (@$inactive_nodes>0) {
print "The following nodes cannot be added to the RMC cluster because they are inactive:\n @$inactive_nodes\n"
}
if (@$active_nodes>0) {
print "active nodes to add:\n @$active_nodes\n";
addNodes_noChecking(@$active_nodes);
}
}
#remove unwanted nodes to the RMC cluster
if (@nodes_to_remove>0) {
print "nodes to remove @nodes_to_remove\n";
removeNodes_noChecking(@nodes_to_remove);
}
#start condition-response assosciations
my $result=`$::XCATROOT/sbin/rmcmon/mkrmcresources $::XCATROOT/lib/perl/xCAT_monitoring/rmc/resources/ms 2>&1`;
if ($?) {
print "Error when creating predefined resources on $localhostname:\n$result\n";
}
return (0, "started");
}
#--------------------------------------------------------------------------------
=head3 pingNodeStatus
This function takes an array of nodes and returns their status using fping.
Arguments:
nodes-- an array of nodes.
Returns:
a hash that has the node status. The format is:
{active=>[node1, node3,...], unreachable=>[node4, node2...]}
=cut
#--------------------------------------------------------------------------------
sub pingNodeStatus {
my ($class, @mon_nodes)=@_;
%status=();
my @active_nodes=();
my @inactive_nodes=();
if ((@mon_nodes)&& (@mon_nodes > 0)) {
#get all the active nodes
my $nodes= join(' ', @mon_nodes);
my $temp=`fping -a $nodes 2> /dev/null`;
chomp($temp);
@active_nodes=split(/\n/, $temp);
#get all the inactive nodes by substracting the active nodes from all.
my %temp2;
if ((@active_nodes) && ( @active_nodes > 0)) {
foreach(@active_nodes) { $temp2{$_}=1};
foreach(@mon_nodes) {
if (!$temp2{$_}) { push(@inactive_nodes, $_);}
}
}
else {@inactive_nodes=@mon_nodes;}
}
#TODO: start condition-response assosciations
return (0, "started");
$status{$::STATUS_ACTIVE}=\@active_nodes;
$status{$::STATUS_INACTIVE}=\@inactive_nodes;
return %status;
}
@ -176,7 +272,7 @@ sub stopNodeStatusMon {
#--------------------------------------------------------------------------------
=head3 addNodes
This function gdds the nodes into the RMC cluster.
This function adds the nodes into the RMC cluster.
Arguments:
nodes --nodes to be added. It is a hash reference keyed by the monitoring server
nodes and each value is a ref to an array of [nodes, nodetype, status] arrays monitored
@ -188,130 +284,143 @@ sub stopNodeStatusMon {
=cut
#--------------------------------------------------------------------------------
sub addNodes {
$noderef=shift;
my $noderef=shift;
if ($noderef =~ /xCAT_monitoring::rmcmon/) {
$noderef=shift;
}
my $VERBOSE=shift;
#if ($VERBOSE) { print "rmcmon::addNodes called $noderef=$noderef\n"};
#print "rmcmon::addNodes get called\n";
my $ms_host_name=hostname();
chomp($ms_host_name);
my $mon_nodes=$noderef->{$ms_host_name};
foreach (keys(%$noderef)) {
my $server=$_;
#if ($VERBOSE) { print " monitoring server: $server\n";}
my @nodes_to_add=();
foreach(@$mon_nodes) {
my $node_pair=$_;
my $node=$node_pair->[0];
my $nodetype=$node_pair->[1];
if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) {
#RMC deals only with osi type. empty type is treated as osi type
#check if the node has already defined
$result=`lsrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`;
if (($?) && ($result !~ /2612-023/)) { #2612-023 no resources found error
print "$result\n";
next;
}
if ($server ne $ms_host_name) {
next; #only handle the nodes which has this node as the monitor server
}
#check if rsct is installed and running
if (! -e "/usr/bin/lsrsrc") {
print "RSCT is not installed.\n";
return 1;
}
my $result=`/usr/bin/lssrc -s ctrmc`;
if ($result !~ /active/) {
#restart rmc daemon
$result=`startsrc -s ctrmc`;
if ($?) {
print "rmc deamon cannot be started\n";
return 1;
}
}
#enable remote client connection
`/usr/bin/rmcctrl -p`;
#ms node id, hostname, ip defs
my $ms_node_id;
my $ms_name,$ms_aliases,$ms_addrtype,$ms_length,@ms_addrs;
my $ms_ipaddresses;
#if ($VERBOSE) {
# print " ms_host_name=$ms_host_name, ms_nam=$ms_name, ms_aliases=$ms_aliases, ms_ip_addr=$ms_ipaddresses, ms_node_id=$ms_node_id\n";
#}
my $mon_nodes=$noderef->{$_};
my $first_time=1;
foreach(@$mon_nodes) {
my $node_pair=$_;
my $node=$node_pair->[0];
my $nodetype=$node_pair->[1];
if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) {
#RMC deals only with osi type. empty type is treated as osi type
#check if the node has already defined
$result=`lsrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`;
if ($? == 0) { #resource has already defined
next;
}
#TODO: check all nodes at the same time or use the 'status' value in the node
#TODO: check all nodes at the same time or use the 'status' value in the node
if ($ms_host_name ne $node) {
`fping -a $node 2> /dev/null`;
if ($?) {
print "Cannot add the node $node into the RMC domian. The node is inactive.\n";
next;
}
if ($first_time) {
$first_time=0;
#enable remote client connection
`/usr/bin/rmcctrl -p`;
#get ms node id, hostname, ip etc
$ms_node_id=`head -n 1 /var/ct/cfg/ct_node_id`;
chomp($ms_node_id);
($ms_name,$ms_aliases,$ms_addrtype,$ms_length,@ms_addrs) = gethostbyname($ms_host_name);
chomp($ms_name);
$ms_ipaddresses="{";
foreach (@ms_addrs) {
$ms_ipaddresses .= '"' .inet_ntoa($_) . '",';
}
chop($ms_ipaddresses);
$ms_ipaddresses .= "}";
}
#get info for the node
$mn_node_id=`psh $node "head -n 1 /var/ct/cfg/ct_node_id" 2>&1`;
$mn_node_id =~ s/.*([0-9 a-g]{16}).*/$1/s;
my ($mn_name,$mn_aliases,$mn_addrtype,$mn_length,@mn_addrs) = gethostbyname($node);
chomp($mn_name);
my $mn_ipaddresses="{";
foreach (@mn_addrs) {
$mn_ipaddresses .= '"'.inet_ntoa($_) . '",';
}
chop($mn_ipaddresses);
$mn_ipaddresses .= "}";
#if ($VERBOSE) {
# print " mn_name=$mn_name, mn_aliases=$mn_aliases, mn_ipaddr=$mn_ipaddresses, mn_node_id=$mn_node_id\n";
#}
# define resource in IBM.MngNode class on server
$result=`mkrsrc-api IBM.MngNode::Name::"$node"::KeyToken::"$node"::IPAddresses::"$mn_ipaddresses"::NodeID::0x$mn_node_id 2>&1`;
if ($?) {
print "define resource in IBM.MngNode class result=$result\n";
}
#copy the configuration script and run it locally
$result=`scp $::XCATROOT/sbin/rmcmon/configrmcnode $node:/tmp 2>&1`;
if ($?) {
print "rmcmon:addNodes: cannot copy the file configrmcnode to node $node\n";
print "Cannot add the node $node into the RMC domian. The node is inactive.\n";
next;
}
}
$result=`psh $node /tmp/configrmcnode -a $node $ms_host_name $ms_ipaddresses 0x$ms_node_id 2>&1`;
if ($?) {
print "$result\n";
}
}
}
push(@nodes_to_add, $node);
}
}
if (@nodes_to_add>0) {
return addNodes_noChecking(@nodes_to_add);
}
return 0;
}
#--------------------------------------------------------------------------------
=head3 addNodes_noChecking
This function gdds the nodes into the RMC cluster, it does not check the OSI type and
if the node has already defined.
Arguments:
nodes --an array of nodes to be added.
Returns:
none
=cut
#--------------------------------------------------------------------------------
sub addNodes_noChecking {
@mon_nodes = @_;
#print "rmcmon::addNodes_noChecking get called with @mon_nodes\n";
my $ms_host_name=hostname();
my $ms_node_id;
my $mn_node_id;
my $ms_name,$ms_aliases,$ms_addrtype,$ms_length,@ms_addrs;
my $ms_ipaddresses;
my $first_time=1;
foreach(@mon_nodes) {
my $node=$_;
if ($first_time) {
$first_time=0;
#get ms node id, hostname, ip etc
$ms_node_id=`head -n 1 /var/ct/cfg/ct_node_id`;
chomp($ms_node_id);
($ms_name,$ms_aliases,$ms_addrtype,$ms_length,@ms_addrs) = gethostbyname($ms_host_name);
chomp($ms_name);
$ms_ipaddresses="{";
foreach (@ms_addrs) {
$ms_ipaddresses .= '"' .inet_ntoa($_) . '",';
}
chop($ms_ipaddresses);
$ms_ipaddresses .= "}";
}
#get info for the node
if($ms_host_name eq $node) {
$mn_node_id=$ms_node_id;
} else {
$mn_node_id=`$::XCATROOT/bin/psh --nonodecheck $node "head -n 1 /var/ct/cfg/ct_node_id" 2>&1`;
if ($?) {
print "Cannot get NodeID for $node. $mn_node_id\n";
next;
}
if ($mn_node_id =~ s/.*([0-9 a-g]{16}).*/$1/s) {;}
else { print "No node id found for $node:\n$mn_node_id\n"; next;}
}
my ($mn_name,$mn_aliases,$mn_addrtype,$mn_length,@mn_addrs) = gethostbyname($node);
chomp($mn_name);
my $mn_ipaddresses="{";
foreach (@mn_addrs) {
$mn_ipaddresses .= '"'.inet_ntoa($_) . '",';
}
chop($mn_ipaddresses);
$mn_ipaddresses .= "}";
# print " mn_name=$mn_name, mn_aliases=$mn_aliases, mn_ipaddr=$mn_ipaddresses, mn_node_id=$mn_node_id\n";
# define resource in IBM.MngNode class on server
$result=`mkrsrc-api IBM.MngNode::Name::"$node"::KeyToken::"$node"::IPAddresses::"$mn_ipaddresses"::NodeID::0x$mn_node_id 2>&1`;
if ($?) {
print "define resource in IBM.MngNode class result=$result\n";
next;
}
#copy the configuration script and run it locally
if($ms_host_name eq $node) {
$result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$ms_host_name"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`;
if ($?) {
print "$result\n";
next;
}
} else {
$result=`scp $::XCATROOT/sbin/rmcmon/configrmcnode $node:/tmp 2>&1`;
if ($?) {
print "rmcmon:addNodes: cannot copy the file configrmcnode to node $node\n";
next;
}
$result=`$::XCATROOT/bin/psh --nonodecheck $node /tmp/configrmcnode -a $node $ms_host_name $ms_ipaddresses 0x$ms_node_id 2>&1`;
if ($?) {
print "$result\n";
}
}
}
return 0;
}
@ -330,59 +439,91 @@ sub addNodes {
=cut
#--------------------------------------------------------------------------------
sub removeNodes {
$noderef=shift;
my $noderef=shift;
if ($noderef =~ /xCAT_monitoring::rmcmon/) {
$noderef=shift;
}
my $VERBOSE=shift;
#if ($VERBOSE) { print "rmcmon::removeNodes called\n"};
#print "rmcmon::removeNodes called\n";
my $ms_host_name=hostname();
my $mon_nodes=$noderef->{$ms_host_name};
my $local_host_name=hostname();
chomp($local_host_name);
foreach (keys(%$noderef)) {
$server=$_;
#print " monitoring server: $server local_host_name=$local_host_name\n";
#only handle the nodes which has this node as the monitor server
if ($server ne $local_host_name) {next; }
my @nodes_to_remove=();
my $mon_nodes=$noderef->{$_};
foreach(@$mon_nodes) {
my $node_pair=$_;
my $node=$node_pair->[0];
my $nodetype=$node_pair->[1];
if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) {
#RMC deals only with osi type. empty type is treated as osi type
#remove resource in IBM.MngNode class on server
my $result=`rmrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`;
if ($?) { print "remove resource in IBM.MngNode class result=$result\n"; }
if ($result =~ m/2612-023/) { #resource not found
next;
}
# TODO: check all the nodes together or use the 'status' value
#if the node is inactive, forget it
`fping -a $node 2> /dev/null`;
if ($?) {
next;
}
#copy the configuration script and run it locally
$result=`scp $::XCATROOT/sbin/rmcmon/configrmcnode $node:/tmp 2>&1 `;
if ($?) {
print "rmcmon:removeNodes: cannot copy the file configrmcnode to node $node\n";
next;
}
$result=`psh --nonodecheck $node /tmp/configrmcnode -d $node 2>&1`;
if ($?) {
print "$result\n";
}
}
foreach(@$mon_nodes) {
my $node_pair=$_;
my $node=$node_pair->[0];
my $nodetype=$node_pair->[1];
if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) {
#RMC deals only with osi type. empty type is treated as osi type
push(@nodes_to_remove, $node);
}
}
if (@nodes_to_remove>0) {
return removeNodes_noChecking(@nodes_to_remove);
}
return 0;
}
#--------------------------------------------------------------------------------
=head3 removeNodes_noChecking
This function removes the nodes from the RMC cluster.
Arguments:
nodes --an array of node names to be removed.
Returns:
none
=cut
#--------------------------------------------------------------------------------
sub removeNodes_noChecking {
my @mon_nodes = @_;
my $ms_host_name=hostname();
#print "rmcmon::removeNodes_noChecking get called with @mon_nodes\n";
foreach(@mon_nodes) {
my $node=$_;
#remove resource in IBM.MngNode class on server
my $result=`rmrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`;
if ($?) {
if ($result =~ m/2612-023/) { #resource not found
next;
}
print "Remove resource in IBM.MngNode class result=$result\n";
}
# TODO: check all the nodes together or use the 'status' value
#if the node is inactive, forget it
if ($ms_host_name ne $node) {
`fping -a $node 2> /dev/null`;
if ($?) {
next;
}
}
if ($ms_host_name eq $node) {
$result= `/usr/bin/rmrsrc-api -s IBM.MCP::"MNName=\\\"\"$node\\\"\"" 2>&1`;
if ($?) {
print "$result\n";
}
} else {
#copy the configuration script and run it locally
$result=`scp $::XCATROOT/sbin/rmcmon/configrmcnode $node:/tmp 2>&1 `;
if ($?) {
print "rmcmon:removeNodes: cannot copy the file configrmcnode to node $node\n";
next;
}
$result=`$::XCATROOT/bin/psh --nonodecheck $node /tmp/configrmcnode -d $node 2>&1`;
if ($?) {
print "$result\n";
}
}
}
return 0;
}

View File

@ -37,9 +37,9 @@ if ($::ADD) {
my $result=`/usr/bin/lssrc -s ctrmc`;
if ($result !~ /active/) {
#restart rmc daemon
$result=`startsrc -s ctrmc`;
$result=`startsrc -s ctrmc 2>&1`;
if ($?) {
print "rmc deamon cannot be started\n";
print "rmc deamon cannot be started:$result\n";
exit 1;
}
}
@ -48,8 +48,10 @@ if ($::ADD) {
`/usr/bin/rmcctrl -p; /usr/bin/refrsrc IBM.MCP`;
#define resource in IBM.MCP class on node
$result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$ARGV[0]"::KeyToken::"$ARGV[1]"::IPAddresses::"$ARGV[2]"::NodeID::$ARGV[3]`;
print "define resource in IBM.MCP class result=$result\n";
$result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$ARGV[0]"::KeyToken::"$ARGV[1]"::IPAddresses::"$ARGV[2]"::NodeID::$ARGV[3] 2>&1`;
if ($?) { print "define resource in IBM.MCP class result=$result\n"; }
#TODO: create predefined sensors. How does the scipts get on the node? mount? come with image?
exit 0
}
@ -61,8 +63,10 @@ if ($::DELETE) {
}
#remove resource in IBM.MCP class on the node
my $result= `/usr/bin/rmrsrc-api -s IBM.MCP::"MNName=\\\"\"$ARGV[0]\\\"\""`;
print "remove resource in IBM.MCP class result=$result\n";
my $result= `/usr/bin/rmrsrc-api -s IBM.MCP::"MNName=\\\"\"$ARGV[0]\\\"\"" 2>&1`;
if ($?) { print "remove resource in IBM.MCP class result=$result\n"; }
#TODO: remove predefined sensors,
exit 0;
}

View File

@ -23,7 +23,7 @@ use xCAT_monitoring::montbhandler;
#stores the module name and the method that is used for the node status monitoring
#for xCAT.
$NODESTAT_MON_NAME;
$NODESTAT_MON_NAME;
$masterpid;
1;
@ -344,6 +344,7 @@ sub startMonitoring {
if ($aRef) {
my $module_name=$aRef->[1];
undef $SIG{CHLD};
#initialize and start monitoring
my @ret1 = ${$module_name."::"}{start}->($monservers);
$ret{$_}=\@ret1;
@ -382,6 +383,7 @@ sub startNodeStatusMonitoring {
my $aRef=$PRODUCT_LIST{$pname};
if ($aRef) {
my $module_name=$aRef->[1];
undef $SIG{CHLD};
my $method = ${$module_name."::"}{supportNodeStatusMon}->();
# return value 0 means not support. 1 means yes.
if ($method > 0) {