From b5869e05a42595238afcc78ea2fdba822de0a85f Mon Sep 17 00:00:00 2001 From: linggao Date: Wed, 20 Feb 2008 02:02:11 +0000 Subject: [PATCH] more work on RMC monitoring git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@530 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-rmc/plugin/rmcmon.pm | 479 ++++++++++++------ xCAT-rmc/scripts/perl/configrmcnode | 16 +- .../lib/xcat/monitoring/monitorctrl.pm | 4 +- 3 files changed, 323 insertions(+), 176 deletions(-) diff --git a/xCAT-rmc/plugin/rmcmon.pm b/xCAT-rmc/plugin/rmcmon.pm index e8cfeb459..b128a8787 100644 --- a/xCAT-rmc/plugin/rmcmon.pm +++ b/xCAT-rmc/plugin/rmcmon.pm @@ -13,8 +13,18 @@ use xCAT::Utils; use xCAT::GlobalDef; #print "xCAT_monitoring::rmcmon loaded\n"; - 1; + +#now the RMC domain can automatically setup when xCAT starts. predefined conditions and sensor are defined on ms. +#TODO: conveting all print statement to logging +#TODO: predefined responses +#TODO: node status monitoring for xCAT. +#TODO: script to define sensors on the nodes. +#TODO: how to push the sensor scripts to nodes? +#TODO: what to do when stop is called? stop all the associations or just the ones that were predefined? or leve them there? +#TODO: do we need to stop all the RMC daemons when stop is called? +#I will come back to work on these once I have SNMP stuff done. + #------------------------------------------------------------------------------- =head1 xCAT_monitoring:rmcmon =head2 Package Description @@ -66,29 +76,115 @@ sub start { return (1, "rmc deamon cannot be started\n"); } } + + #enable remote client connection + `/usr/bin/rmcctrl -p`; #get a list of managed nodes - $result=`/usr/bin/lsrsrc-api -s IBM.MngNode::::Name`; + $result=`/usr/bin/lsrsrc-api -s IBM.MngNode::::Name 2>&1`; + if ($?) { + if ($result !~ /2612-023/) {#2612-023 no resources found error + print "$result\n"; + return (1,$result); + } + $result=''; + } chomp($result); my @rmc_nodes=split(/\n/, $result); + #print "all defined nodes=@rmc_nodes\n"; - foreach (keys(%$noderef)) { - my $server=$_; + my $localhostname=hostname(); + my $mon_nodes=$noderef->{$localhostname}; - my $mon_nodes=$noderef->{$_}; + #check what has changed + my %summary; + foreach (@rmc_nodes) { $summary{$_}=-1;} + if ($mon_nodes) { foreach(@$mon_nodes) { - my $node_pair=$_; - my $node=$node_pair->[0]; - my $nodetype=$node_pair->[1]; - + my $node=$_->[0]; + my $nodetype=$_->[1]; + if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) { + $summary{$node}++; + } + } + } + + my @nodes_to_add=(); + my @nodes_to_remove=(); + foreach(keys(%summary)) { + #print "node=$_ summary=$summary{$_}\n"; + if ($summary{$_}==1) {push(@nodes_to_add, $_);} + elsif ($summary{$_}==-1) {push(@nodes_to_remove, $_);} + } + + #add new nodes to the RMC cluster + if (@nodes_to_add>0) { + my %nodes_status=xCAT_monitoring::rmcmon::pingNodeStatus(@nodes_to_add); + my $active_nodes=$nodes_status{$::STATUS_ACTIVE}; + my $inactive_nodes=$nodes_status{$::STATUS_INACTIVE}; + if (@$inactive_nodes>0) { + print "The following nodes cannot be added to the RMC cluster because they are inactive:\n @$inactive_nodes\n" } + if (@$active_nodes>0) { + print "active nodes to add:\n @$active_nodes\n"; + addNodes_noChecking(@$active_nodes); + } + } + + #remove unwanted nodes to the RMC cluster + if (@nodes_to_remove>0) { + print "nodes to remove @nodes_to_remove\n"; + removeNodes_noChecking(@nodes_to_remove); + } + + #start condition-response assosciations + my $result=`$::XCATROOT/sbin/rmcmon/mkrmcresources $::XCATROOT/lib/perl/xCAT_monitoring/rmc/resources/ms 2>&1`; + if ($?) { + print "Error when creating predefined resources on $localhostname:\n$result\n"; + } + + return (0, "started"); +} + + +#-------------------------------------------------------------------------------- +=head3 pingNodeStatus + This function takes an array of nodes and returns their status using fping. + Arguments: + nodes-- an array of nodes. + Returns: + a hash that has the node status. The format is: + {active=>[node1, node3,...], unreachable=>[node4, node2...]} +=cut +#-------------------------------------------------------------------------------- +sub pingNodeStatus { + my ($class, @mon_nodes)=@_; + %status=(); + my @active_nodes=(); + my @inactive_nodes=(); + if ((@mon_nodes)&& (@mon_nodes > 0)) { + #get all the active nodes + my $nodes= join(' ', @mon_nodes); + my $temp=`fping -a $nodes 2> /dev/null`; + chomp($temp); + @active_nodes=split(/\n/, $temp); + + #get all the inactive nodes by substracting the active nodes from all. + my %temp2; + if ((@active_nodes) && ( @active_nodes > 0)) { + foreach(@active_nodes) { $temp2{$_}=1}; + foreach(@mon_nodes) { + if (!$temp2{$_}) { push(@inactive_nodes, $_);} + } + } + else {@inactive_nodes=@mon_nodes;} } - - #TODO: start condition-response assosciations - - return (0, "started"); + $status{$::STATUS_ACTIVE}=\@active_nodes; + $status{$::STATUS_INACTIVE}=\@inactive_nodes; + + return %status; } @@ -176,7 +272,7 @@ sub stopNodeStatusMon { #-------------------------------------------------------------------------------- =head3 addNodes - This function gdds the nodes into the RMC cluster. + This function adds the nodes into the RMC cluster. Arguments: nodes --nodes to be added. It is a hash reference keyed by the monitoring server nodes and each value is a ref to an array of [nodes, nodetype, status] arrays monitored @@ -188,130 +284,143 @@ sub stopNodeStatusMon { =cut #-------------------------------------------------------------------------------- sub addNodes { - $noderef=shift; + my $noderef=shift; if ($noderef =~ /xCAT_monitoring::rmcmon/) { $noderef=shift; } - my $VERBOSE=shift; - - #if ($VERBOSE) { print "rmcmon::addNodes called $noderef=$noderef\n"}; + + #print "rmcmon::addNodes get called\n"; + my $ms_host_name=hostname(); - chomp($ms_host_name); + my $mon_nodes=$noderef->{$ms_host_name}; - foreach (keys(%$noderef)) { - my $server=$_; - #if ($VERBOSE) { print " monitoring server: $server\n";} + my @nodes_to_add=(); + foreach(@$mon_nodes) { + my $node_pair=$_; + my $node=$node_pair->[0]; + my $nodetype=$node_pair->[1]; + if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) { + #RMC deals only with osi type. empty type is treated as osi type + #check if the node has already defined + $result=`lsrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`; + if (($?) && ($result !~ /2612-023/)) { #2612-023 no resources found error + print "$result\n"; + next; + } - if ($server ne $ms_host_name) { - next; #only handle the nodes which has this node as the monitor server - } - - #check if rsct is installed and running - if (! -e "/usr/bin/lsrsrc") { - print "RSCT is not installed.\n"; - return 1; - } - my $result=`/usr/bin/lssrc -s ctrmc`; - if ($result !~ /active/) { - #restart rmc daemon - $result=`startsrc -s ctrmc`; - if ($?) { - print "rmc deamon cannot be started\n"; - return 1; - } - } - - #enable remote client connection - `/usr/bin/rmcctrl -p`; - - #ms node id, hostname, ip defs - my $ms_node_id; - my $ms_name,$ms_aliases,$ms_addrtype,$ms_length,@ms_addrs; - my $ms_ipaddresses; - - #if ($VERBOSE) { - # print " ms_host_name=$ms_host_name, ms_nam=$ms_name, ms_aliases=$ms_aliases, ms_ip_addr=$ms_ipaddresses, ms_node_id=$ms_node_id\n"; - #} - - my $mon_nodes=$noderef->{$_}; - my $first_time=1; - foreach(@$mon_nodes) { - my $node_pair=$_; - my $node=$node_pair->[0]; - my $nodetype=$node_pair->[1]; - if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) { - #RMC deals only with osi type. empty type is treated as osi type - #check if the node has already defined - $result=`lsrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`; - if ($? == 0) { #resource has already defined - next; - } - - #TODO: check all nodes at the same time or use the 'status' value in the node + #TODO: check all nodes at the same time or use the 'status' value in the node + if ($ms_host_name ne $node) { `fping -a $node 2> /dev/null`; if ($?) { - print "Cannot add the node $node into the RMC domian. The node is inactive.\n"; - next; - } - - if ($first_time) { - $first_time=0; - - #enable remote client connection - `/usr/bin/rmcctrl -p`; - - #get ms node id, hostname, ip etc - $ms_node_id=`head -n 1 /var/ct/cfg/ct_node_id`; - chomp($ms_node_id); - ($ms_name,$ms_aliases,$ms_addrtype,$ms_length,@ms_addrs) = gethostbyname($ms_host_name); - chomp($ms_name); - - $ms_ipaddresses="{"; - foreach (@ms_addrs) { - $ms_ipaddresses .= '"' .inet_ntoa($_) . '",'; - } - chop($ms_ipaddresses); - $ms_ipaddresses .= "}"; - } - - #get info for the node - $mn_node_id=`psh $node "head -n 1 /var/ct/cfg/ct_node_id" 2>&1`; - $mn_node_id =~ s/.*([0-9 a-g]{16}).*/$1/s; - - my ($mn_name,$mn_aliases,$mn_addrtype,$mn_length,@mn_addrs) = gethostbyname($node); - chomp($mn_name); - my $mn_ipaddresses="{"; - foreach (@mn_addrs) { - $mn_ipaddresses .= '"'.inet_ntoa($_) . '",'; - } - chop($mn_ipaddresses); - $mn_ipaddresses .= "}"; - #if ($VERBOSE) { - # print " mn_name=$mn_name, mn_aliases=$mn_aliases, mn_ipaddr=$mn_ipaddresses, mn_node_id=$mn_node_id\n"; - #} - - # define resource in IBM.MngNode class on server - $result=`mkrsrc-api IBM.MngNode::Name::"$node"::KeyToken::"$node"::IPAddresses::"$mn_ipaddresses"::NodeID::0x$mn_node_id 2>&1`; - if ($?) { - print "define resource in IBM.MngNode class result=$result\n"; - } - - #copy the configuration script and run it locally - $result=`scp $::XCATROOT/sbin/rmcmon/configrmcnode $node:/tmp 2>&1`; - if ($?) { - print "rmcmon:addNodes: cannot copy the file configrmcnode to node $node\n"; + print "Cannot add the node $node into the RMC domian. The node is inactive.\n"; next; } + } - $result=`psh $node /tmp/configrmcnode -a $node $ms_host_name $ms_ipaddresses 0x$ms_node_id 2>&1`; - if ($?) { - print "$result\n"; - } - } - } + push(@nodes_to_add, $node); + } } + if (@nodes_to_add>0) { + return addNodes_noChecking(@nodes_to_add); + } + + return 0; +} + +#-------------------------------------------------------------------------------- +=head3 addNodes_noChecking + This function gdds the nodes into the RMC cluster, it does not check the OSI type and + if the node has already defined. + Arguments: + nodes --an array of nodes to be added. + Returns: + none +=cut +#-------------------------------------------------------------------------------- +sub addNodes_noChecking { + + @mon_nodes = @_; + #print "rmcmon::addNodes_noChecking get called with @mon_nodes\n"; + my $ms_host_name=hostname(); + + my $ms_node_id; + my $mn_node_id; + my $ms_name,$ms_aliases,$ms_addrtype,$ms_length,@ms_addrs; + my $ms_ipaddresses; + + my $first_time=1; + foreach(@mon_nodes) { + + my $node=$_; + + if ($first_time) { + $first_time=0; + #get ms node id, hostname, ip etc + $ms_node_id=`head -n 1 /var/ct/cfg/ct_node_id`; + chomp($ms_node_id); + ($ms_name,$ms_aliases,$ms_addrtype,$ms_length,@ms_addrs) = gethostbyname($ms_host_name); + chomp($ms_name); + + $ms_ipaddresses="{"; + foreach (@ms_addrs) { + $ms_ipaddresses .= '"' .inet_ntoa($_) . '",'; + } + chop($ms_ipaddresses); + $ms_ipaddresses .= "}"; + } + + #get info for the node + if($ms_host_name eq $node) { + $mn_node_id=$ms_node_id; + } else { + $mn_node_id=`$::XCATROOT/bin/psh --nonodecheck $node "head -n 1 /var/ct/cfg/ct_node_id" 2>&1`; + if ($?) { + print "Cannot get NodeID for $node. $mn_node_id\n"; + next; + } + if ($mn_node_id =~ s/.*([0-9 a-g]{16}).*/$1/s) {;} + else { print "No node id found for $node:\n$mn_node_id\n"; next;} + } + + my ($mn_name,$mn_aliases,$mn_addrtype,$mn_length,@mn_addrs) = gethostbyname($node); + chomp($mn_name); + my $mn_ipaddresses="{"; + foreach (@mn_addrs) { + $mn_ipaddresses .= '"'.inet_ntoa($_) . '",'; + } + chop($mn_ipaddresses); + $mn_ipaddresses .= "}"; + # print " mn_name=$mn_name, mn_aliases=$mn_aliases, mn_ipaddr=$mn_ipaddresses, mn_node_id=$mn_node_id\n"; + + # define resource in IBM.MngNode class on server + $result=`mkrsrc-api IBM.MngNode::Name::"$node"::KeyToken::"$node"::IPAddresses::"$mn_ipaddresses"::NodeID::0x$mn_node_id 2>&1`; + if ($?) { + print "define resource in IBM.MngNode class result=$result\n"; + next; + } + + #copy the configuration script and run it locally + if($ms_host_name eq $node) { + $result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$ms_host_name"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`; + if ($?) { + print "$result\n"; + next; + } + } else { + $result=`scp $::XCATROOT/sbin/rmcmon/configrmcnode $node:/tmp 2>&1`; + if ($?) { + print "rmcmon:addNodes: cannot copy the file configrmcnode to node $node\n"; + next; + } + + $result=`$::XCATROOT/bin/psh --nonodecheck $node /tmp/configrmcnode -a $node $ms_host_name $ms_ipaddresses 0x$ms_node_id 2>&1`; + if ($?) { + print "$result\n"; + } + } + } return 0; } @@ -330,59 +439,91 @@ sub addNodes { =cut #-------------------------------------------------------------------------------- sub removeNodes { - $noderef=shift; + my $noderef=shift; if ($noderef =~ /xCAT_monitoring::rmcmon/) { $noderef=shift; } - my $VERBOSE=shift; - #if ($VERBOSE) { print "rmcmon::removeNodes called\n"}; + #print "rmcmon::removeNodes called\n"; + my $ms_host_name=hostname(); + my $mon_nodes=$noderef->{$ms_host_name}; - my $local_host_name=hostname(); - chomp($local_host_name); - foreach (keys(%$noderef)) { - $server=$_; - #print " monitoring server: $server local_host_name=$local_host_name\n"; - #only handle the nodes which has this node as the monitor server - if ($server ne $local_host_name) {next; } + my @nodes_to_remove=(); - my $mon_nodes=$noderef->{$_}; - foreach(@$mon_nodes) { - my $node_pair=$_; - my $node=$node_pair->[0]; - my $nodetype=$node_pair->[1]; - if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) { - #RMC deals only with osi type. empty type is treated as osi type - - - #remove resource in IBM.MngNode class on server - my $result=`rmrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`; - if ($?) { print "remove resource in IBM.MngNode class result=$result\n"; } - if ($result =~ m/2612-023/) { #resource not found - next; - } - - # TODO: check all the nodes together or use the 'status' value - #if the node is inactive, forget it - `fping -a $node 2> /dev/null`; - if ($?) { - next; - } - - #copy the configuration script and run it locally - $result=`scp $::XCATROOT/sbin/rmcmon/configrmcnode $node:/tmp 2>&1 `; - if ($?) { - print "rmcmon:removeNodes: cannot copy the file configrmcnode to node $node\n"; - next; - } - - $result=`psh --nonodecheck $node /tmp/configrmcnode -d $node 2>&1`; - if ($?) { - print "$result\n"; - } - } + foreach(@$mon_nodes) { + my $node_pair=$_; + my $node=$node_pair->[0]; + my $nodetype=$node_pair->[1]; + if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) { + #RMC deals only with osi type. empty type is treated as osi type + push(@nodes_to_remove, $node); } } + if (@nodes_to_remove>0) { + return removeNodes_noChecking(@nodes_to_remove); + } + + return 0; +} + + +#-------------------------------------------------------------------------------- +=head3 removeNodes_noChecking + This function removes the nodes from the RMC cluster. + Arguments: + nodes --an array of node names to be removed. + Returns: + none +=cut +#-------------------------------------------------------------------------------- +sub removeNodes_noChecking { + my @mon_nodes = @_; + my $ms_host_name=hostname(); + + + #print "rmcmon::removeNodes_noChecking get called with @mon_nodes\n"; + + foreach(@mon_nodes) { + my $node=$_; + + #remove resource in IBM.MngNode class on server + my $result=`rmrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`; + if ($?) { + if ($result =~ m/2612-023/) { #resource not found + next; + } + print "Remove resource in IBM.MngNode class result=$result\n"; + } + + # TODO: check all the nodes together or use the 'status' value + #if the node is inactive, forget it + if ($ms_host_name ne $node) { + `fping -a $node 2> /dev/null`; + if ($?) { + next; + } + } + + if ($ms_host_name eq $node) { + $result= `/usr/bin/rmrsrc-api -s IBM.MCP::"MNName=\\\"\"$node\\\"\"" 2>&1`; + if ($?) { + print "$result\n"; + } + } else { + #copy the configuration script and run it locally + $result=`scp $::XCATROOT/sbin/rmcmon/configrmcnode $node:/tmp 2>&1 `; + if ($?) { + print "rmcmon:removeNodes: cannot copy the file configrmcnode to node $node\n"; + next; + } + + $result=`$::XCATROOT/bin/psh --nonodecheck $node /tmp/configrmcnode -d $node 2>&1`; + if ($?) { + print "$result\n"; + } + } + } + return 0; } diff --git a/xCAT-rmc/scripts/perl/configrmcnode b/xCAT-rmc/scripts/perl/configrmcnode index c29b17e86..0e586bbf6 100755 --- a/xCAT-rmc/scripts/perl/configrmcnode +++ b/xCAT-rmc/scripts/perl/configrmcnode @@ -37,9 +37,9 @@ if ($::ADD) { my $result=`/usr/bin/lssrc -s ctrmc`; if ($result !~ /active/) { #restart rmc daemon - $result=`startsrc -s ctrmc`; + $result=`startsrc -s ctrmc 2>&1`; if ($?) { - print "rmc deamon cannot be started\n"; + print "rmc deamon cannot be started:$result\n"; exit 1; } } @@ -48,8 +48,10 @@ if ($::ADD) { `/usr/bin/rmcctrl -p; /usr/bin/refrsrc IBM.MCP`; #define resource in IBM.MCP class on node - $result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$ARGV[0]"::KeyToken::"$ARGV[1]"::IPAddresses::"$ARGV[2]"::NodeID::$ARGV[3]`; - print "define resource in IBM.MCP class result=$result\n"; + $result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$ARGV[0]"::KeyToken::"$ARGV[1]"::IPAddresses::"$ARGV[2]"::NodeID::$ARGV[3] 2>&1`; + if ($?) { print "define resource in IBM.MCP class result=$result\n"; } + + #TODO: create predefined sensors. How does the scipts get on the node? mount? come with image? exit 0 } @@ -61,8 +63,10 @@ if ($::DELETE) { } #remove resource in IBM.MCP class on the node - my $result= `/usr/bin/rmrsrc-api -s IBM.MCP::"MNName=\\\"\"$ARGV[0]\\\"\""`; - print "remove resource in IBM.MCP class result=$result\n"; + my $result= `/usr/bin/rmrsrc-api -s IBM.MCP::"MNName=\\\"\"$ARGV[0]\\\"\"" 2>&1`; + if ($?) { print "remove resource in IBM.MCP class result=$result\n"; } + + #TODO: remove predefined sensors, exit 0; } diff --git a/xCAT-server-2.0/lib/xcat/monitoring/monitorctrl.pm b/xCAT-server-2.0/lib/xcat/monitoring/monitorctrl.pm index b5d688008..0f4459898 100644 --- a/xCAT-server-2.0/lib/xcat/monitoring/monitorctrl.pm +++ b/xCAT-server-2.0/lib/xcat/monitoring/monitorctrl.pm @@ -23,7 +23,7 @@ use xCAT_monitoring::montbhandler; #stores the module name and the method that is used for the node status monitoring #for xCAT. -$NODESTAT_MON_NAME; +$NODESTAT_MON_NAME; $masterpid; 1; @@ -344,6 +344,7 @@ sub startMonitoring { if ($aRef) { my $module_name=$aRef->[1]; + undef $SIG{CHLD}; #initialize and start monitoring my @ret1 = ${$module_name."::"}{start}->($monservers); $ret{$_}=\@ret1; @@ -382,6 +383,7 @@ sub startNodeStatusMonitoring { my $aRef=$PRODUCT_LIST{$pname}; if ($aRef) { my $module_name=$aRef->[1]; + undef $SIG{CHLD}; my $method = ${$module_name."::"}{supportNodeStatusMon}->(); # return value 0 means not support. 1 means yes. if ($method > 0) {