From daee2e24374336f2b317eab220426b8a254f80c9 Mon Sep 17 00:00:00 2001 From: linggao Date: Tue, 24 Jun 2008 20:45:27 +0000 Subject: [PATCH] more work on RMC monitoring git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@1755 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- xCAT-rmc/plugin/rmcmon.pm | 194 +++++++++++++-------------------- xCAT-rmc/scripts/configrmcnode | 43 +++++++- 2 files changed, 114 insertions(+), 123 deletions(-) diff --git a/xCAT-rmc/plugin/rmcmon.pm b/xCAT-rmc/plugin/rmcmon.pm index 8dbe2a012..1ebd3322a 100644 --- a/xCAT-rmc/plugin/rmcmon.pm +++ b/xCAT-rmc/plugin/rmcmon.pm @@ -18,14 +18,12 @@ use xCAT::MsgUtils; #print "xCAT_monitoring::rmcmon loaded\n"; 1; -#now the RMC domain can automatically setup when xCAT starts. predefined conditions and sensor are defined on ms. -#TODO: predefined responses -#TODO: node status monitoring for xCAT. + #TODO: script to define sensors on the nodes. #TODO: how to push the sensor scripts to nodes? #TODO: what to do when stop is called? stop all the associations or just the ones that were predefined? or leve them there? #TODO: do we need to stop all the RMC daemons when stop is called? -#I will come back to work on these once I have SNMP stuff done. +#TODO: monitoring HMC with old RSCT and new RSCT #------------------------------------------------------------------------------- =head1 xCAT_monitoring:rmcmon @@ -52,7 +50,7 @@ use xCAT::MsgUtils; =cut #-------------------------------------------------------------------------------- sub start { - print "rmcmon::start called\n"; + #print "rmcmon::start called\n"; my $noderef=xCAT_monitoring::monitorctrl->getMonHierarchy(); @@ -85,7 +83,7 @@ sub start { } chomp($result); my @rmc_nodes=split(/\n/, $result); - print "RMC defined nodes=@rmc_nodes\n"; + #print "RMC defined nodes=@rmc_nodes\n"; #the identification of this node @@ -99,6 +97,7 @@ sub start { my @key_a=split(',', $key); if (! $iphash{$key_a[0]}) { next;} my $mon_nodes=$noderef->{$key}; + my $master=$key_a[1]; #check what has changed my %summary; @@ -126,19 +125,19 @@ sub start { my %nodes_status=xCAT_monitoring::rmcmon->pingNodeStatus(@nodes_to_add); my $active_nodes=$nodes_status{$::STATUS_ACTIVE}; my $inactive_nodes=$nodes_status{$::STATUS_INACTIVE}; - print "active nodes to add:@$active_nodes\ninactive nodes to add: @$inactive_nodes\n"; + #print "active nodes to add:@$active_nodes\ninactive nodes to add: @$inactive_nodes\n"; if (@$inactive_nodes>0) { xCAT::MsgUtils->message('SI', "[mon]: The following nodes cannot be added to the RMC cluster because they are inactive:\n @$inactive_nodes\n"); } if (@$active_nodes>0) { - addNodes_noChecking(@$active_nodes); + addNodes_noChecking($active_nodes, $master); } } #remove unwanted nodes to the RMC cluster if (@nodes_to_remove>0) { - print "nodes to remove @nodes_to_remove\n"; - removeNodes_noChecking(@nodes_to_remove); + #print "nodes to remove @nodes_to_remove\n"; + removeNodes_noChecking(\@nodes_to_remove, $master); } #create conditions/responses/sensors on the service node or mn @@ -424,6 +423,34 @@ sub stopNodeStatusMon { } +#-------------------------------------------------------------------------------- +=head3 getNodeID + This function gets the nodeif for the given node. + + Arguments: + node + Returns: + node id for the given node +=cut +#-------------------------------------------------------------------------------- +sub getNodeID { + my $node=shift; + if ($node =~ /xCAT_monitoring::rmcmon/) { + $node=shift; + } + my $tab=xCAT::Table->new("mac", -create =>0); + my $tmp=$tab->getNodeAttribs($node, ['mac']); + if (defined($tmp) && ($tmp)) { + my $mac=$tmp->{mac}; + $mac =~ s/://g; + $mac .= "0000"; + $tab->close(); + return $mac; + } + $tab->close(); + return undef; +} + #-------------------------------------------------------------------------------- =head3 addNodes This function adds the nodes into the RMC cluster. @@ -440,68 +467,9 @@ sub stopNodeStatusMon { sub addNodes { return (0, "ok"); #not handle it now, wait when nodelist.status work is done - - my $noderef=shift; - if ($noderef =~ /xCAT_monitoring::rmcmon/) { - $noderef=shift; - } - my $bWithInfo=shift; - #print "rmcmon::addNodes get called\n"; - my $mon_nodes=$noderef; - - - my @hostinfo=xCAT::Utils->determinehostname(); - %iphash=(); - foreach(@hostinfo) {$iphash{$_}=1;} - - my @nodes_to_add=(); - my $table3=xCAT::Table->new("nodetype", -create =>0); - foreach(@$mon_nodes) { - my $node_pair=$_; - my $node=$node_pair->[0]; - my $status=$node_pair->[1]; - - #get node type - my $tmp3=$table3->getNodeAttribs($node, ['nodetype']); - my $nodetype="osi"; #default - if (defined($tmp3) && ($tmp3)) { - if ($tmp3->{nodetype}) { $nodetype=$tmp3->{nodetype}; } - } - - if ($nodetype =~ /$::NODETYPE_OSI/) { - #RMC deals only with osi type. empty type is treated as osi type - #check if the node has already defined - $result=`lsrsrc-api -s IBM.MngNode::"Name=\\\"\"$node\\\"\"" 2>&1`; - if (($?) && ($result !~ /2612-023/)) { #2612-023 no resources found error - xCAT::MsgUtils->message('SI', "[mon]: $result\n"); - next; - } - - #check if the node is active - my $active=0; - if (exists($iphash{$node})) { $active=1;} - elsif ($status && ($status eq $::STATUS_ACTIVE)) { $active=1; } - else { - `fping -a $node 2> /dev/null`; - if ($?==0) {$active=1; } - } - if (!$active) { - xCAT::MsgUtils->message('SI', "[mon]: Cannot add the node $node into the RMC domian. The node is inactive.\n"); - next; - } - - push(@nodes_to_add, $node); - } - } - $table3->close(); - - if (@nodes_to_add>0) { - return addNodes_noChecking(@nodes_to_add); - } - - return (0, "ok"); } + #-------------------------------------------------------------------------------- =head3 addNodes_noChecking This function gdds the nodes into the RMC cluster, it does not check the OSI type and @@ -513,9 +481,20 @@ sub addNodes { =cut #-------------------------------------------------------------------------------- sub addNodes_noChecking { - - @mon_nodes = @_; + + my $pmon_nodes=shift; + if ($pmon_nodes =~ /xCAT_monitoring::rmcmon/) { + $pmon_nodes=shift; + } + + my @mon_nodes = @$pmon_nodes; + my $master=shift; + #print "rmcmon::addNodes_noChecking get called with @mon_nodes\n"; + my @hostinfo=xCAT::Utils->determinehostname(); + %iphash=(); + foreach(@hostinfo) {$iphash{$_}=1;} + my $ms_host_name=hostname(); my $ms_node_id; @@ -545,7 +524,7 @@ sub addNodes_noChecking { } #get info for the node - if($ms_host_name eq $node) { + if($iphash{$node}) { $mn_node_id=$ms_node_id; } else { $mn_node_id=`$::XCATROOT/bin/psh --nonodecheck $node /usr/sbin/rsct/bin/lsnodeid 2>&1`; @@ -575,8 +554,8 @@ sub addNodes_noChecking { } #copy the configuration script and run it locally - if($ms_host_name eq $node) { - $result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$ms_host_name"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`; + if($iphash{$node}) { + $result=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$node"::KeyToken::"$master"::IPAddresses::"$ms_ipaddresses"::NodeID::0x$ms_node_id`; if ($?) { xCAT::MsgUtils->message('SI', "[mon]: $result\n"); next; @@ -588,14 +567,14 @@ sub addNodes_noChecking { next; } - $result=`$::XCATROOT/bin/psh --nonodecheck $node NODE=$node MASTER_NAME=$ms_host_name MASTER_IPS=$ms_ipaddresses MASTER_NODEID=0x$ms_node_id /tmp/configrmcnode 1 2>&1`; + $result=`$::XCATROOT/bin/psh --nonodecheck $node NODE=$node MONSERVER=$master MS_NODEID=$ms_node_id /tmp/configrmcnode 1 2>&1`; if ($?) { xCAT::MsgUtils->message('SI', "[mon]: $result\n"); } } } - return (0, "ok"); + return (0, "ok"); } #-------------------------------------------------------------------------------- @@ -614,42 +593,6 @@ sub addNodes_noChecking { sub removeNodes { return (0, "ok"); #not handle it now, wait when nodelist.status work is done - my $noderef=shift; - if ($noderef =~ /xCAT_monitoring::rmcmon/) { - $noderef=shift; - } - my $bWithInfo=shift; - my $mon_nodes=$noderef; - - #print "rmcmon::removeNodes called\n"; - - my @nodes_to_remove=(); - - my $table3=xCAT::Table->new("nodetype", -create =>0); - foreach(@$mon_nodes) { - my $node_pair=$_; - my $node=$node_pair->[0]; - my $status=$node_pair->[1]; - - #get node type - my $tmp3=$table3->getNodeAttribs($node, ['nodetype']); - my $nodetype="osi"; #default - if (defined($tmp3) && ($tmp3)) { - if ($tmp3->{nodetype}) { $nodetype=$tmp3->{nodetype}; } - } - - if ((!$nodetype) || ($nodetype =~ /$::NODETYPE_OSI/)) { - #RMC deals only with osi type. empty type is treated as osi type - push(@nodes_to_remove, $node); - } - } - $table3->close(); - - if (@nodes_to_remove>0) { - return removeNodes_noChecking(@nodes_to_remove); - } - - return (0, "ok"); } @@ -657,13 +600,19 @@ sub removeNodes { =head3 removeNodes_noChecking This function removes the nodes from the RMC cluster. Arguments: - nodes --an array of node names to be removed. + nodes --a pointer to a array of node names to be removed. + Returns: (error code, error message) =cut #-------------------------------------------------------------------------------- sub removeNodes_noChecking { - my @mon_nodes = @_; + my $pmon_nodes=shift; + if ($pmon_nodes =~ /xCAT_monitoring::rmcmon/) { + $pmon_nodes=shift; + } + my @mon_nodes = @$pmon_nodes; + my $ms_host_name=hostname(); @@ -740,7 +689,18 @@ sub processSettingChanges { sub getDescription { return " Description: - rmcmon ..... + rmcmon uses IBM's Resource Monitoring and Control (RMC) component + of Reliable Scalable Cluster Technology (RSCT) to monitor the + xCAT cluster. RMC has built-in resources such as CPU, memory, + process, network, file system etc for monitoring. RMC can also be + used to provide node liveness status monitoring for xCAT. RMC is + good for threadhold monitoring. xCAT automatically sets up the + monitoring domain for RMC during node deployment time. To start + RMC monitoring, use + monstart rmcmon + or + monstart rmcmon -n (to include node status monitoring). Settings: - key: value.\n"; + none.\n"; } + diff --git a/xCAT-rmc/scripts/configrmcnode b/xCAT-rmc/scripts/configrmcnode index 544810215..709a7d4a7 100755 --- a/xCAT-rmc/scripts/configrmcnode +++ b/xCAT-rmc/scripts/configrmcnode @@ -6,21 +6,40 @@ # This script is used for RMC node configuration # usage: # To add node to the cluster: -# NODE=nodename MASTER_NAME=msname \ -# MASTER_IPS={"9.114.46.26","..."} MASTER_NODEID=0xfbb5ec1f64dd299c \ +# NODE=nodename NODEID=fbb5ec1f64dd299c \ +# MONSERVER=msname_or_ip MS_NODEID=fbb5ec1f64dd299c \ # configrmcnode 1 # To remove node to the cluster # NODE=nodename configrmcnode -1 ########################################################################################## ADD=$1; -logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, MASTER_NAME=$MASTER_NAME, MASTER_IPS=$MASTER_IPS, MASTER_NODEID=$MASTER_NODEID" +logger xCAT "configrmcnode: ADD=$ADD, NODE=$NODE, NODEID=$NODEID MONSERVER=$MONSERVER,MS_NODEID=$MS_NODEID" #check if rsct is installed and running if [ ! -e /usr/bin/lsrsrc ]; then logger xCAT "RMC setup on node $NODE: RSCT is not is not installed." exit 1; fi + +#ask RMC take the new nodeid +if [[ $NODEID != "" ]]; then + if [ -e /var/ct/cfg/ct_node_id ]; then + sed -i s/^[^\#].*$/$NODEID/ /var/ct/cfg/ct_node_id + else + echo $NODEID > /var/ct/cfg/ct_node_id + fi + if [ -e /etc/ct_node_id ]; then + sed -i s/^[^\#].*$/$NODEID/ /etc/ct_node_id + else + echo $NODEID > /etc/ct_node_id + fi + result=`/usr/sbin/rsct/install/bin/recfgct -s 2>&1` + if [ $? -ne 0 ]; then + logger xCAT "RMC setup on node $NODE: Cannot reconfig RSCT with new node id" + fi +fi + PID=`/bin/ps -ef | /bin/grep rmcd | /bin/grep -v grep | /bin/awk '{print $2}'` if [ !$PID ]; then @@ -32,12 +51,24 @@ if [ !$PID ]; then fi fi -#enable remote client connection -/usr/bin/rmcctrl -p; /usr/bin/refrsrc IBM.MCP if [ $ADD -eq 1 ]; then + #enable remote client connection + /usr/bin/rmcctrl -p; /usr/bin/refrsrc IBM.MCP + + #get IP address of MS + ms_ip=$MONSERVER + result=`ping -c1 $$MONSERVER 2>&1` + if [ $? -eq 0 ]; then + index1=`expr index "$result" "\("` + index2=`expr index "$result" "\)"` + ms_ip=${result:$index1+1:$index2-$index1-2} + else + logger xCAT "RMC setup on node $NODE:$result" + fi + #define resource in IBM.MCP class on node - result1=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$NODE"::KeyToken::"$MASTER_NAME"::IPAddresses::"$MASTER_IPS"::NodeID::$MASTER_NODEID 2>&1` + result1=`/usr/bin/mkrsrc-api IBM.MCP::MNName::"$NODE"::KeyToken::"$MONSERVER"::IPAddresses::"{\"$ms_ip\"}"::NodeID::0x$MS_NODEID 2>&1` if [ $? -gt 0 ]; then logger xCAT "Define resource in IBM.MCP class on node $NODE. result=$result1" exit 1