From ef73e9399a264ec7b88931dbea35a0bf2854f614 Mon Sep 17 00:00:00 2001 From: linggao Date: Thu, 29 Nov 2007 19:55:16 +0000 Subject: [PATCH] Added support for node status monitoring git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@99 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd --- .../usr/lib/xcat/monitoring/xcatmon.pm | 169 ++++++++++++++++++ xCAT-server-2.0/usr/sbin/xcatnodemon | 103 +++++++++++ 2 files changed, 272 insertions(+) create mode 100644 xCAT-server-2.0/usr/lib/xcat/monitoring/xcatmon.pm create mode 100755 xCAT-server-2.0/usr/sbin/xcatnodemon diff --git a/xCAT-server-2.0/usr/lib/xcat/monitoring/xcatmon.pm b/xCAT-server-2.0/usr/lib/xcat/monitoring/xcatmon.pm new file mode 100644 index 000000000..8bba5555f --- /dev/null +++ b/xCAT-server-2.0/usr/lib/xcat/monitoring/xcatmon.pm @@ -0,0 +1,169 @@ +#!/usr/bin/env perl +# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html +package xCAT_monitoring::xcatmon; + +#use xCAT::NodeRange; +#use Socket; +#use xCAT::Utils; +use xCAT::GlobalDef; +use xCAT::Utils; + +#caches the current node status. (serve1=>{active=>[node1,node3...], unreachable=>[node2, node4..]},....) + +$flag; + +1; +#------------------------------------------------------------------------------- +=head1 xCAT_monitoring:xcatmon +=head2 Package Description + This is a xCAT monitoring plugin. The only thing that this plug-in does is + the node monitoring. +=cut +#------------------------------------------------------------------------------- + +#-------------------------------------------------------------------------------- +=head3 start + This function gets called by the monitorctrl module + when xcatd starts. + Arguments: + monservers --A hash reference keyed by the monitoring server nodes + and each value is a ref to an array of [nodes, nodetype] arrays + monitored by the server. So the format is: + {monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...} + Returns: + (return code, message) +=cut +#-------------------------------------------------------------------------------- +sub start { + #print "xcatmon.start\n"; + + return (0, "started"); +} + + + +#-------------------------------------------------------------------------------- +=head3 stop + This function gets called by the monitorctrl module when + xcatd stops. + Arguments: + none + Returns: + (return code, message) +=cut +#-------------------------------------------------------------------------------- +sub stop { + + return (0, "stopped"); +} + + + + +#-------------------------------------------------------------------------------- +=head3 supportNodeStatusMon + This function is called by the monitorctrl module to check + if this product can help monitoring and returning the node status. + + Arguments: + none + Returns: + 1 +=cut +#-------------------------------------------------------------------------------- +sub supportNodeStatusMon { + + return 1; +} + + + +#-------------------------------------------------------------------------------- +=head3 startNodeStatusMon + This function is called by the monitorctrl module to tell + the product to start monitoring the node status and feed them back + to xCAT. + Arguments: + monservers --A hash reference keyed by the monitoring server nodes + and each value is a ref to an array of [nodes, nodetype] arrays + monitored by the server. So the format is: + {monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...} + Returns: + (return code, message) + +=cut +#-------------------------------------------------------------------------------- +sub startNodeStatusMon { + #print "xcatmon.startNodeStatusMon\n"; + my $newentry="*/1 * * * * /usr/sbin/xcatnodemon >> /var/log/xcatmon.log"; + my ($code, $msg)=xCAT::Utils::add_cron_job($newentry); + if ($code==0) { return (0, "started"); } + else { return ($code, $msg); } +} + + +#-------------------------------------------------------------------------------- +=head3 stopNodeStatusMon + This function is called by the monitorctrl module to tell + the product to stop feeding the node status info back to xCAT. + + Arguments: + none + Returns: + (return code, message) +=cut +#-------------------------------------------------------------------------------- +sub stopNodeStatusMon { + #TODO: turn off the node status monitoring. + + my $job="/usr/sbin/xcatnodemon"; + my ($code, $msg)=xCAT::Utils::remove_cron_job($job); + if ($code==0) { return (0, "stopped"); } + else { return ($code, $msg); } + +} + + +#-------------------------------------------------------------------------------- +=head3 addNodes + This function is called by the monitorctrl module when new nodes are added + to the xCAT cluster. It should add the nodes into the product for monitoring. + Arguments: + nodes --nodes to be added. It is a hash reference keyed by the monitoring server + nodes and each value is a ref to an array of [nodes, nodetype] arrays monitored + by the server. So the format is: + {monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...} + Returns: + none +=cut +#-------------------------------------------------------------------------------- +sub addNodes { + + #print "xcatmon:addNodes called\n"; + + #TODO: include the nodes into the product for monitoring. + return; +} + +#-------------------------------------------------------------------------------- +=head3 removeNodes + This function is called by the monitorctrl module when nodes are removed + from the xCAT cluster. It should remove the nodes from the product for monitoring. + Arguments: + nodes --nodes to be removed. It is a hash reference keyed by the monitoring server + nodes and each value is a ref to an array of [nodes, nodetype] arrays monitored + by the server. So the format is: + {monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...} + Returns: + none +=cut +#-------------------------------------------------------------------------------- +sub removeNodes { + + #print "xcatmon:removeNodes called\n"; + + #TODO: remove the nodes from the product for monitoring. + return; +} + + diff --git a/xCAT-server-2.0/usr/sbin/xcatnodemon b/xCAT-server-2.0/usr/sbin/xcatnodemon new file mode 100755 index 000000000..ae4c442c2 --- /dev/null +++ b/xCAT-server-2.0/usr/sbin/xcatnodemon @@ -0,0 +1,103 @@ +#!/usr/bin/env perl +# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html + +use xCAT::Table; +use xCAT::GlobalDef; + +require("/usr/lib/xcat/monitoring/monitorctrl.pm"); +################################################################# +# This script is used as a cron job by the xCAT monitoring plug-in +# to monitor the node status +################################################################## + +($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); +printf "%2d-%02d-%04d %02d:%02d:%02d: xcatnodemon started.\n", $mon+1,$mday,$year+1900,$hour,$min,$sec; + +#get saved node status from the nodelist table +my %nodes_status_old=xCAT_monitoring::monitorctrl::getNodeStatus(); + +#get a list of nodes +my $tmp_node_active=$nodes_status_old{$::STATUS_ACTIVE}; +my $tmp_node_inactive=$nodes_status_old{$::STATUS_INACTIVE}; +my $tmp_node_unknown=$nodes_status_old{unknown}; +#print "active nodes: @$tmp_node_active\n"; +#print "inactive nodes: @$tmp_node_inactive\n"; +#print "unknown nodes: @$tmp_node_unknown\n"; + +#get current node status +my %nodes_status_new1=(); +if ($tmp_node_active) { %nodes_status_new1=pingNodeStatus(@$tmp_node_active);} +my %nodes_status_new2=(); +if ($tmp_node_inactive) {%nodes_status_new2=pingNodeStatus(@$tmp_node_inactive);} +my %nodes_status_new3=(); +if ($tmp_node_unknown) { %nodes_status_new3=pingNodeStatus(@$tmp_node_unknown);} + +my $changed1=$nodes_status_new1{$::STATUS_INACTIVE}; +my $changed2=$nodes_status_new2{$::STATUS_ACTIVE}; +my $changed3=$nodes_status_new3{$::STATUS_INACTIVE}; +my $changed4=$nodes_status_new3{$::STATUS_ACTIVE}; +my @changed_active=(@$changed2, @$changed4); +my @changed_inactive=(@$changed1, @$changed3); + +print " switch to active: @changed_active\n"; +print " switch to inactive: @changed_inactive\n"; + +my %node_status=(); +if (@changed_active>0) { + $node_status{$::STATUS_ACTIVE}=\@changed_active; +} +if (@changed_inactive>0) { + $node_status{$::STATUS_INACTIVE}=\@changed_inactive; +} + +#only set the node status for the changed ones +if (keys(%node_status) > 0) { + xCAT_monitoring::monitorctrl::processNodeStatusChanges(\%node_status); +} + +($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); +printf "%2d-%02d-%04d %02d:%02d:%02d: xcatnodemon finished.\n\n", $mon+1,$mday,$year+1900,$hour,$min,$sec; + + + +#-------------------------------------------------------------------------------- +=head3 pingNodeStatus + This function takes an array of nodes and returns their status using fping. + Arguments: + nodes-- an array of nodes. + Returns: + a hash that has the node status. The format is: + {active=>[node1, node3,...], unreachable=>[node4, node2...]} +=cut +#-------------------------------------------------------------------------------- +sub pingNodeStatus { + my @mon_nodes=@_; + %status=(); + my @active_nodes=(); + my @inactive_nodes=(); + if ((@mon_nodes)&& (@mon_nodes > 0)) { + #get all the active nodes + #TODO how to decide the path of fping. how about AIX, does it support fping? + my $nodes= join(' ', @mon_nodes); + my $temp=`/usr/sbin/fping -a $nodes 2> /dev/null`; + chomp($temp); + @active_nodes=split(/\n/, $temp); + + #get all the inactive nodes by substracting the active nodes from all. + my %temp2; + if ((@active_nodes) && ( @active_nodes > 0)) { + foreach(@active_nodes) { $temp2{$_}=1}; + foreach(@mon_nodes) { + if (!$temp2{$_}) { push(@inactive_nodes, $_);} + } + } + else {@inactive_nodes=@mon_nodes;} + } + + + $status{$::STATUS_ACTIVE}=\@active_nodes; + $status{$::STATUS_INACTIVE}=\@inactive_nodes; + + return %status; +} +