Added support for node status monitoring

git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@99 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
linggao 2007-11-29 19:55:16 +00:00
parent 54331caa62
commit ef73e9399a
2 changed files with 272 additions and 0 deletions

View File

@ -0,0 +1,169 @@
#!/usr/bin/env perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
package xCAT_monitoring::xcatmon;
#use xCAT::NodeRange;
#use Socket;
#use xCAT::Utils;
use xCAT::GlobalDef;
use xCAT::Utils;
#caches the current node status. (serve1=>{active=>[node1,node3...], unreachable=>[node2, node4..]},....)
$flag;
1;
#-------------------------------------------------------------------------------
=head1 xCAT_monitoring:xcatmon
=head2 Package Description
This is a xCAT monitoring plugin. The only thing that this plug-in does is
the node monitoring.
=cut
#-------------------------------------------------------------------------------
#--------------------------------------------------------------------------------
=head3 start
This function gets called by the monitorctrl module
when xcatd starts.
Arguments:
monservers --A hash reference keyed by the monitoring server nodes
and each value is a ref to an array of [nodes, nodetype] arrays
monitored by the server. So the format is:
{monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub start {
#print "xcatmon.start\n";
return (0, "started");
}
#--------------------------------------------------------------------------------
=head3 stop
This function gets called by the monitorctrl module when
xcatd stops.
Arguments:
none
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub stop {
return (0, "stopped");
}
#--------------------------------------------------------------------------------
=head3 supportNodeStatusMon
This function is called by the monitorctrl module to check
if this product can help monitoring and returning the node status.
Arguments:
none
Returns:
1
=cut
#--------------------------------------------------------------------------------
sub supportNodeStatusMon {
return 1;
}
#--------------------------------------------------------------------------------
=head3 startNodeStatusMon
This function is called by the monitorctrl module to tell
the product to start monitoring the node status and feed them back
to xCAT.
Arguments:
monservers --A hash reference keyed by the monitoring server nodes
and each value is a ref to an array of [nodes, nodetype] arrays
monitored by the server. So the format is:
{monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub startNodeStatusMon {
#print "xcatmon.startNodeStatusMon\n";
my $newentry="*/1 * * * * /usr/sbin/xcatnodemon >> /var/log/xcatmon.log";
my ($code, $msg)=xCAT::Utils::add_cron_job($newentry);
if ($code==0) { return (0, "started"); }
else { return ($code, $msg); }
}
#--------------------------------------------------------------------------------
=head3 stopNodeStatusMon
This function is called by the monitorctrl module to tell
the product to stop feeding the node status info back to xCAT.
Arguments:
none
Returns:
(return code, message)
=cut
#--------------------------------------------------------------------------------
sub stopNodeStatusMon {
#TODO: turn off the node status monitoring.
my $job="/usr/sbin/xcatnodemon";
my ($code, $msg)=xCAT::Utils::remove_cron_job($job);
if ($code==0) { return (0, "stopped"); }
else { return ($code, $msg); }
}
#--------------------------------------------------------------------------------
=head3 addNodes
This function is called by the monitorctrl module when new nodes are added
to the xCAT cluster. It should add the nodes into the product for monitoring.
Arguments:
nodes --nodes to be added. It is a hash reference keyed by the monitoring server
nodes and each value is a ref to an array of [nodes, nodetype] arrays monitored
by the server. So the format is:
{monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}
Returns:
none
=cut
#--------------------------------------------------------------------------------
sub addNodes {
#print "xcatmon:addNodes called\n";
#TODO: include the nodes into the product for monitoring.
return;
}
#--------------------------------------------------------------------------------
=head3 removeNodes
This function is called by the monitorctrl module when nodes are removed
from the xCAT cluster. It should remove the nodes from the product for monitoring.
Arguments:
nodes --nodes to be removed. It is a hash reference keyed by the monitoring server
nodes and each value is a ref to an array of [nodes, nodetype] arrays monitored
by the server. So the format is:
{monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}
Returns:
none
=cut
#--------------------------------------------------------------------------------
sub removeNodes {
#print "xcatmon:removeNodes called\n";
#TODO: remove the nodes from the product for monitoring.
return;
}

View File

@ -0,0 +1,103 @@
#!/usr/bin/env perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
use xCAT::Table;
use xCAT::GlobalDef;
require("/usr/lib/xcat/monitoring/monitorctrl.pm");
#################################################################
# This script is used as a cron job by the xCAT monitoring plug-in
# to monitor the node status
##################################################################
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
printf "%2d-%02d-%04d %02d:%02d:%02d: xcatnodemon started.\n", $mon+1,$mday,$year+1900,$hour,$min,$sec;
#get saved node status from the nodelist table
my %nodes_status_old=xCAT_monitoring::monitorctrl::getNodeStatus();
#get a list of nodes
my $tmp_node_active=$nodes_status_old{$::STATUS_ACTIVE};
my $tmp_node_inactive=$nodes_status_old{$::STATUS_INACTIVE};
my $tmp_node_unknown=$nodes_status_old{unknown};
#print "active nodes: @$tmp_node_active\n";
#print "inactive nodes: @$tmp_node_inactive\n";
#print "unknown nodes: @$tmp_node_unknown\n";
#get current node status
my %nodes_status_new1=();
if ($tmp_node_active) { %nodes_status_new1=pingNodeStatus(@$tmp_node_active);}
my %nodes_status_new2=();
if ($tmp_node_inactive) {%nodes_status_new2=pingNodeStatus(@$tmp_node_inactive);}
my %nodes_status_new3=();
if ($tmp_node_unknown) { %nodes_status_new3=pingNodeStatus(@$tmp_node_unknown);}
my $changed1=$nodes_status_new1{$::STATUS_INACTIVE};
my $changed2=$nodes_status_new2{$::STATUS_ACTIVE};
my $changed3=$nodes_status_new3{$::STATUS_INACTIVE};
my $changed4=$nodes_status_new3{$::STATUS_ACTIVE};
my @changed_active=(@$changed2, @$changed4);
my @changed_inactive=(@$changed1, @$changed3);
print " switch to active: @changed_active\n";
print " switch to inactive: @changed_inactive\n";
my %node_status=();
if (@changed_active>0) {
$node_status{$::STATUS_ACTIVE}=\@changed_active;
}
if (@changed_inactive>0) {
$node_status{$::STATUS_INACTIVE}=\@changed_inactive;
}
#only set the node status for the changed ones
if (keys(%node_status) > 0) {
xCAT_monitoring::monitorctrl::processNodeStatusChanges(\%node_status);
}
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
printf "%2d-%02d-%04d %02d:%02d:%02d: xcatnodemon finished.\n\n", $mon+1,$mday,$year+1900,$hour,$min,$sec;
#--------------------------------------------------------------------------------
=head3 pingNodeStatus
This function takes an array of nodes and returns their status using fping.
Arguments:
nodes-- an array of nodes.
Returns:
a hash that has the node status. The format is:
{active=>[node1, node3,...], unreachable=>[node4, node2...]}
=cut
#--------------------------------------------------------------------------------
sub pingNodeStatus {
my @mon_nodes=@_;
%status=();
my @active_nodes=();
my @inactive_nodes=();
if ((@mon_nodes)&& (@mon_nodes > 0)) {
#get all the active nodes
#TODO how to decide the path of fping. how about AIX, does it support fping?
my $nodes= join(' ', @mon_nodes);
my $temp=`/usr/sbin/fping -a $nodes 2> /dev/null`;
chomp($temp);
@active_nodes=split(/\n/, $temp);
#get all the inactive nodes by substracting the active nodes from all.
my %temp2;
if ((@active_nodes) && ( @active_nodes > 0)) {
foreach(@active_nodes) { $temp2{$_}=1};
foreach(@mon_nodes) {
if (!$temp2{$_}) { push(@inactive_nodes, $_);}
}
}
else {@inactive_nodes=@mon_nodes;}
}
$status{$::STATUS_ACTIVE}=\@active_nodes;
$status{$::STATUS_INACTIVE}=\@inactive_nodes;
return %status;
}