Added support for node status monitoring
git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@99 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
parent
54331caa62
commit
ef73e9399a
169
xCAT-server-2.0/usr/lib/xcat/monitoring/xcatmon.pm
Normal file
169
xCAT-server-2.0/usr/lib/xcat/monitoring/xcatmon.pm
Normal file
@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env perl
|
||||
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
|
||||
package xCAT_monitoring::xcatmon;
|
||||
|
||||
#use xCAT::NodeRange;
|
||||
#use Socket;
|
||||
#use xCAT::Utils;
|
||||
use xCAT::GlobalDef;
|
||||
use xCAT::Utils;
|
||||
|
||||
#caches the current node status. (serve1=>{active=>[node1,node3...], unreachable=>[node2, node4..]},....)
|
||||
|
||||
$flag;
|
||||
|
||||
1;
|
||||
#-------------------------------------------------------------------------------
|
||||
=head1 xCAT_monitoring:xcatmon
|
||||
=head2 Package Description
|
||||
This is a xCAT monitoring plugin. The only thing that this plug-in does is
|
||||
the node monitoring.
|
||||
=cut
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
=head3 start
|
||||
This function gets called by the monitorctrl module
|
||||
when xcatd starts.
|
||||
Arguments:
|
||||
monservers --A hash reference keyed by the monitoring server nodes
|
||||
and each value is a ref to an array of [nodes, nodetype] arrays
|
||||
monitored by the server. So the format is:
|
||||
{monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}
|
||||
Returns:
|
||||
(return code, message)
|
||||
=cut
|
||||
#--------------------------------------------------------------------------------
|
||||
sub start {
|
||||
#print "xcatmon.start\n";
|
||||
|
||||
return (0, "started");
|
||||
}
|
||||
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
=head3 stop
|
||||
This function gets called by the monitorctrl module when
|
||||
xcatd stops.
|
||||
Arguments:
|
||||
none
|
||||
Returns:
|
||||
(return code, message)
|
||||
=cut
|
||||
#--------------------------------------------------------------------------------
|
||||
sub stop {
|
||||
|
||||
return (0, "stopped");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
=head3 supportNodeStatusMon
|
||||
This function is called by the monitorctrl module to check
|
||||
if this product can help monitoring and returning the node status.
|
||||
|
||||
Arguments:
|
||||
none
|
||||
Returns:
|
||||
1
|
||||
=cut
|
||||
#--------------------------------------------------------------------------------
|
||||
sub supportNodeStatusMon {
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
=head3 startNodeStatusMon
|
||||
This function is called by the monitorctrl module to tell
|
||||
the product to start monitoring the node status and feed them back
|
||||
to xCAT.
|
||||
Arguments:
|
||||
monservers --A hash reference keyed by the monitoring server nodes
|
||||
and each value is a ref to an array of [nodes, nodetype] arrays
|
||||
monitored by the server. So the format is:
|
||||
{monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}
|
||||
Returns:
|
||||
(return code, message)
|
||||
|
||||
=cut
|
||||
#--------------------------------------------------------------------------------
|
||||
sub startNodeStatusMon {
|
||||
#print "xcatmon.startNodeStatusMon\n";
|
||||
my $newentry="*/1 * * * * /usr/sbin/xcatnodemon >> /var/log/xcatmon.log";
|
||||
my ($code, $msg)=xCAT::Utils::add_cron_job($newentry);
|
||||
if ($code==0) { return (0, "started"); }
|
||||
else { return ($code, $msg); }
|
||||
}
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
=head3 stopNodeStatusMon
|
||||
This function is called by the monitorctrl module to tell
|
||||
the product to stop feeding the node status info back to xCAT.
|
||||
|
||||
Arguments:
|
||||
none
|
||||
Returns:
|
||||
(return code, message)
|
||||
=cut
|
||||
#--------------------------------------------------------------------------------
|
||||
sub stopNodeStatusMon {
|
||||
#TODO: turn off the node status monitoring.
|
||||
|
||||
my $job="/usr/sbin/xcatnodemon";
|
||||
my ($code, $msg)=xCAT::Utils::remove_cron_job($job);
|
||||
if ($code==0) { return (0, "stopped"); }
|
||||
else { return ($code, $msg); }
|
||||
|
||||
}
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
=head3 addNodes
|
||||
This function is called by the monitorctrl module when new nodes are added
|
||||
to the xCAT cluster. It should add the nodes into the product for monitoring.
|
||||
Arguments:
|
||||
nodes --nodes to be added. It is a hash reference keyed by the monitoring server
|
||||
nodes and each value is a ref to an array of [nodes, nodetype] arrays monitored
|
||||
by the server. So the format is:
|
||||
{monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}
|
||||
Returns:
|
||||
none
|
||||
=cut
|
||||
#--------------------------------------------------------------------------------
|
||||
sub addNodes {
|
||||
|
||||
#print "xcatmon:addNodes called\n";
|
||||
|
||||
#TODO: include the nodes into the product for monitoring.
|
||||
return;
|
||||
}
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
=head3 removeNodes
|
||||
This function is called by the monitorctrl module when nodes are removed
|
||||
from the xCAT cluster. It should remove the nodes from the product for monitoring.
|
||||
Arguments:
|
||||
nodes --nodes to be removed. It is a hash reference keyed by the monitoring server
|
||||
nodes and each value is a ref to an array of [nodes, nodetype] arrays monitored
|
||||
by the server. So the format is:
|
||||
{monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}
|
||||
Returns:
|
||||
none
|
||||
=cut
|
||||
#--------------------------------------------------------------------------------
|
||||
sub removeNodes {
|
||||
|
||||
#print "xcatmon:removeNodes called\n";
|
||||
|
||||
#TODO: remove the nodes from the product for monitoring.
|
||||
return;
|
||||
}
|
||||
|
||||
|
103
xCAT-server-2.0/usr/sbin/xcatnodemon
Executable file
103
xCAT-server-2.0/usr/sbin/xcatnodemon
Executable file
@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env perl
|
||||
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
|
||||
|
||||
use xCAT::Table;
|
||||
use xCAT::GlobalDef;
|
||||
|
||||
require("/usr/lib/xcat/monitoring/monitorctrl.pm");
|
||||
#################################################################
|
||||
# This script is used as a cron job by the xCAT monitoring plug-in
|
||||
# to monitor the node status
|
||||
##################################################################
|
||||
|
||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
|
||||
printf "%2d-%02d-%04d %02d:%02d:%02d: xcatnodemon started.\n", $mon+1,$mday,$year+1900,$hour,$min,$sec;
|
||||
|
||||
#get saved node status from the nodelist table
|
||||
my %nodes_status_old=xCAT_monitoring::monitorctrl::getNodeStatus();
|
||||
|
||||
#get a list of nodes
|
||||
my $tmp_node_active=$nodes_status_old{$::STATUS_ACTIVE};
|
||||
my $tmp_node_inactive=$nodes_status_old{$::STATUS_INACTIVE};
|
||||
my $tmp_node_unknown=$nodes_status_old{unknown};
|
||||
#print "active nodes: @$tmp_node_active\n";
|
||||
#print "inactive nodes: @$tmp_node_inactive\n";
|
||||
#print "unknown nodes: @$tmp_node_unknown\n";
|
||||
|
||||
#get current node status
|
||||
my %nodes_status_new1=();
|
||||
if ($tmp_node_active) { %nodes_status_new1=pingNodeStatus(@$tmp_node_active);}
|
||||
my %nodes_status_new2=();
|
||||
if ($tmp_node_inactive) {%nodes_status_new2=pingNodeStatus(@$tmp_node_inactive);}
|
||||
my %nodes_status_new3=();
|
||||
if ($tmp_node_unknown) { %nodes_status_new3=pingNodeStatus(@$tmp_node_unknown);}
|
||||
|
||||
my $changed1=$nodes_status_new1{$::STATUS_INACTIVE};
|
||||
my $changed2=$nodes_status_new2{$::STATUS_ACTIVE};
|
||||
my $changed3=$nodes_status_new3{$::STATUS_INACTIVE};
|
||||
my $changed4=$nodes_status_new3{$::STATUS_ACTIVE};
|
||||
my @changed_active=(@$changed2, @$changed4);
|
||||
my @changed_inactive=(@$changed1, @$changed3);
|
||||
|
||||
print " switch to active: @changed_active\n";
|
||||
print " switch to inactive: @changed_inactive\n";
|
||||
|
||||
my %node_status=();
|
||||
if (@changed_active>0) {
|
||||
$node_status{$::STATUS_ACTIVE}=\@changed_active;
|
||||
}
|
||||
if (@changed_inactive>0) {
|
||||
$node_status{$::STATUS_INACTIVE}=\@changed_inactive;
|
||||
}
|
||||
|
||||
#only set the node status for the changed ones
|
||||
if (keys(%node_status) > 0) {
|
||||
xCAT_monitoring::monitorctrl::processNodeStatusChanges(\%node_status);
|
||||
}
|
||||
|
||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
|
||||
printf "%2d-%02d-%04d %02d:%02d:%02d: xcatnodemon finished.\n\n", $mon+1,$mday,$year+1900,$hour,$min,$sec;
|
||||
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
=head3 pingNodeStatus
|
||||
This function takes an array of nodes and returns their status using fping.
|
||||
Arguments:
|
||||
nodes-- an array of nodes.
|
||||
Returns:
|
||||
a hash that has the node status. The format is:
|
||||
{active=>[node1, node3,...], unreachable=>[node4, node2...]}
|
||||
=cut
|
||||
#--------------------------------------------------------------------------------
|
||||
sub pingNodeStatus {
|
||||
my @mon_nodes=@_;
|
||||
%status=();
|
||||
my @active_nodes=();
|
||||
my @inactive_nodes=();
|
||||
if ((@mon_nodes)&& (@mon_nodes > 0)) {
|
||||
#get all the active nodes
|
||||
#TODO how to decide the path of fping. how about AIX, does it support fping?
|
||||
my $nodes= join(' ', @mon_nodes);
|
||||
my $temp=`/usr/sbin/fping -a $nodes 2> /dev/null`;
|
||||
chomp($temp);
|
||||
@active_nodes=split(/\n/, $temp);
|
||||
|
||||
#get all the inactive nodes by substracting the active nodes from all.
|
||||
my %temp2;
|
||||
if ((@active_nodes) && ( @active_nodes > 0)) {
|
||||
foreach(@active_nodes) { $temp2{$_}=1};
|
||||
foreach(@mon_nodes) {
|
||||
if (!$temp2{$_}) { push(@inactive_nodes, $_);}
|
||||
}
|
||||
}
|
||||
else {@inactive_nodes=@mon_nodes;}
|
||||
}
|
||||
|
||||
|
||||
$status{$::STATUS_ACTIVE}=\@active_nodes;
|
||||
$status{$::STATUS_INACTIVE}=\@inactive_nodes;
|
||||
|
||||
return %status;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user