xcat-core/xCAT-server/lib/xcat/monitoring/xcatmon.pm

410 lines
14 KiB
Perl
Raw Normal View History

#!/usr/bin/env perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
package xCAT_monitoring::xcatmon;
BEGIN
{
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
}
use lib "$::XCATROOT/lib/perl";
use strict;
use xCAT::Utils;
use xCAT::GlobalDef;
use xCAT_monitoring::monitorctrl;
use Sys::Hostname;
1;
#-------------------------------------------------------------------------------
=head1 xCAT_monitoring:xcatmon
=head2 Package Description
This is a xCAT monitoring plugin. The only thing that this plug-in does is
the node monitoring. To activate it simply do the following command:
chtab pname=xCAT monitoring.nodestatmon=Y
=cut
#-------------------------------------------------------------------------------
#--------------------------------------------------------------------------------
=head3 start
This function gets called by the monitorctrl module when monstart command
gets called and when xcatd starts.
Arguments:
p_nodes -- a pointer to an arrays of nodes to be monitored. null means all.
scope -- the action scope, it indicates the node type the action will take place.
0 means localhost only.
2 means both monservers and nodes,
callback -- the callback pointer for error and status displaying. It can be null.
Returns:
(return code, message)
if the callback is set, use callback to display the status and error.
=cut
#--------------------------------------------------------------------------------
sub start {
print "xcatmon.start\n";
return (0, "started");
}
#--------------------------------------------------------------------------------
=head3 stop
This function gets called by the monitorctrl module when monstop command gets called.
Arguments:
p_nodes -- a pointer to an arrays of nodes to be stoped for monitoring. null means all.
scope -- the action scope, it indicates the node type the action will take place.
0 means localhost only.
2 means both monservers and nodes,
callback -- the callback pointer for error and status displaying. It can be null.
Returns:
(return code, message)
if the callback is set, use callback to display the status and error.
=cut
#--------------------------------------------------------------------------------
sub stop {
print "xcatmon.stop\n";
return (0, "stopped");
}
#--------------------------------------------------------------------------------
=head3 supportNodeStatusMon
This function is called by the monitorctrl module to check
if this product can help monitoring and returning the node status.
Arguments:
none
Returns:
1
=cut
#--------------------------------------------------------------------------------
sub supportNodeStatusMon {
return 1;
}
#--------------------------------------------------------------------------------
=head3 startNodeStatusMon
This function is called by the monitorctrl module when monstart gets called and
when xcatd starts. It starts monitoring the node status and feed them back
to xCAT.
Arguments:
p_nodes -- a pointer to an arrays of nodes to be monitored. null means all.
scope -- the action scope, it indicates the node type the action will take place.
0 means localhost only.
2 means both monservers and nodes,
callback -- the callback pointer for error and status displaying. It can be null.
note: p_nodes and scope are ignored by this plugin.
Returns:
(return code, message)
if the callback is set, use callback to display the status and error.
=cut
#--------------------------------------------------------------------------------
sub startNodeStatusMon
{
print "xcatmon.startNodeStatusMon\n";
my $noderef=shift;
if ($noderef =~ /xCAT_monitoring::xcatmon/) {
$noderef=shift;
}
my $scope=shift;
my $callback=shift;
#run the command first to update the status,
my $cmd="$::XCATROOT/sbin/xcatnodemon";
#$output=`$cmd 2>&1`;
#if ($?) {
# print "xcatmon: $output\n";
#}
#figure out the ping-intercal setting
my $value=3; #default
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("xcatmon");
#print "settings for xcatmon:\n";
#foreach (keys(%settings)) {
# print "key=$_, value=$settings{$_}\n";
#}
my $reading;
if (exists($settings{'ping-interval'})) {
$reading=$settings{'ping-interval'};
if ($reading>0) { $value=$reading;}
}
#create the cron job, it will run the command every 3 minutes.
my $newentry;
if (xCAT::Utils->isAIX()) {
#AIX does not support */value format, have to list them all.
my $minutes;
if ($value==1) { $minutes='*';}
elsif ($value<=30) {
my @temp_a=(0..59);
foreach (@temp_a) {
if (($_ % $value) == 0) { $minutes .= "$_,";}
}
chop($minutes);
} else {
$minutes="0";
}
$newentry="$minutes * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd";
} else {
$newentry="*/$value * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd";
}
my ($code, $msg)=xCAT::Utils::add_cron_job($newentry);
my $localhostname=hostname();
if ($code==0) {
if ($callback) {
my $rsp={};
$rsp->{data}->[0]="$localhostname: started. Refresh interval is $value minute(s).";
$callback->($rsp);
}
return (0, "started"); }
else {
if ($callback) {
my $rsp={};
$rsp->{data}->[0]="$localhostname: $code $msg";
$callback->($rsp);
}
return ($code, $msg);
}
}
#--------------------------------------------------------------------------------
=head3 stopNodeStatusMon
This function is called by the monitorctrl module when monstop command is issued.
It stops feeding the node status info back to xCAT.
Arguments:
p_nodes -- a pointer to an arrays of nodes to stoped for monitoring. null means all.
scope -- the action scope, it indicates the node type the action will take place.
0 means localhost only.
2 means both monservers and nodes,
callback -- the callback pointer for error and status displaying. It can be null.
note: p_nodes and scope are ignored by this plugin.
Returns:
(return code, message)
if the callback is set, use callback to display the status and error.
=cut
#--------------------------------------------------------------------------------
sub stopNodeStatusMon {
print "xcatmon.stopNodeStatusMon\n";
my $noderef=shift;
if ($noderef =~ /xCAT_monitoring::xcatmon/) {
$noderef=shift;
}
my $scope=shift;
my $callback=shift;
my $job="$::XCATROOT/sbin/xcatnodemon";
my ($code, $msg)=xCAT::Utils::remove_cron_job($job);
my $localhostname=hostname();
if ($code==0) {
if ($callback) {
my $rsp={};
$rsp->{data}->[0]="$localhostname: stopped.";
$callback->($rsp);
}
return (0, "stopped"); }
else {
if ($callback) {
my $rsp={};
$rsp->{data}->[0]="$localhostname: $code $msg";
$callback->($rsp);
}
return ($code, $msg);
}
}
#--------------------------------------------------------------------------------
=head3 config
This function configures the cluster for the given nodes.
This function is called by when monconfig command is issued or when xcatd starts
on the service node. It will configure the cluster to include the given nodes within
the monitoring doamin.
Arguments:
p_nodes -- a pointer to an arrays of nodes to be added for monitoring. none means all.
scope -- the action scope, it indicates the node type the action will take place.
0 means localhost only.
2 means both monservers and nodes,
callback -- the callback pointer for error and status displaying. It can be null.
Returns:
(error code, error message)
=cut
#--------------------------------------------------------------------------------
sub config {
print "xcatmon:config called\n";
return (0, "ok");
}
#--------------------------------------------------------------------------------
=head3 deconfig
This function de-configures the cluster for the given nodes.
This function is called by the monitorctrl module when nodes are removed
from the xCAT cluster. It should remove the nodes from the product for monitoring.
Arguments:
p_nodes -- a pointer to an arrays of nodes to be removed for monitoring. none means all.
scope -- the action scope, it indicates the node type the action will take place.
0 means localhost only.
2 means both monservers and nodes,
callback -- the callback pointer for error and status displaying. It can be null.
Returns:
(error code, error message)
=cut
#--------------------------------------------------------------------------------
sub deconfig {
print "xcatmon:deconfig called\n";
return (0, "ok");
}
#--------------------------------------------------------------------------------
=head3 getMonNodesStatus
This function goes to the xCAT nodelist table to retrieve the saved node status
for all the node that are managed by local nodes.
Arguments:
none.
Returns:
a hash that has the node status. The format is:
{alive=>[node1, node3,...], unreachable=>[node4, node2...], unknown=>[node8, node101...]}
=cut
#--------------------------------------------------------------------------------
sub getMonNodesStatus {
my %status=();
my @inactive_nodes=();
my @active_nodes=();
my @unknown_nodes=();
my $hierachy=xCAT_monitoring::monitorctrl->getMonHierarchy();
if (ref($hierachy) eq 'ARRAY') {
xCAT::MsgUtils->message('S', "[mon]: " . $hierachy->[1]);
return %status;
}
my @mon_servers=keys(%$hierachy);
my $isSV=xCAT::Utils->isServiceNode();
#on a service node or on ms, get the nodes that has local host as the server node
my $monnodes;
my @hostinfo=xCAT::Utils->determinehostname();
my %iphash=();
foreach(@hostinfo) {$iphash{$_}=1;}
#if this is mn, include the ones that has no service nodes
if (!$isSV) { $iphash{'noservicenode'}=1;}
foreach(@mon_servers) {
#service node come in pairs, the first one is the monserver adapter that facing the mn,
# the second one is facing the cn. we use the first one here
my @server_pair=split(',', $_);
my $sv=$server_pair[0];
if ($iphash{$sv}) {
$monnodes=$hierachy->{$_};
}
foreach(@$monnodes) {
my $node=$_->[0];
my $status=$_->[2];
my $type=$_[1];
if (!$status) { $status=$::STATUS_DEFINED;} #default
if ($status eq $::STATUS_ACTIVE) { push(@active_nodes, $node);}
elsif ($status eq $::STATUS_INACTIVE) { push(@inactive_nodes, $node);}
else {
my $need_active=0;
my $need_inactive=0;
if ($::NEXT_NODESTAT_VAL{$status}->{$::STATUS_ACTIVE}==1) { $need_active=1;}
if ($::NEXT_NODESTAT_VAL{$status}->{$::STATUS_INACTIVE}==1) { $need_inactive=1;}
if (($need_active==1) && ($need_inactive==0)) { push(@inactive_nodes, $node); } #put it into the inactive list so that the monitoring code can switch it to active.
elsif (($need_active==0) && ($need_inactive==1)) { push(@active_nodes, $node); } #put it into the active list so that the monitoring code can chane it to inactive.
elsif (($need_active==1) && ($need_inactive==1)) { push(@unknown_nodes, $node);} #unknow list so that the monitoring code can change it to active or inactive
else {
#if it is non-osi node, check it anyway
if ($type !~ /osi/) {push(@unknown_nodes, $node);}
}
}
}
}
$status{$::STATUS_ACTIVE}=\@active_nodes;
$status{$::STATUS_INACTIVE}=\@inactive_nodes;
$status{unknown}=\@unknown_nodes;
return %status;
}
#--------------------------------------------------------------------------------
=head3 setNodeStatusAttributes
This function will update the status column of the nodelist table with the new node status.
Arguments:
status -- a hash pointer of the node status. A key is a status string. The value is
an array pointer of nodes that have the same status.
for example: {alive=>["node1", "node1"], unreachable=>["node5","node100"]}
force -- 1 force the input values to be set.
-- 0 make sure if the input value is the next valid value.
Returns:
0 for successful.
non-0 for not successful.
=cut
#--------------------------------------------------------------------------------
sub setNodeStatusAttributes {
my $temp=shift;
if ($temp =~ /xCAT_monitoring::xcatmon/) {
$temp=shift;
}
my $force=shift;
return xCAT_monitoring::monitorctrl->setNodeStatusAttributes($temp, $force);
}
#--------------------------------------------------------------------------------
=head3 processSettingChanges
This function gets called when the setting for this monitoring plugin
has been changed in the monsetting table.
Arguments:
none.
Returns:
0 for successful.
non-0 for not successful.
=cut
#--------------------------------------------------------------------------------
sub processSettingChanges {
#restart the cron job
xCAT_monitoring::xcatmon->stopNodeStatusMon([], 0);
xCAT_monitoring::xcatmon->startNodeStatusMon([], 0);
}
#--------------------------------------------------------------------------------
=head3 getDiscription
This function returns the detailed description of the plugin inluding the
valid values for its settings in the mon setting tabel.
Arguments:
none
Returns:
The description.
=cut
#--------------------------------------------------------------------------------
sub getDescription {
return
" Description:
xcatmon uses fping to report the node liveness status and update the
nodelist.status column. Use command 'monstart xcatmon -n' to start
monitoring.
Settings:
ping-interval: the number of minutes between each fping operation.
The default value is 3.";
}