438 lines
14 KiB
Perl
438 lines
14 KiB
Perl
|
#!/usr/bin/env perl
|
||
|
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
|
||
|
package xCAT_monitoring::pcpmon;
|
||
|
BEGIN
|
||
|
{
|
||
|
$::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
|
||
|
}
|
||
|
use lib "$::XCATROOT/lib/perl";
|
||
|
use xCAT::NodeRange;
|
||
|
use Sys::Hostname;
|
||
|
use Socket;
|
||
|
use xCAT::Utils;
|
||
|
use xCAT::GlobalDef;
|
||
|
use xCAT_monitoring::monitorctrl;
|
||
|
use xCAT::MsgUtils;
|
||
|
use strict;
|
||
|
use warnings;
|
||
|
1;
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
=head1 xCAT_monitoring:pcpmon
|
||
|
=head2 Package Description
|
||
|
xCAT monitoring plugin package to handle PCP monitoring.
|
||
|
=cut
|
||
|
#-------------------------------------------------------------------------------
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
=head3 start
|
||
|
This function gets called by the monitorctrl module when xcatd starts and
|
||
|
when monstart command is issued by the user. It starts the daemons and
|
||
|
does necessary startup process for the PCP monitoring.
|
||
|
p_nodes -- a pointer to an arrays of nodes to be monitored. null means all.
|
||
|
scope -- the action scope, it indicates the node type the action will take place.
|
||
|
0 means localhost only.
|
||
|
2 means both localhost and nodes,
|
||
|
callback -- the callback pointer for error and status displaying. It can be null.
|
||
|
Returns:
|
||
|
(return code, message)
|
||
|
if the callback is set, use callback to display the status and error.
|
||
|
=cut
|
||
|
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
sub start
|
||
|
{ # starting sub routine
|
||
|
print "pcp::start called\n";
|
||
|
my $noderef=shift;
|
||
|
if ($noderef =~ /xCAT_monitoring::pcpmon/)
|
||
|
{
|
||
|
$noderef=shift;
|
||
|
}
|
||
|
my $scope=shift;
|
||
|
print "scope is: $scope \n";
|
||
|
my $callback=shift;
|
||
|
|
||
|
my $cmd="$::XCATROOT/sbin/pcp_collect";
|
||
|
#figure out the ping-intercal setting
|
||
|
my $value=5; #default
|
||
|
my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("pcpmon");
|
||
|
|
||
|
my $reading=$settings{'ping-interval'};
|
||
|
print "reading is $reading \n";
|
||
|
if ($reading>0) { $value=$reading;}
|
||
|
|
||
|
#create the cron job, it will run the command every 5 minutes(default and can be changed).
|
||
|
my $newentry;
|
||
|
if (xCAT::Utils->isAIX()) {
|
||
|
#AIX does not support */value format, have to list them all.
|
||
|
my $minutes;
|
||
|
if ($value==1) { $minutes='*';}
|
||
|
elsif ($value<=30) {
|
||
|
my @temp_a=(0..59);
|
||
|
foreach (@temp_a) {
|
||
|
if (($_ % $value) == 0) { $minutes .= "$_,";}
|
||
|
}
|
||
|
chop($minutes);
|
||
|
} else {
|
||
|
$minutes="0";
|
||
|
}
|
||
|
$newentry="$minutes * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd";
|
||
|
} else {
|
||
|
$newentry="*/$value * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd";
|
||
|
}
|
||
|
my ($code, $msg)=xCAT::Utils::add_cron_job($newentry);
|
||
|
my $localhostname=hostname();
|
||
|
if ($code==0) {
|
||
|
if ($callback) {
|
||
|
my $rsp={};
|
||
|
$rsp->{data}->[0]="$localhostname: started. Refresh interval is $value minute(s)";
|
||
|
$callback->($rsp);
|
||
|
}
|
||
|
#return (0, "started");
|
||
|
}
|
||
|
else {
|
||
|
if ($callback) {
|
||
|
my $rsp={};
|
||
|
$rsp->{data}->[0]="$localhostname: $code $msg";
|
||
|
$callback->($rsp);
|
||
|
}
|
||
|
|
||
|
#return ($code, $msg);
|
||
|
}
|
||
|
|
||
|
my $localhost=hostname();
|
||
|
print "local host is $localhost \n";
|
||
|
print "starting pcp locally \n";
|
||
|
my $res_pcp = `/etc/init.d/pcp restart 2>&1`;
|
||
|
print "res_pcp=$res_pcp\n";
|
||
|
print "the result cp before is $? \n";
|
||
|
if ($?)
|
||
|
{
|
||
|
print "pcp result after is $? \n";
|
||
|
if ($callback)
|
||
|
{
|
||
|
my $resp={};
|
||
|
$resp->{data}->[0]="$localhost: PCP not started successfully: $res_pcp \n";
|
||
|
$callback->($resp);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
xCAT::MsgUtils->message('S', "[mon]: $res_pcp \n");
|
||
|
}
|
||
|
|
||
|
return(1,"PCP not started successfully. \n");
|
||
|
}
|
||
|
|
||
|
if ($scope)
|
||
|
{ #opening if scope
|
||
|
print "opening scope \n";
|
||
|
print "inside scope is:$scope";
|
||
|
print "noderef is: @$noderef \n";
|
||
|
my $pPairHash=xCAT_monitoring::monitorctrl->getMonServer($noderef);
|
||
|
print "pairhash: $pPairHash\n";
|
||
|
#identification of this node
|
||
|
my @hostinfo=xCAT::Utils->determinehostname();
|
||
|
print "host:@hostinfo\n";
|
||
|
my $isSV=xCAT::Utils->isServiceNode();
|
||
|
print "is sv is:$isSV \n";
|
||
|
my %iphash=();
|
||
|
foreach(@hostinfo) {$iphash{$_}=1;}
|
||
|
if (!$isSV) { $iphash{'noservicenode'}=1;}
|
||
|
|
||
|
my @children;
|
||
|
foreach my $key (keys (%$pPairHash))
|
||
|
{ #opening foreach1
|
||
|
print "opening foreach1 \n";
|
||
|
print "key is: $key \n";
|
||
|
my @key_a=split(',', $key);
|
||
|
print "a[0] is: $key_a[0] \n";
|
||
|
print "a[1] is: $key_a[1] \n";
|
||
|
if (! $iphash{$key_a[0]}) { next;}
|
||
|
my $mon_nodes=$pPairHash->{$key};
|
||
|
|
||
|
foreach(@$mon_nodes)
|
||
|
{ #opening foreach2
|
||
|
my $node=$_->[0];
|
||
|
my $nodetype=$_->[1];
|
||
|
print "node=$node, nodetype=$nodetype\n";
|
||
|
if (($nodetype) && ($nodetype =~ /$::NODETYPE_OSI/))
|
||
|
{
|
||
|
push(@children,$node);
|
||
|
}
|
||
|
} #closing foreach2
|
||
|
} #closing foreach1
|
||
|
print "children:@children\n";
|
||
|
my $rec = join(',',@children);
|
||
|
print "the string is $rec";
|
||
|
print "XCATBYPASS=Y $::XCATROOT/bin/xdsh $rec /etc/init.d/pcp restart 2>& \n";
|
||
|
my $result=`XCATBYPASS=Y $::XCATROOT/bin/xdsh $rec /etc/init.d/pcp restart 2>&1`;
|
||
|
if ($result)
|
||
|
{
|
||
|
if ($callback)
|
||
|
{
|
||
|
my $resp={};
|
||
|
$resp->{data}->[0]="$localhost: $result\n";
|
||
|
$callback->($resp);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
xCAT::MsgUtils->message('S', "[mon]: $result\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} #closing if scope
|
||
|
|
||
|
if ($callback)
|
||
|
{
|
||
|
my $resp={};
|
||
|
$resp->{data}->[0]="$localhost: started. \n";
|
||
|
$callback->($resp);
|
||
|
}
|
||
|
|
||
|
return (0, "started");
|
||
|
|
||
|
} # closing sub routine
|
||
|
#--------------------------------------------------------------
|
||
|
=head3 config
|
||
|
This function configures the cluster for the given nodes. This function is called
|
||
|
when moncfg command is issued or when xcatd starts on the service node.
|
||
|
Returns: 1
|
||
|
=cut
|
||
|
#--------------------------------------------------------------
|
||
|
sub config
|
||
|
{
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
#--------------------------------------------------------------
|
||
|
=head3 deconfig
|
||
|
This function de-configures the cluster for the given nodes. This function is called
|
||
|
when mondecfg command is issued by the user.
|
||
|
Returns: 1
|
||
|
=cut
|
||
|
#--------------------------------------------------------------
|
||
|
sub deconfig
|
||
|
{
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
=head3 stop
|
||
|
This function gets called by the monitorctrl module when
|
||
|
xcatd stops or when monstop command is issued by the user.
|
||
|
It stops the monitoring on all nodes, stops
|
||
|
the daemons and does necessary cleanup process for the
|
||
|
PCP monitoring.
|
||
|
Arguments:
|
||
|
p_nodes -- a pointer to an arrays of nodes to be stoped for monitoring. null means all.
|
||
|
scope -- the action scope, it indicates the node type the action will take place.
|
||
|
0 means localhost only.
|
||
|
2 means both monservers and nodes,
|
||
|
callback -- the callback pointer for error and status displaying. It can be null.
|
||
|
Returns:
|
||
|
(return code, message)
|
||
|
if the callback is set, use callback to display the status and error.
|
||
|
=cut
|
||
|
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
sub stop
|
||
|
{ # starting sub routine
|
||
|
print "pcpmon::stop called\n";
|
||
|
my $noderef=shift;
|
||
|
if ($noderef =~ /xCAT_monitoring::pcpmon/)
|
||
|
{
|
||
|
$noderef=shift;
|
||
|
}
|
||
|
my $scope=shift;
|
||
|
my $callback=shift;
|
||
|
|
||
|
my $job="$::XCATROOT/sbin/pcp_collect";
|
||
|
my ($code, $msg)=xCAT::Utils::remove_cron_job($job);
|
||
|
my $localhostname=hostname();
|
||
|
if ($code==0) {
|
||
|
if ($callback) {
|
||
|
my $rsp={};
|
||
|
$rsp->{data}->[0]="$localhostname: stopped.";
|
||
|
$callback->($rsp);
|
||
|
}
|
||
|
#return (0, "stopped");
|
||
|
}
|
||
|
else {
|
||
|
if ($callback) {
|
||
|
my $rsp={};
|
||
|
$rsp->{data}->[0]="$localhostname: $code $msg";
|
||
|
$callback->($rsp);
|
||
|
}
|
||
|
#return ($code, $msg);
|
||
|
}
|
||
|
|
||
|
|
||
|
my $localhost=hostname();
|
||
|
print "local host is $localhost \n";
|
||
|
print "stopping pcp locally \n";
|
||
|
my $res_pcp = `/etc/init.d/pcp stop 2>&1`;
|
||
|
print "res_pcp=$res_pcp\n";
|
||
|
print "the result pcp before is $? \n";
|
||
|
if ($?)
|
||
|
{
|
||
|
print "pcp result after is $? \n";
|
||
|
if ($callback)
|
||
|
{
|
||
|
my $resp={};
|
||
|
$resp->{data}->[0]="$localhost: PCP not stopped successfully: $res_pcp \n";
|
||
|
$callback->($resp);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
xCAT::MsgUtils->message('S', "[mon]: $res_pcp \n");
|
||
|
}
|
||
|
|
||
|
return(1,"PCP not stopped successfully. \n");
|
||
|
}
|
||
|
|
||
|
|
||
|
if ($scope)
|
||
|
{ #opening if scope
|
||
|
print "opening scope \n";
|
||
|
print "noderef is: @$noderef \n";
|
||
|
my $pPairHash=xCAT_monitoring::monitorctrl->getMonServer($noderef);
|
||
|
print "pairhash: $pPairHash\n";
|
||
|
#identification of this node
|
||
|
my @hostinfo=xCAT::Utils->determinehostname();
|
||
|
print "host:@hostinfo\n";
|
||
|
my $isSV=xCAT::Utils->isServiceNode();
|
||
|
print "is sv is:$isSV \n";
|
||
|
my %iphash=();
|
||
|
foreach(@hostinfo) {$iphash{$_}=1;}
|
||
|
if (!$isSV) { $iphash{'noservicenode'}=1;}
|
||
|
|
||
|
my @children;
|
||
|
foreach my $key (keys (%$pPairHash))
|
||
|
{ #opening foreach1
|
||
|
print "opening foreach1 \n";
|
||
|
print "key is: $key \n";
|
||
|
my @key_a=split(',', $key);
|
||
|
print "a[1] is: $key_a[1] \n";
|
||
|
if (! $iphash{$key_a[0]}) { next;}
|
||
|
my $mon_nodes=$pPairHash->{$key};
|
||
|
|
||
|
foreach(@$mon_nodes)
|
||
|
{ #opening foreach2
|
||
|
my $node=$_->[0];
|
||
|
my $nodetype=$_->[1];
|
||
|
print "node=$node, nodetype=$nodetype\n";
|
||
|
if (($nodetype) && ($nodetype =~ /$::NODETYPE_OSI/))
|
||
|
{
|
||
|
push(@children,$node);
|
||
|
}
|
||
|
} #closing foreach2
|
||
|
} #closing foreach1
|
||
|
print "children:@children\n";
|
||
|
my $rec = join(',',@children);
|
||
|
print "the string is $rec";
|
||
|
print "XCATBYPASS=Y $::XCATROOT/bin/xdsh $rec /etc/init.d/pcp stop 2>& \n";
|
||
|
my $result=`XCATBYPASS=Y $::XCATROOT/bin/xdsh $rec /etc/init.d/pcp stop 2>&1`;
|
||
|
if ($result)
|
||
|
{
|
||
|
if ($callback)
|
||
|
{
|
||
|
my $resp={};
|
||
|
$resp->{data}->[0]="$localhost: $result\n";
|
||
|
$callback->($resp);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
xCAT::MsgUtils->message('S', "[mon]: $result\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} #closing if scope
|
||
|
|
||
|
if ($callback)
|
||
|
{
|
||
|
my $resp={};
|
||
|
$resp->{data}->[0]="$localhost: stopped. \n";
|
||
|
$callback->($resp);
|
||
|
}
|
||
|
|
||
|
return (0, "stopped");
|
||
|
}
|
||
|
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
=head3 supportNodeStatusMon
|
||
|
This function is called by the monitorctrl module to check
|
||
|
if PCP can help monitoring and returning the node status.
|
||
|
|
||
|
Arguments:
|
||
|
none
|
||
|
Returns:
|
||
|
1
|
||
|
=cut
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
sub supportNodeStatusMon {
|
||
|
#print "pcpmon::supportNodeStatusMon called\n";
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
=head3 startNodeStatusMon
|
||
|
This function is called by the monitorctrl module to tell
|
||
|
PCP to start monitoring the node status and feed them back
|
||
|
to xCAT. PCP will start setting up the condition/response
|
||
|
to monitor the node status changes.
|
||
|
|
||
|
Arguments:
|
||
|
None.
|
||
|
Returns:
|
||
|
(return code, message)
|
||
|
|
||
|
=cut
|
||
|
#--------------------------------------------------------------------------------
|
||
|
sub startNodeStatusMon {
|
||
|
#print "pcpmon::startNodeStatusMon called\n";
|
||
|
return (0, "started");
|
||
|
}
|
||
|
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
=head3 stopNodeStatusMon
|
||
|
This function is called by the monitorctrl module to tell
|
||
|
PCP to stop feeding the node status info back to xCAT. It will
|
||
|
stop the condition/response that is monitoring the node status.
|
||
|
|
||
|
Arguments:
|
||
|
none
|
||
|
Returns:
|
||
|
(return code, message)
|
||
|
=cut
|
||
|
#--------------------------------------------------------------------------------
|
||
|
sub stopNodeStatusMon {
|
||
|
#print "pcpmon::stopNodeStatusMon called\n";
|
||
|
return (0, "stopped");
|
||
|
}
|
||
|
|
||
|
#--------------------------------------------------------------------------------
|
||
|
=head3 getDiscription
|
||
|
This function returns the detailed description of the plugin inluding the
|
||
|
valid values for its settings in the monsetting tabel.
|
||
|
Arguments:
|
||
|
none
|
||
|
Returns:
|
||
|
The description.
|
||
|
=cut
|
||
|
#--------------------------------------------------------------------------------
|
||
|
sub getDescription
|
||
|
{
|
||
|
return "Description: This plugin will help interface the xCAT cluster with PCP monitoring software
|
||
|
ping-interval: the number of minutes between the metric collection operation.
|
||
|
The default value is 5 \n ";
|
||
|
}
|