2007-11-29 19:55:16 +00:00
#!/usr/bin/env perl
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
package xCAT_monitoring::xcatmon ;
2007-12-11 19:14:43 +00:00
BEGIN
{
2008-01-08 21:18:39 +00:00
$ ::XCATROOT = $ ENV { 'XCATROOT' } ? $ ENV { 'XCATROOT' } : '/opt/xcat' ;
2007-12-11 19:14:43 +00:00
}
use lib "$::XCATROOT/lib/perl" ;
2008-08-01 17:35:58 +00:00
use strict ;
2007-11-29 19:55:16 +00:00
use xCAT::Utils ;
2008-02-16 04:51:42 +00:00
use xCAT::GlobalDef ;
use xCAT_monitoring::monitorctrl ;
2008-08-01 17:35:58 +00:00
use Sys::Hostname ;
2007-11-29 19:55:16 +00:00
1 ;
#-------------------------------------------------------------------------------
= head1 xCAT_monitoring:xcatmon
= head2 Package Description
This is a xCAT monitoring plugin . The only thing that this plug - in does is
2007-11-29 20:01:00 +00:00
the node monitoring . To activate it simply do the following command:
chtab pname = xCAT monitoring . nodestatmon = Y
2007-11-29 19:55:16 +00:00
= cut
#-------------------------------------------------------------------------------
#--------------------------------------------------------------------------------
= head3 start
2008-08-01 17:35:58 +00:00
This function gets called by the monitorctrl module when monstart command
gets called and when xcatd starts .
2007-11-29 19:55:16 +00:00
Arguments:
2008-08-01 17:35:58 +00:00
p_nodes - - a pointer to an arrays of nodes to be monitored . null means all .
scope - - the action scope , it indicates the node type the action will take place .
0 means localhost only .
2 means both monservers and nodes ,
callback - - the callback pointer for error and status displaying . It can be null .
2007-11-29 19:55:16 +00:00
Returns:
2008-08-01 17:35:58 +00:00
( return code , message )
if the callback is set , use callback to display the status and error .
2007-11-29 19:55:16 +00:00
= cut
#--------------------------------------------------------------------------------
sub start {
2008-08-01 17:35:58 +00:00
print "xcatmon.start\n" ;
2007-11-29 19:55:16 +00:00
return ( 0 , "started" ) ;
}
#--------------------------------------------------------------------------------
= head3 stop
2008-08-01 17:35:58 +00:00
This function gets called by the monitorctrl module when monstop command gets called .
2007-11-29 19:55:16 +00:00
Arguments:
2008-08-01 17:35:58 +00:00
p_nodes - - a pointer to an arrays of nodes to be stoped for monitoring . null means all .
scope - - the action scope , it i ndicates the node type the action will take place .
0 means localhost only .
2 means both monservers and nodes ,
callback - - the callback pointer for error and status displaying . It can be null .
2007-11-29 19:55:16 +00:00
Returns:
2008-08-01 17:35:58 +00:00
( return code , message )
if the callback is set , use callback to display the status and error .
2007-11-29 19:55:16 +00:00
= cut
#--------------------------------------------------------------------------------
sub stop {
2008-08-01 17:35:58 +00:00
print "xcatmon.stop\n" ;
2007-11-29 19:55:16 +00:00
return ( 0 , "stopped" ) ;
}
#--------------------------------------------------------------------------------
= head3 supportNodeStatusMon
This function is called by the monitorctrl module to check
if this product can help monitoring and returning the node status .
Arguments:
none
Returns:
1
= cut
#--------------------------------------------------------------------------------
sub supportNodeStatusMon {
return 1 ;
}
#--------------------------------------------------------------------------------
= head3 startNodeStatusMon
2008-08-01 17:35:58 +00:00
This function is called by the monitorctrl module when monstart gets called and
when xcatd starts . It starts monitoring the node status and feed them back
2007-11-29 19:55:16 +00:00
to xCAT .
Arguments:
2008-08-01 17:35:58 +00:00
p_nodes - - a pointer to an arrays of nodes to be monitored . null means all .
scope - - the action scope , it indicates the node type the action will take place .
0 means localhost only .
2 means both monservers and nodes ,
callback - - the callback pointer for error and status displaying . It can be null .
note: p_nodes and scope are ignored by this plugin .
2007-11-29 19:55:16 +00:00
Returns:
2008-08-01 17:35:58 +00:00
( return code , message )
if the callback is set , use callback to display the status and error .
2007-11-29 19:55:16 +00:00
= cut
#--------------------------------------------------------------------------------
2008-08-01 17:35:58 +00:00
sub startNodeStatusMon
{
print "xcatmon.startNodeStatusMon\n" ;
2010-02-01 16:18:49 +00:00
if ( ! - e "/etc/xCATMN" ) { return ( 0 , "" ) ; } #only run the cron job on mn
2008-08-01 17:35:58 +00:00
my $ noderef = shift ;
if ( $ noderef =~ /xCAT_monitoring::xcatmon/ ) {
$ noderef = shift ;
2008-02-23 01:01:49 +00:00
}
2008-08-01 17:35:58 +00:00
my $ scope = shift ;
my $ callback = shift ;
2008-02-21 18:36:26 +00:00
#run the command first to update the status,
2010-02-01 16:18:49 +00:00
#my $cmd="$::XCATROOT/sbin/xcatnodemon";
my $ cmd = "$::XCATROOT/bin/nodestat all -m -u -q" ;
2008-02-23 01:01:49 +00:00
#$output=`$cmd 2>&1`;
#if ($?) {
# print "xcatmon: $output\n";
#}
#figure out the ping-intercal setting
2008-03-04 21:45:26 +00:00
my $ value = 3 ; #default
my % settings = xCAT_monitoring::monitorctrl - > getPluginSettings ( "xcatmon" ) ;
2008-02-21 18:36:26 +00:00
2008-03-04 21:45:26 +00:00
#print "settings for xcatmon:\n";
#foreach (keys(%settings)) {
# print "key=$_, value=$settings{$_}\n";
#}
2009-03-25 18:47:56 +00:00
my $ reading ;
if ( exists ( $ settings { 'ping-interval' } ) ) {
$ reading = $ settings { 'ping-interval' } ;
if ( $ reading > 0 ) { $ value = $ reading ; }
}
2008-03-04 21:45:26 +00:00
2008-02-21 18:36:26 +00:00
#create the cron job, it will run the command every 3 minutes.
2008-03-19 15:20:19 +00:00
my $ newentry ;
if ( xCAT::Utils - > isAIX ( ) ) {
#AIX does not support */value format, have to list them all.
my $ minutes ;
if ( $ value == 1 ) { $ minutes = '*' ; }
elsif ( $ value <= 30 ) {
my @ temp_a = ( 0 .. 59 ) ;
foreach ( @ temp_a ) {
if ( ( $ _ % $ value ) == 0 ) { $ minutes . = "$_," ; }
}
chop ( $ minutes ) ;
} else {
$ minutes = "0" ;
}
$ newentry = "$minutes * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd" ;
} else {
$ newentry = "*/$value * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd" ;
}
2007-11-29 19:55:16 +00:00
my ( $ code , $ msg ) = xCAT::Utils:: add_cron_job ( $ newentry ) ;
2008-08-01 17:35:58 +00:00
my $ localhostname = hostname ( ) ;
if ( $ code == 0 ) {
if ( $ callback ) {
my $ rsp = { } ;
$ rsp - > { data } - > [ 0 ] = "$localhostname: started. Refresh interval is $value minute(s)." ;
$ callback - > ( $ rsp ) ;
}
return ( 0 , "started" ) ; }
else {
if ( $ callback ) {
my $ rsp = { } ;
$ rsp - > { data } - > [ 0 ] = "$localhostname: $code $msg" ;
$ callback - > ( $ rsp ) ;
}
return ( $ code , $ msg ) ;
}
2007-11-29 19:55:16 +00:00
}
#--------------------------------------------------------------------------------
= head3 stopNodeStatusMon
2008-08-01 17:35:58 +00:00
This function is called by the monitorctrl module when monstop command is issued .
It stops feeding the node status info back to xCAT .
2007-11-29 19:55:16 +00:00
Arguments:
2008-08-01 17:35:58 +00:00
p_nodes - - a pointer to an arrays of nodes to stoped for monitoring . null means all .
scope - - the action scope , it indicates the node type the action will take place .
0 means localhost only .
2 means both monservers and nodes ,
callback - - the callback pointer for error and status displaying . It can be null .
note: p_nodes and scope are ignored by this plugin .
2007-11-29 19:55:16 +00:00
Returns:
2008-08-01 17:35:58 +00:00
( return code , message )
if the callback is set , use callback to display the status and error .
2007-11-29 19:55:16 +00:00
= cut
#--------------------------------------------------------------------------------
sub stopNodeStatusMon {
2008-08-01 17:35:58 +00:00
print "xcatmon.stopNodeStatusMon\n" ;
2010-02-01 16:18:49 +00:00
if ( ! - e "/etc/xCATMN" ) { return ( 0 , "" ) ; } #only run the cron job on mn
2008-08-01 17:35:58 +00:00
my $ noderef = shift ;
if ( $ noderef =~ /xCAT_monitoring::xcatmon/ ) {
$ noderef = shift ;
}
my $ scope = shift ;
my $ callback = shift ;
2007-11-29 19:55:16 +00:00
2010-02-01 16:18:49 +00:00
#my $job="$::XCATROOT/sbin/xcatnodemon";
my $ job = "$::XCATROOT/bin/nodestat all -m -u -q" ;
2007-11-29 19:55:16 +00:00
my ( $ code , $ msg ) = xCAT::Utils:: remove_cron_job ( $ job ) ;
2008-08-01 17:35:58 +00:00
my $ localhostname = hostname ( ) ;
if ( $ code == 0 ) {
if ( $ callback ) {
my $ rsp = { } ;
$ rsp - > { data } - > [ 0 ] = "$localhostname: stopped." ;
$ callback - > ( $ rsp ) ;
}
return ( 0 , "stopped" ) ; }
else {
if ( $ callback ) {
my $ rsp = { } ;
$ rsp - > { data } - > [ 0 ] = "$localhostname: $code $msg" ;
$ callback - > ( $ rsp ) ;
}
return ( $ code , $ msg ) ;
}
2007-11-29 19:55:16 +00:00
}
#--------------------------------------------------------------------------------
2008-08-01 17:35:58 +00:00
= head3 config
This function configures the cluster for the given nodes .
This function is called by when monconfig command is issued or when xcatd starts
on the service node . It will configure the cluster to include the given nodes within
the monitoring doamin .
2007-11-29 19:55:16 +00:00
Arguments:
2008-08-01 17:35:58 +00:00
p_nodes - - a pointer to an arrays of nodes to be added for monitoring . none means all .
scope - - the action scope , it indicates the node type the action will take place .
0 means localhost only .
2 means both monservers and nodes ,
callback - - the callback pointer for error and status displaying . It can be null .
2007-11-29 19:55:16 +00:00
Returns:
2008-03-21 20:32:41 +00:00
( error code , error message )
2007-11-29 19:55:16 +00:00
= cut
#--------------------------------------------------------------------------------
2008-08-01 17:35:58 +00:00
sub config {
2007-11-29 19:55:16 +00:00
2008-08-01 17:35:58 +00:00
print "xcatmon:config called\n" ;
2007-11-29 19:55:16 +00:00
2008-03-21 20:32:41 +00:00
return ( 0 , "ok" ) ;
2007-11-29 19:55:16 +00:00
}
#--------------------------------------------------------------------------------
2008-08-01 17:35:58 +00:00
= head3 deconfig
This function de - configures the cluster for the given nodes .
2007-11-29 19:55:16 +00:00
This function is called by the monitorctrl module when nodes are removed
from the xCAT cluster . It should remove the nodes from the product for monitoring .
Arguments:
2008-08-01 17:35:58 +00:00
p_nodes - - a pointer to an arrays of nodes to be removed for monitoring . none means all .
scope - - the action scope , it indicates the node type the action will take place .
0 means localhost only .
2 means both monservers and nodes ,
callback - - the callback pointer for error and status displaying . It can be null .
2007-11-29 19:55:16 +00:00
Returns:
2008-03-21 20:32:41 +00:00
( error code , error message )
2007-11-29 19:55:16 +00:00
= cut
#--------------------------------------------------------------------------------
2008-08-01 17:35:58 +00:00
sub deconfig {
2007-11-29 19:55:16 +00:00
2008-08-01 17:35:58 +00:00
print "xcatmon:deconfig called\n" ;
2007-11-29 19:55:16 +00:00
2008-03-21 20:32:41 +00:00
return ( 0 , "ok" ) ;
2007-11-29 19:55:16 +00:00
}
2008-02-16 04:51:42 +00:00
#--------------------------------------------------------------------------------
= head3 getMonNodesStatus
This function goes to the xCAT nodelist table to retrieve the saved node status
for all the node that are managed by local nodes .
Arguments:
none .
Returns:
a hash that has the node status . The format is:
2008-09-17 19:06:45 +00:00
{ alive = > [ node1 , node3 , ... ] , unreachable = > [ node4 , node2 ... ] , unknown = > [ node8 , node101 ... ] }
2008-02-16 04:51:42 +00:00
= cut
#--------------------------------------------------------------------------------
sub getMonNodesStatus {
2008-08-01 17:35:58 +00:00
my % status = ( ) ;
2008-02-16 04:51:42 +00:00
my @ inactive_nodes = ( ) ;
my @ active_nodes = ( ) ;
my @ unknown_nodes = ( ) ;
2008-05-16 15:09:43 +00:00
my $ hierachy = xCAT_monitoring::monitorctrl - > getMonHierarchy ( ) ;
2009-03-17 21:07:21 +00:00
if ( ref ( $ hierachy ) eq 'ARRAY' ) {
xCAT::MsgUtils - > message ( 'S' , "[mon]: " . $ hierachy - > [ 1 ] ) ;
return % status ;
}
2008-05-16 15:09:43 +00:00
my @ mon_servers = keys ( %$ hierachy ) ;
2008-03-14 17:18:42 +00:00
my $ isSV = xCAT::Utils - > isServiceNode ( ) ;
#on a service node or on ms, get the nodes that has local host as the server node
my @ hostinfo = xCAT::Utils - > determinehostname ( ) ;
my % iphash = ( ) ;
foreach ( @ hostinfo ) { $ iphash { $ _ } = 1 ; }
2008-05-16 15:09:43 +00:00
#if this is mn, include the ones that has no service nodes
if ( ! $ isSV ) { $ iphash { 'noservicenode' } = 1 ; }
2010-02-01 16:18:49 +00:00
my % processed = ( ) ;
2008-05-16 15:09:43 +00:00
foreach ( @ mon_servers ) {
#service node come in pairs, the first one is the monserver adapter that facing the mn,
# the second one is facing the cn. we use the first one here
2010-01-07 19:55:15 +00:00
my @ server_pair = split ( ':' , $ _ ) ;
2008-05-16 15:09:43 +00:00
my $ sv = $ server_pair [ 0 ] ;
2010-02-01 16:18:49 +00:00
if ( ! $ processed { $ sv } ) { $ processed { $ sv } = 1 ; }
else { next ; }
2008-05-16 15:09:43 +00:00
if ( $ iphash { $ sv } ) {
2010-02-01 16:18:49 +00:00
my $ monnodes = $ hierachy - > { $ _ } ;
foreach ( @$ monnodes ) {
my $ node = $ _ - > [ 0 ] ;
my $ status = $ _ - > [ 2 ] ;
my $ type = $ _ [ 1 ] ;
if ( ! $ status ) { $ status = $ ::STATUS_DEFINED ; } #default
if ( $ status eq $ ::STATUS_ACTIVE ) { push ( @ active_nodes , $ node ) ; }
elsif ( $ status eq $ ::STATUS_INACTIVE ) { push ( @ inactive_nodes , $ node ) ; }
else {
my $ need_active = 0 ;
my $ need_inactive = 0 ;
if ( $ ::NEXT_NODESTAT_VAL { $ status } - > { $ ::STATUS_ACTIVE } == 1 ) { $ need_active = 1 ; }
if ( $ ::NEXT_NODESTAT_VAL { $ status } - > { $ ::STATUS_INACTIVE } == 1 ) { $ need_inactive = 1 ; }
if ( ( $ need_active == 1 ) && ( $ need_inactive == 0 ) ) { push ( @ inactive_nodes , $ node ) ; } #put it into the inactive list so that the monitoring code can switch it to active.
elsif ( ( $ need_active == 0 ) && ( $ need_inactive == 1 ) ) { push ( @ active_nodes , $ node ) ; } #put it into the active list so that the monitoring code can chane it to inactive.
elsif ( ( $ need_active == 1 ) && ( $ need_inactive == 1 ) ) { push ( @ unknown_nodes , $ node ) ; } #unknow list so that the monitoring code can change it to active or inactive
else {
#if it is non-osi node, check it anyway
if ( $ type !~ /osi/ ) { push ( @ unknown_nodes , $ node ) ; }
}
}
2008-09-25 03:04:56 +00:00
}
2008-06-26 19:35:32 +00:00
}
2008-03-14 17:18:42 +00:00
}
2008-02-16 04:51:42 +00:00
$ status { $ ::STATUS_ACTIVE } = \ @ active_nodes ;
$ status { $ ::STATUS_INACTIVE } = \ @ inactive_nodes ;
$ status { unknown } = \ @ unknown_nodes ;
return % status ;
}
2010-02-01 16:18:49 +00:00
2008-02-16 04:51:42 +00:00
#--------------------------------------------------------------------------------
2008-08-01 17:35:58 +00:00
= head3 setNodeStatusAttributes
This function will update the status column of the nodelist table with the new node status .
2008-02-16 04:51:42 +00:00
Arguments:
status - - a hash pointer of the node status . A key is a status string . The value is
an array pointer of nodes that have the same status .
2008-09-17 19:06:45 +00:00
for example: { alive = > [ "node1" , "node1" ] , unreachable = > [ "node5" , "node100" ] }
2008-09-25 03:04:56 +00:00
force - - 1 force the input values to be set .
- - 0 make sure if the input value is the next valid value .
2008-02-16 04:51:42 +00:00
Returns:
0 for successful .
non - 0 for not successful .
= cut
#--------------------------------------------------------------------------------
2008-08-01 17:35:58 +00:00
sub setNodeStatusAttributes {
2008-02-16 04:51:42 +00:00
my $ temp = shift ;
2008-03-01 22:08:04 +00:00
if ( $ temp =~ /xCAT_monitoring::xcatmon/ ) {
2008-02-16 04:51:42 +00:00
$ temp = shift ;
}
2008-09-25 03:04:56 +00:00
my $ force = shift ;
return xCAT_monitoring::monitorctrl - > setNodeStatusAttributes ( $ temp , $ force ) ;
2008-02-16 04:51:42 +00:00
}
2008-03-04 21:45:26 +00:00
#--------------------------------------------------------------------------------
= head3 processSettingChanges
This function gets called when the setting for this monitoring plugin
has been changed in the monsetting table .
Arguments:
none .
Returns:
0 for successful .
non - 0 for not successful .
= cut
#--------------------------------------------------------------------------------
sub processSettingChanges {
#restart the cron job
2008-08-01 17:35:58 +00:00
xCAT_monitoring::xcatmon - > stopNodeStatusMon ( [] , 0 ) ;
xCAT_monitoring::xcatmon - > startNodeStatusMon ( [] , 0 ) ;
2008-03-04 21:45:26 +00:00
}
#--------------------------------------------------------------------------------
= head3 getDiscription
This function returns the detailed description of the plugin inluding the
valid values for its settings in the mon setting tabel .
Arguments:
none
Returns:
The description .
= cut
#--------------------------------------------------------------------------------
sub getDescription {
2008-03-05 20:43:37 +00:00
return
" Description:
2010-03-12 21:46:11 +00:00
xcatmon provides node status monitoring using fping on AIX and nmap on Linux .
It also provides application status monitoring . The status and the appstatus
columns of the nodelist table will be updated periodically with the latest
2010-12-02 15:54:43 +00:00
status values for the nodes . Use command 'monadd xcatmon -n' and then
'monstart xcatmon' to start monitoring .
2008-03-04 21:45:26 +00:00
Settings:
2010-12-02 15:54:43 +00:00
ping - interval: the number of minutes between each fping operation .
2010-03-12 21:46:11 +00:00
The default value is 3 .
2010-12-02 15:54:43 +00:00
apps: a list of comma separated application names whose status will be queried .
2010-03-12 21:46:11 +00:00
For how to get the status of each app , look for app name in the key filed
in a different row .
2010-12-02 15:54:43 +00:00
port: the application daemon port number , if not specified , use internal list ,
2010-03-12 21:46:11 +00:00
then /etc/s ervices .
2010-12-02 15:54:43 +00:00
group: the name of a node group that needs to get the application status from .
2010-03-12 21:46:11 +00:00
If not specified , assume all the nodes in the nodelist table .
To specify more than one groups , use group = a , group = b format .
2010-12-02 15:54:43 +00:00
cmd: the command that will be run locally on mn or sn .
lcmd: the command that will be run locally on the mn only .
dcmd: the command that will be run distributed on the nodes using xdsh .
2010-10-29 18:50:17 +00:00
For commands specified by 'cmd' and 'lcmd' , the input of is a list of comma
separated node names , the output must be in the following format :
node1:string1
node2:string2
...
For the command specified by 'dcmd' , no input is needed , the output can be a
string . " ;
2010-03-12 21:46:11 +00:00
2008-03-04 21:45:26 +00:00
}