#!/usr/bin/env perl # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html package xCAT_monitoring::xcatmon; BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat'; } use lib "$::XCATROOT/lib/perl"; use strict; use xCAT::Utils; use xCAT::GlobalDef; use xCAT_monitoring::monitorctrl; use Sys::Hostname; 1; #------------------------------------------------------------------------------- =head1 xCAT_monitoring:xcatmon =head2 Package Description This is a xCAT monitoring plugin. The only thing that this plug-in does is the node monitoring. To activate it simply do the following command: chtab pname=xCAT monitoring.nodestatmon=Y =cut #------------------------------------------------------------------------------- #-------------------------------------------------------------------------------- =head3 start This function gets called by the monitorctrl module when monstart command gets called and when xcatd starts. Arguments: p_nodes -- a pointer to an arrays of nodes to be monitored. null means all. scope -- the action scope, it indicates the node type the action will take place. 0 means localhost only. 2 means both monservers and nodes, callback -- the callback pointer for error and status displaying. It can be null. Returns: (return code, message) if the callback is set, use callback to display the status and error. =cut #-------------------------------------------------------------------------------- sub start { print "xcatmon.start\n"; return (0, "started"); } #-------------------------------------------------------------------------------- =head3 stop This function gets called by the monitorctrl module when monstop command gets called. Arguments: p_nodes -- a pointer to an arrays of nodes to be stoped for monitoring. null means all. scope -- the action scope, it indicates the node type the action will take place. 0 means localhost only. 2 means both monservers and nodes, callback -- the callback pointer for error and status displaying. It can be null. Returns: (return code, message) if the callback is set, use callback to display the status and error. =cut #-------------------------------------------------------------------------------- sub stop { print "xcatmon.stop\n"; return (0, "stopped"); } #-------------------------------------------------------------------------------- =head3 supportNodeStatusMon This function is called by the monitorctrl module to check if this product can help monitoring and returning the node status. Arguments: none Returns: 1 =cut #-------------------------------------------------------------------------------- sub supportNodeStatusMon { return 1; } #-------------------------------------------------------------------------------- =head3 startNodeStatusMon This function is called by the monitorctrl module when monstart gets called and when xcatd starts. It starts monitoring the node status and feed them back to xCAT. Arguments: p_nodes -- a pointer to an arrays of nodes to be monitored. null means all. scope -- the action scope, it indicates the node type the action will take place. 0 means localhost only. 2 means both monservers and nodes, callback -- the callback pointer for error and status displaying. It can be null. note: p_nodes and scope are ignored by this plugin. Returns: (return code, message) if the callback is set, use callback to display the status and error. =cut #-------------------------------------------------------------------------------- sub startNodeStatusMon { print "xcatmon.startNodeStatusMon\n"; my $noderef=shift; if ($noderef =~ /xCAT_monitoring::xcatmon/) { $noderef=shift; } my $scope=shift; my $callback=shift; #run the command first to update the status, my $cmd="$::XCATROOT/sbin/xcatnodemon"; #$output=`$cmd 2>&1`; #if ($?) { # print "xcatmon: $output\n"; #} #figure out the ping-intercal setting my $value=3; #default my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("xcatmon"); #print "settings for xcatmon:\n"; #foreach (keys(%settings)) { # print "key=$_, value=$settings{$_}\n"; #} my $reading=$settings{'ping-interval'}; if ($reading>0) { $value=$reading;} #create the cron job, it will run the command every 3 minutes. my $newentry; if (xCAT::Utils->isAIX()) { #AIX does not support */value format, have to list them all. my $minutes; if ($value==1) { $minutes='*';} elsif ($value<=30) { my @temp_a=(0..59); foreach (@temp_a) { if (($_ % $value) == 0) { $minutes .= "$_,";} } chop($minutes); } else { $minutes="0"; } $newentry="$minutes * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd"; } else { $newentry="*/$value * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd"; } my ($code, $msg)=xCAT::Utils::add_cron_job($newentry); my $localhostname=hostname(); if ($code==0) { if ($callback) { my $rsp={}; $rsp->{data}->[0]="$localhostname: started. Refresh interval is $value minute(s)."; $callback->($rsp); } return (0, "started"); } else { if ($callback) { my $rsp={}; $rsp->{data}->[0]="$localhostname: $code $msg"; $callback->($rsp); } return ($code, $msg); } } #-------------------------------------------------------------------------------- =head3 stopNodeStatusMon This function is called by the monitorctrl module when monstop command is issued. It stops feeding the node status info back to xCAT. Arguments: p_nodes -- a pointer to an arrays of nodes to stoped for monitoring. null means all. scope -- the action scope, it indicates the node type the action will take place. 0 means localhost only. 2 means both monservers and nodes, callback -- the callback pointer for error and status displaying. It can be null. note: p_nodes and scope are ignored by this plugin. Returns: (return code, message) if the callback is set, use callback to display the status and error. =cut #-------------------------------------------------------------------------------- sub stopNodeStatusMon { print "xcatmon.stopNodeStatusMon\n"; my $noderef=shift; if ($noderef =~ /xCAT_monitoring::xcatmon/) { $noderef=shift; } my $scope=shift; my $callback=shift; my $job="$::XCATROOT/sbin/xcatnodemon"; my ($code, $msg)=xCAT::Utils::remove_cron_job($job); my $localhostname=hostname(); if ($code==0) { if ($callback) { my $rsp={}; $rsp->{data}->[0]="$localhostname: stopped."; $callback->($rsp); } return (0, "stopped"); } else { if ($callback) { my $rsp={}; $rsp->{data}->[0]="$localhostname: $code $msg"; $callback->($rsp); } return ($code, $msg); } } #-------------------------------------------------------------------------------- =head3 config This function configures the cluster for the given nodes. This function is called by when monconfig command is issued or when xcatd starts on the service node. It will configure the cluster to include the given nodes within the monitoring doamin. Arguments: p_nodes -- a pointer to an arrays of nodes to be added for monitoring. none means all. scope -- the action scope, it indicates the node type the action will take place. 0 means localhost only. 2 means both monservers and nodes, callback -- the callback pointer for error and status displaying. It can be null. Returns: (error code, error message) =cut #-------------------------------------------------------------------------------- sub config { print "xcatmon:config called\n"; return (0, "ok"); } #-------------------------------------------------------------------------------- =head3 deconfig This function de-configures the cluster for the given nodes. This function is called by the monitorctrl module when nodes are removed from the xCAT cluster. It should remove the nodes from the product for monitoring. Arguments: p_nodes -- a pointer to an arrays of nodes to be removed for monitoring. none means all. scope -- the action scope, it indicates the node type the action will take place. 0 means localhost only. 2 means both monservers and nodes, callback -- the callback pointer for error and status displaying. It can be null. Returns: (error code, error message) =cut #-------------------------------------------------------------------------------- sub deconfig { print "xcatmon:deconfig called\n"; return (0, "ok"); } #-------------------------------------------------------------------------------- =head3 getMonNodesStatus This function goes to the xCAT nodelist table to retrieve the saved node status for all the node that are managed by local nodes. Arguments: none. Returns: a hash that has the node status. The format is: {alive=>[node1, node3,...], unreachable=>[node4, node2...], unknown=>[node8, node101...]} =cut #-------------------------------------------------------------------------------- sub getMonNodesStatus { my %status=(); my @inactive_nodes=(); my @active_nodes=(); my @unknown_nodes=(); my $hierachy=xCAT_monitoring::monitorctrl->getMonHierarchy(); my @mon_servers=keys(%$hierachy); my $isSV=xCAT::Utils->isServiceNode(); #on a service node or on ms, get the nodes that has local host as the server node my $monnodes; my @hostinfo=xCAT::Utils->determinehostname(); my %iphash=(); foreach(@hostinfo) {$iphash{$_}=1;} #if this is mn, include the ones that has no service nodes if (!$isSV) { $iphash{'noservicenode'}=1;} foreach(@mon_servers) { #service node come in pairs, the first one is the monserver adapter that facing the mn, # the second one is facing the cn. we use the first one here my @server_pair=split(',', $_); my $sv=$server_pair[0]; if ($iphash{$sv}) { $monnodes=$hierachy->{$_}; } foreach(@$monnodes) { my $node=$_->[0]; my $status=$_->[2]; if ($status eq $::STATUS_ACTIVE) { push(@active_nodes, $node);} elsif ($status eq $::STATUS_INACTIVE) { push(@inactive_nodes, $node);} else { my $need_active=0; my $need_inactive=0; if ($::NEXT_NODESTAT_VAL{$status}->{$::STATUS_ACTIVE}==1) { $need_active=1;} if ($::NEXT_NODESTAT_VAL{$status}->{$::STATUS_INACTIVE}==1) { $need_inactive=1;} if (($need_active==1) && ($need_inactive==0)) { push(@inactive_nodes, $node); } #put it into the inactive list so that the monitoring code can switch it to active. elsif (($need_active==0) && ($need_inactive==1)) { push(@active_nodes, $node); } #put it into the active list so that the monitoring code can chane it to inactive. elsif (($need_active==1) && ($need_inactive==1)) { push(@unknown_nodes, $node);} #unknow list so that the monitoring code can change it to active or inactive } } } $status{$::STATUS_ACTIVE}=\@active_nodes; $status{$::STATUS_INACTIVE}=\@inactive_nodes; $status{unknown}=\@unknown_nodes; return %status; } #-------------------------------------------------------------------------------- =head3 setNodeStatusAttributes This function will update the status column of the nodelist table with the new node status. Arguments: status -- a hash pointer of the node status. A key is a status string. The value is an array pointer of nodes that have the same status. for example: {alive=>["node1", "node1"], unreachable=>["node5","node100"]} force -- 1 force the input values to be set. -- 0 make sure if the input value is the next valid value. Returns: 0 for successful. non-0 for not successful. =cut #-------------------------------------------------------------------------------- sub setNodeStatusAttributes { my $temp=shift; if ($temp =~ /xCAT_monitoring::xcatmon/) { $temp=shift; } my $force=shift; return xCAT_monitoring::monitorctrl->setNodeStatusAttributes($temp, $force); } #-------------------------------------------------------------------------------- =head3 processSettingChanges This function gets called when the setting for this monitoring plugin has been changed in the monsetting table. Arguments: none. Returns: 0 for successful. non-0 for not successful. =cut #-------------------------------------------------------------------------------- sub processSettingChanges { #restart the cron job xCAT_monitoring::xcatmon->stopNodeStatusMon([], 0); xCAT_monitoring::xcatmon->startNodeStatusMon([], 0); } #-------------------------------------------------------------------------------- =head3 getDiscription This function returns the detailed description of the plugin inluding the valid values for its settings in the mon setting tabel. Arguments: none Returns: The description. =cut #-------------------------------------------------------------------------------- sub getDescription { return " Description: xcatmon uses fping to report the node liveness status and update the nodelist.status column. Use command 'monstart xcatmon -n' to start monitoring. Settings: ping-interval: the number of minutes between each fping operation. The default value is 3."; }