mirror of
				https://github.com/xcat2/xcat-core.git
				synced 2025-10-31 19:32:31 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			472 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
			
		
		
	
	
			472 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
| #!/usr/bin/env perl
 | |
| # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
 | |
| package xCAT_monitoring::xcatmon;
 | |
| 
 | |
| BEGIN
 | |
| {
 | |
|     $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
 | |
| }
 | |
| use lib "$::XCATROOT/lib/perl";
 | |
| use strict;
 | |
| use xCAT::Utils;
 | |
| use xCAT::NetworkUtils;
 | |
| use xCAT::GlobalDef;
 | |
| use xCAT_monitoring::monitorctrl;
 | |
| use Sys::Hostname;
 | |
| 
 | |
| 
 | |
| 1;
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| 
 | |
| =head1  xCAT_monitoring:xcatmon
 | |
| =head2    Package Description
 | |
|    This is a xCAT monitoring plugin. The only thing that this plug-in does is
 | |
|    the node monitoring. To activate it simply do the following command:
 | |
|       chtab pname=xCAT monitoring.nodestatmon=Y
 | |
| =cut
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    start
 | |
|       This function gets called by the monitorctrl module when monstart command
 | |
|      gets called and when xcatd starts.
 | |
| 
 | |
|     Arguments:
 | |
|        p_nodes -- a pointer to an arrays of nodes to be monitored. null means all.
 | |
|        scope -- the action scope, it indicates the node type the action will take place.
 | |
|                 0 means localhost only.
 | |
|                 2 means both monservers and nodes,
 | |
|        callback -- the callback pointer for error and status displaying. It can be null.
 | |
|     Returns:
 | |
|       (return code, message)
 | |
|       if the callback is set, use callback to display the status and error.
 | |
| 
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub start {
 | |
|     print "xcatmon.start\n";
 | |
| 
 | |
|     return (0, "started");
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    stop
 | |
|       This function gets called by the monitorctrl module when monstop command gets called.
 | |
|     Arguments:
 | |
|        p_nodes -- a pointer to an arrays of nodes to be stopped for monitoring. null means all.
 | |
|        scope -- the action scope, it indicates the node type the action will take place.
 | |
|                 0 means localhost only.
 | |
|                 2 means both monservers and nodes,
 | |
|        callback -- the callback pointer for error and status displaying. It can be null.
 | |
|     Returns:
 | |
|       (return code, message)
 | |
|       if the callback is set, use callback to display the status and error.
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub stop {
 | |
|     print "xcatmon.stop\n";
 | |
| 
 | |
|     return (0, "stopped");
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    supportNodeStatusMon
 | |
|     This function is called by the monitorctrl module to check
 | |
|     if this product can help monitoring and returning the node status.
 | |
| 
 | |
|     Arguments:
 | |
|         none
 | |
|     Returns:
 | |
|         1
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub supportNodeStatusMon {
 | |
| 
 | |
|     return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3   startNodeStatusMon
 | |
|     This function is called by the monitorctrl module when monstart gets called and
 | |
|     when xcatd starts. It starts monitoring the node status and feed them back
 | |
|     to xCAT.
 | |
|     Arguments:
 | |
|        p_nodes -- a pointer to an arrays of nodes to be monitored. null means all.
 | |
|        scope -- the action scope, it indicates the node type the action will take place.
 | |
|                 0 means localhost only.
 | |
|                 2 means both monservers and nodes,
 | |
|        callback -- the callback pointer for error and status displaying. It can be null.
 | |
|     note: p_nodes and scope are ignored by this plugin.
 | |
|     Returns:
 | |
|       (return code, message)
 | |
|       if the callback is set, use callback to display the status and error.
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub startNodeStatusMon
 | |
| {
 | |
|     print "xcatmon.startNodeStatusMon\n";
 | |
|     if (!-e "/etc/xCATMN") { return (0, ""); }    #only run the cron job on mn
 | |
| 
 | |
|     my $noderef = shift;
 | |
|     if ($noderef =~ /xCAT_monitoring::xcatmon/) {
 | |
|         $noderef = shift;
 | |
|     }
 | |
|     my $scope    = shift;
 | |
|     my $callback = shift;
 | |
| 
 | |
|     #run the command first to update the status,
 | |
|     #my $cmd="$::XCATROOT/sbin/xcatnodemon";
 | |
|     my $cmd = "$::XCATROOT/bin/nodestat all -m -u -q";
 | |
| 
 | |
|     #$output=`$cmd 2>&1`;
 | |
|     #if ($?) {
 | |
|     #  print "xcatmon: $output\n";
 | |
|     #}
 | |
| 
 | |
|     #figure out the ping-intercal setting
 | |
|     my $value = 3;    #default
 | |
|     my %settings = xCAT_monitoring::monitorctrl->getPluginSettings("xcatmon");
 | |
| 
 | |
|     #print "settings for xcatmon:\n";
 | |
|     #foreach (keys(%settings)) {
 | |
|     #  print "key=$_, value=$settings{$_}\n";
 | |
|     #}
 | |
|     my $reading;
 | |
|     if (exists($settings{'ping-interval'})) {
 | |
|         $reading = $settings{'ping-interval'};
 | |
|         if ($reading > 0) { $value = $reading; }
 | |
|     }
 | |
| 
 | |
|     #create the cron job, it will run the command every 3 minutes.
 | |
|     my $newentry;
 | |
|     if (xCAT::Utils->isAIX()) {
 | |
| 
 | |
|         #AIX does not support */value format, have to list them all.
 | |
|         my $minutes;
 | |
|         if ($value == 1) { $minutes = '*'; }
 | |
|         elsif ($value <= 30) {
 | |
|             my @temp_a = (0 .. 59);
 | |
|             foreach (@temp_a) {
 | |
|                 if (($_ % $value) == 0) { $minutes .= "$_,"; }
 | |
|             }
 | |
|             chop($minutes);
 | |
|         } else {
 | |
|             $minutes = "0";
 | |
|         }
 | |
|         $newentry = "$minutes * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd";
 | |
|     } else {
 | |
|         $newentry = "*/$value * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd";
 | |
|     }
 | |
|     my ($code, $msg) = xCAT::Utils::add_cron_job($newentry);
 | |
|     my $localhostname = hostname();
 | |
|     if ($code == 0) {
 | |
|         if ($callback) {
 | |
|             my $rsp = {};
 | |
|             $rsp->{data}->[0] = "$localhostname: started. Refresh interval is $value minute(s).";
 | |
|             $callback->($rsp);
 | |
|         }
 | |
|         return (0, "started"); }
 | |
|     else {
 | |
|         if ($callback) {
 | |
|             my $rsp = {};
 | |
|             $rsp->{data}->[0] = "$localhostname: $code  $msg";
 | |
|             $callback->($rsp);
 | |
|         }
 | |
|         return ($code, $msg);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3   stopNodeStatusMon
 | |
|     This function is called by the monitorctrl module when monstop command is issued.
 | |
|     It stops feeding the node status info back to xCAT.
 | |
|     Arguments:
 | |
|        p_nodes -- a pointer to an arrays of nodes to stopped for monitoring. null means all.
 | |
|        scope -- the action scope, it indicates the node type the action will take place.
 | |
|                 0 means localhost only.
 | |
|                 2 means both monservers and nodes,
 | |
|        callback -- the callback pointer for error and status displaying. It can be null.
 | |
|     note: p_nodes and scope are ignored by this plugin.
 | |
|     Returns:
 | |
|       (return code, message)
 | |
|       if the callback is set, use callback to display the status and error.
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub stopNodeStatusMon {
 | |
|     print "xcatmon.stopNodeStatusMon\n";
 | |
|     if (!-e "/etc/xCATMN") { return (0, ""); }    #only run the cron job on mn
 | |
| 
 | |
|     my $noderef = shift;
 | |
|     if ($noderef =~ /xCAT_monitoring::xcatmon/) {
 | |
|         $noderef = shift;
 | |
|     }
 | |
|     my $scope    = shift;
 | |
|     my $callback = shift;
 | |
| 
 | |
|     #my $job="$::XCATROOT/sbin/xcatnodemon";
 | |
|     my $job = "$::XCATROOT/bin/nodestat all -m -u -q";
 | |
|     my ($code, $msg) = xCAT::Utils::remove_cron_job($job);
 | |
|     my $localhostname = hostname();
 | |
|     if ($code == 0) {
 | |
|         if ($callback) {
 | |
|             my $rsp = {};
 | |
|             $rsp->{data}->[0] = "$localhostname: stopped.";
 | |
|             $callback->($rsp);
 | |
|         }
 | |
|         return (0, "stopped"); }
 | |
|     else {
 | |
|         if ($callback) {
 | |
|             my $rsp = {};
 | |
|             $rsp->{data}->[0] = "$localhostname: $code  $msg";
 | |
|             $callback->($rsp);
 | |
|         }
 | |
|         return ($code, $msg);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    config
 | |
|       This function configures the cluster for the given nodes.
 | |
|       This function is called by when monconfig command is issued or when xcatd starts
 | |
|      on the service node. It will configure the cluster to include the given nodes within
 | |
|      the monitoring doamin.
 | |
|     Arguments:
 | |
|        p_nodes -- a pointer to an arrays of nodes to be added for monitoring. none means all.
 | |
|        scope -- the action scope, it indicates the node type the action will take place.
 | |
|                 0 means localhost only.
 | |
|                 2 means both monservers and nodes,
 | |
|        callback -- the callback pointer for error and status displaying. It can be null.
 | |
|     Returns:
 | |
|        (error code, error message)
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub config {
 | |
| 
 | |
|     print "xcatmon:config called\n";
 | |
| 
 | |
|     return (0, "ok");
 | |
| }
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    deconfig
 | |
|       This function de-configures the cluster for the given nodes.
 | |
|       This function is called by the monitorctrl module when nodes are removed
 | |
|       from the xCAT cluster. It should remove the nodes from the product for monitoring.
 | |
|     Arguments:
 | |
|        p_nodes -- a pointer to an arrays of nodes to be removed for monitoring. none means all.
 | |
|        scope -- the action scope, it indicates the node type the action will take place.
 | |
|                 0 means localhost only.
 | |
|                 2 means both monservers and nodes,
 | |
|        callback -- the callback pointer for error and status displaying. It can be null.
 | |
|     Returns:
 | |
|        (error code, error message)
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub deconfig {
 | |
| 
 | |
|     print "xcatmon:deconfig called\n";
 | |
| 
 | |
|     return (0, "ok");
 | |
| }
 | |
| 
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    getMonNodesStatus
 | |
|       This function goes to the xCAT nodelist table to retrieve the saved node status
 | |
|       for all the node that are managed by local nodes.
 | |
|     Arguments:
 | |
|        none.
 | |
|     Returns:
 | |
|        a hash that has the node status. The format is:
 | |
|           {alive=>[node1, node3,...], unreachable=>[node4, node2...], unknown=>[node8, node101...]}
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub getMonNodesStatus {
 | |
|     my %status         = ();
 | |
|     my @inactive_nodes = ();
 | |
|     my @active_nodes   = ();
 | |
|     my @unknown_nodes  = ();
 | |
| 
 | |
|     my $hierachy = xCAT_monitoring::monitorctrl->getMonHierarchy();
 | |
|     if (ref($hierachy) eq 'ARRAY') {
 | |
|         xCAT::MsgUtils->message('S', "[mon]: " . $hierachy->[1]);
 | |
|         return %status;
 | |
|     }
 | |
| 
 | |
|     my @mon_servers = keys(%$hierachy);
 | |
|     my $isSV        = xCAT::Utils->isServiceNode();
 | |
| 
 | |
|     #on a service node or on ms, get the nodes that has local host as the server node
 | |
|     my @hostinfo = xCAT::NetworkUtils->determinehostname();
 | |
|     my %iphash   = ();
 | |
|     foreach (@hostinfo) { $iphash{$_} = 1; }
 | |
| 
 | |
|     #if this is mn, include the ones that has no service nodes
 | |
|     if (!$isSV) { $iphash{'noservicenode'} = 1; }
 | |
| 
 | |
|     my %processed = ();
 | |
|     foreach (@mon_servers) {
 | |
| 
 | |
|         #service node come in pairs, the first one is the monserver adapter that facing the mn,
 | |
|         # the second one is facing the cn. we use the first one here
 | |
|         my @server_pair = split(':', $_);
 | |
|         my $sv = $server_pair[0];
 | |
|         if (!$processed{$sv}) { $processed{$sv} = 1; }
 | |
|         else                  { next; }
 | |
| 
 | |
|         if ($iphash{$sv}) {
 | |
|             my $monnodes = $hierachy->{$_};
 | |
| 
 | |
|             foreach (@$monnodes) {
 | |
|                 my $node   = $_->[0];
 | |
|                 my $status = $_->[2];
 | |
|                 my $type   = $_[1];
 | |
|                 if (!$status) { $status = $::STATUS_DEFINED; }    #default
 | |
| 
 | |
|                 if ($status eq $::STATUS_ACTIVE) { push(@active_nodes, $node); }
 | |
|                 elsif ($status eq $::STATUS_INACTIVE) { push(@inactive_nodes, $node); }
 | |
|                 else {
 | |
|                     my $need_active   = 0;
 | |
|                     my $need_inactive = 0;
 | |
|                     if ($::NEXT_NODESTAT_VAL{$status}->{$::STATUS_ACTIVE} == 1) { $need_active = 1; }
 | |
|                     if ($::NEXT_NODESTAT_VAL{$status}->{$::STATUS_INACTIVE} == 1) { $need_inactive = 1; }
 | |
|                     if (($need_active == 1) && ($need_inactive == 0)) { push(@inactive_nodes, $node); } #put it into the inactive list so that the monitoring code can switch it to active.
 | |
|                     elsif (($need_active == 0) && ($need_inactive == 1)) { push(@active_nodes, $node); } #put it into the active list so that the monitoring code can chane it to inactive.
 | |
|                     elsif (($need_active == 1) && ($need_inactive == 1)) { push(@unknown_nodes, $node); } #unknow list so that the monitoring code can change it to active or inactive
 | |
|                     else {
 | |
|                         #if it is non-osi node, check it anyway
 | |
|                         if ($type !~ /osi/) { push(@unknown_nodes, $node); }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     $status{$::STATUS_ACTIVE}   = \@active_nodes;
 | |
|     $status{$::STATUS_INACTIVE} = \@inactive_nodes;
 | |
|     $status{unknown}            = \@unknown_nodes;
 | |
| 
 | |
|     return %status;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    setNodeStatusAttributes
 | |
|       This function will update the status column of the nodelist table with the new node status.
 | |
|     Arguments:
 | |
|        status -- a hash pointer of the node status. A key is a status string. The value is
 | |
|                 an array pointer of nodes that have the same status.
 | |
|                 for example: {alive=>["node1", "node1"], unreachable=>["node5","node100"]}
 | |
|        force -- 1 force the input values to be set.
 | |
|              -- 0 make sure if the input value is the next valid value.
 | |
|     Returns:
 | |
|         0 for successful.
 | |
|         non-0 for not successful.
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub setNodeStatusAttributes {
 | |
|     my $temp = shift;
 | |
|     if ($temp =~ /xCAT_monitoring::xcatmon/) {
 | |
|         $temp = shift;
 | |
|     }
 | |
|     my $force = shift;
 | |
| 
 | |
|     return xCAT_monitoring::monitorctrl->setNodeStatusAttributes($temp, $force);
 | |
| }
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    processSettingChanges
 | |
|       This function gets called when the setting for this monitoring plugin
 | |
|       has been changed in the monsetting table.
 | |
|     Arguments:
 | |
|        none.
 | |
|     Returns:
 | |
|         0 for successful.
 | |
|         non-0 for not successful.
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub processSettingChanges {
 | |
| 
 | |
|     #restart the cron job
 | |
|     xCAT_monitoring::xcatmon->stopNodeStatusMon([], 0);
 | |
|     xCAT_monitoring::xcatmon->startNodeStatusMon([], 0);
 | |
| }
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| 
 | |
| =head3    getDiscription
 | |
|       This function returns the detailed description of the plugin inluding the
 | |
|      valid values for its settings in the mon setting tabel.
 | |
|      Arguments:
 | |
|         none
 | |
|     Returns:
 | |
|         The description.
 | |
| =cut
 | |
| 
 | |
| #--------------------------------------------------------------------------------
 | |
| sub getDescription {
 | |
|     return
 | |
|       "  Description:
 | |
|     xcatmon provides node status monitoring using fping on AIX and nmap on Linux.
 | |
|     It also provides application status monitoring. The status and the appstatus
 | |
|     columns of the nodelist table will be  updated periodically  with the latest
 | |
|     status values for the nodes.   Use  command 'monadd xcatmon -n' and then
 | |
|     'monstart xcatmon'  to start monitoring.
 | |
|   Settings:
 | |
|     ping-interval:  the number of minutes between each nmap/fping operation.
 | |
|         The default value is 3.
 | |
|     apps: a list of comma separated application names whose status will be queried.
 | |
|         For how to get the status of each app, look for app name in the key field
 | |
|         in a different row.
 | |
|     port: the application daemon port number, if not specified, use internal list,
 | |
|         then /etc/services.
 | |
|     group:  the name of a node group that needs to get the application status from.
 | |
|          If not specified, assume all the nodes in the nodelist table.
 | |
|          To specify more than one groups, use group=a,group=b format.
 | |
|     cmd: the command that will be run locally on mn or sn.
 | |
|     lcmd: the command that will be run locally on the mn only.
 | |
|     dcmd: the command that will be run distributed on the nodes using xdsh.
 | |
| 
 | |
|        For commands specified by 'cmd' and 'lcmd', the input of is a list of comma
 | |
|        separated node names, the output must be in the following format:
 | |
|          node1:string1
 | |
|          node2:string2
 | |
|          ...
 | |
|        For the command specified by 'dcmd', no input is needed, the output can be a
 | |
|        string.";
 | |
| 
 | |
| }
 |