git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@2990 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
		
			
				
	
	
		
			410 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
			
		
		
	
	
			410 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
#!/usr/bin/env perl
 | 
						|
# IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
 | 
						|
package xCAT_monitoring::xcatmon;
 | 
						|
BEGIN
 | 
						|
{
 | 
						|
  $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : '/opt/xcat';
 | 
						|
}
 | 
						|
use lib "$::XCATROOT/lib/perl";
 | 
						|
use strict;
 | 
						|
use xCAT::Utils;
 | 
						|
use xCAT::GlobalDef;
 | 
						|
use xCAT_monitoring::monitorctrl;
 | 
						|
use Sys::Hostname;
 | 
						|
 | 
						|
 | 
						|
1;
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
=head1  xCAT_monitoring:xcatmon  
 | 
						|
=head2    Package Description
 | 
						|
   This is a xCAT monitoring plugin. The only thing that this plug-in does is 
 | 
						|
   the node monitoring. To activate it simply do the following command:
 | 
						|
      chtab pname=xCAT monitoring.nodestatmon=Y
 | 
						|
=cut
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    start
 | 
						|
      This function gets called by the monitorctrl module when monstart command 
 | 
						|
     gets called and when xcatd starts.  
 | 
						|
 | 
						|
    Arguments:
 | 
						|
       p_nodes -- a pointer to an arrays of nodes to be monitored. null means all.
 | 
						|
       scope -- the action scope, it indicates the node type the action will take place.
 | 
						|
                0 means localhost only. 
 | 
						|
                2 means both monservers and nodes, 
 | 
						|
       callback -- the callback pointer for error and status displaying. It can be null.
 | 
						|
    Returns:
 | 
						|
      (return code, message) 
 | 
						|
      if the callback is set, use callback to display the status and error. 
 | 
						|
     
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub start {
 | 
						|
  print "xcatmon.start\n";
 | 
						|
 | 
						|
  return (0, "started");
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    stop
 | 
						|
      This function gets called by the monitorctrl module when monstop command gets called. 
 | 
						|
    Arguments:
 | 
						|
       p_nodes -- a pointer to an arrays of nodes to be stoped for monitoring. null means all.
 | 
						|
       scope -- the action scope, it indicates the node type the action will take place.
 | 
						|
                0 means localhost only. 
 | 
						|
                2 means both monservers and nodes, 
 | 
						|
       callback -- the callback pointer for error and status displaying. It can be null.
 | 
						|
    Returns:
 | 
						|
      (return code, message) 
 | 
						|
      if the callback is set, use callback to display the status and error. 
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub stop {
 | 
						|
  print "xcatmon.stop\n";
 | 
						|
  
 | 
						|
  return (0, "stopped");
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    supportNodeStatusMon
 | 
						|
    This function is called by the monitorctrl module to check
 | 
						|
    if this product can help monitoring and returning the node status.
 | 
						|
    
 | 
						|
    Arguments:
 | 
						|
        none
 | 
						|
    Returns:
 | 
						|
        1
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub supportNodeStatusMon {
 | 
						|
  
 | 
						|
  return 1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3   startNodeStatusMon
 | 
						|
    This function is called by the monitorctrl module when monstart gets called and
 | 
						|
    when xcatd starts. It starts monitoring the node status and feed them back
 | 
						|
    to xCAT.  
 | 
						|
    Arguments:
 | 
						|
       p_nodes -- a pointer to an arrays of nodes to be monitored. null means all.
 | 
						|
       scope -- the action scope, it indicates the node type the action will take place.
 | 
						|
                0 means localhost only.  
 | 
						|
                2 means both monservers and nodes, 
 | 
						|
       callback -- the callback pointer for error and status displaying. It can be null.
 | 
						|
    note: p_nodes and scope are ignored by this plugin.
 | 
						|
    Returns:
 | 
						|
      (return code, message) 
 | 
						|
      if the callback is set, use callback to display the status and error. 
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub startNodeStatusMon
 | 
						|
{
 | 
						|
  print "xcatmon.startNodeStatusMon\n";
 | 
						|
  my $noderef=shift;
 | 
						|
  if ($noderef =~ /xCAT_monitoring::xcatmon/) {
 | 
						|
    $noderef=shift;
 | 
						|
  }
 | 
						|
  my $scope=shift;
 | 
						|
  my $callback=shift;
 | 
						|
 | 
						|
  #run the command first to update the status, 
 | 
						|
  my $cmd="$::XCATROOT/sbin/xcatnodemon";
 | 
						|
  #$output=`$cmd 2>&1`;
 | 
						|
  #if ($?) {
 | 
						|
  #  print "xcatmon: $output\n";
 | 
						|
  #}
 | 
						|
  
 | 
						|
  #figure out the ping-intercal setting
 | 
						|
  my $value=3; #default
 | 
						|
  my %settings=xCAT_monitoring::monitorctrl->getPluginSettings("xcatmon");
 | 
						|
 | 
						|
  #print "settings for xcatmon:\n";
 | 
						|
  #foreach (keys(%settings)) {
 | 
						|
  #  print "key=$_, value=$settings{$_}\n";
 | 
						|
  #}
 | 
						|
  my $reading;
 | 
						|
  if (exists($settings{'ping-interval'})) { 
 | 
						|
    $reading=$settings{'ping-interval'};
 | 
						|
    if ($reading>0) { $value=$reading;}
 | 
						|
  }
 | 
						|
   
 | 
						|
  #create the cron job, it will run the command every 3 minutes.
 | 
						|
  my $newentry;
 | 
						|
  if (xCAT::Utils->isAIX()) {
 | 
						|
    #AIX does not support */value format, have to list them all.
 | 
						|
    my $minutes;
 | 
						|
    if ($value==1) { $minutes='*';}
 | 
						|
    elsif ($value<=30) {
 | 
						|
      my @temp_a=(0..59);
 | 
						|
      foreach (@temp_a) {
 | 
						|
        if (($_ % $value) == 0) { $minutes .= "$_,";}
 | 
						|
      }
 | 
						|
      chop($minutes);
 | 
						|
    } else {
 | 
						|
      $minutes="0";
 | 
						|
    }
 | 
						|
    $newentry="$minutes * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd";
 | 
						|
  } else {
 | 
						|
    $newentry="*/$value * * * * XCATROOT=$::XCATROOT PATH=$ENV{'PATH'} XCATCFG='$ENV{'XCATCFG'}' $cmd";
 | 
						|
  }
 | 
						|
  my ($code, $msg)=xCAT::Utils::add_cron_job($newentry);
 | 
						|
  my $localhostname=hostname(); 
 | 
						|
  if ($code==0) { 
 | 
						|
    if ($callback) {
 | 
						|
      my $rsp={};
 | 
						|
      $rsp->{data}->[0]="$localhostname: started. Refresh interval is $value minute(s).";
 | 
						|
      $callback->($rsp);
 | 
						|
    }
 | 
						|
    return (0, "started"); }
 | 
						|
  else {
 | 
						|
    if ($callback) {
 | 
						|
      my $rsp={};
 | 
						|
      $rsp->{data}->[0]="$localhostname: $code  $msg";
 | 
						|
      $callback->($rsp);
 | 
						|
    }
 | 
						|
    return ($code, $msg); 
 | 
						|
  } 
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3   stopNodeStatusMon
 | 
						|
    This function is called by the monitorctrl module when monstop command is issued.
 | 
						|
    It stops feeding the node status info back to xCAT. 
 | 
						|
    Arguments:
 | 
						|
       p_nodes -- a pointer to an arrays of nodes to stoped for monitoring. null means all.
 | 
						|
       scope -- the action scope, it indicates the node type the action will take place.
 | 
						|
                0 means localhost only. 
 | 
						|
                2 means both monservers and nodes, 
 | 
						|
       callback -- the callback pointer for error and status displaying. It can be null.
 | 
						|
    note: p_nodes and scope are ignored by this plugin.
 | 
						|
    Returns:
 | 
						|
      (return code, message) 
 | 
						|
      if the callback is set, use callback to display the status and error. 
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub stopNodeStatusMon {
 | 
						|
  print "xcatmon.stopNodeStatusMon\n";
 | 
						|
  my $noderef=shift;
 | 
						|
  if ($noderef =~ /xCAT_monitoring::xcatmon/) {
 | 
						|
    $noderef=shift;
 | 
						|
  }
 | 
						|
  my $scope=shift;
 | 
						|
  my $callback=shift;
 | 
						|
  
 | 
						|
  my $job="$::XCATROOT/sbin/xcatnodemon";
 | 
						|
  my ($code, $msg)=xCAT::Utils::remove_cron_job($job);
 | 
						|
  my $localhostname=hostname(); 
 | 
						|
  if ($code==0) { 
 | 
						|
    if ($callback) {
 | 
						|
      my $rsp={};
 | 
						|
      $rsp->{data}->[0]="$localhostname: stopped.";
 | 
						|
      $callback->($rsp);
 | 
						|
    }
 | 
						|
    return (0, "stopped"); }
 | 
						|
  else {
 | 
						|
    if ($callback) {
 | 
						|
      my $rsp={};
 | 
						|
      $rsp->{data}->[0]="$localhostname: $code  $msg";
 | 
						|
      $callback->($rsp);
 | 
						|
    }
 | 
						|
    return ($code, $msg); 
 | 
						|
  } 
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    config
 | 
						|
      This function configures the cluster for the given nodes.  
 | 
						|
      This function is called by when monconfig command is issued or when xcatd starts
 | 
						|
     on the service node. It will configure the cluster to include the given nodes within
 | 
						|
     the monitoring doamin. 
 | 
						|
    Arguments:
 | 
						|
       p_nodes -- a pointer to an arrays of nodes to be added for monitoring. none means all.
 | 
						|
       scope -- the action scope, it indicates the node type the action will take place.
 | 
						|
                0 means localhost only. 
 | 
						|
                2 means both monservers and nodes, 
 | 
						|
       callback -- the callback pointer for error and status displaying. It can be null.
 | 
						|
    Returns:
 | 
						|
       (error code, error message)
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub config {
 | 
						|
 | 
						|
  print "xcatmon:config called\n";
 | 
						|
 
 | 
						|
  return (0, "ok");
 | 
						|
}
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    deconfig
 | 
						|
      This function de-configures the cluster for the given nodes.  
 | 
						|
      This function is called by the monitorctrl module when nodes are removed 
 | 
						|
      from the xCAT cluster. It should remove the nodes from the product for monitoring.
 | 
						|
    Arguments:
 | 
						|
       p_nodes -- a pointer to an arrays of nodes to be removed for monitoring. none means all.
 | 
						|
       scope -- the action scope, it indicates the node type the action will take place.
 | 
						|
                0 means localhost only. 
 | 
						|
                2 means both monservers and nodes, 
 | 
						|
       callback -- the callback pointer for error and status displaying. It can be null.
 | 
						|
    Returns:
 | 
						|
       (error code, error message)
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub deconfig {
 | 
						|
 | 
						|
  print "xcatmon:deconfig called\n";
 | 
						|
 | 
						|
  return (0, "ok");
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    getMonNodesStatus
 | 
						|
      This function goes to the xCAT nodelist table to retrieve the saved node status
 | 
						|
      for all the node that are managed by local nodes.
 | 
						|
    Arguments:
 | 
						|
       none.
 | 
						|
    Returns:
 | 
						|
       a hash that has the node status. The format is: 
 | 
						|
          {alive=>[node1, node3,...], unreachable=>[node4, node2...], unknown=>[node8, node101...]}
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub getMonNodesStatus {
 | 
						|
  my %status=();
 | 
						|
  my @inactive_nodes=();
 | 
						|
  my @active_nodes=();
 | 
						|
  my @unknown_nodes=();
 | 
						|
 | 
						|
  my $hierachy=xCAT_monitoring::monitorctrl->getMonHierarchy();
 | 
						|
  if (ref($hierachy) eq 'ARRAY') {
 | 
						|
      xCAT::MsgUtils->message('S', "[mon]: " . $hierachy->[1]);
 | 
						|
      return %status;	
 | 
						|
  }
 | 
						|
 
 | 
						|
 | 
						|
  my @mon_servers=keys(%$hierachy); 
 | 
						|
  my $isSV=xCAT::Utils->isServiceNode(); 
 | 
						|
  
 | 
						|
  #on a service node or on ms, get the nodes that has local host as the server node
 | 
						|
  my $monnodes;
 | 
						|
  my @hostinfo=xCAT::Utils->determinehostname();
 | 
						|
  my %iphash=();
 | 
						|
  foreach(@hostinfo) {$iphash{$_}=1;}
 | 
						|
  #if this is mn, include the ones that has no service nodes
 | 
						|
  if (!$isSV) { $iphash{'noservicenode'}=1;}
 | 
						|
  
 | 
						|
 | 
						|
  foreach(@mon_servers) {
 | 
						|
    #service node come in pairs, the first one is the monserver adapter that facing the mn,
 | 
						|
    # the second one is facing the cn. we use the first one here
 | 
						|
    my @server_pair=split(',', $_); 
 | 
						|
    my $sv=$server_pair[0];
 | 
						|
    if ($iphash{$sv}) {
 | 
						|
      $monnodes=$hierachy->{$_};
 | 
						|
    }
 | 
						|
  
 | 
						|
     
 | 
						|
    foreach(@$monnodes) {
 | 
						|
      my $node=$_->[0];
 | 
						|
      my $status=$_->[2];
 | 
						|
      my $type=$_[1];
 | 
						|
      if (!$status) { $status=$::STATUS_DEFINED;} #default
 | 
						|
 | 
						|
      if ($status eq $::STATUS_ACTIVE) { push(@active_nodes, $node);}
 | 
						|
      elsif ($status eq $::STATUS_INACTIVE) { push(@inactive_nodes, $node);}
 | 
						|
      else {
 | 
						|
	my $need_active=0;
 | 
						|
        my $need_inactive=0;
 | 
						|
	if ($::NEXT_NODESTAT_VAL{$status}->{$::STATUS_ACTIVE}==1) { $need_active=1;}
 | 
						|
	if ($::NEXT_NODESTAT_VAL{$status}->{$::STATUS_INACTIVE}==1) { $need_inactive=1;}
 | 
						|
        if (($need_active==1) && ($need_inactive==0)) { push(@inactive_nodes, $node); } #put it into the inactive list so that the monitoring code can switch it to active.
 | 
						|
        elsif (($need_active==0) && ($need_inactive==1)) { push(@active_nodes, $node); } #put it into the active list so that the monitoring code can chane it to inactive.
 | 
						|
        elsif  (($need_active==1) && ($need_inactive==1)) { push(@unknown_nodes, $node);} #unknow list so that the monitoring code can change it to active or inactive
 | 
						|
        else {
 | 
						|
          #if it is non-osi node, check it anyway
 | 
						|
	  if ($type !~ /osi/) {push(@unknown_nodes, $node);}
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 
 | 
						|
  $status{$::STATUS_ACTIVE}=\@active_nodes;
 | 
						|
  $status{$::STATUS_INACTIVE}=\@inactive_nodes;
 | 
						|
  $status{unknown}=\@unknown_nodes;
 | 
						|
 | 
						|
  return %status;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    setNodeStatusAttributes
 | 
						|
      This function will update the status column of the nodelist table with the new node status.
 | 
						|
    Arguments:
 | 
						|
       status -- a hash pointer of the node status. A key is a status string. The value is 
 | 
						|
                an array pointer of nodes that have the same status.
 | 
						|
                for example: {alive=>["node1", "node1"], unreachable=>["node5","node100"]}
 | 
						|
       force -- 1 force the input values to be set.
 | 
						|
             -- 0 make sure if the input value is the next valid value.
 | 
						|
    Returns:
 | 
						|
        0 for successful.
 | 
						|
        non-0 for not successful.
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub setNodeStatusAttributes {
 | 
						|
  my $temp=shift;
 | 
						|
  if ($temp =~ /xCAT_monitoring::xcatmon/) {
 | 
						|
    $temp=shift;
 | 
						|
  }
 | 
						|
  my $force=shift;
 | 
						|
  
 | 
						|
  return xCAT_monitoring::monitorctrl->setNodeStatusAttributes($temp, $force);
 | 
						|
}
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    processSettingChanges
 | 
						|
      This function gets called when the setting for this monitoring plugin 
 | 
						|
      has been changed in the monsetting table.
 | 
						|
    Arguments:
 | 
						|
       none.
 | 
						|
    Returns:
 | 
						|
        0 for successful.
 | 
						|
        non-0 for not successful.
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub processSettingChanges {
 | 
						|
  #restart the cron job
 | 
						|
  xCAT_monitoring::xcatmon->stopNodeStatusMon([], 0);
 | 
						|
  xCAT_monitoring::xcatmon->startNodeStatusMon([], 0);  
 | 
						|
}
 | 
						|
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
=head3    getDiscription
 | 
						|
      This function returns the detailed description of the plugin inluding the
 | 
						|
     valid values for its settings in the mon setting tabel. 
 | 
						|
     Arguments:
 | 
						|
        none
 | 
						|
    Returns:
 | 
						|
        The description.
 | 
						|
=cut
 | 
						|
#--------------------------------------------------------------------------------
 | 
						|
sub getDescription {
 | 
						|
  return 
 | 
						|
"  Description:
 | 
						|
    xcatmon uses fping to report the node liveness status and update the 
 | 
						|
    nodelist.status column. Use command 'monstart xcatmon -n' to start 
 | 
						|
    monitoring. 
 | 
						|
  Settings:
 | 
						|
    ping-interval:  the number of minutes between each fping operation. 
 | 
						|
        The default value is 3.";
 | 
						|
}
 |