mirror of
				https://github.com/xcat2/xcat-core.git
				synced 2025-10-31 11:22:27 +00:00 
			
		
		
		
	Added support for node status monitoring
git-svn-id: https://svn.code.sf.net/p/xcat/code/xcat-core/trunk@99 8638fb3e-16cb-4fca-ae20-7b5d299a9bcd
This commit is contained in:
		
							
								
								
									
										169
									
								
								xCAT-server-2.0/usr/lib/xcat/monitoring/xcatmon.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										169
									
								
								xCAT-server-2.0/usr/lib/xcat/monitoring/xcatmon.pm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,169 @@ | ||||
| #!/usr/bin/env perl | ||||
| # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html | ||||
| package xCAT_monitoring::xcatmon; | ||||
|  | ||||
| #use xCAT::NodeRange; | ||||
| #use Socket; | ||||
| #use xCAT::Utils; | ||||
| use xCAT::GlobalDef; | ||||
| use xCAT::Utils; | ||||
|  | ||||
| #caches the current node status. (serve1=>{active=>[node1,node3...], unreachable=>[node2, node4..]},....) | ||||
|  | ||||
| $flag; | ||||
|  | ||||
| 1; | ||||
| #------------------------------------------------------------------------------- | ||||
| =head1  xCAT_monitoring:xcatmon   | ||||
| =head2    Package Description | ||||
|    This is a xCAT monitoring plugin. The only thing that this plug-in does is  | ||||
|    the node monitoring.  | ||||
| =cut | ||||
| #------------------------------------------------------------------------------- | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
| =head3    start | ||||
|       This function gets called by the monitorctrl module | ||||
|       when xcatd starts.   | ||||
|     Arguments: | ||||
|       monservers --A hash reference keyed by the monitoring server nodes  | ||||
|          and each value is a ref to an array of [nodes, nodetype] arrays   | ||||
|          monitored by the server. So the format is: | ||||
|            {monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}    | ||||
|     Returns: | ||||
|       (return code, message)       | ||||
| =cut | ||||
| #-------------------------------------------------------------------------------- | ||||
| sub start { | ||||
|   #print "xcatmon.start\n"; | ||||
|  | ||||
|   return (0, "started"); | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
| =head3    stop | ||||
|       This function gets called by the monitorctrl module when | ||||
|       xcatd stops.  | ||||
|     Arguments: | ||||
|        none | ||||
|     Returns: | ||||
|        (return code, message) | ||||
| =cut | ||||
| #-------------------------------------------------------------------------------- | ||||
| sub stop { | ||||
|    | ||||
|   return (0, "stopped"); | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
| =head3    supportNodeStatusMon | ||||
|     This function is called by the monitorctrl module to check | ||||
|     if this product can help monitoring and returning the node status. | ||||
|      | ||||
|     Arguments: | ||||
|         none | ||||
|     Returns: | ||||
|         1 | ||||
| =cut | ||||
| #-------------------------------------------------------------------------------- | ||||
| sub supportNodeStatusMon { | ||||
|    | ||||
|   return 1; | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
| =head3   startNodeStatusMon | ||||
|     This function is called by the monitorctrl module to tell | ||||
|     the product to start monitoring the node status and feed them back | ||||
|     to xCAT.   | ||||
|     Arguments: | ||||
|       monservers --A hash reference keyed by the monitoring server nodes  | ||||
|          and each value is a ref to an array of [nodes, nodetype] arrays   | ||||
|          monitored by the server. So the format is: | ||||
|            {monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}    | ||||
|     Returns: | ||||
|         (return code, message) | ||||
|  | ||||
| =cut | ||||
| #-------------------------------------------------------------------------------- | ||||
| sub startNodeStatusMon { | ||||
|   #print "xcatmon.startNodeStatusMon\n"; | ||||
|   my $newentry="*/1 * * * * /usr/sbin/xcatnodemon >> /var/log/xcatmon.log"; | ||||
|   my ($code, $msg)=xCAT::Utils::add_cron_job($newentry); | ||||
|   if ($code==0) { return (0, "started"); } | ||||
|   else {  return ($code, $msg); } | ||||
| } | ||||
|  | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
| =head3   stopNodeStatusMon | ||||
|     This function is called by the monitorctrl module to tell | ||||
|     the product to stop feeding the node status info back to xCAT.  | ||||
|  | ||||
|     Arguments: | ||||
|         none | ||||
|     Returns: | ||||
|         (return code, message) | ||||
| =cut | ||||
| #-------------------------------------------------------------------------------- | ||||
| sub stopNodeStatusMon { | ||||
|   #TODO: turn off the node status monitoring.  | ||||
|    | ||||
|   my $job="/usr/sbin/xcatnodemon"; | ||||
|   my ($code, $msg)=xCAT::Utils::remove_cron_job($job); | ||||
|   if ($code==0) { return (0, "stopped"); } | ||||
|   else {  return ($code, $msg); } | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
| =head3    addNodes | ||||
|       This function is called by the monitorctrl module when new nodes are added  | ||||
|       to the xCAT cluster. It should add the nodes into the product for monitoring. | ||||
|     Arguments: | ||||
|       nodes --nodes to be added. It is a  hash reference keyed by the monitoring server  | ||||
|         nodes and each value is a ref to an array of [nodes, nodetype] arrays  monitored  | ||||
|         by the server. So the format is: | ||||
|           {monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}  | ||||
|     Returns: | ||||
|        none | ||||
| =cut | ||||
| #-------------------------------------------------------------------------------- | ||||
| sub addNodes { | ||||
|  | ||||
|   #print "xcatmon:addNodes called\n"; | ||||
|   | ||||
|   #TODO: include the nodes into the product for monitoring.  | ||||
|   return; | ||||
| } | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
| =head3    removeNodes | ||||
|       This function is called by the monitorctrl module when nodes are removed  | ||||
|       from the xCAT cluster. It should remove the nodes from the product for monitoring. | ||||
|     Arguments: | ||||
|       nodes --nodes to be removed. It is a hash reference keyed by the monitoring server  | ||||
|         nodes and each value is a ref to an array of [nodes, nodetype] arrays  monitored  | ||||
|         by the server. So the format is: | ||||
|         {monserver1=>[['node1', 'osi'], ['node2', 'switch']...], ...}  | ||||
|     Returns: | ||||
|        none | ||||
| =cut | ||||
| #-------------------------------------------------------------------------------- | ||||
| sub removeNodes { | ||||
|  | ||||
|   #print "xcatmon:removeNodes called\n"; | ||||
|  | ||||
|   #TODO: remove the nodes from the product for monitoring. | ||||
|   return; | ||||
| } | ||||
|  | ||||
|  | ||||
							
								
								
									
										103
									
								
								xCAT-server-2.0/usr/sbin/xcatnodemon
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										103
									
								
								xCAT-server-2.0/usr/sbin/xcatnodemon
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| #!/usr/bin/env perl | ||||
| # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html | ||||
|  | ||||
| use xCAT::Table; | ||||
| use xCAT::GlobalDef; | ||||
|  | ||||
| require("/usr/lib/xcat/monitoring/monitorctrl.pm"); | ||||
| ################################################################# | ||||
| # This script is used as a cron job by the xCAT monitoring plug-in | ||||
| # to monitor the node status | ||||
| ##################################################################  | ||||
|  | ||||
| ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); | ||||
| printf "%2d-%02d-%04d %02d:%02d:%02d: xcatnodemon started.\n", $mon+1,$mday,$year+1900,$hour,$min,$sec; | ||||
|  | ||||
| #get saved node status from the nodelist table  | ||||
| my %nodes_status_old=xCAT_monitoring::monitorctrl::getNodeStatus(); | ||||
|  | ||||
| #get a list of nodes | ||||
| my $tmp_node_active=$nodes_status_old{$::STATUS_ACTIVE}; | ||||
| my $tmp_node_inactive=$nodes_status_old{$::STATUS_INACTIVE}; | ||||
| my $tmp_node_unknown=$nodes_status_old{unknown}; | ||||
| #print "active nodes: @$tmp_node_active\n"; | ||||
| #print "inactive nodes: @$tmp_node_inactive\n"; | ||||
| #print "unknown nodes: @$tmp_node_unknown\n"; | ||||
|  | ||||
| #get current node status | ||||
| my %nodes_status_new1=(); | ||||
| if ($tmp_node_active) { %nodes_status_new1=pingNodeStatus(@$tmp_node_active);}  | ||||
| my %nodes_status_new2=(); | ||||
| if ($tmp_node_inactive) {%nodes_status_new2=pingNodeStatus(@$tmp_node_inactive);} | ||||
| my %nodes_status_new3=(); | ||||
| if ($tmp_node_unknown) { %nodes_status_new3=pingNodeStatus(@$tmp_node_unknown);} | ||||
|  | ||||
| my $changed1=$nodes_status_new1{$::STATUS_INACTIVE}; | ||||
| my $changed2=$nodes_status_new2{$::STATUS_ACTIVE}; | ||||
| my $changed3=$nodes_status_new3{$::STATUS_INACTIVE}; | ||||
| my $changed4=$nodes_status_new3{$::STATUS_ACTIVE}; | ||||
| my @changed_active=(@$changed2, @$changed4); | ||||
| my @changed_inactive=(@$changed1, @$changed3); | ||||
|  | ||||
| print "  switch to active:   @changed_active\n"; | ||||
| print "  switch to inactive: @changed_inactive\n"; | ||||
|  | ||||
| my %node_status=(); | ||||
| if (@changed_active>0) { | ||||
|   $node_status{$::STATUS_ACTIVE}=\@changed_active; | ||||
| }  | ||||
| if (@changed_inactive>0) { | ||||
|   $node_status{$::STATUS_INACTIVE}=\@changed_inactive; | ||||
| } | ||||
|  | ||||
| #only set the node status for the changed ones | ||||
| if (keys(%node_status) > 0) { | ||||
|   xCAT_monitoring::monitorctrl::processNodeStatusChanges(\%node_status); | ||||
| } | ||||
|  | ||||
| ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); | ||||
| printf "%2d-%02d-%04d %02d:%02d:%02d: xcatnodemon finished.\n\n", $mon+1,$mday,$year+1900,$hour,$min,$sec; | ||||
|  | ||||
|  | ||||
|  | ||||
| #-------------------------------------------------------------------------------- | ||||
| =head3    pingNodeStatus | ||||
|       This function takes an array of nodes and returns their status using fping. | ||||
|     Arguments: | ||||
|        nodes-- an array of nodes. | ||||
|     Returns: | ||||
|        a hash that has the node status. The format is:  | ||||
|           {active=>[node1, node3,...], unreachable=>[node4, node2...]} | ||||
| =cut | ||||
| #-------------------------------------------------------------------------------- | ||||
| sub pingNodeStatus { | ||||
|   my @mon_nodes=@_; | ||||
|   %status=(); | ||||
|   my @active_nodes=(); | ||||
|   my @inactive_nodes=(); | ||||
|   if ((@mon_nodes)&& (@mon_nodes > 0)) { | ||||
|     #get all the active nodes | ||||
|     #TODO how to decide the path of fping. how about AIX, does it support fping? | ||||
|     my $nodes= join(' ', @mon_nodes); | ||||
|     my $temp=`/usr/sbin/fping -a $nodes 2> /dev/null`; | ||||
|     chomp($temp); | ||||
|     @active_nodes=split(/\n/, $temp); | ||||
|  | ||||
|     #get all the inactive nodes by substracting the active nodes from all. | ||||
|     my %temp2; | ||||
|     if ((@active_nodes) && ( @active_nodes > 0)) { | ||||
|       foreach(@active_nodes) { $temp2{$_}=1}; | ||||
|         foreach(@mon_nodes) { | ||||
|           if (!$temp2{$_}) { push(@inactive_nodes, $_);} | ||||
|         } | ||||
|     } | ||||
|     else {@inactive_nodes=@mon_nodes;}      | ||||
|   } | ||||
|  | ||||
|    | ||||
|   $status{$::STATUS_ACTIVE}=\@active_nodes; | ||||
|   $status{$::STATUS_INACTIVE}=\@inactive_nodes; | ||||
|   | ||||
|   return %status; | ||||
| } | ||||
|  | ||||
		Reference in New Issue
	
	Block a user