mirror of
				https://github.com/xcat2/xcat-core.git
				synced 2025-10-30 19:02:27 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1366 lines
		
	
	
		
			55 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			1366 lines
		
	
	
		
			55 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #! /usr/bin/perl
 | |
| # IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html
 | |
| 
 | |
| BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; }
 | |
| 
 | |
| use lib "$::XCATROOT/probe/lib/perl";
 | |
| use probe_utils;
 | |
| use LogParse;
 | |
| use probe_global_constant;
 | |
| use xCAT::NetworkUtils;
 | |
| use File::Basename;
 | |
| use IO::Select;
 | |
| use Time::Local;
 | |
| use Data::Dumper;
 | |
| use Getopt::Long qw(:config no_ignore_case);
 | |
| 
 | |
| #---------------------------------------------
 | |
| #             Global attributes
 | |
| #---------------------------------------------
 | |
| 
 | |
| #-------MAC to IP/node map list-------
 | |
| # $macmap{mac_addr}{"ip"}="x.x.x.x"
 | |
| # $macmap{mac_addr}{"node"}="nodename"
 | |
| #-------------------------------------
 | |
| my %macmap;
 | |
| 
 | |
| #------IP to node map list-------
 | |
| # $ipnodemap{ip_addr}="nodename"
 | |
| #--------------------------------
 | |
| my %ipnodemap;
 | |
| 
 | |
| #The NIC of provision network
 | |
| my $installnic;
 | |
| 
 | |
| #Used by customer or developer, to obtain more output information
 | |
| my $verbose = 0;
 | |
| 
 | |
| #if no specific instruction, do monitor by default
 | |
| my $monitor = 1;
 | |
| 
 | |
| #used by developer, to debug the detail information about function running
 | |
| my $debug = 0;
 | |
| 
 | |
| my $osdeploy_start_time;
 | |
| 
 | |
| #---------------------------------------------
 | |
| #            Command Usage
 | |
| #---------------------------------------------
 | |
| my $program_name = basename("$0");
 | |
| $::USAGE = "Usage:
 | |
|     $program_name -h
 | |
|     $program_name -n <node_range> [-t <max_waiting_time>] [-p <level>] [-V]
 | |
|     $program_name -n <node_range> -r <roll_back_duration> [-p <level>] [-V]
 | |
| 
 | |
| Description:
 | |
|     Probe operating system provision process. Supports two modes - 'Realtime monitor' and 'Replay history'. 
 | |
|     Realtime monitor: This is a default. This tool with monitor provision state of the node. Trigger 'Realtime monitor' before rebooting target node to do provisioning.
 | |
|     Replay history: Used after provisioning is finished to probe the previously completed provisioning.
 | |
| 
 | |
|     [NOTE] Currently, hierarchial structure is not supported.
 | |
| 
 | |
| Options:
 | |
|     -h : Get usage information of $program_name
 | |
|     -V : Output more information 
 | |
|     -n : The range of nodes to be monitored or replayed.
 | |
|     -t : The maximum time to wait when doing monitor, unit is minutes. default is 60.
 | |
|     -r : Trigger 'Replay history' mode. Follow the duration of rolling back. Units are 'h' (hour) or 'm' (minute)
 | |
|          Supported format examples: 3h30m (3 hours and 30 minutes ago), 2h (2 hours ago), 40m (40 minutes ago) and 3 (3 hours ago).
 | |
|          If unit is not specified, hour will be used by default.
 | |
|     -p Show elapsed time of each stage during provision for each node
 | |
|          Support 3 output format:
 | |
|          'compact': Elapsed time of provision for each node.
 | |
|          'phase'  : Elapsed time for DHCP, INSTALL, POSTSCRIPTS and POSTBOOTSCRIPTS stages, and time for whole provision.
 | |
|          'origin' : Show origin start time of each stage.
 | |
| ";
 | |
| 
 | |
| 
 | |
| #----------------------------------------------
 | |
| #               Main process
 | |
| #----------------------------------------------
 | |
| 
 | |
| # parse command line arguments
 | |
| my $help        = 0;
 | |
| my $test        = 0;
 | |
| my $maxwaittime = 60;    #unit is minute, the max wait time of monitor
 | |
| my $rollforward_time_of_replay;    #used by feature replay provision log
 | |
| my $noderange;
 | |
| if (
 | |
|     !GetOptions("--help|h|?" => \$help,
 | |
|         "T"   => \$test,
 | |
|         "V"   => \$verbose,
 | |
|         "p=s" => \$performance,
 | |
|         "t=s" => \$maxwaittime,
 | |
|         "r=s" => \$rollforward_time_of_replay,
 | |
|         "n=s" => \$noderange))
 | |
| {
 | |
|     probe_utils->send_msg("stdout", "f", "Invalid parameter for $program_name");
 | |
|     probe_utils->send_msg("stdout", "",  "$::USAGE");
 | |
|     exit 1;
 | |
| }
 | |
| 
 | |
| if ($help) {
 | |
|     probe_utils->send_msg("stdout", "", "$::USAGE");
 | |
|     exit 0;
 | |
| }
 | |
| 
 | |
| if ($test) {
 | |
|     probe_utils->send_msg("stdout", "o", "Probe operating system provision process. Supports two modes - 'Realtime monitor' and 'Replay history'.");
 | |
|     exit 0;
 | |
| }
 | |
| 
 | |
| unless ($noderange) {
 | |
|     probe_utils->send_msg("stdout", "f", "A noderange is required");
 | |
|     probe_utils->send_msg("stdout", "",  "$::USAGE");
 | |
|     exit 1;
 | |
| }
 | |
| 
 | |
| my @nodes = split (",", $noderange);
 | |
| my @error_nodes;
 | |
| foreach my $node (@nodes) {
 | |
|     if ($node =~ /^-/) {
 | |
|         push @error_nodes, $node;
 | |
|     }
 | |
| }
 | |
| if (@error_nodes) {
 | |
|     my $error = join (",", @error_nodes);
 | |
|     probe_utils->send_msg("stdout", "f", "[$error]: Wrong node definition.");
 | |
|     exit 1;
 | |
| }
 | |
| 
 | |
| if ($rollforward_time_of_replay) {
 | |
|     if (($rollforward_time_of_replay !~ /(\d+)h(\d+)m/i) && ($rollforward_time_of_replay !~ /^(\d+)h*$/i) && ($rollforward_time_of_replay !~ /^(\d+)m$/i)) {
 | |
|         probe_utils->send_msg("stdout", "f", "Unsupported time format for option '-r'");
 | |
|         probe_utils->send_msg("stdout", "", "$::USAGE");
 | |
|         exit 1;
 | |
|     }
 | |
| }
 | |
| 
 | |
| if ($performance) {
 | |
|     if ($performance ne "compact" and $performance ne "phase" and $performance ne "origin") {
 | |
|         probe_utils->send_msg("stdout", "f", "Unsupported parameter for option '-p'");
 | |
|         probe_utils->send_msg("stdout", "",  "$::USAGE");
 | |
|         exit 1;
 | |
|     }
 | |
| }
 | |
| 
 | |
| my $rst = do_pre_check($noderange);
 | |
| if ($debug) {
 | |
|     print "Dumper macmap--------\n";
 | |
|     print Dumper \%macmap;
 | |
|     print "Dumper ipnodemap--------\n";
 | |
|     print Dumper \%ipnodemap;
 | |
|     print "installnic = $installnic\n";
 | |
| }
 | |
| 
 | |
| #if failed to pass pre-check, exit directly
 | |
| exit $rst if ($rst);
 | |
| 
 | |
| # record every status's start time and end time for each node
 | |
| # $node_status_time{$node}{$status}{start_time} = $start_time;
 | |
| my %node_status_time = () if ($performance);
 | |
| 
 | |
| if ($rollforward_time_of_replay) {
 | |
|     $monitor = 0;
 | |
| 
 | |
|     my $start_time_of_replay = time();
 | |
|     my $end_time_of_replay   = $start_time_of_replay;
 | |
|     if ($rollforward_time_of_replay =~ /(\d+)h(\d+)m/i) {
 | |
|         $start_time_of_replay -= ($1 * 3600 + $2 * 60)
 | |
|     } elsif ($rollforward_time_of_replay =~ /^(\d+)h*$/i) {
 | |
|         $start_time_of_replay -= $1 * 3600;
 | |
|     } elsif ($rollforward_time_of_replay =~ /^(\d+)m$/) {
 | |
|         $start_time_of_replay -= $1 * 60;
 | |
|     }
 | |
| 
 | |
|     $rst = do_replay($noderange, $start_time_of_replay, $end_time_of_replay);
 | |
|     exit $rst;
 | |
| }
 | |
| 
 | |
| #if no specific instruction, do monitor by default
 | |
| $rst = do_monitor($noderange, $maxwaittime);
 | |
| 
 | |
| exit $rst;
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Pre-check some static configuration. such as privision network interface, the definition of node
 | |
|     Arguments:
 | |
|         noderange:  The range of node
 | |
|     Returns:
 | |
|         0: pass
 | |
|         1: failed
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub do_pre_check {
 | |
|     my $noderange = shift;
 | |
| 
 | |
|     my $rst = 0;    #the return value of current function
 | |
| 
 | |
|     my @error = ();
 | |
|     my $sub_func_rst = obtain_install_nic(\$installnic, \@error);
 | |
|     if ($sub_func_rst) {
 | |
|         probe_utils->send_msg("stdout", "f", "Failed to obtain install NIC in current server");
 | |
|         probe_utils->send_msg("stdout", "d", "$_") foreach (@error);
 | |
|     } else {
 | |
|         probe_utils->send_msg("stdout", "i", "The install NIC in current server is $installnic");
 | |
|     }
 | |
|     $rst |= $sub_func_rst;
 | |
| 
 | |
|     @error = ();
 | |
|     $sub_func_rst = check_noderange($noderange, \@error);
 | |
|     if ($sub_func_rst) {
 | |
|         probe_utils->send_msg("stdout", "f", "There is something wrong in node definition");
 | |
|         probe_utils->send_msg("stdout", "d", "$_") foreach (@error);
 | |
|     } else {
 | |
|         probe_utils->send_msg("stdout", "o", "All nodes to be deployed are valid");
 | |
|     }
 | |
|     $rst |= $sub_func_rst;
 | |
| 
 | |
|     return $rst;
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Obtain the network interface of provision
 | |
|     Arguments: 
 | |
|         installnic_ref: (output attribute) the reference of scalar which save the provision network interface.
 | |
|         return_error_ref: (output attribute) the reference of array which save the error information.
 | |
|     Returns:
 | |
|         0: success 
 | |
|         1: failed
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub obtain_install_nic {
 | |
|     my $installnic_ref   = shift;
 | |
|     my $return_error_ref = shift;
 | |
| 
 | |
|     my $master_ip_in_site = `lsdef -t site -i master -c |awk -F"=" '{print \$2}'`;
 | |
|     chomp($master_ip_in_site);
 | |
|     unless ($master_ip_in_site) {
 | |
|         push @$return_error_ref, "'master' isn't configured in 'site' table";
 | |
|         return 1;
 | |
|     }
 | |
| 
 | |
|     $$installnic_ref = `ip addr |grep -B2 $master_ip_in_site|awk -F" " '/mtu/{gsub(/:/,"",\$2); print \$2}'`;
 | |
|     chomp($$installnic_ref);
 | |
|     if (!$$installnic_ref) {
 | |
|         push @$return_error_ref, "The value of 'master' in 'site' table is $master_ip_in_site, can't get corresponding network interface in current server";
 | |
|         return 1;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Check if the definition of all nodes are valid
 | |
|         If all nodes definition are valid, initialize below global attributes which will be used as a map in "Monitor" or "Replay" 
 | |
|         %macmap, %ipnodemap
 | |
|                 
 | |
|     Arguments:
 | |
|         node_range: node range
 | |
|     Returns:
 | |
|         0 : pass
 | |
|         1 : failed
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub check_noderange {
 | |
|     my $node_range = shift;
 | |
|     my $error_ref  = shift;
 | |
| 
 | |
|     my @cmdoutput = `lsdef $node_range -i ip,mac -c 2>&1`;
 | |
|     my $rst       = 0;
 | |
|     my %nodecheckrst;
 | |
| 
 | |
|     foreach (@cmdoutput) {
 | |
|         chomp($_);
 | |
|         $_ =~ s/^\s+|\s+$//g;
 | |
|         if ($_ =~ /^Error: Could not find an object named '(.+)' of type .+/i) {
 | |
|             $rst = 1;
 | |
|             push @$error_ref, "Node $1 without node definition";
 | |
|         } elsif ($_ =~ /(\S+):\s+mac=(.*)/i) {
 | |
|             my $node = $1;
 | |
|             my $mac  = $2;
 | |
|             if ($mac) {
 | |
|                 $nodecheckrst{$node}{"mac"} = $mac;
 | |
|             } else {
 | |
|                 $rst = 1;
 | |
|                 push @$error_ref, "Node $node without 'mac' attribute definition";
 | |
|             }
 | |
|         } elsif ($_ =~ /(\S+):\s+ip=(.*)/i) {
 | |
|             my $node = $1;
 | |
|             my $ip   = $2;
 | |
|             if ($ip) {
 | |
|                 $nodecheckrst{$node}{"ip"} = $ip;
 | |
|             } else {
 | |
|                 my $nodeip = xCAT::NetworkUtils->getipaddr($node);
 | |
|                 if ($nodeip) {
 | |
|                     $nodecheckrst{$node}{"ip"} = $nodeip;
 | |
|                 } else {
 | |
|                     $rst = 1;
 | |
|                     push @$error_ref, "Node $node without ip definition";
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (!$rst) {
 | |
|         foreach my $node (keys %nodecheckrst) {
 | |
|             my @macs = split(/\|/, $nodecheckrst{$node}{"mac"});
 | |
|             foreach my $mac (@macs) {
 | |
| 
 | |
|                 #[NOTE] don't support 2 adapters in the same network now. TODO
 | |
| 
 | |
|                 if ($mac =~ /\!\*NOIP\*/) {
 | |
|                     $mac =~ s/\!\*NOIP\*//g;
 | |
|                     $macmap{$mac}{"ip"}   = "NOIP";
 | |
|                     $macmap{$mac}{"node"} = $node;
 | |
|                 } elsif ($mac =~ /(\w{2}:\w{2}:\w{2}:\w{2}:\w{2}:\w{2})\!(.+)/) {
 | |
|                     $mac = $1;
 | |
|                     my $tmp_node = $2;
 | |
|                     $macmap{$mac}{"node"} = $node;
 | |
|                     my $tmp_ip = xCAT::NetworkUtils->getipaddr($tmp_node);
 | |
|                     if ($tmp_ip) {
 | |
|                         $macmap{$mac}{"ip"}  = $tmp_ip;
 | |
|                         $ipnodemap{ $nodecheckrst{$node}{"ip"} } = $node;
 | |
|                     } else {
 | |
|                         $macmap{$mac}{"ip"} = "NOIP";
 | |
|                     }
 | |
|                 } else {
 | |
|                     $macmap{$mac}{"node"} = $node;
 | |
|                     $macmap{$mac}{"ip"}   = $nodecheckrst{$node}{"ip"};
 | |
|                     $ipnodemap{ $nodecheckrst{$node}{"ip"} } = $node;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return $rst;
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Initailize a very important hash "%node_state" which will save the state information of every node 
 | |
|     Arguments:
 | |
|         noderange: (input attribute) The range of node
 | |
|         node_state_ref: (output attribute) the reference of hash "%node_state" 
 | |
|             The strucuture of  hash "%node_state" are :
 | |
|                 $node_state{<node>}{statehistory}  Array.  save the latest loop provision states 
 | |
|                 $node_state{<node>}{done}          Scalar.  the flag of if the node have finished the provision                
 | |
|                 $node_state{<node>}{allstatehistory}   Array. save the history states before the latest loop provision. Used in debug mode.  
 | |
|                 $node_state{<node>}{log}           Array. save all related logs of node. Used in debug mode. 
 | |
|     Returns:
 | |
|         NULL
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub init_node_state {
 | |
|     my $noderange      = shift;
 | |
|     my $node_state_ref = shift;
 | |
| 
 | |
|     my @nodes = probe_utils->parse_node_range($noderange);
 | |
|     foreach my $node (@nodes) {
 | |
|         @{ $node_state_ref->{$node}{statehistory} } = ();
 | |
|         %{ $node_state_ref->{$node}{errors} } = ();
 | |
|         $node_state_ref->{$node}{done} = 0;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Implement the replay feature.
 | |
|     Arguments:
 | |
|         noderange : the range of node
 | |
|         start_time_of_replay: the start time point of scaning log
 | |
|         end_time_of_replay: the end time point of scaning log
 | |
|    
 | |
|     Returns:
 | |
|         0: success 
 | |
|         1: failed
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub do_replay {
 | |
|     my $noderange            = shift;
 | |
|     my $start_time_of_replay = shift;
 | |
|     my $end_time_of_replay   = shift;
 | |
| 
 | |
|     my $rc = 0;
 | |
| 
 | |
|     #handle INT/TERM  signal
 | |
|     my $terminal = 0;
 | |
|     $SIG{TERM} = $SIG{INT} = sub {
 | |
|         $terminal = 1;
 | |
|     };
 | |
| 
 | |
|     my $timestr = scalar(localtime($start_time_of_replay));
 | |
|     probe_utils->send_msg("stdout", "d", "Starting to scan logs which are later than '$timestr', please waiting for a while.............");
 | |
| 
 | |
|     my %node_state;
 | |
|     init_node_state($noderange, \%node_state);
 | |
|     if ($debug) {
 | |
|         print "Dumper node_state-------\n";
 | |
|         print Dumper \%node_state;
 | |
|     }
 | |
| 
 | |
|     my $log_parse = LogParse->new($verbose, $::REPLAY);
 | |
|     my @candidate_mn_hostname_in_log = $log_parse->obtain_candidate_mn_hostname_in_log();
 | |
| 
 | |
|     while ($start_time_of_replay < $end_time_of_replay) {
 | |
|         my @valid_one_second_log_set;
 | |
|         my $rst = $log_parse->obtain_one_second_logs($start_time_of_replay, \@valid_one_second_log_set);
 | |
|         if ($rst) {
 | |
|             probe_utils->send_msg("stdout", "d", "Failed to obtain logs from log files");
 | |
|             $rc = 1; 
 | |
|             last;
 | |
|         }
 | |
| 
 | |
|         foreach my $log_ref (@valid_one_second_log_set) {
 | |
|             dispatch_log_to_handler($log_ref, \@candidate_mn_hostname_in_log, \%node_state);
 | |
|         }
 | |
| 
 | |
|         $start_time_of_replay = $log_parse->obtain_next_second();
 | |
|     
 | |
|         # receive terminal signal from customer
 | |
|         if ($terminal) {
 | |
|             probe_utils->send_msg("stdout", "d", "Get INT or TERM signal!!!");
 | |
|             probe_utils->send_msg("stdout", "w", "Haven't scaned all valid logs, report based on the logs have been scaned");
 | |
|             last;
 | |
|         }
 | |
|     }
 | |
|     $log_parse->destory();
 | |
| 
 | |
|     conclusion_report(\%node_state);
 | |
|     return $rc;
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Calculate the provision of every node. offer a report to customer
 | |
|     Arguments:
 | |
|         node_state_ref: The reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state"
 | |
|     Returns:
 | |
|         0: success
 | |
|         1: failed
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub conclusion_report {
 | |
|     my $node_state_ref = shift;
 | |
| 
 | |
|     probe_utils->send_msg("stdout", "", "======================  Summary  =====================");
 | |
| 
 | |
|     if ($debug) {
 | |
|         print "---->the result of %node_state<------\n";
 | |
|         print Dumper $node_state_ref;
 | |
|     }
 | |
| 
 | |
|     if ($verbose) {
 | |
|         probe_utils->send_msg("stdout", "d", "----------node state history----------");
 | |
|         foreach my $node (keys %$node_state_ref) {
 | |
|             my $allhistorystate;
 | |
|             my $historystate;
 | |
|             probe_utils->send_msg("stdout", "d", "[$node]:");
 | |
|             if (@{ $node_state_ref->{$node}{allstatehistory} }) {
 | |
|                 $allhistorystate .= "$::STATE_DESC{$_}=>" foreach (@{ $node_state_ref->{$node}{allstatehistory} });
 | |
|                 $allhistorystate =~ s/=>$//g;
 | |
|                 probe_utils->send_msg("stdout", "d", "Setps executed prior to last provisioning attempt:");
 | |
|                 probe_utils->send_msg("stdout", "d", "$allhistorystate");
 | |
|             }
 | |
| 
 | |
|             $historystate .= "$::STATE_DESC{$_}=>" foreach (@{ $node_state_ref->{$node}{statehistory} });
 | |
|             $historystate =~ s/=>$//g;
 | |
|             probe_utils->send_msg("stdout", "d", "Steps executed for last provisioning attempt:");
 | |
|             probe_utils->send_msg("stdout", "d", "$historystate");
 | |
|         }
 | |
|         probe_utils->send_msg("stdout", "d", "--------------------------------------");
 | |
|     }
 | |
| 
 | |
|     my %failed_node;
 | |
|     foreach my $node (keys %$node_state_ref) {
 | |
|         my $stop_stage = 0;
 | |
| 
 | |
|         my $start_rpower   = 0;
 | |
|         my $isntalling     = 0;
 | |
|         my $postbootscript = 0;
 | |
|         my $completed      = 0;
 | |
| 
 | |
|         #calculate node provision result
 | |
|         #the max value of all state is the final stop stage
 | |
|         foreach (@{ $node_state_ref->{$node}{statehistory} }) {
 | |
|             $stop_stage     = $_ if ($stop_stage < $_);
 | |
|             $start_rpower   = 1  if ($_ == $::STATE_POWER_ON);
 | |
|             $isntalling     = 1  if ($_ == $::STATE_INSTALLING);
 | |
|             $postbootscript = 1  if ($_ == $::STATE_POSTBOOTSCRIPT);
 | |
|             $completed      = 1  if ($_ == $::STATE_COMPLETED)
 | |
|         }
 | |
| 
 | |
|         # Cover limited non-privision error when replay
 | |
|         # 1 if power on target node successfully and there is 'running postbootscript' in node state history, but without "installing" state,
 | |
|         #   It is very possible to just do reboot process
 | |
|         # 2 if there isn't reboot operation for target node during the rollback time window 
 | |
|         #   That means there isn't provision process happened 
 | |
| 
 | |
|         unless ($monitor) {
 | |
|             if (! $start_rpower) {
 | |
|                 $failed_node{$node}{non_provision_prediction} = "Without provision process during rollback time window";
 | |
|                 next;
 | |
|             } elsif (!$isntalling && $postbootscript && !$completed) {
 | |
|                 $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk";
 | |
|                 next;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         # if not completed or completed but with postscript error, add node to failed_node hash
 | |
|         if ($stop_stage != $::STATE_COMPLETED or @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }) {
 | |
|             $failed_node{$node}{provision_stop_point} = $stop_stage;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my $is_success = 1;
 | |
|     if (%failed_node) {
 | |
|         $is_success = 0;
 | |
|         my $failed_node_num = keys %failed_node;
 | |
|         if ($failed_node_num > 1) {
 | |
|             probe_utils->send_msg("stdout", "d", "There are $failed_node_num node provision failures");
 | |
|         } elsif ($failed_node_num == 1) {
 | |
|             probe_utils->send_msg("stdout", "d", "There is $failed_node_num node provision failures");
 | |
|         }
 | |
| 
 | |
|         foreach my $node (keys %failed_node) {
 | |
|             if ($failed_node{$node}{non_provision_prediction}) {
 | |
|                 probe_utils->send_msg("stdout", "f", "$node : $failed_node{$node}{non_provision_prediction}");
 | |
|             } else {
 | |
|                 my $node_length = length($node);
 | |
|                 my $space_str = " " x ($length_node + 2);
 | |
|                 # if stop at stage before postscript, means there is error at current stage, print error message
 | |
|                 probe_utils->send_msg("stdout", "f", "$node : stop at stage '$::STATE_DESC{$failed_node{$node}{provision_stop_point}}'");
 | |
|                 if ($failed_node{$node}{provision_stop_point} < $::STATE_POSTSCRIPT) {
 | |
|                     foreach my $node_error (@{ $node_state_ref->{$node}{errors}{$failed_node{$node}{provision_stop_point}} }) {
 | |
|                         probe_utils->send_msg("stdout", "d", "$space_str $node_error");
 | |
|                     }
 | |
|                 } else {
 | |
|                     # if stop at postscript or after postscript, check whether has error from postscript, if has print
 | |
|                     for (my $stage = $::STATE_POSTSCRIPT; $stage <= $::STATE_COMPLETED; $stage++) {
 | |
|                         foreach my $node_error (@{ $node_state_ref->{$node}{errors}{$stage} }) {
 | |
|                              probe_utils->send_msg("stdout", "d", "$space_str $node_error");
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     } else {
 | |
|         probe_utils->send_msg("stdout", "o", "All nodes provisioned successfully");
 | |
|     }
 | |
| 
 | |
|     performance_calculation($is_success) if ($performance);
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Calculate the performance of provision (for each node)
 | |
|     Arguments:
 | |
|         performance: compact: calculate how much time spent for provision
 | |
|                      phase: calculate how much time spent for each status (DHCP, INSTALL, POSTSCRIPTS, POSTBOOTSCRIPTS)
 | |
|                      origin : show time point for each status
 | |
|                      reserve: reserve for other type
 | |
|     Returns:
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub performance_calculation {
 | |
|     my $is_success = shift;
 | |
|     #print Dumper(%node_status_time);
 | |
| 
 | |
|     return if (!%node_status_time);
 | |
| 
 | |
|     # Currently, only diskful is supported
 | |
|     my $provision_type = $::DISKFUL;
 | |
| 
 | |
|     my @status_for_time = ();
 | |
|     my $isnull = 0;
 | |
|     my @time_content;
 | |
|     # print table's first line
 | |
|     # @status_for_time: the status that needed to calculate time
 | |
|    
 | |
|     my @title_lines = (
 | |
|         [qw/NODE ELAPSED/],
 | |
|         [qw/NODE SVRBOOT INSTALL POST POSTBOOT ELAPSED/],
 | |
|         [qw/NODE RPOWER DHCP BOOTLOADER KERNEL INITRD INSTALL POST POSTBOOT COMPLETED/],
 | |
|         [qw/NODE RPOWER DHCP BOOTLOADER KERNEL INITRD NETBOOTING ROOTIMG POSTBOOT COMPLETED/],
 | |
|         [qw/NODE DHCP BOOTLOADER KERNEL INITRD NETBOOTING ROOTIMG POSTBOOT ELAPSED/]);
 | |
|  
 | |
|     if ($performance eq "compact") {
 | |
|         push @time_content, "Provision Time";
 | |
|         push @time_content, $title_lines[0];
 | |
|         @status_for_time = ($::STATE_COMPLETED);
 | |
|     }
 | |
| 
 | |
|     if ($performance eq "phase") {
 | |
|         push @time_content, "Time for Phases";
 | |
|         push @time_content, $title_lines[1];
 | |
|         @status_for_time = ($::STATE_DHCP, $::STATE_INSTALLRPM, $::STATE_POSTSCRIPT, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED);
 | |
|     }
 | |
| 
 | |
|     if ($performance eq "origin") {
 | |
|         push @time_content, "Start Time for Stage";
 | |
|         if ($provision_type == $::DISKFUL){
 | |
|             push @time_content, $title_lines[2];
 | |
|             @status_for_time = ($::STATE_POWER_ON, $::STATE_DHCP, $::STATE_BOOTLODER, $::STATE_KERNEL, $::STATE_INITRD, $::STATE_INSTALLRPM, $::STATE_POSTSCRIPT, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED);
 | |
|         } else { # reserved for diskless
 | |
|             push @time_content, $title_lines[3];
 | |
|             @status_for_time = ($::STATE_POWER_ON, $::STATE_DHCP, $::STATE_BOOTLODER, $::STATE_KERNEL, $::STATE_INITRD, $::STATE_NETBOOTING, $::STATE_ROOTIMG, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if ($performance eq "reserve") {
 | |
|         push @time_content, $title_lines[4];
 | |
|         @status_for_time = ($::STATE_DHCP, $::STATE_BOOTLODER, $::STATE_KERNEL, $::STATE_INITRD, $::STATE_NETBOOTING, $::STATE_ROOTIMG, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED);
 | |
|     }
 | |
| 
 | |
|     # calculate time for each node and status
 | |
|     foreach my $node (keys %node_status_time) {
 | |
| 
 | |
|         my @timeinfo = ();
 | |
|         push @timeinfo, $node;
 | |
| 
 | |
|         if ($performance eq "origin") {
 | |
|             foreach my $status (@status_for_time) {
 | |
|                 if ($node_status_time{$node}{$status}{time_point}) {
 | |
|                     if ($status == $::STATE_DHCP or $status == $::STATE_BOOTLODER) {
 | |
|                         # If there is no start time of kernel, means osdeploy start after kernel download
 | |
|                         if (!$node_status_time{$node}{$::STATE_KERNEL}{start_time}) {
 | |
|                             push @timeinfo, "NULL";
 | |
|                             $isnull = 1;
 | |
|                         # If there is start time of kernel, but dhcp or bootloader's time value larger than kernel. It means dhcp and bootloader is after kernel, not the value we wanted.
 | |
|                         } elsif ($node_status_time{$node}{$status}{start_time} > $node_status_time{$node}{$::STATE_KERNEL}{start_time}) {
 | |
|                             push @timeinfo, "NULL";
 | |
|                             $isnull = 1;      
 | |
|                         } else {
 | |
|                             push @timeinfo, $node_status_time{$node}{$status}{time_point}; 
 | |
|                         }
 | |
|                     } elsif ($status >= $::STATE_INSTALLRPM and (!$node_status_time{$node}{$status-1}{start_time} and !$node_status_time{$node}{$status-1}{end_time})) {
 | |
|                         push @timeinfo, "NULL";
 | |
|                         $isnull = 1;
 | |
|                     } else {
 | |
|                         push @timeinfo, $node_status_time{$node}{$status}{time_point};
 | |
|                     }
 | |
|                 } else {
 | |
|                     push @timeinfo, "NULL";
 | |
|                     $isnull = 1;
 | |
|                 }
 | |
|             }
 | |
|             push @time_content, [ @timeinfo ];
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         # get the start time and end time for each step
 | |
|         foreach my $status (@status_for_time) {
 | |
|             my $tmp_status;
 | |
|             my $tmp_detail_status;
 | |
|             if ($performance eq "phase") {
 | |
|                 # when phase, if the status is DHCP, use power on time as it's start time
 | |
|                 $tmp_detail_status = $::STATE_DHCP;
 | |
|             } else {
 | |
|                 # if not phase, power on time is the start time for each steps
 | |
|                 $tmp_detail_status = $::STATE_COMPLETED;
 | |
|             }
 | |
| 
 | |
|             if ($status <= $tmp_detail_status or $status == $::STATE_COMPLETED) {
 | |
|                 $tmp_status = $::STATE_POWER_ON;
 | |
|             } else {
 | |
|                 $tmp_status = $status;
 | |
|             }
 | |
| 
 | |
|             my $tmp_start_time = $node_status_time{$node}{$tmp_status}{start_time};
 | |
|             my $tmp_end_time = $node_status_time{$node}{$status}{end_time};
 | |
|             $tmp_end_time = $node_status_time{$node}{$status}{start_time} if ($status != $::STATE_DHCP and $status != $::STATE_INSTALLRPM and $status != $::STATE_POSTSCRIPT and $status != $::STATE_POSTBOOTSCRIPT and $node_status_time{$node}{$status}{start_time});
 | |
| 
 | |
|             if ($tmp_start_time && $tmp_end_time) {
 | |
|                 if ($status >= $::STATE_INSTALLRPM and (!$node_status_time{$node}{$status-1}{start_time} and !$node_status_time{$node}{$status-1}{end_time})) {
 | |
|                     push @timeinfo, "NULL";
 | |
|                     $isnull = 1;
 | |
|                 } else {
 | |
|                     push @timeinfo, probe_utils->convert_second_to_time($tmp_end_time - $tmp_start_time);
 | |
|                 } 
 | |
|             } else {
 | |
|                 push @timeinfo, "NULL";
 | |
|                 $isnull = 1;
 | |
|             }
 | |
|         }
 | |
|         push @time_content, [ @timeinfo ];
 | |
|     }
 | |
| 
 | |
|     probe_utils->print_table(\@time_content, 1);
 | |
| 
 | |
|     if ($performance eq "phase") {
 | |
|         print "SVRBOOT  : Time from rpower to acquire ip\n";
 | |
|         print "INSTALL  : Time to download all rpm/deb packages\n";
 | |
|         print "POST     : Postscript execution time\n";
 | |
|         print "POSTBOOT : Postbootscript execution time\n";
 | |
|         print "ELAPSED  : Total provision time\n";
 | |
|     }
 | |
| 
 | |
|     if ($isnull and $is_success) {
 | |
|         my $command_input = "xcatprobe -w $program_name";
 | |
|         $command_input .= " -n $noderange" if ($noderange);
 | |
|         $command_input .= " -p $performance";
 | |
|         $command_input .= " -V" if ($verbose);
 | |
|         my $osdeploy_end_time = time();
 | |
|         my $time_for_replay = $osdeploy_end_time - $osdeploy_start_time;
 | |
|         my $time_hour = ($time_for_replay - $time_for_replay%3600) / 3600 + 1;
 | |
|         $command_input .= " -r $time_hour" . "h";
 | |
|         probe_utils->send_msg("stdout", "", "Did not get correct time, please run '$command_input' to get correct time");
 | |
|     }
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Implement the monitor feature
 | |
|     Arguments:
 | |
|         noderange : the range of node
 | |
|         maxwaittime:  the max waiting time for monitor. 
 | |
|     Returns:
 | |
|         0: success
 | |
|         1: failed
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub do_monitor {
 | |
|     my $noderange   = shift;
 | |
|     my $maxwaittime = shift;
 | |
| 
 | |
|     my $rst      = 0;
 | |
|     my $terminal = 0;
 | |
| 
 | |
|     $osdeploy_start_time = time() if ($performance);
 | |
| 
 | |
|     $SIG{TERM} = $SIG{INT} = sub {
 | |
|         $terminal = 1;
 | |
|     };
 | |
| 
 | |
|     my $startline =
 | |
|       "-------------------------------------------------------------
 | |
| Start capturing every message during OS provision process....
 | |
| -------------------------------------------------------------
 | |
| ";
 | |
|     probe_utils->send_msg("stdout", "", "$startline");
 | |
| 
 | |
|     my @openfilepids;
 | |
|     my @openfilefds;
 | |
|     my %fd_filetype_map;
 | |
| 
 | |
|     {    #a very important brace to hold a code block
 | |
|         my $log_parse         = LogParse->new($verbose, $::MONITOR);
 | |
|         my $candidate_log_ref = $log_parse->obtain_log_file_list();
 | |
| 
 | |
|         #open candidate log file to obtain realtime log
 | |
|         if (%$candidate_log_ref) {
 | |
|             foreach my $logfile (keys %$candidate_log_ref) {
 | |
|                 my $pid;
 | |
|                 my $fd;
 | |
|                 if (!($pid = open($fd, "tail -f -n 0 $candidate_log_ref->{$logfile}{file} 2>&1 |"))) {
 | |
|                     probe_utils->send_msg("stdout", "f", "Can't open $candidate_log_ref->{$logfile}{file} to get logs");
 | |
|                     $rst = 1;
 | |
|                     last;
 | |
|                 } else {
 | |
|                     push @openfilepids, $pid;
 | |
|                     push @openfilefds,  $fd;
 | |
|                     $fd_filetype_map{$fd} = $candidate_log_ref->{$logfile}{type};
 | |
|                 }
 | |
|             }
 | |
|         } else {
 | |
|             probe_utils->send_msg("stdout", "f", "There are no valid log files to be scanned");
 | |
|             $rst = 1;
 | |
|         }
 | |
| 
 | |
|         last if ($rst);
 | |
| 
 | |
|         my %node_state;
 | |
|         init_node_state($noderange, \%node_state);
 | |
| 
 | |
|         my $select = new IO::Select;
 | |
|         $select->add(\*$_) foreach (@openfilefds);
 | |
|         $| = 1;
 | |
| 
 | |
|         my @hdls;
 | |
|         my $starttime = time();
 | |
|         my @candidate_mn_hostname_in_log = $log_parse->obtain_candidate_mn_hostname_in_log();
 | |
| 
 | |
|         #read log realtimely, then handle each log
 | |
|         for (; ;) {
 | |
|             if (@hdls = $select->can_read(0)) {
 | |
|                 foreach my $hdl (@hdls) {
 | |
|                     my $line = "";
 | |
|                     chomp($line = <$hdl>);
 | |
|                     my $log_content_ref = $log_parse->obtain_log_content($fd_filetype_map{$hdl}, $line);
 | |
|                     dispatch_log_to_handler($log_content_ref, \@candidate_mn_hostname_in_log, \%node_state);
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             # stop reading log at below 3 scenarios
 | |
|             # 1 receive terminal signal from customer
 | |
|             if ($terminal) {
 | |
|                 probe_utils->send_msg("stdout", "d", "Get INT or TERM signal from STDIN");
 | |
|                 last;
 | |
| 
 | |
|                 # 2 all node have finished the provision
 | |
|             } elsif (all_monitor_node_done(\%node_state)) {
 | |
|                 probe_utils->send_msg("stdout", "o", "All nodes specified to monitor, have finished OS provision process");
 | |
|                 last;
 | |
| 
 | |
|                 # 3 exceed the max waiting time
 | |
|             } elsif (time() - $starttime > ($maxwaittime * 60)) {
 | |
|                 probe_utils->send_msg("stdout", "i", "$maxwaittime minutes have expired, stop monitoring");
 | |
|                 last;
 | |
|             } else {
 | |
|                 sleep 0.01;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         conclusion_report(\%node_state);
 | |
|         $log_parse->destory();
 | |
|     }
 | |
| 
 | |
|     # close all running sub process
 | |
|     my $existrunningpid = 0;
 | |
|     $existrunningpid = 1 if (@openfilepids);
 | |
|     my $trytime = 0;
 | |
|     while ($existrunningpid) {
 | |
| 
 | |
|         #send terminal signal to all running process at same time
 | |
|         if ($try < 5) {    #try INT 5 up to 5 times
 | |
|             foreach my $pid (@openfilepids) {
 | |
|                 kill 'INT', $pid if ($pid);
 | |
|             }
 | |
|         } elsif ($try < 10) {    #try TERM 5 up to 5 times
 | |
|             foreach my $pid (@openfilepids) {
 | |
|                 kill 'TERM', $pid if ($pid);
 | |
|             }
 | |
|         } else {                 #try KILL 1 time
 | |
|             foreach my $pid (@openfilepids) {
 | |
|                 kill 'KILL', $pid if ($pid);
 | |
|             }
 | |
|         }
 | |
|         ++$try;
 | |
|         sleep 1;
 | |
| 
 | |
|         #To check how many process exit, set the flag of exited process to 0
 | |
|         for (my $i = 0 ; $i <= $#openfilepids ; $i++) {
 | |
|             $openfilepids[$i] = 0 if (waitpid($openfilepids[$i], WNOHANG));
 | |
|         }
 | |
| 
 | |
|         #To check if there are processes still running, if there are, try kill again in next loop
 | |
|         $existrunningpid = 0;
 | |
|         $existrunningpid |= $_ foreach (@openfilepids);
 | |
| 
 | |
|         #just try 10 times, if still can't kill some process, give up
 | |
|         if ($try > 10) {
 | |
|             my $leftpid;
 | |
|             foreach my $pid (@openfilepids) {
 | |
|                 $leftpid .= "$pid " if ($pid);
 | |
|             }
 | |
|             probe_utils->send_msg("stdout", "d", "Can't stop process $leftpid, please handle manually.");
 | |
|             last;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # close all openning file descriptors
 | |
|     close($_) foreach (@openfilefds);
 | |
| 
 | |
|     return $rst;
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Check if all node have been finished the provision process
 | |
|     Arguments:
 | |
|         node_state_ref: The reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state"
 | |
|     Returns:
 | |
|         0: success
 | |
|         1: failed
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub all_monitor_node_done {
 | |
|     my $node_state_ref = shift;
 | |
|     my $done           = 1;
 | |
| 
 | |
|     foreach my $node (keys %$node_state_ref) {
 | |
|         if ($node_state_ref->{$node}{done} == 0) {
 | |
|             $done = 0;
 | |
|             last;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return $done;
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Handle one line DHCP log
 | |
|     Arguments:
 | |
|         log_ref: (input attribute) the reference of hash which save one line dhcp log. 
 | |
|         node_state_ref: (output attribute), the reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state" 
 | |
|     Returns:
 | |
|         NULL
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub handle_dhcp_msg {
 | |
|     my $log_ref        = shift;
 | |
|     my $node_state_ref = shift;
 | |
|     if ($log_ref->{msg} =~ /DHCPDISCOVER\s+from\s+(.+)\s+via\s+([^:]+)(.*)/i) {
 | |
|         my $mac = $1;
 | |
|         my $nic = $2;
 | |
| 
 | |
|         if (exists $macmap{$mac}) {
 | |
|             my $node   = $macmap{$mac}{"node"};
 | |
|             my $record = "Receive DHCPDISCOVER via $nic";
 | |
|             probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
 | |
|             push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
|         }
 | |
|     } elsif ($log_ref->{msg} =~ /DHCPOFFER\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/i) {
 | |
|         my $ip  = $1;
 | |
|         my $mac = $2;
 | |
|         my $nic = $3;
 | |
| 
 | |
|         if (exists $macmap{$mac}) {
 | |
|             my $node   = $macmap{$mac}{"node"};
 | |
|             my $record = "Send DHCPOFFER on $ip back to $mac via $nic";
 | |
|             probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
 | |
|             push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
|         }
 | |
|     } elsif ($log_ref->{msg} =~ /DHCPREQUEST\s+for\s+(.+)\s+[\(\)0-9\.]*\s*from\s+(.+)\s+via\s+(.+)/) {
 | |
|         my $ip  = $1;
 | |
|         my $mac = $2;
 | |
|         my $nic = $3;
 | |
|  
 | |
|         if (exists $macmap{$mac}) {
 | |
|             my $node   = $macmap{$mac}{"node"};
 | |
|             my $record = $log_ref->{msg};
 | |
|             probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
 | |
|             push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
|         }
 | |
|     } elsif ($log_ref->{msg} =~ /DHCPACK\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/) {
 | |
|         my $ip  = $1;
 | |
|         my $mac = $2;
 | |
|         my $nic = $3;
 | |
| 
 | |
|         if (exists $macmap{$mac}) {
 | |
|             my $node   = $macmap{$mac}{"node"};
 | |
|             my $record = "Send DHCPACK on $ip back to $mac via $nic";
 | |
|             probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
 | |
|             push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
| 
 | |
|             if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
 | |
|                 my $warn_msg = "The ip($ip) assigned to $mac via DHCP is different from the ip($macmap{$mac}{'ip'}) in node definition.";
 | |
| 
 | |
|                 #probe_utils->send_msg("stdout", "w", "$warn_msg") if ($monitor);
 | |
|                 probe_utils->send_msg("stdout", "w", "$warn_msg");
 | |
|             }
 | |
| 
 | |
|             $ipnodemap{$ip} = $node;
 | |
|             set_node_state($node_state_ref, $node, $::STATE_DHCP);
 | |
|             if ($performance and !$node_status_time{$node}{$::STATE_DHCP}{end_time}) {
 | |
|                 $node_status_time{$node}{$::STATE_DHCP}{time_point} = $log_ref->{time_record};
 | |
|                 $node_status_time{$node}{$::STATE_DHCP}{end_time} = $log_ref->{time};
 | |
|             }
 | |
|         }
 | |
|     } elsif ($log_ref->{msg} =~ /BOOTREQUEST\s+from\s+(.+)\s+via\s+([^:]+)(.*)/) {
 | |
|         my $mac = $1;
 | |
|         my $nic = $2;
 | |
|         if (exists $macmap{$mac}) {
 | |
|             my $node   = $macmap{$mac}{"node"};
 | |
|             my $record = "Receive BOOTREQUEST from $mac via $nic";
 | |
|             probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
 | |
|             push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
|         }
 | |
|     } elsif ($log_ref->{msg} =~ /BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) {
 | |
|         my $ip  = $1;
 | |
|         my $mac = $2;
 | |
|         my $nic = $3;
 | |
| 
 | |
|         if (exists $macmap{$mac}) {
 | |
|             my $node   = $macmap{$mac}{"node"};
 | |
|             my $record = "Send BOOTREPLY on $ip back to $mac via $nic";
 | |
|             probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
 | |
|             push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
| 
 | |
|             if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
 | |
|                 my $warn_msg = "The ip($ip) assigned to $mac via DHCP is different from the ip($macmap{$mac}{'ip'}) in node definition.";
 | |
|                 probe_utils->send_msg("stdout", "w", "$warn_msg");
 | |
|             }
 | |
| 
 | |
|             $ipnodemap{$ip} = $node;
 | |
|             set_node_state($node_state_ref, $node, $::STATE_DHCP);
 | |
|             if ($performance and !$node_status_time{$node}{$::STATE_DHCP}{end_time}) {
 | |
|                 $node_status_time{$node}{$::STATE_DHCP}{end_time} = $log_ref->{time};
 | |
|                 $node_status_time{$node}{$::STATE_DHCP}{time_point} = $log_ref->{time_record};
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Handle one line TFTP log
 | |
|     Arguments:
 | |
|         log_ref: (input attribute) the reference of hash which save one line TFTP log.
 | |
|         node_state_ref: (output attribute), the reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state"
 | |
|     Returns:
 | |
|         NULL
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub handle_tftp_msg {
 | |
|     my $log_ref        = shift;
 | |
|     my $node_state_ref = shift;
 | |
| 
 | |
|     if ($log_ref->{msg} =~ /RRQ\s+from\s+(.+)\s+filename\s+(.+)/i) {
 | |
|         my $ip     = $1;
 | |
|         my $file   = $2;
 | |
|         my $record = "Via TFTP download $file";
 | |
|         if (exists($node_state_ref->{ $ipnodemap{$ip} })) {
 | |
|             probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $log_ref->{time_record} $record") if ($monitor);
 | |
|             push(@{ $node_state_ref->{ $ipnodemap{$ip} }{log} }, $log_ref->{msg}) if ($debug);
 | |
| 
 | |
|             if ($file =~ /xcat\/xnba.*/i or $file =~ /\/boot\/grub2\/powerpc-ieee1275\//i or $file =~ /\/yb\/node\/yaboot\-/i) {
 | |
|                 set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_BOOTLODER);
 | |
|                 if ($performance) {
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_BOOTLODER}{start_time} = $log_ref->{time} if (!$node_status_time{$ipnodemap{$ip}}{$::STATE_BOOTLODER}{start_time});
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_BOOTLODER}{time_point} = $log_ref->{time_record} if (!$node_status_time{$ipnodemap{$ip}}{$::STATE_BOOTLODER}{time_point});
 | |
|                 }
 | |
|             } elsif ($file =~ /vmlinuz|inst64|linux/) {
 | |
|                 set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KERNEL);
 | |
|                 if ($performance) {
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_KERNEL}{start_time} = $log_ref->{time};
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_KERNEL}{time_point} = $log_ref->{time_record};
 | |
|                 }
 | |
|             } elsif ($file =~ /initrd/i) {
 | |
|                 set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INITRD);
 | |
|                 if ($performance) {
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_INITRD}{start_time} = $log_ref->{time};
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_INITRD}{time_point} = $log_ref->{time_record};
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Handle one line HTTP log
 | |
|     Arguments:
 | |
|         log_ref: (input attribute) the reference of hash which save one line HTTP log.
 | |
|         node_state_ref: (output attribute), the reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state"
 | |
|     Returns:
 | |
|         NULL
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub handle_http_msg {
 | |
|     my $log_ref        = shift;
 | |
|     my $node_state_ref = shift;
 | |
|     my $ip             = $log_ref->{sender};
 | |
| 
 | |
|     if (exists($node_state_ref->{ $ipnodemap{$ip} })) {
 | |
| 
 | |
|         if ($log_ref->{msg} =~ /"GET\s+(.+)\s+HTTP.+" (\d+)/ or $log_ref->{msg} =~ /"HEAD\s+(.+)\s+HTTP.+" (\d+)/) {
 | |
|             my $file   = $1;
 | |
|             my $http_code = $2;
 | |
|             my $record = "Via HTTP get $file";
 | |
| 
 | |
|             probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $log_ref->{time_record} $record") if ($monitor);
 | |
|             push(@{ $node_state_ref->{ $ipnodemap{$ip} }{log} }, $log_ref->{msg}) if ($debug);
 | |
| 
 | |
|             if ($file =~ /vmlinuz|inst64/i or ($file =~ /linux/i and $file =~ /osimage/i)) {
 | |
|                 set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KERNEL);
 | |
|                 if ($performance) {
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_KERNEL}{start_time} = $log_ref->{time};
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_KERNEL}{time_point} = $log_ref->{time_record};
 | |
|                 }
 | |
|                 push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KERNEL} }, "$record failed with $http_code") if ($http_code >= 400);
 | |
|             } elsif ($file =~ /initrd/i and $file =~ /osimage/i) {
 | |
|                 set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INITRD);
 | |
|                 if ($performance) {
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_INITRD}{start_time} = $log_ref->{time};
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_INITRD}{time_point} = $log_ref->{time_record};
 | |
|                 }
 | |
|                 push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INITRD} }, "$record failed with $http_code") if ($http_code >= 400);
 | |
|             } elsif (($file =~ /^\/install\/autoinst\//i) and ($file !~ /getinstdisk$/i) and ($file !~ /\.pre$/i) and ($file !~ /\.post$/i)) {
 | |
|                 set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KICKSTART);
 | |
|                 if ($performance) {
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_KICKSTART}{start_time} = $log_ref->{time};
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_KICKSTART}{time_point} = $log_ref->{time_record};
 | |
|                 }
 | |
|                 push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KICKSTART} }, "$record failed with $http_code") if ($http_code >= 400);
 | |
|             } elsif ($file =~ /\.deb$/i or $file =~ /\/Packages\/.+\.rpm$/ or $file =~ /\/suse\/noarch\/.+\.rpm$/i) {
 | |
|                 set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INSTALLRPM);
 | |
|                 if ($performance) {
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{end_time} = $log_ref->{time};
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{start_time} =  $log_ref->{time}  unless ( $node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{start_time} );
 | |
|                     $node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{time_point} = $log_ref->{time_record} unless ( $node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{time_point} );
 | |
|                 }
 | |
|                 push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INSTALLRPM} }, "$record failed with $http_code") if ($http_code >= 400);
 | |
|             } elsif ($file =~ /rootimg/) {
 | |
|                  set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_ROOTIMG);
 | |
|                  if ($performance) {
 | |
|                      $node_status_time{$ipnodemap{$ip}}{$::STATE_ROOTIMG}{start_time} = $log_ref->{time};
 | |
|                      $node_status_time{$ipnodemap{$ip}}{$::STATE_ROOTIMG}{time_point} = $log_ref->{time_record};
 | |
|                  }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Handle one line log comes from cluster.log
 | |
|     Arguments:
 | |
|         log_ref: (input attribute) the reference of hash which save one line log comes from cluster.log.
 | |
|         node_state_ref: (output attribute), the reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state"
 | |
|     Returns:
 | |
|         NULL
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub handle_cluster_msg {
 | |
|     my $log_ref        = shift;
 | |
|     my $node_state_ref = shift;
 | |
| 
 | |
|     my $log_msg = $log_ref->{msg};
 | |
|     my @split_log = split(/\s+/, $log_msg);
 | |
| 
 | |
|     if ($split_log[1] eq "Allowing") {
 | |
|         my $command     = $split_log[2];
 | |
|         my $nodes_str   = $split_log[4];
 | |
|         my $sub_command = $split_log[5];
 | |
| 
 | |
|         if ($command eq "rinstall" or $command eq "rnetboot" or ($command eq "rpower" and $sub_command =~ /on|boot|reset/) or ($command eq "xdsh" and $log_msg =~ /reboot|shutdown -r/)) {
 | |
|             my @nodes = probe_utils->parse_node_range($nodes_str);
 | |
|             foreach my $node (@nodes) {
 | |
|                 if (exists $node_state_ref->{$node}) {
 | |
|                     probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} Use command $command to reboot node $node") if ($monitor);
 | |
|                     push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
|                     set_node_state($node_state_ref, $node, $::STATE_POWER_ON);
 | |
|                     if ($performance) {
 | |
|                         $node_status_time{$node}{$::STATE_POWER_ON}{start_time} = $log_ref->{time};
 | |
|                         $node_status_time{$node}{$::STATE_POWER_ON}{time_point} = $log_ref->{time_record};
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     } elsif ($log_ref->{msg} =~ /(.+)\s+status:\s+(.+)\s+statustime:/) {
 | |
|         my $nodes  = $1;
 | |
|         my $status = $2;
 | |
|         my $record = "Node status is changed to $status";
 | |
|         my @split_node = split(" ", $nodes);
 | |
| 
 | |
|         foreach my $node (@split_node) {
 | |
|             if (exists $node_state_ref->{$node}) {
 | |
|                 probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
 | |
|                 push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
| 
 | |
|                 if ($status eq "installing") {
 | |
|                     set_node_state($node_state_ref, $node, $::STATE_INSTALLING);
 | |
|                     if ($performance) {
 | |
|                         $node_status_time{$node}{$::STATE_INSTALLING}{start_time} = $log_ref->{time};
 | |
|                         $node_status_time{$node}{$::STATE_INSTALLING}{time_point} = $log_ref->{time_record};
 | |
|                     }
 | |
|                 } elsif ($status eq "powering-on") {
 | |
|                     set_node_state($node_state_ref, $node, $::STATE_POWERINGON);
 | |
|                     if ($performance) {
 | |
|                         $node_status_time{$node}{$::STATE_POWERINGON}{start_time} = $log_ref->{time};
 | |
|                         $node_status_time{$node}{$::STATE_POWERINGON}{time_point} = $log_ref->{time_record};
 | |
|                     }
 | |
|                 } elsif ($status eq "booting") {
 | |
|                     set_node_state($node_state_ref, $node, $::STATE_BOOTING);
 | |
|                     if ($performance) {
 | |
|                         $node_status_time{$node}{$::STATE_BOOTING}{start_time} = $log_ref->{time};
 | |
|                         $node_status_time{$node}{$::STATE_BOOTING}{time_point} = $log_ref->{time_record};
 | |
|                     }
 | |
|                 } elsif ($status eq "netbooting") {
 | |
|                     set_node_state($node_state_ref, $node, $::STATE_NETBOOTING);
 | |
|                     if ($performance) {
 | |
|                         $node_status_time{$node}{$::STATE_NETBOOTING}{start_time} = $log_ref->{time};
 | |
|                         $node_status_time{$node}{$::STATE_NETBOOTING}{time_point} = $log_ref->{time_record};
 | |
|                     }
 | |
|                 } elsif ($status eq "failed") {
 | |
|                     $node_state_ref->{$node}{done} = 1;
 | |
|                 } 
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Handle one line log comes from computes.log
 | |
|     Arguments:
 | |
|         log_ref: (input attribute) the reference of hash which save one line log comes from computes.log.
 | |
|         node_state_ref: (output attribute), the reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state"
 | |
|     Returns:
 | |
|         NULL
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub handle_compute_msg {
 | |
|     my $log_ref        = shift;
 | |
|     my $node_state_ref = shift;
 | |
|     my $node           = $log_ref->{sender};
 | |
| 
 | |
|     if (exists $node_state_ref->{$node}) {
 | |
|         probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $log_ref->{msg}") if ($monitor);
 | |
|         push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
 | |
|         if ($log_ref->{msg} =~ /Running postscript:/i) {
 | |
|             set_node_state($node_state_ref, $node, $::STATE_POSTSCRIPT);
 | |
|             if ($performance) {
 | |
|                 $node_status_time{$node}{$::STATE_POSTSCRIPT}{start_time} = $log_ref->{time} unless ($node_status_time{$node}{$::STATE_POSTSCRIPT}{start_time});
 | |
|                 $node_status_time{$node}{$::STATE_POSTSCRIPT}{time_point} = $log_ref->{time_record} unless ($node_status_time{$node}{$::STATE_POSTSCRIPT}{time_point});
 | |
|             }
 | |
|         } elsif ($log_ref->{msg} =~ /postscript (.+) return with (\d+)/) {
 | |
|             my $script_name = $1;
 | |
|             my $return_code = $2;
 | |
|             if ($return_code != 0) {
 | |
|                 my $error_str = "postscript $script_name return with $return_code";
 | |
|                 # when monitor, will show 2 same messages, so filter to show only one
 | |
|                 unless (grep {$_ eq $error_str} @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }) {
 | |
|                     push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }, $error_str;
 | |
|                 }
 | |
|             }
 | |
|             $node_status_time{$node}{$::STATE_POSTSCRIPT}{end_time} = $log_ref->{time} if ($performance);
 | |
|         } elsif ($log_ref->{msg} =~ /Running postbootscript:/i) {
 | |
|             set_node_state($node_state_ref, $node, $::STATE_POSTBOOTSCRIPT);
 | |
|             if ($performance) {
 | |
|                 $node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{start_time} = $log_ref->{time} unless ($node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{start_time});
 | |
|                 $node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{time_point} = $log_ref->{time_record} unless ($node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{time_point});
 | |
|             }
 | |
|         } elsif ($log_ref->{msg} =~ /postbootscript (.+) return with (\d+)/) {
 | |
|             my $script_name = $1;
 | |
|             my $return_code = $2;
 | |
|             if ($return_code != 0) {
 | |
|                 my $error_str = "postbootscript $script_name return with $return_code";
 | |
|                 # when monitor, will show 2 same messages, so filter to show only one
 | |
|                 unless (grep {$_ eq $error_str} @{ $node_state_ref->{$node}{errors}{$::STATE_POSTBOOTSCRIPT} }) {
 | |
|                     push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTBOOTSCRIPT} }, $error_str;
 | |
|                 }
 | |
|             }
 | |
|             $node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{end_time} = $log_ref->{time} if ($performance);
 | |
|         } elsif ($log_ref->{msg} =~ /provision completed/) {
 | |
|             set_node_state($node_state_ref, $node, $::STATE_COMPLETED);
 | |
|             $node_state_ref->{$node}{done} = 1;
 | |
|             probe_utils->send_msg("stdout", "o", "[$node] $log_ref->{time_record} provision completed") if ($monitor);
 | |
|             if ($performance) {
 | |
|                 $node_status_time{$node}{$::STATE_COMPLETED}{start_time} = $log_ref->{time};
 | |
|                 $node_status_time{$node}{$::STATE_COMPLETED}{time_point} = $log_ref->{time_record};
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Dispatch log to related handler 
 | |
|     Arguments:
 | |
|         log_ref: (input attribute) the reference of hash which save one line log comes from computes.log.
 | |
|         candidate_mn_hostname_in_log_ref: (input attribute) The reference of array which save the candidate host name of MN 
 | |
|         node_state_ref: (output attribute), the reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state"
 | |
|     Returns:
 | |
|         NULL
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub dispatch_log_to_handler {
 | |
|     my $log_ref                          = shift;
 | |
|     my $candidate_mn_hostname_in_log_ref = shift;
 | |
|     my $node_state_ref                   = shift;
 | |
| 
 | |
|     if ($log_ref->{label} == $::LOGLABEL_DHCPD) {
 | |
|         handle_dhcp_msg($log_ref, $node_state_ref);
 | |
|     } elsif ($log_ref->{label} == $::LOGLABEL_TFTP) {
 | |
|         handle_tftp_msg($log_ref, $node_state_ref);
 | |
|     } elsif ($log_ref->{label} == $::LOGLABEL_XCAT) {
 | |
|         if (grep(/$log_ref->{sender}/, @$candidate_mn_hostname_in_log_ref)) {
 | |
|             handle_cluster_msg($log_ref, $node_state_ref);
 | |
|         } else {
 | |
|             handle_compute_msg($log_ref, $node_state_ref);
 | |
|         }
 | |
|     } elsif ($log_ref->{label} == $::LOGLABEL_HTTP) {
 | |
|         handle_http_msg($log_ref, $node_state_ref);
 | |
|     }
 | |
| }
 | |
| 
 | |
| #------------------------------------------
 | |
| 
 | |
| =head3
 | |
|     Description:
 | |
|         Set node state in hash %node_state
 | |
|     Arguments:
 | |
|         node_state_ref: (input/output attribute), the reference of hash "%node_state". refer to function "init_node_state" for the structure of "%node_state"
 | |
|         node : (input attribute) The node name
 | |
|         newstate : (input attribute) The new state of node 
 | |
|     Returns:
 | |
|         NULL
 | |
| =cut
 | |
| 
 | |
| #------------------------------------------
 | |
| sub set_node_state {
 | |
|     my $node_state_ref = shift;
 | |
|     my $node           = shift;
 | |
|     my $newstate       = shift;
 | |
| 
 | |
|     if ($newstate == $::STATE_POWER_ON) {
 | |
|         push @{ $node_state_ref->{$node}{allstatehistory} }, @{ $node_state_ref->{$node}{statehistory} };
 | |
|         @{ $node_state_ref->{$node}{statehistory} } = ();
 | |
|         %{ $node_state_ref->{$node}{errors} } = ();
 | |
|         push @{ $node_state_ref->{$node}{statehistory} }, $newstate;
 | |
|         $node_status_time{$node} = ();
 | |
|     } else {
 | |
|         my $index = @{ $node_state_ref->{$node}{statehistory} } - 1;
 | |
| 
 | |
|         if ($node_state_ref->{$node}{statehistory}->[$index] != $newstate) {
 | |
|             push @{ $node_state_ref->{$node}{statehistory} }, $newstate;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 |