mirror of
https://github.com/xcat2/xcat-core.git
synced 2025-05-25 05:02:05 +00:00
1079 lines
38 KiB
Perl
Executable File
1079 lines
38 KiB
Perl
Executable File
#! /usr/bin/perl
|
|
# IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html
|
|
|
|
BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; }
|
|
|
|
use lib "$::XCATROOT/probe/lib/perl";
|
|
|
|
use probe_utils;
|
|
use xCAT::NetworkUtils;
|
|
use File::Basename;
|
|
use IO::Select;
|
|
use Time::Local;
|
|
use Data::Dumper;
|
|
use Getopt::Long qw(:config no_ignore_case);
|
|
|
|
my $program_name = basename("$0");
|
|
my $help;
|
|
my $test;
|
|
my $maxwaittime = 60; #unit is minute
|
|
my $output = "stdout";
|
|
my $verbose = 0;
|
|
my $rst = 0;
|
|
my $noderange;
|
|
my $terminal = 0;
|
|
my $installnic;
|
|
my $monitor = 0;
|
|
my $replaylog; ##used by feature replay deploymen log
|
|
|
|
my %rawdata;
|
|
|
|
#-%rawdata structure-------
|
|
# $rawdata{nodename}{"history"} #array, the log history of current node
|
|
# $rawdata{nodename}{"state"} #the latest status of current node, used for State Machine
|
|
# $rawdata{nodename}{"statehistory"} #array, the history status of current node, used for State Machine
|
|
#--------------------------
|
|
|
|
my %macmap;
|
|
|
|
#-%macmap structure-------
|
|
# $macmap{mac_addr}{"ip"}="x.x.x.x"
|
|
# $macmap{mac_addr}{"node"}="nodename"
|
|
#-------------------------
|
|
|
|
my %ipnodemap;
|
|
|
|
#-%ipnodemap structure-------
|
|
# $ipnodemap{ip_addr}="nodename"
|
|
#---------------------------
|
|
|
|
my %monitor_nodes;
|
|
|
|
#- %monitor_nodes structure-------
|
|
# $monitor_nodes{nodename}{"status"} #useless now
|
|
# $monitor_nodes{nodename}{"rst"}
|
|
#---------------------------------
|
|
|
|
# provision state machine
|
|
my %state_set = (
|
|
"unknown" => 0,
|
|
"server_reboot" => 1,
|
|
"loaded_kernel_and_initrd" => 2,
|
|
"kernel_and_initrd_got_ip" => 3,
|
|
"install_os_packages" => 4,
|
|
"run_postscript" => 5,
|
|
"run_postbootscript" => 6,
|
|
"done" => 7,
|
|
);
|
|
|
|
my %state_set_reverse = (
|
|
"0" => "unknown",
|
|
"1" => "server_reboot",
|
|
"2" => "loaded_kernel_and_initrd",
|
|
"3" => "kernel_and_initrd_got_ip",
|
|
"4" => "install_os_packages",
|
|
"5" => "run_postscript",
|
|
"6" => "run_postbootscript",
|
|
"7" => "done",
|
|
);
|
|
|
|
my %valid_process;
|
|
$valid_process{1}{process} = [ $state_set{unknown}, $state_set{done} ];
|
|
$valid_process{1}{type} = "reboot";
|
|
|
|
$valid_process{2}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{done} ];
|
|
$valid_process{2}{type} = "reboot";
|
|
|
|
$valid_process{3}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ];
|
|
$valid_process{3}{type} = "reboot";
|
|
|
|
$valid_process{4}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{loaded_kernel_and_initrd}, $state_set{kernel_and_initrd_got_ip}, $state_set{install_os_packages}, $state_set{run_postscript}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ];
|
|
$valid_process{4}{type} = "deploy";
|
|
|
|
$valid_process{5}{process} = [ $state_set{unknown}, $state_set{loaded_kernel_and_initrd}, $state_set{kernel_and_initrd_got_ip}, $state_set{install_os_packages}, $state_set{run_postscript}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ];
|
|
$valid_process{5}{type} = "deploy";
|
|
|
|
sub reset_state {
|
|
my $state_ref = shift;
|
|
my $condition = shift;
|
|
my $reset_flag = 1;
|
|
|
|
if ($$state_ref == $state_set{unknown} && $condition eq "dhcp") {
|
|
$$state_ref = $state_set{server_reboot};
|
|
} elsif ($$state_ref == $state_set{unknown} && $condition eq "booted") {
|
|
$$state_ref = $state_set{done};
|
|
} elsif (($$state_ref == $state_set{unknown} && $condition eq "tftp") || ($$state_ref == $state_set{unknown} && $condition eq "http")) {
|
|
$$state_ref = $state_set{loaded_kernel_and_initrd};
|
|
} elsif ($$state_ref == $state_set{server_reboot} && $condition eq "booted") {
|
|
$$state_ref = $state_set{done};
|
|
} elsif ($$state_ref == $state_set{server_reboot} && $condition eq "postscript") {
|
|
$$state_ref = $state_set{run_postbootscript};
|
|
} elsif (($$state_ref == $state_set{server_reboot} && $condition eq "tftp") || ($$state_ref == $state_set{server_reboot} && $condition eq "http")) {
|
|
$$state_ref = $state_set{loaded_kernel_and_initrd};
|
|
} elsif ($$state_ref == $state_set{loaded_kernel_and_initrd} && $condition eq "dhcp") {
|
|
$$state_ref = $state_set{kernel_and_initrd_got_ip};
|
|
} elsif ($$state_ref == $state_set{kernel_and_initrd_got_ip} && $condition eq "http") {
|
|
$$state_ref = $state_set{install_os_packages};
|
|
} elsif ($$state_ref == $state_set{install_os_packages} && $condition eq "postscript") {
|
|
$$state_ref = $state_set{run_postscript};
|
|
} elsif ($$state_ref == $state_set{run_postscript} && $condition eq "dhcp") {
|
|
$$state_ref = $state_set{server_reboot};
|
|
} elsif ($$state_ref == $state_set{run_postbootscript} && $condition eq "booted") {
|
|
$$state_ref = $state_set{done};
|
|
} elsif ($$state_ref == $state_set{done} && $condition eq "dhcp") {
|
|
$$state_ref = $state_set{server_reboot};
|
|
} elsif ($$state_ref == $state_set{done} && $condition eq "poweron") {
|
|
$$state_ref = $state_set{server_reboot};
|
|
} else {
|
|
$reset_flag = 0;
|
|
}
|
|
|
|
return $reset_flag;
|
|
}
|
|
|
|
|
|
my @candidate_svr_hostname_inlog;
|
|
my $svr_hostname_short = `hostname -s`;
|
|
chomp($svr_hostname_short);
|
|
my $svr_hostname_domain = `hostname -d`;
|
|
chomp($svr_hostname_domain);
|
|
push(@candidate_svr_hostname_inlog, $svr_hostname_short);
|
|
push(@candidate_svr_hostname_inlog, "$svr_hostname_short.$svr_hostname_domain");
|
|
|
|
|
|
$::USAGE = "Usage:
|
|
$program_name -h
|
|
$program_name -T
|
|
$program_name -n <node_range> [-V]
|
|
$program_name -n <node_range> -r <xxhxxm> [-V]
|
|
|
|
Description:
|
|
Probe for OS provision process. Realtime monitor or replay history of OS provision process.
|
|
If realtime monitor, run this before 'rpower' node.
|
|
Currently, hierarchial structure is not supported.
|
|
|
|
Options:
|
|
-h : Get usage information of $program_name
|
|
-T : Verify if $program_name can work, reserved option for probe framework
|
|
-V : Output more information for debug
|
|
-n : The range of nodes for monitor or replay log.
|
|
-t : The maximum time in minutes to wait when doing monitor, default is 60.
|
|
-r : Replay history log for probe provisioniong. Input a start time when probe should begin.
|
|
Supported time formats are xxhxxm, xxh, or xxm. If units not specified, hour will be used by default.
|
|
";
|
|
|
|
#------------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Check if all nodes are valid
|
|
Arguments:
|
|
node_range: node range
|
|
Returns:
|
|
0 : pass
|
|
1 : failed
|
|
=cut
|
|
|
|
#------------------------------------------
|
|
sub check_noderange {
|
|
my $node_range = shift;
|
|
my @cmdoutput = `lsdef $node_range -i ip,mac -c 2>&1`;
|
|
my $rst = 0;
|
|
my %nodecheckrst;
|
|
|
|
foreach (@cmdoutput) {
|
|
chomp($_);
|
|
$_ =~ s/^\s+|\s+$//g;
|
|
if ($_ =~ /^Error: Could not find an object named '(\w+)' .+/i) {
|
|
$nodecheckrst{$1}{"error"} = "Could not find node definition";
|
|
} elsif ($_ =~ /(\S+):\s+mac=(.*)/i) {
|
|
my $node = $1;
|
|
my $mac = $2;
|
|
if ($mac) {
|
|
$nodecheckrst{$node}{"mac"} = $mac;
|
|
} else {
|
|
$nodecheckrst{$node}{"error"} = "Node $1 doesn't have MAC address";
|
|
}
|
|
} elsif ($_ =~ /(\S+):\s+ip=(.*)/i) {
|
|
my $node = $1;
|
|
my $ip = $2;
|
|
if ($ip) {
|
|
$nodecheckrst{$node}{"ip"} = $ip;
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach my $node (keys %nodecheckrst) {
|
|
if (exists($nodecheckrst{$node}{error})) {
|
|
probe_utils->send_msg("$output", "d", "$node : $nodecheckrst{$node}{error}");
|
|
$rst = 1;
|
|
next;
|
|
}
|
|
my $noerror=1;
|
|
my @macs = split(/\|/, $nodecheckrst{$node}{"mac"});
|
|
foreach my $mac (@macs) {
|
|
|
|
#[NOTE] don't support 2 adapters in the same network now. TODO
|
|
|
|
if ($mac =~ /\!\*NOIP\*/) {
|
|
$mac =~ s/\!\*NOIP\*//g;
|
|
$macmap{$mac}{"ip"} = "NOIP";
|
|
$macmap{$mac}{"node"} = $node;
|
|
} else {
|
|
$macmap{$mac}{"node"} = $node;
|
|
if($nodecheckrst{$node}{"ip"}){
|
|
$macmap{$mac}{"ip"} = $nodecheckrst{$node}{"ip"};
|
|
}else{
|
|
my $nodeip = xCAT::NetworkUtils->getipaddr($node);
|
|
if($nodeip){
|
|
$macmap{$mac}{"ip"} = $nodeip;
|
|
}else{
|
|
$noerror = 0;
|
|
$rst = 1;
|
|
probe_utils->send_msg("$output", "f", "$node : can't be resolved to an IP address");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$monitor_nodes{$node}{"rst"} = 0 if($noerror);
|
|
}
|
|
|
|
unless (%monitor_nodes) {
|
|
probe_utils->send_msg("$output", "d", "There is no valid node to handle");
|
|
$rst = 1;
|
|
}
|
|
|
|
return $rst;
|
|
}
|
|
|
|
#------------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Handle one line log come from dhcp log file
|
|
Arguments:
|
|
msg: one line http log
|
|
installnic: target network interfaces
|
|
Returns:
|
|
0 : pass
|
|
1 : failed
|
|
=cut
|
|
|
|
#------------------------------------------
|
|
sub handle_dhcp_msg {
|
|
my $msg = shift;
|
|
|
|
if ($msg =~ /.+DHCPDISCOVER\s+from\s+(.+)\s+via\s+([^:]+)(.*)/i) {
|
|
my $mac = $1;
|
|
my $nic = $2;
|
|
|
|
if (exists $macmap{$mac}) {
|
|
my $node = $macmap{$mac}{"node"};
|
|
my $record = "Receive DHCPDISCOVER via $nic";
|
|
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $record);
|
|
}
|
|
} elsif ($msg =~ /.+DHCPOFFER\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/i) {
|
|
my $ip = $1;
|
|
my $mac = $2;
|
|
my $nic = $3;
|
|
|
|
if (exists $macmap{$mac}) {
|
|
my $node = $macmap{$mac}{"node"};
|
|
my $record = "Send DHCPOFFER on $ip back to $mac via $nic";
|
|
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $record);
|
|
}
|
|
|
|
} elsif ($msg !~ /unknown lease/ && $msg !~ /ignored/ && $msg =~ /.+DHCPREQUEST\s+for\s+(.+)\s\((.+)\)\s+from\s+(.+)\s+via\s+(.+)/) {
|
|
my $ip = $1;
|
|
my $server = $2;
|
|
my $mac = $3;
|
|
my $nic = $4;
|
|
|
|
if (exists $macmap{$mac}) {
|
|
my $node = $macmap{$mac}{"node"};
|
|
my $record = "Receive DHCPREQUEST from $mac for $ip via $nic";
|
|
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $record);
|
|
}
|
|
} elsif ($msg =~ /.+DHCPACK\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/) {
|
|
my $ip = $1;
|
|
my $mac = $2;
|
|
my $nic = $3;
|
|
|
|
if (exists $macmap{$mac}) {
|
|
my $node = $macmap{$mac}{"node"};
|
|
my $record = "Send DHCPACK on $ip back to $mac via $nic";
|
|
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $record);
|
|
$ipnodemap{$ip} = $node;
|
|
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "dhcp"));
|
|
|
|
if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
|
|
my $warn_msg = "The ip($ip) assigned to $mac via DHCP is different from the ip($macmap{$mac}{'ip'}) in node definition.";
|
|
probe_utils->send_msg("$output", "w", "$warn_msg") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $warn_msg);
|
|
}
|
|
}
|
|
} elsif ($msg =~ /.+BOOTREQUEST\s+from\s+(.+)\s+via\s+([^:]+)(.*)/) {
|
|
my $mac = $1;
|
|
my $nic = $2;
|
|
if (exists $macmap{$mac}) {
|
|
my $node = $macmap{$mac}{"node"};
|
|
my $record = "Receive BOOTREQUEST from $mac via $nic";
|
|
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $record);
|
|
}
|
|
} elsif ($msg =~ /.+BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) {
|
|
my $ip = $1;
|
|
my $mac = $2;
|
|
my $nic = $3;
|
|
|
|
if (exists $macmap{$mac}) {
|
|
my $node = $macmap{$mac}{"node"};
|
|
my $record = "Send BOOTREPLY on $ip back to $mac via $nic";
|
|
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $record);
|
|
$ipnodemap{$ip} = $node;
|
|
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "dhcp"));
|
|
|
|
if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
|
|
my $warn_msg = "The ip($ip) assigned to $mac via DHCP is different from the ip($macmap{$mac}{'ip'}) in node definition.";
|
|
probe_utils->send_msg("$output", "w", "$warn_msg") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $warn_msg);
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#------------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Handle one line log come from tftp log file
|
|
Arguments:
|
|
msg: one line tftp log
|
|
Returns:
|
|
0 : pass
|
|
1 : failed
|
|
=cut
|
|
|
|
#------------------------------------------
|
|
sub handle_tftp_msg {
|
|
my $msg = shift;
|
|
|
|
if ($msg =~ /RRQ\s+from\s+(.+)\s+filename\s+(.+)/i) {
|
|
my $ip = $1;
|
|
my $file = $2;
|
|
my $record = "Via TFTP download $file";
|
|
if (exists($rawdata{"$ipnodemap{$ip}"})) {
|
|
probe_utils->send_msg("$output", "d", "[$ipnodemap{$ip}] $record") if ($monitor);
|
|
push(@{ $rawdata{ $ipnodemap{$ip} }{"history"} }, $record);
|
|
push(@{ $rawdata{ $ipnodemap{$ip} }{statehistory} }, $rawdata{ $ipnodemap{$ip} }{state}) if (reset_state(\$rawdata{ $ipnodemap{$ip} }{state}, "tftp"));
|
|
}
|
|
}
|
|
}
|
|
|
|
#------------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Handle one line log come from http log file
|
|
Arguments:
|
|
msg: one line http log
|
|
Returns:
|
|
0 : pass
|
|
1 : failed
|
|
=cut
|
|
|
|
#------------------------------------------
|
|
sub handle_http_msg {
|
|
my $msg = shift;
|
|
if ($msg =~ /(\d+\.\d+.\d+.\d+)\s.+GET\s+(.+)\s+HTTP.+/) {
|
|
my $ip = $1;
|
|
my $file = $2;
|
|
my $record = "Via HTTP get $file";
|
|
|
|
if (exists($rawdata{"$ipnodemap{$ip}"})) {
|
|
probe_utils->send_msg("$output", "d", "[$ipnodemap{$ip}] $record") if ($monitor);
|
|
push(@{ $rawdata{ $ipnodemap{$ip} }{"history"} }, $record);
|
|
push(@{ $rawdata{ $ipnodemap{$ip} }{statehistory} }, $rawdata{ $ipnodemap{$ip} }{state}) if (reset_state(\$rawdata{ $ipnodemap{$ip} }{state}, "http"));
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#------------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Handle one line log come from cluster.log
|
|
Arguments:
|
|
msg: one line log
|
|
Returns:
|
|
0 : pass
|
|
1 : failed
|
|
=cut
|
|
|
|
#------------------------------------------
|
|
sub handle_cluster_msg {
|
|
my $line = shift;
|
|
my $node = "";
|
|
my $msg;
|
|
my $status;
|
|
|
|
my @splitline = split(/\s+/, $line);
|
|
if (($splitline[4] =~ /^xcat/i) || ($splitline[5] =~ /^xcat/i)) {
|
|
|
|
#log like: Aug 7 22:30:31 c910f02c01p09 xcat: c910f02c04p04 status: booted statustime: 08-07-2016 22:30:31
|
|
if (($splitline[6] =~ /^status:$/i) && ($splitline[8] =~ /^statustime:$/)) {
|
|
$node = $splitline[5];
|
|
$status = $splitline[7];
|
|
if (exists($rawdata{$node})) {
|
|
my $record = "Node status is changed to $status";
|
|
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $record);
|
|
|
|
#one node finish deployment
|
|
if ($status eq "booted") {
|
|
$monitor_nodes{$node}{"rst"} = 1 if (defined($monitor_nodes{$node}));
|
|
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "booted"));
|
|
}
|
|
|
|
if ($status eq "powering-on") {
|
|
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "poweron"));
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#-----------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Handle one line log come from computes.log
|
|
Arguments:
|
|
msg: one line compute log
|
|
Returns:
|
|
0 : pass
|
|
1 : failed
|
|
=cut
|
|
|
|
#-----------------------------------------
|
|
sub handle_compute_msg {
|
|
my $line = shift;
|
|
my $sender;
|
|
my $node;
|
|
my $msg;
|
|
|
|
if ($line =~ /.+\d{2}:\d{2}:\d{2}\s+(.+)\s+(xcat.+)/i) {
|
|
$sender = $1;
|
|
$msg = $2;
|
|
|
|
if (!xCAT::NetworkUtils->isIpaddr($sender)) {
|
|
$node = $sender;
|
|
} else {
|
|
$node = $ipnodemap{$sender};
|
|
}
|
|
|
|
if ($node ne "" && exists($rawdata{$node})) {
|
|
probe_utils->send_msg("$output", "d", "[$node] $msg") if ($monitor);
|
|
push(@{ $rawdata{$node}{"history"} }, $msg);
|
|
|
|
#node start to run postscript or postbootscript
|
|
if ($msg =~ /Running postscript/) {
|
|
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "postscript"));
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#------------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Test if all nodes have finished job
|
|
Arguments:
|
|
One golble attribute %monitor_nodes;
|
|
Returns:
|
|
1: all nodes finished
|
|
0: not all nodes finished
|
|
=cut
|
|
|
|
#------------------------------------------
|
|
sub all_monitor_node_done {
|
|
my $done = 1;
|
|
|
|
foreach my $node (keys %monitor_nodes) {
|
|
if ($monitor_nodes{$node}{"rst"} == 0) {
|
|
$done = 0;
|
|
last;
|
|
}
|
|
}
|
|
|
|
return $done;
|
|
}
|
|
|
|
#------------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Dump monitor history, categorised by mac address.
|
|
Arguments:
|
|
NULL
|
|
Returns:
|
|
=cut
|
|
|
|
#------------------------------------------
|
|
sub dump_history {
|
|
|
|
my $title = "
|
|
=============================================================
|
|
= The summary of os provision:
|
|
=============================================================
|
|
";
|
|
print "$title\n";
|
|
|
|
foreach $node (keys %rawdata) {
|
|
|
|
if ($verbose) {
|
|
print "[$node]\n";
|
|
|
|
my $httphit = 0;
|
|
my @httptmp;
|
|
foreach my $line (@{ $rawdata{$node}{"history"} }) {
|
|
if ($line =~ /Via HTTP/) {
|
|
if ($httphit) {
|
|
shift @httptmp if ($#httptmp > 0);
|
|
push @httptmp, $line;
|
|
} else {
|
|
print "\t$line\n";
|
|
$httphit = 1;
|
|
}
|
|
} else {
|
|
if ($#httptmp > -1) {
|
|
print "\tVia HTTP ..........\n";
|
|
print "\t$_\n" foreach (@httptmp);
|
|
}
|
|
@httptmp = ();
|
|
$httphit = 0;
|
|
print "\t$line\n";
|
|
}
|
|
}
|
|
my $statelist = "";
|
|
for (my $i = 0 ; $i < scalar(@{ $rawdata{$node}{statehistory} }) ; $i++) {
|
|
$statelist .= "$state_set_reverse{$rawdata{$node}{statehistory}[$i]} ";
|
|
}
|
|
probe_utils->send_msg("$output", "d", "[$node] state history: $statelist");
|
|
}
|
|
|
|
my @tmpnodestatehistory = @{ $rawdata{$node}{statehistory} };
|
|
|
|
#print "state history = @tmpnodestatehistory\n";
|
|
|
|
my %match_result;
|
|
my $procidx = 0;
|
|
my $newloop = 0;
|
|
my $notfirstloop = 0;
|
|
while (@tmpnodestatehistory) {
|
|
undef %match_result if ($notfirstloop);
|
|
$newloop = 0;
|
|
foreach my $type (keys %valid_process) {
|
|
if ($notfirstloop) {
|
|
$procidx = 1;
|
|
} else {
|
|
$procidx = 0;
|
|
}
|
|
my $proclen = scalar(@{ $valid_process{$type}{process} });
|
|
my $i;
|
|
for ($i = 0 ; $i < scalar(@tmpnodestatehistory) ; $i++) {
|
|
if ($procidx < $proclen) {
|
|
if ($tmpnodestatehistory[$i] == $valid_process{$type}{process}[$procidx]) {
|
|
++$procidx;
|
|
} else {
|
|
--$procidx if (notfirstloop);
|
|
push @{ $match_result{$procidx} }, $type;
|
|
last;
|
|
}
|
|
} else {
|
|
splice(@tmpnodestatehistory, 0, $i);
|
|
$newloop = 1;
|
|
$notfirstloop = 1;
|
|
last;
|
|
}
|
|
}
|
|
if ($i == scalar(@tmpnodestatehistory)) {
|
|
push @{ $match_result{$procidx} }, $type;
|
|
next;
|
|
}
|
|
last if ($newloop);
|
|
}
|
|
last if (!$newloop);
|
|
}
|
|
|
|
#print "------------result---------------\n";
|
|
#print Dumper %match_result;
|
|
|
|
my $max_match = 0;
|
|
foreach my $key (keys %match_result) {
|
|
$max_match = $key if ($key > $max_match);
|
|
}
|
|
|
|
my $formatprefix;
|
|
if ($max_match == 0) {
|
|
my $statelist = "";
|
|
for (my $i = 0 ; $i < scalar(@{ $rawdata{$node}{statehistory} }) ; $i++) {
|
|
$statelist .= "$state_set_reverse{$rawdata{$node}{statehistory}[$i]} ";
|
|
}
|
|
probe_utils->send_msg("$output", "f", "[$node] deployment failed");
|
|
probe_utils->send_msg("$output", "d", "\t$node did unknown process, state change history is $statelist");
|
|
} elsif (scalar(@{ $match_result{$max_match} }) > 1) {
|
|
probe_utils->send_msg("$output", "f", "[$node] deployment failed");
|
|
probe_utils->send_msg("$output", "d", "\tThere are more than one possible process satisfy $node situation");
|
|
foreach my $proc (@{ $match_result{$max_match} }) {
|
|
if ($valid_process{$proc}{type} eq "deploy") {
|
|
probe_utils->send_msg("$output", "d", "\tpossible process \"deploy\", pass $state_set_reverse{$valid_process{$proc}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$proc}{process}[$max_match]} stage");
|
|
} elsif ($valid_process{$proc}{type} eq "reboot") {
|
|
probe_utils->send_msg("$output", "d", "\tpossible process \"reboot\", pass $state_set_reverse{$valid_process{$proc}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$proc}{process}[$max_match]} stage");
|
|
}
|
|
}
|
|
} else {
|
|
if (($valid_process{ $match_result{$max_match}[0] }{type} eq "deploy") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] == $state_set{done})) {
|
|
probe_utils->send_msg("$output", "o", "[$node] deployment completed");
|
|
} elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "deploy") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] != $state_set{done})) {
|
|
probe_utils->send_msg("$output", "f", "[$node] deployment failed, pass $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match]} stage")
|
|
} elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "reboot") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] == $state_set{done})) {
|
|
probe_utils->send_msg("$output", "f", "[$node] reboot completed, without deployment process");
|
|
} elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "reboot") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] != $state_set{done})) {
|
|
probe_utils->send_msg("$output", "f", "[$node] reboot failed, without deployment process, stop at $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match]} stage");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#------------------------------------------
|
|
|
|
=head3
|
|
Description:
|
|
Monitor the process of os provision
|
|
Returns:
|
|
0: pass
|
|
1: failed
|
|
=cut
|
|
|
|
#------------------------------------------
|
|
sub do_monitor {
|
|
$SIG{TERM} = $SIG{INT} = sub {
|
|
$terminal = 1;
|
|
};
|
|
|
|
my $rst = 0;
|
|
my $startline =
|
|
"-------------------------------------------------------------
|
|
Start capturing every message during OS provision process......
|
|
-------------------------------------------------------------
|
|
";
|
|
|
|
print("$startline\n");
|
|
|
|
my $varlogmsg = "/var/log/messages";
|
|
my $clusterlog = "/var/log/xcat/cluster.log";
|
|
my $computelog = "/var/log/xcat/computes.log";
|
|
|
|
my $httplog;
|
|
if (-e "/var/log/httpd/access_log") {
|
|
$httplog = "/var/log/httpd/access_log";
|
|
} elsif (-e "/var/log/apache2/access_log") {
|
|
$httplog = "/var/log/apache2/access_log";
|
|
} elsif (-e "/var/log/apache2/access.log") {
|
|
$httplog = "/var/log/apache2/access.log";
|
|
}
|
|
|
|
my $varlogpid;
|
|
my $clusterpid;
|
|
my $httppid;
|
|
my $computerpid;
|
|
|
|
{
|
|
if (!-e $varlogmsg) {
|
|
probe_utils->send_msg("$output", "w", "$varlogmsg doesn't exist");
|
|
} else {
|
|
if (!($varlogpid = open(VARLOGMSGFILE, "tail -f -n 0 $varlogmsg 2>&1 |"))) {
|
|
probe_utils->send_msg("$output", "f", "Can't open $varlogmsg to get logs");
|
|
$rst = 1;
|
|
last;
|
|
}
|
|
}
|
|
|
|
if (!-e "$clusterlog") {
|
|
probe_utils->send_msg("$output", "w", "$clusterlog doesn't exist");
|
|
probe_utils->send_msg("$output", "i", "If using SLES11 or xCAT2.11.x, ignore above warning");
|
|
} else {
|
|
if (!($clusterpid = open(CLUSTERLOGFILE, "tail -f -n 0 $clusterlog 2>&1 |"))) {
|
|
probe_utils->send_msg("$output", "f", "Can't open $clusterlog to get logs");
|
|
$rst = 1;
|
|
last;
|
|
}
|
|
}
|
|
if (!-e "$httplog") {
|
|
probe_utils->send_msg("$output", "w", "$httplog doesn't exist");
|
|
} else {
|
|
if (!($httppid = open(HTTPLOGFILE, "tail -f -n 0 $httplog 2>&1 |"))) {
|
|
probe_utils->send_msg("$output", "f", "Can't open $httplog to get logs");
|
|
$rst = 1;
|
|
last;
|
|
}
|
|
}
|
|
if (!-e "$computelog") {
|
|
probe_utils->send_msg("$output", "w", "$computelog doesn't exist");
|
|
probe_utils->send_msg("$output", "i", "If sles11 or xCAT2.11.x, ignore above warning");
|
|
} else {
|
|
if (!($computerpid = open(COMPUTERFILE, "tail -f -n 0 $computelog 2>&1 |"))) {
|
|
probe_utils->send_msg("$output", "f", "Can't open $computelog to get logs");
|
|
$rst = 1;
|
|
last;
|
|
}
|
|
}
|
|
|
|
my $select = new IO::Select;
|
|
$select->add(\*VARLOGMSGFILE);
|
|
$select->add(\*CLUSTERLOGFILE);
|
|
$select->add(\*HTTPLOGFILE);
|
|
$select->add(\*COMPUTERFILE);
|
|
$| = 1;
|
|
|
|
my $line = "";
|
|
my @hdls;
|
|
my $hdl;
|
|
|
|
my $starttime = time();
|
|
$monitor = 1;
|
|
for (; ;) {
|
|
if (@hdls = $select->can_read(0)) {
|
|
foreach $hdl (@hdls) {
|
|
if ($hdl == \*VARLOGMSGFILE) {
|
|
chomp($line = <VARLOGMSGFILE>);
|
|
my @tmp = split(/\s+/, $line);
|
|
if ($tmp[4] =~ /dhcpd/i && $line =~ /$installnic/) {
|
|
handle_dhcp_msg("$line");
|
|
} elsif ($tmp[4] =~ /in.tftpd/i) {
|
|
handle_tftp_msg("$line");
|
|
} elsif (($tmp[4] =~ /^xcat/i) || ($tmp[5] =~ /^xcat/i)) {
|
|
if (grep(/^$tmp[3]$/, @candidate_svr_hostname_inlog)) {
|
|
handle_cluster_msg("$line");
|
|
} else {
|
|
handle_compute_msg("$line");
|
|
}
|
|
}
|
|
} elsif ($hdl == \*CLUSTERLOGFILE) {
|
|
chomp($line = <CLUSTERLOGFILE>);
|
|
handle_cluster_msg("$line");
|
|
} elsif ($hdl == \*HTTPLOGFILE) {
|
|
chomp($line = <HTTPLOGFILE>);
|
|
handle_http_msg("$line");
|
|
} elsif ($hdl == \*COMPUTERFILE) {
|
|
chomp($line = <COMPUTERFILE>);
|
|
handle_compute_msg("$line");
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($terminal || (%monitor_nodes && all_monitor_node_done())) {
|
|
if ($terminal) {
|
|
probe_utils->send_msg("$output", "d", "Get INT or TERM signal from STDIN");
|
|
} else {
|
|
probe_utils->send_msg("$output", "o", "All nodes specified to monitor, have finished OS provision process");
|
|
}
|
|
last;
|
|
}
|
|
|
|
if (time() - $starttime > ($maxwaittime * 60)) {
|
|
probe_utils->send_msg("$output", "i", "$maxwaittime minutes have expired, stop monitoring");
|
|
last;
|
|
}
|
|
sleep 0.01;
|
|
}
|
|
&dump_history;
|
|
}
|
|
kill 'INT', $varlogpid if ($varlogpid);
|
|
kill 'INT', $clusterpid if ($clusterpid);
|
|
kill 'INT', $httppid if ($httppid);
|
|
kill 'INT', $computerpid if ($computerpid);
|
|
close(VARLOGMSGFILE) if (VARLOGMSGFILE);
|
|
close(CLUSTERLOGFILE) if (CLUSTERLOGFILE);
|
|
close(HTTPLOGFILE) if (HTTPLOGFILE);
|
|
close(COMPUTERFILE) if (COMPUTERFILE);
|
|
|
|
return $rst;
|
|
}
|
|
|
|
sub get_valid_logs {
|
|
my $ref_timestamp = shift;
|
|
my $year = shift;
|
|
my $epoch_seconds_of_now = shift;
|
|
my $bthistory_ref = shift;
|
|
my @orglogfilelist = ("/var/log/xcat/cluster.log",
|
|
"/var/log/messages",
|
|
"/var/log/xcat/computes.log");
|
|
my $httplog;
|
|
if (-e "/var/log/httpd/access_log") {
|
|
$httplog = "/var/log/httpd/access_log";
|
|
} elsif (-e "/var/log/apache2/access_log") {
|
|
$httplog = "/var/log/apache2/access_log";
|
|
} elsif (-e "/var/log/apache2/access.log") {
|
|
$httplog = "/var/log/apache2/access.log";
|
|
}
|
|
push @orglogfilelist, $httplog;
|
|
|
|
foreach my $f (@orglogfilelist) {
|
|
my $filename = basename("$f");
|
|
$filename =~ s/(.+)\.(.+)/$1/g;
|
|
my $path_only = dirname("$f");
|
|
my @rotatefiles;
|
|
|
|
#TODO using opendir to refine below code
|
|
my @alltargetfiles = `ls -lt $path_only |awk -F" " '/ $filename/ {print \$9}'`;
|
|
foreach my $samenamefile (@alltargetfiles) {
|
|
chomp($samenamefile);
|
|
push @rotatefiles, "$path_only/$samenamefile";
|
|
}
|
|
|
|
my $ishttplog = 0;
|
|
$ishttplog = 1 if ($filename =~ /access/);
|
|
|
|
foreach my $file (@rotatefiles) {
|
|
my $fd;
|
|
my $filetype = `file $file 2>&1`;
|
|
chomp($filetype);
|
|
if ($filetype =~ /ASCII/) {
|
|
if (!open($fd, "$file")) {
|
|
print "open $files failed\n";
|
|
next;
|
|
}
|
|
} else {
|
|
|
|
#TODO handle compression files
|
|
}
|
|
|
|
#print "--->load $file\n";
|
|
my $line;
|
|
my $historynum = 0;
|
|
last unless ($line = <$fd>);
|
|
chomp($line);
|
|
my $needrotate = 0;
|
|
my $logindex = 0;
|
|
my @splitline = split(/\s+/, $line);
|
|
my $timestamp;
|
|
my $timestampepoch;
|
|
|
|
if ($ishttplog) {
|
|
$splitline[3] =~ s/^\[(.+)/$1/g;
|
|
$timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]);
|
|
} else {
|
|
$timestamp = join(" ", @splitline[ 0 .. 2 ]);
|
|
$timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now);
|
|
}
|
|
if ($ref_timestamp <= $timestampepoch) {
|
|
$needrotate = 1;
|
|
} else {
|
|
seek($fd, 0, 2);
|
|
my $tail = tell;
|
|
my $head = 0;
|
|
my $lasttail = $tail;
|
|
my $i = 0;
|
|
while ($head <= $tail) {
|
|
my $middle = int(($tail - $head) / 2) + $head;
|
|
seek($fd, $middle, 0);
|
|
$line = <$fd>;
|
|
$middle += length($line);
|
|
last unless ($line = <$fd>);
|
|
@splitline = split(/\s+/, $line);
|
|
if ($ishttplog) {
|
|
$splitline[3] =~ s/^\[(.+)/$1/g;
|
|
$timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]);
|
|
} else {
|
|
$timestamp = join(" ", @splitline[ 0 .. 2 ]);
|
|
$timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now);
|
|
} if ($ref_timestamp == $timestampepoch) {
|
|
$historynum = $middle;
|
|
last;
|
|
} elsif ($ref_timestamp < $timestampepoch) {
|
|
$tail = $middle;
|
|
last if ($tail == $lasttail);
|
|
$lasttail = $tail;
|
|
} else {
|
|
$head = $middle;
|
|
}
|
|
}
|
|
$historynum = $head unless ($historynum);
|
|
}
|
|
|
|
seek($fd, $historynum, 0);
|
|
while (<$fd>) {
|
|
chomp;
|
|
@splitline = split(/\s+/, $_);
|
|
if ($ishttplog) {
|
|
$splitline[3] =~ s/^\[(.+)/$1/g;
|
|
$timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]);
|
|
} else {
|
|
$timestamp = join(" ", @splitline[ 0 .. 2 ]);
|
|
$timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now);
|
|
}
|
|
|
|
if (($splitline[4] =~ /dhcpd/i && $_ =~ /$installnic/)
|
|
|| ($splitline[4] =~ /in.tftpd/i)
|
|
|| (($splitline[4] =~ /^xcat/i) || ($splitline[5] =~ /^xcat/i))
|
|
|| ($splitline[5] =~ /GET/ && $splitline[7] =~ /HTTP/)) {
|
|
my $log = "$timestampepoch $filename$logindex $_";
|
|
$logindex++;
|
|
push @$bthistory_ref, $log;
|
|
}
|
|
}
|
|
close($fd);
|
|
last unless ($needrotate);
|
|
}
|
|
}
|
|
|
|
#sort logs depending on time
|
|
my @sort_ht = sort(@$bthistory_ref);
|
|
for ($i = 0 ; $i <= $#sort_ht ; $i++) {
|
|
$sort_ht[$i] =~ s/^(\d+) (\S+) (.+)/$1 $3/g;
|
|
}
|
|
|
|
#delete duplicate logs
|
|
my %count;
|
|
@$bthistory_ref = grep { ++$count{$_} < 2; } @sort_ht;
|
|
}
|
|
|
|
sub do_replay {
|
|
my $ref_timestamp = shift;
|
|
|
|
my $timestr = scalar(localtime($ref_timestamp));
|
|
print "Start logs search after '$timestr', waiting for a while.............\n";
|
|
|
|
my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = localtime(time());
|
|
my $epoch_seconds_of_now = time();
|
|
my @bthistory;
|
|
get_valid_logs($ref_timestamp, $year, $epoch_seconds_of_now, \@bthistory);
|
|
|
|
foreach my $line (@bthistory) {
|
|
$line =~ s/(\d+) (.+)/$2/g;
|
|
my @tmp = split(/\s+/, $line);
|
|
if ($tmp[4] =~ /dhcpd/i && $line =~ /$installnic/) {
|
|
handle_dhcp_msg("$line");
|
|
} elsif ($tmp[4] =~ /in.tftpd/i) {
|
|
handle_tftp_msg("$line");
|
|
} elsif (($tmp[4] =~ /^xcat/i) || ($tmp[5] =~ /^xcat/i)) {
|
|
if (grep(/^$tmp[3]$/, @candidate_svr_hostname_inlog)) {
|
|
handle_cluster_msg("$line");
|
|
} else {
|
|
handle_compute_msg("$line");
|
|
}
|
|
} elsif ($tmp[5] =~ /GET/ && $tmp[7] =~ /HTTP/) {
|
|
handle_http_msg("$line");
|
|
}
|
|
}
|
|
&dump_history;
|
|
return 0;
|
|
}
|
|
|
|
#-------------------------------------
|
|
# main process
|
|
#-------------------------------------
|
|
if (
|
|
!GetOptions("--help|h|?" => \$help,
|
|
"T" => \$test,
|
|
"V" => \$verbose,
|
|
"t" => \$maxwaittime,
|
|
"r=s" => \$replaylog,
|
|
"n=s" => \$noderange))
|
|
{
|
|
probe_utils->send_msg("$output", "f", "Invalid parameter for $program_name");
|
|
probe_utils->send_msg("$output", "d", "$::USAGE");
|
|
exit 1;
|
|
}
|
|
|
|
if ($help) {
|
|
if ($output ne "stdout") {
|
|
probe_utils->send_msg("$output", "d", "$::USAGE");
|
|
} else {
|
|
print "$::USAGE";
|
|
}
|
|
exit 0;
|
|
}
|
|
|
|
if ($test) {
|
|
probe_utils->send_msg("$output", "o", "Probe for OS provision process, realtime monitor of OS provision process.");
|
|
exit 0;
|
|
}
|
|
|
|
unless ($noderange) {
|
|
probe_utils->send_msg("$output", "f", "A noderange is required");
|
|
probe_utils->send_msg("$output", "d", "$::USAGE");
|
|
exit 1;
|
|
}
|
|
|
|
my $epoch_starttime = time();
|
|
if ($replaylog) {
|
|
if ($replaylog =~ /(\d+)h(\d+)m/i) {
|
|
$epoch_starttime -= ($1 * 3600 + $2 * 60)
|
|
} elsif ($replaylog =~ /^(\d+)h*$/i) {
|
|
$epoch_starttime -= $1 * 3600;
|
|
} elsif ($replaylog =~ /^(\d+)m$/) {
|
|
$epoch_starttime -= $1 * 60;
|
|
} else {
|
|
probe_utils->send_msg("$output", "f", "Unsupported time format for history log replay");
|
|
print "$::USAGE";
|
|
exit 1;
|
|
}
|
|
}
|
|
|
|
my $msg = "All pre_defined nodes are valid";
|
|
my $rc = check_noderange($noderange);
|
|
if ($rc) {
|
|
probe_utils->send_msg("$output", "f", $msg);
|
|
$rst = 1;
|
|
} else {
|
|
probe_utils->send_msg("$output", "o", $msg);
|
|
}
|
|
|
|
unless ($installnic) {
|
|
my $masteripinsite = `tabdump site | awk -F',' '/^"master",/ { gsub(/"/, "", \$2) ; print \$2 }'`;
|
|
chomp($masteripinsite);
|
|
$installnic = `ip addr |grep -B2 $masteripinsite|awk -F" " '/mtu/{gsub(/:/,"",\$2); print \$2}'`;
|
|
chomp($installnic);
|
|
if (!$installnic) {
|
|
probe_utils->send_msg("$output", "f", "The value of 'master' in 'site' table is $masteripinsite, can't get corresponding network interface");
|
|
$rst = 1;
|
|
} else {
|
|
probe_utils->send_msg("$output", "i", "The installation network interface is $installnic");
|
|
}
|
|
}
|
|
|
|
exit $rst if ($rst);
|
|
|
|
foreach my $node (keys %monitor_nodes) {
|
|
$rawdata{$node}{state} = $state_set{unknown};
|
|
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state});
|
|
}
|
|
|
|
if ($replaylog) {
|
|
$rst = do_replay($epoch_starttime);
|
|
exit $rst;
|
|
}
|
|
|
|
$rst = do_monitor();
|
|
|
|
exit $rst;
|
|
|
|
|