2
0
mirror of https://github.com/xcat2/xcat-core.git synced 2025-05-25 05:02:05 +00:00

1079 lines
38 KiB
Perl
Executable File

#! /usr/bin/perl
# IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html
BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; }
use lib "$::XCATROOT/probe/lib/perl";
use probe_utils;
use xCAT::NetworkUtils;
use File::Basename;
use IO::Select;
use Time::Local;
use Data::Dumper;
use Getopt::Long qw(:config no_ignore_case);
my $program_name = basename("$0");
my $help;
my $test;
my $maxwaittime = 60; #unit is minute
my $output = "stdout";
my $verbose = 0;
my $rst = 0;
my $noderange;
my $terminal = 0;
my $installnic;
my $monitor = 0;
my $replaylog; ##used by feature replay deploymen log
my %rawdata;
#-%rawdata structure-------
# $rawdata{nodename}{"history"} #array, the log history of current node
# $rawdata{nodename}{"state"} #the latest status of current node, used for State Machine
# $rawdata{nodename}{"statehistory"} #array, the history status of current node, used for State Machine
#--------------------------
my %macmap;
#-%macmap structure-------
# $macmap{mac_addr}{"ip"}="x.x.x.x"
# $macmap{mac_addr}{"node"}="nodename"
#-------------------------
my %ipnodemap;
#-%ipnodemap structure-------
# $ipnodemap{ip_addr}="nodename"
#---------------------------
my %monitor_nodes;
#- %monitor_nodes structure-------
# $monitor_nodes{nodename}{"status"} #useless now
# $monitor_nodes{nodename}{"rst"}
#---------------------------------
# provision state machine
my %state_set = (
"unknown" => 0,
"server_reboot" => 1,
"loaded_kernel_and_initrd" => 2,
"kernel_and_initrd_got_ip" => 3,
"install_os_packages" => 4,
"run_postscript" => 5,
"run_postbootscript" => 6,
"done" => 7,
);
my %state_set_reverse = (
"0" => "unknown",
"1" => "server_reboot",
"2" => "loaded_kernel_and_initrd",
"3" => "kernel_and_initrd_got_ip",
"4" => "install_os_packages",
"5" => "run_postscript",
"6" => "run_postbootscript",
"7" => "done",
);
my %valid_process;
$valid_process{1}{process} = [ $state_set{unknown}, $state_set{done} ];
$valid_process{1}{type} = "reboot";
$valid_process{2}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{done} ];
$valid_process{2}{type} = "reboot";
$valid_process{3}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ];
$valid_process{3}{type} = "reboot";
$valid_process{4}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{loaded_kernel_and_initrd}, $state_set{kernel_and_initrd_got_ip}, $state_set{install_os_packages}, $state_set{run_postscript}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ];
$valid_process{4}{type} = "deploy";
$valid_process{5}{process} = [ $state_set{unknown}, $state_set{loaded_kernel_and_initrd}, $state_set{kernel_and_initrd_got_ip}, $state_set{install_os_packages}, $state_set{run_postscript}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ];
$valid_process{5}{type} = "deploy";
sub reset_state {
my $state_ref = shift;
my $condition = shift;
my $reset_flag = 1;
if ($$state_ref == $state_set{unknown} && $condition eq "dhcp") {
$$state_ref = $state_set{server_reboot};
} elsif ($$state_ref == $state_set{unknown} && $condition eq "booted") {
$$state_ref = $state_set{done};
} elsif (($$state_ref == $state_set{unknown} && $condition eq "tftp") || ($$state_ref == $state_set{unknown} && $condition eq "http")) {
$$state_ref = $state_set{loaded_kernel_and_initrd};
} elsif ($$state_ref == $state_set{server_reboot} && $condition eq "booted") {
$$state_ref = $state_set{done};
} elsif ($$state_ref == $state_set{server_reboot} && $condition eq "postscript") {
$$state_ref = $state_set{run_postbootscript};
} elsif (($$state_ref == $state_set{server_reboot} && $condition eq "tftp") || ($$state_ref == $state_set{server_reboot} && $condition eq "http")) {
$$state_ref = $state_set{loaded_kernel_and_initrd};
} elsif ($$state_ref == $state_set{loaded_kernel_and_initrd} && $condition eq "dhcp") {
$$state_ref = $state_set{kernel_and_initrd_got_ip};
} elsif ($$state_ref == $state_set{kernel_and_initrd_got_ip} && $condition eq "http") {
$$state_ref = $state_set{install_os_packages};
} elsif ($$state_ref == $state_set{install_os_packages} && $condition eq "postscript") {
$$state_ref = $state_set{run_postscript};
} elsif ($$state_ref == $state_set{run_postscript} && $condition eq "dhcp") {
$$state_ref = $state_set{server_reboot};
} elsif ($$state_ref == $state_set{run_postbootscript} && $condition eq "booted") {
$$state_ref = $state_set{done};
} elsif ($$state_ref == $state_set{done} && $condition eq "dhcp") {
$$state_ref = $state_set{server_reboot};
} elsif ($$state_ref == $state_set{done} && $condition eq "poweron") {
$$state_ref = $state_set{server_reboot};
} else {
$reset_flag = 0;
}
return $reset_flag;
}
my @candidate_svr_hostname_inlog;
my $svr_hostname_short = `hostname -s`;
chomp($svr_hostname_short);
my $svr_hostname_domain = `hostname -d`;
chomp($svr_hostname_domain);
push(@candidate_svr_hostname_inlog, $svr_hostname_short);
push(@candidate_svr_hostname_inlog, "$svr_hostname_short.$svr_hostname_domain");
$::USAGE = "Usage:
$program_name -h
$program_name -T
$program_name -n <node_range> [-V]
$program_name -n <node_range> -r <xxhxxm> [-V]
Description:
Probe for OS provision process. Realtime monitor or replay history of OS provision process.
If realtime monitor, run this before 'rpower' node.
Currently, hierarchial structure is not supported.
Options:
-h : Get usage information of $program_name
-T : Verify if $program_name can work, reserved option for probe framework
-V : Output more information for debug
-n : The range of nodes for monitor or replay log.
-t : The maximum time in minutes to wait when doing monitor, default is 60.
-r : Replay history log for probe provisioniong. Input a start time when probe should begin.
Supported time formats are xxhxxm, xxh, or xxm. If units not specified, hour will be used by default.
";
#------------------------------------------
=head3
Description:
Check if all nodes are valid
Arguments:
node_range: node range
Returns:
0 : pass
1 : failed
=cut
#------------------------------------------
sub check_noderange {
my $node_range = shift;
my @cmdoutput = `lsdef $node_range -i ip,mac -c 2>&1`;
my $rst = 0;
my %nodecheckrst;
foreach (@cmdoutput) {
chomp($_);
$_ =~ s/^\s+|\s+$//g;
if ($_ =~ /^Error: Could not find an object named '(\w+)' .+/i) {
$nodecheckrst{$1}{"error"} = "Could not find node definition";
} elsif ($_ =~ /(\S+):\s+mac=(.*)/i) {
my $node = $1;
my $mac = $2;
if ($mac) {
$nodecheckrst{$node}{"mac"} = $mac;
} else {
$nodecheckrst{$node}{"error"} = "Node $1 doesn't have MAC address";
}
} elsif ($_ =~ /(\S+):\s+ip=(.*)/i) {
my $node = $1;
my $ip = $2;
if ($ip) {
$nodecheckrst{$node}{"ip"} = $ip;
}
}
}
foreach my $node (keys %nodecheckrst) {
if (exists($nodecheckrst{$node}{error})) {
probe_utils->send_msg("$output", "d", "$node : $nodecheckrst{$node}{error}");
$rst = 1;
next;
}
my $noerror=1;
my @macs = split(/\|/, $nodecheckrst{$node}{"mac"});
foreach my $mac (@macs) {
#[NOTE] don't support 2 adapters in the same network now. TODO
if ($mac =~ /\!\*NOIP\*/) {
$mac =~ s/\!\*NOIP\*//g;
$macmap{$mac}{"ip"} = "NOIP";
$macmap{$mac}{"node"} = $node;
} else {
$macmap{$mac}{"node"} = $node;
if($nodecheckrst{$node}{"ip"}){
$macmap{$mac}{"ip"} = $nodecheckrst{$node}{"ip"};
}else{
my $nodeip = xCAT::NetworkUtils->getipaddr($node);
if($nodeip){
$macmap{$mac}{"ip"} = $nodeip;
}else{
$noerror = 0;
$rst = 1;
probe_utils->send_msg("$output", "f", "$node : can't be resolved to an IP address");
}
}
}
}
$monitor_nodes{$node}{"rst"} = 0 if($noerror);
}
unless (%monitor_nodes) {
probe_utils->send_msg("$output", "d", "There is no valid node to handle");
$rst = 1;
}
return $rst;
}
#------------------------------------------
=head3
Description:
Handle one line log come from dhcp log file
Arguments:
msg: one line http log
installnic: target network interfaces
Returns:
0 : pass
1 : failed
=cut
#------------------------------------------
sub handle_dhcp_msg {
my $msg = shift;
if ($msg =~ /.+DHCPDISCOVER\s+from\s+(.+)\s+via\s+([^:]+)(.*)/i) {
my $mac = $1;
my $nic = $2;
if (exists $macmap{$mac}) {
my $node = $macmap{$mac}{"node"};
my $record = "Receive DHCPDISCOVER via $nic";
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $record);
}
} elsif ($msg =~ /.+DHCPOFFER\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/i) {
my $ip = $1;
my $mac = $2;
my $nic = $3;
if (exists $macmap{$mac}) {
my $node = $macmap{$mac}{"node"};
my $record = "Send DHCPOFFER on $ip back to $mac via $nic";
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $record);
}
} elsif ($msg !~ /unknown lease/ && $msg !~ /ignored/ && $msg =~ /.+DHCPREQUEST\s+for\s+(.+)\s\((.+)\)\s+from\s+(.+)\s+via\s+(.+)/) {
my $ip = $1;
my $server = $2;
my $mac = $3;
my $nic = $4;
if (exists $macmap{$mac}) {
my $node = $macmap{$mac}{"node"};
my $record = "Receive DHCPREQUEST from $mac for $ip via $nic";
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $record);
}
} elsif ($msg =~ /.+DHCPACK\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/) {
my $ip = $1;
my $mac = $2;
my $nic = $3;
if (exists $macmap{$mac}) {
my $node = $macmap{$mac}{"node"};
my $record = "Send DHCPACK on $ip back to $mac via $nic";
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $record);
$ipnodemap{$ip} = $node;
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "dhcp"));
if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
my $warn_msg = "The ip($ip) assigned to $mac via DHCP is different from the ip($macmap{$mac}{'ip'}) in node definition.";
probe_utils->send_msg("$output", "w", "$warn_msg") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $warn_msg);
}
}
} elsif ($msg =~ /.+BOOTREQUEST\s+from\s+(.+)\s+via\s+([^:]+)(.*)/) {
my $mac = $1;
my $nic = $2;
if (exists $macmap{$mac}) {
my $node = $macmap{$mac}{"node"};
my $record = "Receive BOOTREQUEST from $mac via $nic";
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $record);
}
} elsif ($msg =~ /.+BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) {
my $ip = $1;
my $mac = $2;
my $nic = $3;
if (exists $macmap{$mac}) {
my $node = $macmap{$mac}{"node"};
my $record = "Send BOOTREPLY on $ip back to $mac via $nic";
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $record);
$ipnodemap{$ip} = $node;
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "dhcp"));
if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
my $warn_msg = "The ip($ip) assigned to $mac via DHCP is different from the ip($macmap{$mac}{'ip'}) in node definition.";
probe_utils->send_msg("$output", "w", "$warn_msg") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $warn_msg);
}
}
}
return 0;
}
#------------------------------------------
=head3
Description:
Handle one line log come from tftp log file
Arguments:
msg: one line tftp log
Returns:
0 : pass
1 : failed
=cut
#------------------------------------------
sub handle_tftp_msg {
my $msg = shift;
if ($msg =~ /RRQ\s+from\s+(.+)\s+filename\s+(.+)/i) {
my $ip = $1;
my $file = $2;
my $record = "Via TFTP download $file";
if (exists($rawdata{"$ipnodemap{$ip}"})) {
probe_utils->send_msg("$output", "d", "[$ipnodemap{$ip}] $record") if ($monitor);
push(@{ $rawdata{ $ipnodemap{$ip} }{"history"} }, $record);
push(@{ $rawdata{ $ipnodemap{$ip} }{statehistory} }, $rawdata{ $ipnodemap{$ip} }{state}) if (reset_state(\$rawdata{ $ipnodemap{$ip} }{state}, "tftp"));
}
}
}
#------------------------------------------
=head3
Description:
Handle one line log come from http log file
Arguments:
msg: one line http log
Returns:
0 : pass
1 : failed
=cut
#------------------------------------------
sub handle_http_msg {
my $msg = shift;
if ($msg =~ /(\d+\.\d+.\d+.\d+)\s.+GET\s+(.+)\s+HTTP.+/) {
my $ip = $1;
my $file = $2;
my $record = "Via HTTP get $file";
if (exists($rawdata{"$ipnodemap{$ip}"})) {
probe_utils->send_msg("$output", "d", "[$ipnodemap{$ip}] $record") if ($monitor);
push(@{ $rawdata{ $ipnodemap{$ip} }{"history"} }, $record);
push(@{ $rawdata{ $ipnodemap{$ip} }{statehistory} }, $rawdata{ $ipnodemap{$ip} }{state}) if (reset_state(\$rawdata{ $ipnodemap{$ip} }{state}, "http"));
}
}
return 0;
}
#------------------------------------------
=head3
Description:
Handle one line log come from cluster.log
Arguments:
msg: one line log
Returns:
0 : pass
1 : failed
=cut
#------------------------------------------
sub handle_cluster_msg {
my $line = shift;
my $node = "";
my $msg;
my $status;
my @splitline = split(/\s+/, $line);
if (($splitline[4] =~ /^xcat/i) || ($splitline[5] =~ /^xcat/i)) {
#log like: Aug 7 22:30:31 c910f02c01p09 xcat: c910f02c04p04 status: booted statustime: 08-07-2016 22:30:31
if (($splitline[6] =~ /^status:$/i) && ($splitline[8] =~ /^statustime:$/)) {
$node = $splitline[5];
$status = $splitline[7];
if (exists($rawdata{$node})) {
my $record = "Node status is changed to $status";
probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $record);
#one node finish deployment
if ($status eq "booted") {
$monitor_nodes{$node}{"rst"} = 1 if (defined($monitor_nodes{$node}));
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "booted"));
}
if ($status eq "powering-on") {
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "poweron"));
}
}
}
}
return 0;
}
#-----------------------------------------
=head3
Description:
Handle one line log come from computes.log
Arguments:
msg: one line compute log
Returns:
0 : pass
1 : failed
=cut
#-----------------------------------------
sub handle_compute_msg {
my $line = shift;
my $sender;
my $node;
my $msg;
if ($line =~ /.+\d{2}:\d{2}:\d{2}\s+(.+)\s+(xcat.+)/i) {
$sender = $1;
$msg = $2;
if (!xCAT::NetworkUtils->isIpaddr($sender)) {
$node = $sender;
} else {
$node = $ipnodemap{$sender};
}
if ($node ne "" && exists($rawdata{$node})) {
probe_utils->send_msg("$output", "d", "[$node] $msg") if ($monitor);
push(@{ $rawdata{$node}{"history"} }, $msg);
#node start to run postscript or postbootscript
if ($msg =~ /Running postscript/) {
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "postscript"));
}
}
}
return 0;
}
#------------------------------------------
=head3
Description:
Test if all nodes have finished job
Arguments:
One golble attribute %monitor_nodes;
Returns:
1: all nodes finished
0: not all nodes finished
=cut
#------------------------------------------
sub all_monitor_node_done {
my $done = 1;
foreach my $node (keys %monitor_nodes) {
if ($monitor_nodes{$node}{"rst"} == 0) {
$done = 0;
last;
}
}
return $done;
}
#------------------------------------------
=head3
Description:
Dump monitor history, categorised by mac address.
Arguments:
NULL
Returns:
=cut
#------------------------------------------
sub dump_history {
my $title = "
=============================================================
= The summary of os provision:
=============================================================
";
print "$title\n";
foreach $node (keys %rawdata) {
if ($verbose) {
print "[$node]\n";
my $httphit = 0;
my @httptmp;
foreach my $line (@{ $rawdata{$node}{"history"} }) {
if ($line =~ /Via HTTP/) {
if ($httphit) {
shift @httptmp if ($#httptmp > 0);
push @httptmp, $line;
} else {
print "\t$line\n";
$httphit = 1;
}
} else {
if ($#httptmp > -1) {
print "\tVia HTTP ..........\n";
print "\t$_\n" foreach (@httptmp);
}
@httptmp = ();
$httphit = 0;
print "\t$line\n";
}
}
my $statelist = "";
for (my $i = 0 ; $i < scalar(@{ $rawdata{$node}{statehistory} }) ; $i++) {
$statelist .= "$state_set_reverse{$rawdata{$node}{statehistory}[$i]} ";
}
probe_utils->send_msg("$output", "d", "[$node] state history: $statelist");
}
my @tmpnodestatehistory = @{ $rawdata{$node}{statehistory} };
#print "state history = @tmpnodestatehistory\n";
my %match_result;
my $procidx = 0;
my $newloop = 0;
my $notfirstloop = 0;
while (@tmpnodestatehistory) {
undef %match_result if ($notfirstloop);
$newloop = 0;
foreach my $type (keys %valid_process) {
if ($notfirstloop) {
$procidx = 1;
} else {
$procidx = 0;
}
my $proclen = scalar(@{ $valid_process{$type}{process} });
my $i;
for ($i = 0 ; $i < scalar(@tmpnodestatehistory) ; $i++) {
if ($procidx < $proclen) {
if ($tmpnodestatehistory[$i] == $valid_process{$type}{process}[$procidx]) {
++$procidx;
} else {
--$procidx if (notfirstloop);
push @{ $match_result{$procidx} }, $type;
last;
}
} else {
splice(@tmpnodestatehistory, 0, $i);
$newloop = 1;
$notfirstloop = 1;
last;
}
}
if ($i == scalar(@tmpnodestatehistory)) {
push @{ $match_result{$procidx} }, $type;
next;
}
last if ($newloop);
}
last if (!$newloop);
}
#print "------------result---------------\n";
#print Dumper %match_result;
my $max_match = 0;
foreach my $key (keys %match_result) {
$max_match = $key if ($key > $max_match);
}
my $formatprefix;
if ($max_match == 0) {
my $statelist = "";
for (my $i = 0 ; $i < scalar(@{ $rawdata{$node}{statehistory} }) ; $i++) {
$statelist .= "$state_set_reverse{$rawdata{$node}{statehistory}[$i]} ";
}
probe_utils->send_msg("$output", "f", "[$node] deployment failed");
probe_utils->send_msg("$output", "d", "\t$node did unknown process, state change history is $statelist");
} elsif (scalar(@{ $match_result{$max_match} }) > 1) {
probe_utils->send_msg("$output", "f", "[$node] deployment failed");
probe_utils->send_msg("$output", "d", "\tThere are more than one possible process satisfy $node situation");
foreach my $proc (@{ $match_result{$max_match} }) {
if ($valid_process{$proc}{type} eq "deploy") {
probe_utils->send_msg("$output", "d", "\tpossible process \"deploy\", pass $state_set_reverse{$valid_process{$proc}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$proc}{process}[$max_match]} stage");
} elsif ($valid_process{$proc}{type} eq "reboot") {
probe_utils->send_msg("$output", "d", "\tpossible process \"reboot\", pass $state_set_reverse{$valid_process{$proc}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$proc}{process}[$max_match]} stage");
}
}
} else {
if (($valid_process{ $match_result{$max_match}[0] }{type} eq "deploy") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] == $state_set{done})) {
probe_utils->send_msg("$output", "o", "[$node] deployment completed");
} elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "deploy") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] != $state_set{done})) {
probe_utils->send_msg("$output", "f", "[$node] deployment failed, pass $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match]} stage")
} elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "reboot") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] == $state_set{done})) {
probe_utils->send_msg("$output", "f", "[$node] reboot completed, without deployment process");
} elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "reboot") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] != $state_set{done})) {
probe_utils->send_msg("$output", "f", "[$node] reboot failed, without deployment process, stop at $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match]} stage");
}
}
}
}
#------------------------------------------
=head3
Description:
Monitor the process of os provision
Returns:
0: pass
1: failed
=cut
#------------------------------------------
sub do_monitor {
$SIG{TERM} = $SIG{INT} = sub {
$terminal = 1;
};
my $rst = 0;
my $startline =
"-------------------------------------------------------------
Start capturing every message during OS provision process......
-------------------------------------------------------------
";
print("$startline\n");
my $varlogmsg = "/var/log/messages";
my $clusterlog = "/var/log/xcat/cluster.log";
my $computelog = "/var/log/xcat/computes.log";
my $httplog;
if (-e "/var/log/httpd/access_log") {
$httplog = "/var/log/httpd/access_log";
} elsif (-e "/var/log/apache2/access_log") {
$httplog = "/var/log/apache2/access_log";
} elsif (-e "/var/log/apache2/access.log") {
$httplog = "/var/log/apache2/access.log";
}
my $varlogpid;
my $clusterpid;
my $httppid;
my $computerpid;
{
if (!-e $varlogmsg) {
probe_utils->send_msg("$output", "w", "$varlogmsg doesn't exist");
} else {
if (!($varlogpid = open(VARLOGMSGFILE, "tail -f -n 0 $varlogmsg 2>&1 |"))) {
probe_utils->send_msg("$output", "f", "Can't open $varlogmsg to get logs");
$rst = 1;
last;
}
}
if (!-e "$clusterlog") {
probe_utils->send_msg("$output", "w", "$clusterlog doesn't exist");
probe_utils->send_msg("$output", "i", "If using SLES11 or xCAT2.11.x, ignore above warning");
} else {
if (!($clusterpid = open(CLUSTERLOGFILE, "tail -f -n 0 $clusterlog 2>&1 |"))) {
probe_utils->send_msg("$output", "f", "Can't open $clusterlog to get logs");
$rst = 1;
last;
}
}
if (!-e "$httplog") {
probe_utils->send_msg("$output", "w", "$httplog doesn't exist");
} else {
if (!($httppid = open(HTTPLOGFILE, "tail -f -n 0 $httplog 2>&1 |"))) {
probe_utils->send_msg("$output", "f", "Can't open $httplog to get logs");
$rst = 1;
last;
}
}
if (!-e "$computelog") {
probe_utils->send_msg("$output", "w", "$computelog doesn't exist");
probe_utils->send_msg("$output", "i", "If sles11 or xCAT2.11.x, ignore above warning");
} else {
if (!($computerpid = open(COMPUTERFILE, "tail -f -n 0 $computelog 2>&1 |"))) {
probe_utils->send_msg("$output", "f", "Can't open $computelog to get logs");
$rst = 1;
last;
}
}
my $select = new IO::Select;
$select->add(\*VARLOGMSGFILE);
$select->add(\*CLUSTERLOGFILE);
$select->add(\*HTTPLOGFILE);
$select->add(\*COMPUTERFILE);
$| = 1;
my $line = "";
my @hdls;
my $hdl;
my $starttime = time();
$monitor = 1;
for (; ;) {
if (@hdls = $select->can_read(0)) {
foreach $hdl (@hdls) {
if ($hdl == \*VARLOGMSGFILE) {
chomp($line = <VARLOGMSGFILE>);
my @tmp = split(/\s+/, $line);
if ($tmp[4] =~ /dhcpd/i && $line =~ /$installnic/) {
handle_dhcp_msg("$line");
} elsif ($tmp[4] =~ /in.tftpd/i) {
handle_tftp_msg("$line");
} elsif (($tmp[4] =~ /^xcat/i) || ($tmp[5] =~ /^xcat/i)) {
if (grep(/^$tmp[3]$/, @candidate_svr_hostname_inlog)) {
handle_cluster_msg("$line");
} else {
handle_compute_msg("$line");
}
}
} elsif ($hdl == \*CLUSTERLOGFILE) {
chomp($line = <CLUSTERLOGFILE>);
handle_cluster_msg("$line");
} elsif ($hdl == \*HTTPLOGFILE) {
chomp($line = <HTTPLOGFILE>);
handle_http_msg("$line");
} elsif ($hdl == \*COMPUTERFILE) {
chomp($line = <COMPUTERFILE>);
handle_compute_msg("$line");
}
}
}
if ($terminal || (%monitor_nodes && all_monitor_node_done())) {
if ($terminal) {
probe_utils->send_msg("$output", "d", "Get INT or TERM signal from STDIN");
} else {
probe_utils->send_msg("$output", "o", "All nodes specified to monitor, have finished OS provision process");
}
last;
}
if (time() - $starttime > ($maxwaittime * 60)) {
probe_utils->send_msg("$output", "i", "$maxwaittime minutes have expired, stop monitoring");
last;
}
sleep 0.01;
}
&dump_history;
}
kill 'INT', $varlogpid if ($varlogpid);
kill 'INT', $clusterpid if ($clusterpid);
kill 'INT', $httppid if ($httppid);
kill 'INT', $computerpid if ($computerpid);
close(VARLOGMSGFILE) if (VARLOGMSGFILE);
close(CLUSTERLOGFILE) if (CLUSTERLOGFILE);
close(HTTPLOGFILE) if (HTTPLOGFILE);
close(COMPUTERFILE) if (COMPUTERFILE);
return $rst;
}
sub get_valid_logs {
my $ref_timestamp = shift;
my $year = shift;
my $epoch_seconds_of_now = shift;
my $bthistory_ref = shift;
my @orglogfilelist = ("/var/log/xcat/cluster.log",
"/var/log/messages",
"/var/log/xcat/computes.log");
my $httplog;
if (-e "/var/log/httpd/access_log") {
$httplog = "/var/log/httpd/access_log";
} elsif (-e "/var/log/apache2/access_log") {
$httplog = "/var/log/apache2/access_log";
} elsif (-e "/var/log/apache2/access.log") {
$httplog = "/var/log/apache2/access.log";
}
push @orglogfilelist, $httplog;
foreach my $f (@orglogfilelist) {
my $filename = basename("$f");
$filename =~ s/(.+)\.(.+)/$1/g;
my $path_only = dirname("$f");
my @rotatefiles;
#TODO using opendir to refine below code
my @alltargetfiles = `ls -lt $path_only |awk -F" " '/ $filename/ {print \$9}'`;
foreach my $samenamefile (@alltargetfiles) {
chomp($samenamefile);
push @rotatefiles, "$path_only/$samenamefile";
}
my $ishttplog = 0;
$ishttplog = 1 if ($filename =~ /access/);
foreach my $file (@rotatefiles) {
my $fd;
my $filetype = `file $file 2>&1`;
chomp($filetype);
if ($filetype =~ /ASCII/) {
if (!open($fd, "$file")) {
print "open $files failed\n";
next;
}
} else {
#TODO handle compression files
}
#print "--->load $file\n";
my $line;
my $historynum = 0;
last unless ($line = <$fd>);
chomp($line);
my $needrotate = 0;
my $logindex = 0;
my @splitline = split(/\s+/, $line);
my $timestamp;
my $timestampepoch;
if ($ishttplog) {
$splitline[3] =~ s/^\[(.+)/$1/g;
$timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]);
} else {
$timestamp = join(" ", @splitline[ 0 .. 2 ]);
$timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now);
}
if ($ref_timestamp <= $timestampepoch) {
$needrotate = 1;
} else {
seek($fd, 0, 2);
my $tail = tell;
my $head = 0;
my $lasttail = $tail;
my $i = 0;
while ($head <= $tail) {
my $middle = int(($tail - $head) / 2) + $head;
seek($fd, $middle, 0);
$line = <$fd>;
$middle += length($line);
last unless ($line = <$fd>);
@splitline = split(/\s+/, $line);
if ($ishttplog) {
$splitline[3] =~ s/^\[(.+)/$1/g;
$timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]);
} else {
$timestamp = join(" ", @splitline[ 0 .. 2 ]);
$timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now);
} if ($ref_timestamp == $timestampepoch) {
$historynum = $middle;
last;
} elsif ($ref_timestamp < $timestampepoch) {
$tail = $middle;
last if ($tail == $lasttail);
$lasttail = $tail;
} else {
$head = $middle;
}
}
$historynum = $head unless ($historynum);
}
seek($fd, $historynum, 0);
while (<$fd>) {
chomp;
@splitline = split(/\s+/, $_);
if ($ishttplog) {
$splitline[3] =~ s/^\[(.+)/$1/g;
$timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]);
} else {
$timestamp = join(" ", @splitline[ 0 .. 2 ]);
$timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now);
}
if (($splitline[4] =~ /dhcpd/i && $_ =~ /$installnic/)
|| ($splitline[4] =~ /in.tftpd/i)
|| (($splitline[4] =~ /^xcat/i) || ($splitline[5] =~ /^xcat/i))
|| ($splitline[5] =~ /GET/ && $splitline[7] =~ /HTTP/)) {
my $log = "$timestampepoch $filename$logindex $_";
$logindex++;
push @$bthistory_ref, $log;
}
}
close($fd);
last unless ($needrotate);
}
}
#sort logs depending on time
my @sort_ht = sort(@$bthistory_ref);
for ($i = 0 ; $i <= $#sort_ht ; $i++) {
$sort_ht[$i] =~ s/^(\d+) (\S+) (.+)/$1 $3/g;
}
#delete duplicate logs
my %count;
@$bthistory_ref = grep { ++$count{$_} < 2; } @sort_ht;
}
sub do_replay {
my $ref_timestamp = shift;
my $timestr = scalar(localtime($ref_timestamp));
print "Start logs search after '$timestr', waiting for a while.............\n";
my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = localtime(time());
my $epoch_seconds_of_now = time();
my @bthistory;
get_valid_logs($ref_timestamp, $year, $epoch_seconds_of_now, \@bthistory);
foreach my $line (@bthistory) {
$line =~ s/(\d+) (.+)/$2/g;
my @tmp = split(/\s+/, $line);
if ($tmp[4] =~ /dhcpd/i && $line =~ /$installnic/) {
handle_dhcp_msg("$line");
} elsif ($tmp[4] =~ /in.tftpd/i) {
handle_tftp_msg("$line");
} elsif (($tmp[4] =~ /^xcat/i) || ($tmp[5] =~ /^xcat/i)) {
if (grep(/^$tmp[3]$/, @candidate_svr_hostname_inlog)) {
handle_cluster_msg("$line");
} else {
handle_compute_msg("$line");
}
} elsif ($tmp[5] =~ /GET/ && $tmp[7] =~ /HTTP/) {
handle_http_msg("$line");
}
}
&dump_history;
return 0;
}
#-------------------------------------
# main process
#-------------------------------------
if (
!GetOptions("--help|h|?" => \$help,
"T" => \$test,
"V" => \$verbose,
"t" => \$maxwaittime,
"r=s" => \$replaylog,
"n=s" => \$noderange))
{
probe_utils->send_msg("$output", "f", "Invalid parameter for $program_name");
probe_utils->send_msg("$output", "d", "$::USAGE");
exit 1;
}
if ($help) {
if ($output ne "stdout") {
probe_utils->send_msg("$output", "d", "$::USAGE");
} else {
print "$::USAGE";
}
exit 0;
}
if ($test) {
probe_utils->send_msg("$output", "o", "Probe for OS provision process, realtime monitor of OS provision process.");
exit 0;
}
unless ($noderange) {
probe_utils->send_msg("$output", "f", "A noderange is required");
probe_utils->send_msg("$output", "d", "$::USAGE");
exit 1;
}
my $epoch_starttime = time();
if ($replaylog) {
if ($replaylog =~ /(\d+)h(\d+)m/i) {
$epoch_starttime -= ($1 * 3600 + $2 * 60)
} elsif ($replaylog =~ /^(\d+)h*$/i) {
$epoch_starttime -= $1 * 3600;
} elsif ($replaylog =~ /^(\d+)m$/) {
$epoch_starttime -= $1 * 60;
} else {
probe_utils->send_msg("$output", "f", "Unsupported time format for history log replay");
print "$::USAGE";
exit 1;
}
}
my $msg = "All pre_defined nodes are valid";
my $rc = check_noderange($noderange);
if ($rc) {
probe_utils->send_msg("$output", "f", $msg);
$rst = 1;
} else {
probe_utils->send_msg("$output", "o", $msg);
}
unless ($installnic) {
my $masteripinsite = `tabdump site | awk -F',' '/^"master",/ { gsub(/"/, "", \$2) ; print \$2 }'`;
chomp($masteripinsite);
$installnic = `ip addr |grep -B2 $masteripinsite|awk -F" " '/mtu/{gsub(/:/,"",\$2); print \$2}'`;
chomp($installnic);
if (!$installnic) {
probe_utils->send_msg("$output", "f", "The value of 'master' in 'site' table is $masteripinsite, can't get corresponding network interface");
$rst = 1;
} else {
probe_utils->send_msg("$output", "i", "The installation network interface is $installnic");
}
}
exit $rst if ($rst);
foreach my $node (keys %monitor_nodes) {
$rawdata{$node}{state} = $state_set{unknown};
push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state});
}
if ($replaylog) {
$rst = do_replay($epoch_starttime);
exit $rst;
}
$rst = do_monitor();
exit $rst;