From 09a661c8003af49905935f7f63a55c1aea32f94a Mon Sep 17 00:00:00 2001 From: XuWei Date: Tue, 13 Dec 2016 21:58:27 -0500 Subject: [PATCH] enhance xcatprobe osdeploy functions --- xCAT-probe/lib/perl/LogParse.pm | 29 +++++++--- xCAT-probe/subcmds/osdeploy | 95 ++++++++++++++++++++++++--------- 2 files changed, 92 insertions(+), 32 deletions(-) diff --git a/xCAT-probe/lib/perl/LogParse.pm b/xCAT-probe/lib/perl/LogParse.pm index a7c6e5379..026c1adfe 100644 --- a/xCAT-probe/lib/perl/LogParse.pm +++ b/xCAT-probe/lib/perl/LogParse.pm @@ -249,7 +249,7 @@ sub obtain_one_second_logs { while (<$fd>) { chomp; $self->debuglogger("[$loglabel]read: $_"); - my $log_content_ref = $self->obtain_log_content($self->{log_open_info}->{$loglabel}{filetype}, $_); + my $log_content_ref = $self->obtain_log_content($self->{log_open_info}->{$loglabel}{filetype}, $_, 0); #if read the log whoes time bigger than target time, stop to read $self->debuglogger("\t$log_content_ref->{time} $the_time_to_load"); @@ -472,13 +472,15 @@ sub obtain_log_content { my $self = shift; my $log_type = shift; my $original_log = shift; + my $is_monitor = shift; my %log_content = (); my @split_line = split(/\s+/, $original_log); if ($log_type == $::LOGTYPE_RSYSLOG) { if ($split_line[0] =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)(.+)-(.+)/) { - $log_content{time} = $self->convert_to_epoch_seconds($split_line[0]); + $log_content{time_record} = "$4:$5:$6"; + $log_content{time} = $self->convert_to_epoch_seconds($split_line[0], $is_monitor); if (!xCAT::NetworkUtils->isIpaddr($split_line[1])) { my @sender_tmp = split(/\./, $split_line[1]); $log_content{sender} = $sender_tmp[0]; @@ -505,7 +507,8 @@ sub obtain_log_content { } } else { my $timestamp = join(" ", @split_line[ 0 .. 2 ]); - $log_content{time} = $self->convert_to_epoch_seconds($timestamp); + $log_content{time_record} = $split_line[2]; + $log_content{time} = $self->convert_to_epoch_seconds($timestamp, $is_monitor); if (!xCAT::NetworkUtils->isIpaddr($split_line[3])) { my @sender_tmp = split(/\./, $split_line[3]); $log_content{sender} = $sender_tmp[0]; @@ -533,7 +536,10 @@ sub obtain_log_content { } } elsif ($log_type == $::LOGTYPE_HTTP) { $split_line[3] =~ s/^\[(.+)/$1/g; - $log_content{time} = $self->convert_to_epoch_seconds($split_line[3]); + if ($split_line[3] =~ /(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)/) { + $log_content{time_record} = "$4:$5:$6"; + } + $log_content{time} = $self->convert_to_epoch_seconds($split_line[3], $is_monitor); if (!xCAT::NetworkUtils->isIpaddr($split_line[0])) { my @sender_tmp = split(/\./, $split_line[0]); $log_content{sender} = $sender_tmp[0]; @@ -561,6 +567,7 @@ sub obtain_log_content { sub convert_to_epoch_seconds { my $self = shift; my $timestr = shift; + my $is_monitor = shift; my $yday; my $mday; @@ -581,15 +588,23 @@ sub convert_to_epoch_seconds { ($mday, $dday, $h, $m, $s) = ($1, $2, $3, $4, $5); $yday = $self->{current_ref_year}; $epoch_seconds = timelocal($s, $m, $h, $dday, $monthsmap{$mday}, $yday); - if ($epoch_seconds > $self->{current_ref_time}) { - --$yday; - $epoch_seconds = timelocal($s, $m, $h, $dday, $monthsmap{$mday}, $yday); + if ($is_monitor) { + if ($epoch_seconds < $self->{current_ref_time}) { + ++$yday; + $epoch_seconds = timelocal($s, $m, $h, $dday, $monthsmap{$mday}, $yday); + } + } else { + if ($epoch_seconds > $self->{current_ref_time}) { + --$yday; + $epoch_seconds = timelocal($s, $m, $h, $dday, $monthsmap{$mday}, $yday); + } } # The time format looks like "15/Aug/2016:01:10:24" } elsif ($timestr =~ /(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)/) { $epoch_seconds = timelocal($6, $5, $4, $1, $monthsmap{$2}, $3); } + return $epoch_seconds; } diff --git a/xCAT-probe/subcmds/osdeploy b/xCAT-probe/subcmds/osdeploy index 63231b5e2..4b64f67cd 100755 --- a/xCAT-probe/subcmds/osdeploy +++ b/xCAT-probe/subcmds/osdeploy @@ -330,6 +330,7 @@ sub init_node_state { my @nodes = probe_utils->parse_node_range($noderange); foreach my $node (@nodes) { @{ $node_state_ref->{$node}{statehistory} } = (); + %{ $node_state_ref->{$node}{errors} } = (); $node_state_ref->{$node}{done} = 0; } } @@ -463,7 +464,6 @@ sub conclusion_report { foreach (@{ $node_state_ref->{$node}{statehistory} }) { $stop_stage = $_ if ($stop_stage < $_); $start_rpower = 1 if ($_ == $::STATE_POWER_ON); - $power_on = 1 if ($_ == $::STATE_POWERINGON); $isntalling = 1 if ($_ == $::STATE_INSTALLING); $postbootscript = 1 if ($_ == $::STATE_POSTBOOTSCRIPT); } @@ -476,19 +476,29 @@ sub conclusion_report { # 3 There isn't reboot operation for target node during the rollback time window # That means there isn't provision process happened - if ($start_rpower && !$power_on) { - $failed_node{$node}{non_provision_prediction} = "Trigger target node reboot failed"; - } elsif ($start_rpower && $power_on && !$isntalling && $postbootscript) { - $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk"; - } elsif (! $start_rpower){ - $failed_node{$node}{non_provision_prediction} = "Without provision process during rollback time window"; + if ($monitor) { + if (!$isntalling && $postbootscript) { + $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk"; + next; + } } else { - if ($stop_stage != $::STATE_COMPLETED) { - $failed_node{$node}{provision_stop_point} = $stop_stage; + if (! $start_rpower) { + $failed_node{$node}{non_provision_prediction} = "Without provision process during rollback time window"; + next; + } elsif (!$isntalling && $postbootscript) { + $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk"; + next; } } - } + if ($stop_stage != $::STATE_COMPLETED) { + $failed_node{$node}{provision_stop_point} = $stop_stage; + } + + if (@{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }) { + $failed_node{$node}{provision_stop_point} = $stop_stage; + } + } if (%failed_node) { my $failed_node_num = keys %failed_node; @@ -503,6 +513,17 @@ sub conclusion_report { probe_utils->send_msg("stdout", "f", "$node : $failed_node{$node}{non_provision_prediction}"); } else { probe_utils->send_msg("stdout", "f", "$node : stop at stage '$::STATE_DESC{$failed_node{$node}{provision_stop_point}}'"); + if ($failed_node{$node}{provision_stop_point} < $::STATE_POSTSCRIPT) { + foreach my $node_error (@{ $node_state_ref->{$node}{errors}{$failed_node{$node}{provision_stop_point}} }) { + probe_utils->send_msg("stdout", "d", "$node_error"); + } + } else { + for (my $stage = $::STATE_POSTSCRIPT; $stage <= $::STATE_COMPLETED; $stage++) { + foreach my $node_error (@{ $node_state_ref->{$node}{errors}{$stage} }) { + probe_utils->send_msg("stdout", "d", "$node_error"); + } + } + } } } } else { @@ -590,7 +611,7 @@ Start capturing every message during OS provision process.... foreach my $hdl (@hdls) { my $line = ""; chomp($line = <$hdl>); - my $log_content_ref = $log_parse->obtain_log_content($fd_filetype_map{$hdl}, $line); + my $log_content_ref = $log_parse->obtain_log_content($fd_filetype_map{$hdl}, $line, $monitor); dispatch_log_to_handler($log_content_ref, \@candidate_mn_hostname_in_log, \%node_state); } } @@ -718,7 +739,7 @@ sub handle_dhcp_msg { if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Receive DHCPDISCOVER via $nic"; - probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); } } elsif ($log_ref->{msg} =~ /DHCPOFFER\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/i) { @@ -729,7 +750,7 @@ sub handle_dhcp_msg { if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send DHCPOFFER on $ip back to $mac via $nic"; - probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); } } elsif ($log_ref->{msg} =~ /DHCPREQUEST\s+for\s+(.+)\s+[\(\)0-9\.]*\s*from\s+(.+)\s+via\s+(.+)/) { @@ -740,7 +761,7 @@ sub handle_dhcp_msg { if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = $log_ref->{msg}; - probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); } } elsif ($log_ref->{msg} =~ /DHCPACK\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/) { @@ -751,7 +772,7 @@ sub handle_dhcp_msg { if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send DHCPACK on $ip back to $mac via $nic"; - probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) { @@ -770,7 +791,7 @@ sub handle_dhcp_msg { if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Receive BOOTREQUEST from $mac via $nic"; - probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); } } elsif ($log_ref->{msg} =~ /BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) { @@ -781,7 +802,7 @@ sub handle_dhcp_msg { if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send BOOTREPLY on $ip back to $mac via $nic"; - probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) { @@ -817,7 +838,7 @@ sub handle_tftp_msg { my $file = $2; my $record = "Via TFTP download $file"; if (exists($node_state_ref->{ $ipnodemap{$ip} })) { - probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{ $ipnodemap{$ip} }{log} }, $log_ref->{msg}) if ($debug); if ($file =~ /xcat\/xnba.*/i or $file =~ /\/boot\/grub2\/powerpc-ieee1275\//i or $file =~ /\/yb\/node\/yaboot\-/i) { @@ -852,21 +873,26 @@ sub handle_http_msg { if (exists($node_state_ref->{ $ipnodemap{$ip} })) { - if ($log_ref->{msg} =~ /GET\s+(.+)\s+HTTP.+/ or $log_ref->{msg} =~ /HEAD\s+(.+)\s+HTTP.+/) { + if ($log_ref->{msg} =~ /"GET\s+(.+)\s+HTTP.+" (\d+)/ or $log_ref->{msg} =~ /"HEAD\s+(.+)\s+HTTP.+" (\d+)/) { my $file = $1; + my $http_code = $2; my $record = "Via HTTP get $file"; - probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{ $ipnodemap{$ip} }{log} }, $log_ref->{msg}) if ($debug); if ($file =~ /vmlinuz|inst64/i or ($file =~ /linux/i and $file =~ /osimage/i)) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KERNEL); + push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KERNEL} }, "$record failed with $http_code") if ($http_code >= 400); } elsif ($file =~ /initrd/i and $file =~ /osimage/i) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INITRD); + push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INITRD} }, "$record failed with $http_code") if ($http_code >= 400); } elsif (($file =~ /^\/install\/autoinst\//i) and ($file !~ /getinstdisk$/i) and ($file !~ /\.pre$/i) and ($file !~ /\.post$/i)) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KICKSTART); + push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KICKSTART} }, "$record failed with $http_code") if ($http_code >= 400); } elsif ($file =~ /\.deb$/i or $file =~ /\/Packages\/.+\.rpm$/ or $file =~ /\/suse\/noarch\/.+\.rpm$/i) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INSTALLRPM); + push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INSTALLRPM} }, "$record failed with $http_code") if ($http_code >= 400); } } } @@ -897,11 +923,11 @@ sub handle_cluster_msg { my $nodes_str = $split_log[4]; my $sub_command = $split_log[5]; - if ($command eq "rinstall" or $command eq "rnetboot" or ($command eq "rpower" and $sub_command =~ /on|boot|reset/)) { + if ($command eq "rinstall" or $command eq "rnetboot" or ($command eq "rpower" and $sub_command =~ /on|boot|reset/) or ($command eq "xdsh" and $log_msg =~ /reboot|shutdown -r/)) { my @nodes = probe_utils->parse_node_range($nodes_str); foreach my $node (@nodes) { if (exists $node_state_ref->{$node}) { - probe_utils->send_msg("stdout", "d", "[$node] Use command $command to reboot node $node") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} Use command $command to reboot node $node") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); set_node_state($node_state_ref, $node, $::STATE_POWER_ON); } @@ -915,7 +941,7 @@ sub handle_cluster_msg { foreach my $node (@split_node) { if (exists $node_state_ref->{$node}) { - probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); if ($status eq "installing") { @@ -951,16 +977,34 @@ sub handle_compute_msg { my $node = $log_ref->{sender}; if (exists $node_state_ref->{$node}) { - probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{msg}") if ($monitor); + probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $log_ref->{msg}") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); if ($log_ref->{msg} =~ /Running postscript:/i) { set_node_state($node_state_ref, $node, $::STATE_POSTSCRIPT); + } elsif ($log_ref->{msg} =~ /postscript (.+) return with (\d+)/) { + my $script_name = $1; + my $return_code = $2; + if ($return_code != 0) { + my $error_str = "postscript $script_name return with $return_code"; + unless (grep {$_ eq $error_str} @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }) { + push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }, $error_str; + } + } } elsif ($log_ref->{msg} =~ /Running postbootscript:/i) { set_node_state($node_state_ref, $node, $::STATE_POSTBOOTSCRIPT); + } elsif ($log_ref->{msg} =~ /postbootscript (.+) return with (\d+)/) { + my $script_name = $1; + my $return_code = $2; + if ($return_code != 0) { + my $error_str = "postbootscript $script_name return with $return_code"; + unless (grep {$_ eq $error_str} @{ $node_state_ref->{$node}{errors}{$::STATE_POSTBOOTSCRIPT} }) { + push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTBOOTSCRIPT} }, $error_str; + } + } } elsif ($log_ref->{msg} =~ /provision completed/) { set_node_state($node_state_ref, $node, $::STATE_COMPLETED); $node_state_ref->{$node}{done} = 1; - probe_utils->send_msg("stdout", "o", "[$node] provision completed") if ($monitor); + probe_utils->send_msg("stdout", "o", "[$node] $log_ref->{time_record} provision completed") if ($monitor); } } } @@ -1021,6 +1065,7 @@ sub set_node_state { if ($newstate == $::STATE_POWER_ON) { push @{ $node_state_ref->{$node}{allstatehistory} }, @{ $node_state_ref->{$node}{statehistory} }; @{ $node_state_ref->{$node}{statehistory} } = (); + %{ $node_state_ref->{$node}{errors} } = (); push @{ $node_state_ref->{$node}{statehistory} }, $newstate; } else { my $index = @{ $node_state_ref->{$node}{statehistory} } - 1;