Merge pull request #2308 from xuweibj/osd213

enhance xcatprobe osdeploy functions
2025-10-26 00:45:38 +00:00 · 2016-12-16 16:44:13 +08:00
parent 82df12452e 34ab88985c
commit 9a3f08e28e
5 changed files with 106 additions and 39 deletions
--- a/xCAT-probe/lib/perl/LogParse.pm
+++ b/xCAT-probe/lib/perl/LogParse.pm
@@ -23,6 +23,7 @@ use File::Basename;
    Arguments:
        Public attributes:
            $self->{verbose}:scalar, Offer verbose information, used for handling feature logic
+            $self->{load_type}:scalar, $::MONITOR or $::REPLAY, used for distinguishing monitor and replay.
        private attributes:
            $self->{log_open_info}: reference of a hash, used to save the log file operating information. 
            $self->{current_ref_year}: scalar, the year information of current time. such as 2016. Used for log time parsing
@@ -50,6 +51,7 @@ sub new {
    my $self  = {};
    my $class = shift;
    $self->{verbose} = shift;
+    $self->{load_type} = shift;

    my %log_open_info;
    $self->{log_open_info} = \%log_open_info;
@@ -478,6 +480,7 @@ sub obtain_log_content {

    if ($log_type == $::LOGTYPE_RSYSLOG) {
        if ($split_line[0] =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)(.+)-(.+)/) {
+            $log_content{time_record} = "$4:$5:$6";
            $log_content{time} = $self->convert_to_epoch_seconds($split_line[0]);
            if (!xCAT::NetworkUtils->isIpaddr($split_line[1])) {
                my @sender_tmp = split(/\./, $split_line[1]);
@@ -505,7 +508,8 @@ sub obtain_log_content {
            }
        } else {
            my $timestamp = join(" ", @split_line[ 0 .. 2 ]);
-            $log_content{time}   = $self->convert_to_epoch_seconds($timestamp);
+            $log_content{time_record} = $split_line[2]; 
+            $log_content{time} = $self->convert_to_epoch_seconds($timestamp);
            if (!xCAT::NetworkUtils->isIpaddr($split_line[3])) {
                my @sender_tmp = split(/\./, $split_line[3]);
                $log_content{sender} = $sender_tmp[0];
@@ -533,6 +537,9 @@ sub obtain_log_content {
        }
    } elsif ($log_type == $::LOGTYPE_HTTP) {
        $split_line[3] =~ s/^\[(.+)/$1/g;
+        if ($split_line[3] =~ /(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)/) {
+            $log_content{time_record} = "$4:$5:$6";
+        }
        $log_content{time}   = $self->convert_to_epoch_seconds($split_line[3]);
        if (!xCAT::NetworkUtils->isIpaddr($split_line[0])) {
            my @sender_tmp = split(/\./, $split_line[0]);
@@ -581,15 +588,23 @@ sub convert_to_epoch_seconds {
        ($mday, $dday, $h, $m, $s) = ($1, $2, $3, $4, $5);
        $yday = $self->{current_ref_year};
        $epoch_seconds = timelocal($s, $m, $h, $dday, $monthsmap{$mday}, $yday);
-        if ($epoch_seconds > $self->{current_ref_time}) {
-            --$yday;
-            $epoch_seconds = timelocal($s, $m, $h, $dday, $monthsmap{$mday}, $yday);
+        if ($self->{load_type} == $::MONITOR) {
+            if ($epoch_seconds < $self->{current_ref_time}) {
+                ++$yday;
+                $epoch_seconds = timelocal($s, $m, $h, $dday, $monthsmap{$mday}, $yday);
+            }
+        } else {
+            if ($epoch_seconds > $self->{current_ref_time}) {
+                --$yday;
+                $epoch_seconds = timelocal($s, $m, $h, $dday, $monthsmap{$mday}, $yday);
+            }
        }

        # The time format looks like "15/Aug/2016:01:10:24"
    } elsif ($timestr =~ /(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+)/) {
        $epoch_seconds = timelocal($6, $5, $4, $1, $monthsmap{$2}, $3);
    }
+
    return $epoch_seconds;
 }

--- a/xCAT-probe/lib/perl/probe_global_constant.pm
+++ b/xCAT-probe/lib/perl/probe_global_constant.pm
@@ -2,6 +2,10 @@
 # IBM(c) 2007 EPL license http://www.eclipse.org/legal/epl-v10.html
 package probe_global_constant;

+#The type of load log
+$::MONITOR = 0;
+$::REPLAY  = 1;
+
 #The type of log
 $::LOGTYPE_RSYSLOG = 0;    #rsyslog
 $::LOGTYPE_HTTP    = 1;    #apache log
--- a/xCAT-probe/subcmds/discovery
+++ b/xCAT-probe/subcmds/discovery
@@ -914,7 +914,7 @@ sub do_monitor {
    my %fd_filetype_map;

    {   #a very important brace to hold a code block
-        my $log_parse         = LogParse->new($verbose);
+        my $log_parse         = LogParse->new($verbose, $::MONITOR);
        my $candidate_log_ref = $log_parse->obtain_log_file_list();

        #open candidate log file to obtain realtime log
@@ -1071,7 +1071,7 @@ sub do_replay {
        print Dumper \%node_state;
    }

-    my $log_parse = LogParse->new($verbose);
+    my $log_parse = LogParse->new($verbose, $::REPLAY);
    my @candidate_mn_hostname_in_log = $log_parse->obtain_candidate_mn_hostname_in_log();

    while ($start_time_of_replay < $end_time_of_replay) {
--- a/xCAT-probe/subcmds/osdeploy
+++ b/xCAT-probe/subcmds/osdeploy
@@ -330,6 +330,7 @@ sub init_node_state {
    my @nodes = probe_utils->parse_node_range($noderange);
    foreach my $node (@nodes) {
        @{ $node_state_ref->{$node}{statehistory} } = ();
+        %{ $node_state_ref->{$node}{errors} } = ();
        $node_state_ref->{$node}{done} = 0;
    }
 }
@@ -374,7 +375,7 @@ sub do_replay {
        print Dumper \%node_state;
    }

-    my $log_parse = LogParse->new($verbose);
+    my $log_parse = LogParse->new($verbose, $::REPLAY);
    my @candidate_mn_hostname_in_log = $log_parse->obtain_candidate_mn_hostname_in_log();

    while ($start_time_of_replay < $end_time_of_replay) {
@@ -454,7 +455,6 @@ sub conclusion_report {
        my $stop_stage = 0;

        my $start_rpower   = 0;
-        my $power_on       = 0;
        my $isntalling     = 0;
        my $postbootscript = 0;

@@ -463,32 +463,36 @@ sub conclusion_report {
        foreach (@{ $node_state_ref->{$node}{statehistory} }) {
            $stop_stage     = $_ if ($stop_stage < $_);
            $start_rpower   = 1  if ($_ == $::STATE_POWER_ON);
-            $power_on       = 1  if ($_ == $::STATE_POWERINGON);
            $isntalling     = 1  if ($_ == $::STATE_INSTALLING);
            $postbootscript = 1  if ($_ == $::STATE_POSTBOOTSCRIPT);
        }

-        # Cover limited non-privision error
-        # 1 if xcatd receive reboot command to do provision (such like rpower, rnetboot, rinstall...) but the node status didn't change to "powering-on"
-        #   that means reboot target node failed.
-        # 2 if power on target node successfully and there is 'running postbootscript' in node state history, but without "installing" state,
+        # Cover limited non-privision error 
+        # 1 if power on target node successfully and there is 'running postbootscript' in node state history, but without "installing" state,
        #   It is very possible to just do reboot process
-        # 3 There isn't reboot operation for target node during the rollback time window 
+        # 2 When replay, if there isn't reboot operation for target node during the rollback time window 
        #   That means there isn't provision process happened 

-        if ($start_rpower && !$power_on) {
-            $failed_node{$node}{non_provision_prediction} = "Trigger target node reboot failed";
-        } elsif ($start_rpower && $power_on && !$isntalling && $postbootscript) {
-            $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk";
-        } elsif (! $start_rpower){
-            $failed_node{$node}{non_provision_prediction} = "Without provision process during rollback time window"; 
+        if ($monitor) {
+            if (!$isntalling && $postbootscript) {
+                $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk";
+                next;
+            }
        } else {
-            if ($stop_stage != $::STATE_COMPLETED) {
-                $failed_node{$node}{provision_stop_point} = $stop_stage;
+            if (! $start_rpower) {
+                $failed_node{$node}{non_provision_prediction} = "Without provision process during rollback time window";
+                next;
+            } elsif (!$isntalling && $postbootscript) {
+                $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk";
+                next;
            }
        }
-    }

+        # if not completed or completed but with postscript error, add node to failed_node hash
+        if ($stop_stage != $::STATE_COMPLETED or @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }) {
+            $failed_node{$node}{provision_stop_point} = $stop_stage;
+        }
+    }

    if (%failed_node) {
        my $failed_node_num = keys %failed_node;
@@ -502,7 +506,22 @@ sub conclusion_report {
            if ($failed_node{$node}{non_provision_prediction}) {
                probe_utils->send_msg("stdout", "f", "$node : $failed_node{$node}{non_provision_prediction}");
            } else {
+                my $node_length = length($node);
+                my $space_str = " " x ($length_node + 2);
+                # if stop at stage before postscript, means there is error at current stage, print error message
                probe_utils->send_msg("stdout", "f", "$node : stop at stage '$::STATE_DESC{$failed_node{$node}{provision_stop_point}}'");
+                if ($failed_node{$node}{provision_stop_point} < $::STATE_POSTSCRIPT) {
+                    foreach my $node_error (@{ $node_state_ref->{$node}{errors}{$failed_node{$node}{provision_stop_point}} }) {
+                        probe_utils->send_msg("stdout", "d", "$space_str $node_error");
+                    }
+                } else {
+                    # if stop at postscript or after postscript, check whether has error from postscript, if has print
+                    for (my $stage = $::STATE_POSTSCRIPT; $stage <= $::STATE_COMPLETED; $stage++) {
+                        foreach my $node_error (@{ $node_state_ref->{$node}{errors}{$stage} }) {
+                             probe_utils->send_msg("stdout", "d", "$space_str $node_error");
+                        }
+                    }
+                }
            }
        }
    } else {
@@ -548,7 +567,7 @@ Start capturing every message during OS provision process....
    my %fd_filetype_map;

    {    #a very important brace to hold a code block
-        my $log_parse         = LogParse->new($verbose);
+        my $log_parse         = LogParse->new($verbose, $::MONITOR);
        my $candidate_log_ref = $log_parse->obtain_log_file_list();

        #open candidate log file to obtain realtime log
@@ -718,7 +737,7 @@ sub handle_dhcp_msg {
        if (exists $macmap{$mac}) {
            my $node   = $macmap{$mac}{"node"};
            my $record = "Receive DHCPDISCOVER via $nic";
-            probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor);
+            probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
            push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
        }
    } elsif ($log_ref->{msg} =~ /DHCPOFFER\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/i) {
@@ -729,7 +748,7 @@ sub handle_dhcp_msg {
        if (exists $macmap{$mac}) {
            my $node   = $macmap{$mac}{"node"};
            my $record = "Send DHCPOFFER on $ip back to $mac via $nic";
-            probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor);
+            probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
            push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
        }
    } elsif ($log_ref->{msg} =~ /DHCPREQUEST\s+for\s+(.+)\s+[\(\)0-9\.]*\s*from\s+(.+)\s+via\s+(.+)/) {
@@ -740,7 +759,7 @@ sub handle_dhcp_msg {
        if (exists $macmap{$mac}) {
            my $node   = $macmap{$mac}{"node"};
            my $record = $log_ref->{msg};
-            probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor);
+            probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
            push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
        }
    } elsif ($log_ref->{msg} =~ /DHCPACK\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/) {
@@ -751,7 +770,7 @@ sub handle_dhcp_msg {
        if (exists $macmap{$mac}) {
            my $node   = $macmap{$mac}{"node"};
            my $record = "Send DHCPACK on $ip back to $mac via $nic";
-            probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor);
+            probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
            push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);

            if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
@@ -770,7 +789,7 @@ sub handle_dhcp_msg {
        if (exists $macmap{$mac}) {
            my $node   = $macmap{$mac}{"node"};
            my $record = "Receive BOOTREQUEST from $mac via $nic";
-            probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor);
+            probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
            push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
        }
    } elsif ($log_ref->{msg} =~ /BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) {
@@ -781,7 +800,7 @@ sub handle_dhcp_msg {
        if (exists $macmap{$mac}) {
            my $node   = $macmap{$mac}{"node"};
            my $record = "Send BOOTREPLY on $ip back to $mac via $nic";
-            probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor);
+            probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
            push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);

            if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
@@ -817,7 +836,7 @@ sub handle_tftp_msg {
        my $file   = $2;
        my $record = "Via TFTP download $file";
        if (exists($node_state_ref->{ $ipnodemap{$ip} })) {
-            probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $record") if ($monitor);
+            probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $log_ref->{time_record} $record") if ($monitor);
            push(@{ $node_state_ref->{ $ipnodemap{$ip} }{log} }, $log_ref->{msg}) if ($debug);

            if ($file =~ /xcat\/xnba.*/i or $file =~ /\/boot\/grub2\/powerpc-ieee1275\//i or $file =~ /\/yb\/node\/yaboot\-/i) {
@@ -852,21 +871,26 @@ sub handle_http_msg {

    if (exists($node_state_ref->{ $ipnodemap{$ip} })) {

-        if ($log_ref->{msg} =~ /GET\s+(.+)\s+HTTP.+/ or $log_ref->{msg} =~ /HEAD\s+(.+)\s+HTTP.+/) {
+        if ($log_ref->{msg} =~ /"GET\s+(.+)\s+HTTP.+" (\d+)/ or $log_ref->{msg} =~ /"HEAD\s+(.+)\s+HTTP.+" (\d+)/) {
            my $file   = $1;
+            my $http_code = $2;
            my $record = "Via HTTP get $file";

-            probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $record") if ($monitor);
+            probe_utils->send_msg("stdout", "d", "[$ipnodemap{$ip}] $log_ref->{time_record} $record") if ($monitor);
            push(@{ $node_state_ref->{ $ipnodemap{$ip} }{log} }, $log_ref->{msg}) if ($debug);

            if ($file =~ /vmlinuz|inst64/i or ($file =~ /linux/i and $file =~ /osimage/i)) {
                set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KERNEL);
+                push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KERNEL} }, "$record failed with $http_code") if ($http_code >= 400);
            } elsif ($file =~ /initrd/i and $file =~ /osimage/i) {
                set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INITRD);
+                push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INITRD} }, "$record failed with $http_code") if ($http_code >= 400);
            } elsif (($file =~ /^\/install\/autoinst\//i) and ($file !~ /getinstdisk$/i) and ($file !~ /\.pre$/i) and ($file !~ /\.post$/i)) {
                set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KICKSTART);
+                push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KICKSTART} }, "$record failed with $http_code") if ($http_code >= 400);
            } elsif ($file =~ /\.deb$/i or $file =~ /\/Packages\/.+\.rpm$/ or $file =~ /\/suse\/noarch\/.+\.rpm$/i) {
                set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INSTALLRPM);
+                push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INSTALLRPM} }, "$record failed with $http_code") if ($http_code >= 400);
            }
        }
    }
@@ -897,11 +921,11 @@ sub handle_cluster_msg {
        my $nodes_str   = $split_log[4];
        my $sub_command = $split_log[5];

-        if ($command eq "rinstall" or $command eq "rnetboot" or ($command eq "rpower" and $sub_command =~ /on|boot|reset/)) {
+        if ($command eq "rinstall" or $command eq "rnetboot" or ($command eq "rpower" and $sub_command =~ /on|boot|reset/) or ($command eq "xdsh" and $log_msg =~ /reboot|shutdown -r/)) {
            my @nodes = probe_utils->parse_node_range($nodes_str);
            foreach my $node (@nodes) {
                if (exists $node_state_ref->{$node}) {
-                    probe_utils->send_msg("stdout", "d", "[$node] Use command $command to reboot node $node") if ($monitor);
+                    probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} Use command $command to reboot node $node") if ($monitor);
                    push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
                    set_node_state($node_state_ref, $node, $::STATE_POWER_ON);
                }
@@ -915,7 +939,7 @@ sub handle_cluster_msg {

        foreach my $node (@split_node) {
            if (exists $node_state_ref->{$node}) {
-                probe_utils->send_msg("stdout", "d", "[$node] $record") if ($monitor);
+                probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
                push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);

                if ($status eq "installing") {
@@ -951,16 +975,36 @@ sub handle_compute_msg {
    my $node           = $log_ref->{sender};

    if (exists $node_state_ref->{$node}) {
-        probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{msg}") if ($monitor);
+        probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $log_ref->{msg}") if ($monitor);
        push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
        if ($log_ref->{msg} =~ /Running postscript:/i) {
            set_node_state($node_state_ref, $node, $::STATE_POSTSCRIPT);
+        } elsif ($log_ref->{msg} =~ /postscript (.+) return with (\d+)/) {
+            my $script_name = $1;
+            my $return_code = $2;
+            if ($return_code != 0) {
+                my $error_str = "postscript $script_name return with $return_code";
+                # when monitor, will show 2 same messages, so filter to show only one
+                unless (grep {$_ eq $error_str} @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }) {
+                    push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }, $error_str;
+                }
+            }
        } elsif ($log_ref->{msg} =~ /Running postbootscript:/i) {
            set_node_state($node_state_ref, $node, $::STATE_POSTBOOTSCRIPT);
+        } elsif ($log_ref->{msg} =~ /postbootscript (.+) return with (\d+)/) {
+            my $script_name = $1;
+            my $return_code = $2;
+            if ($return_code != 0) {
+                my $error_str = "postbootscript $script_name return with $return_code";
+                # when monitor, will show 2 same messages, so filter to show only one
+                unless (grep {$_ eq $error_str} @{ $node_state_ref->{$node}{errors}{$::STATE_POSTBOOTSCRIPT} }) {
+                    push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTBOOTSCRIPT} }, $error_str;
+                }
+            }
        } elsif ($log_ref->{msg} =~ /provision completed/) {
            set_node_state($node_state_ref, $node, $::STATE_COMPLETED);
            $node_state_ref->{$node}{done} = 1;
-            probe_utils->send_msg("stdout", "o", "[$node] provision completed") if ($monitor);
+            probe_utils->send_msg("stdout", "o", "[$node] $log_ref->{time_record} provision completed") if ($monitor);
        }
    }
 }
@@ -1021,6 +1065,7 @@ sub set_node_state {
    if ($newstate == $::STATE_POWER_ON) {
        push @{ $node_state_ref->{$node}{allstatehistory} }, @{ $node_state_ref->{$node}{statehistory} };
        @{ $node_state_ref->{$node}{statehistory} } = ();
+        %{ $node_state_ref->{$node}{errors} } = ();
        push @{ $node_state_ref->{$node}{statehistory} }, $newstate;
    } else {
        my $index = @{ $node_state_ref->{$node}{statehistory} } - 1;
--- a/xCAT-probe/xcatprobe
+++ b/xCAT-probe/xcatprobe
@@ -141,7 +141,10 @@ sub format_cmd_output {

        if ($flag) {
            my $leftspace = $maxlen - length($finalmsg[$i]);
-            my $spacenum = (($leftspace >= $flaglen) ? ($leftspace - $flaglen) : ($screenwidth - length($finalmsg[$i]) + $maxlen - $flaglen));
+            my $spacenum = 0;
+            if($flag !~ /debug/i) {
+                $spacenum = (($leftspace >= $flaglen) ? ($leftspace - $flaglen) : ($screenwidth - length($finalmsg[$i]) + $maxlen - $flaglen));
+            }
            my $spacestr = " " x $spacenum;
            print "$finalmsg[$i]$spacestr";