diff --git a/xCAT-probe/lib/perl/probe_global_constant.pm b/xCAT-probe/lib/perl/probe_global_constant.pm index db83a833d..454d58f36 100644 --- a/xCAT-probe/lib/perl/probe_global_constant.pm +++ b/xCAT-probe/lib/perl/probe_global_constant.pm @@ -27,12 +27,14 @@ $::STATE_BOOTLODER = 4; $::STATE_KERNEL = 5; $::STATE_INITRD = 6; $::STATE_KICKSTART = 7; -$::STATE_INSTALLING = 8; -$::STATE_INSTALLRPM = 9; -$::STATE_POSTSCRIPT = 10; -$::STATE_BOOTING = 11; -$::STATE_POSTBOOTSCRIPT = 12; -$::STATE_COMPLETED = 13; +$::STATE_NETBOOTING = 8; +$::STATE_ROOTIMG = 9; +$::STATE_INSTALLING = 10; +$::STATE_INSTALLRPM = 11; +$::STATE_POSTSCRIPT = 12; +$::STATE_BOOTING = 13; +$::STATE_POSTBOOTSCRIPT = 14; +$::STATE_COMPLETED = 15; #The description of every important stage of provision process %::STATE_DESC = ( @@ -77,5 +79,7 @@ $::STATE_DISCOVER_COMPLETED = 10; $::STATE_DISCOVER_COMPLETED => "discovery_complete", ); +$::DISKFUL = 1; +$::DISKLESS = 2; 1; diff --git a/xCAT-probe/lib/perl/probe_utils.pm b/xCAT-probe/lib/perl/probe_utils.pm index 99b59e46b..2400212c2 100644 --- a/xCAT-probe/lib/perl/probe_utils.pm +++ b/xCAT-probe/lib/perl/probe_utils.pm @@ -550,4 +550,52 @@ sub is_ntp_ready{ return 0; } +#------------------------------------------ + +=head3 + Description: + Convert second to time + Arguments: + second_in : the time in seconds + Returns: + xx:xx:xx xx hours xx minutes xx seconds +=cut + +#------------------------------------------ +sub convert_second_to_time { + my $second_in = shift; + $second_in = shift if (($second_in) && ($second_in =~ /probe_utils/)); + my @time = (); + my $result; + + if ($second_in == 0) { + return "00:00:00"; + } + + my $count = 0; + while ($count < 3) { + my $tmp_second; + if ($count == 2) { + $tmp_second = $second_in % 100; + } else { + $tmp_second = $second_in % 60; + } + if ($tmp_second == 0) { + push @time, "00"; + } elsif ($tmp_second < 10) { + push @time, "0" . "$tmp_second"; + } else { + push @time, "$tmp_second"; + } + + $second_in = ($second_in - $tmp_second) / 60; + $count++; + } + + my @time_result = reverse @time; + $result = join(":", @time_result); + + return $result; +} + 1; diff --git a/xCAT-probe/subcmds/osdeploy b/xCAT-probe/subcmds/osdeploy index 16a33f75c..c04c3af27 100755 --- a/xCAT-probe/subcmds/osdeploy +++ b/xCAT-probe/subcmds/osdeploy @@ -47,8 +47,8 @@ my $debug = 0; my $program_name = basename("$0"); $::USAGE = "Usage: $program_name -h - $program_name -n [-t ] [-V] - $program_name -n -r [-V] + $program_name -n [-t ] [-p ] [-V] + $program_name -n -r [-p ] [-V] Description: Probe operating system provision process. Supports two modes - 'Realtime monitor' and 'Replay history'. @@ -65,6 +65,9 @@ Options: -r : Trigger 'Replay history' mode. Follow the duration of rolling back. Units are 'h' (hour) or 'm' (minute) Supported format examples: 3h30m (3 hours and 30 minutes ago), 2h (2 hours ago), 40m (40 minutes ago) and 3 (3 hours ago). If unit is not specified, hour will be used by default. + -p : Performance of provision for each node and all. + Supported level: 1 (show how much time spent for provision), + 2 (show how much time spent for DHCP, Download RPM packages, Run Postscripts, Run Postbootscripts). "; @@ -82,6 +85,7 @@ if ( !GetOptions("--help|h|?" => \$help, "T" => \$test, "V" => \$verbose, + "p=s" => \$performance, "t=s" => \$maxwaittime, "r=s" => \$rollforward_time_of_replay, "n=s" => \$noderange)) @@ -107,6 +111,11 @@ unless ($noderange) { exit 1; } +my $command_input = "xcatprobe -w $program_name"; +$command_input .= " -n $noderange" if ($noderange); +$command_input .= " -p $performance" if ($performance); +$command_input .= " -V" if ($verbose); + if ($rollforward_time_of_replay) { if (($rollforward_time_of_replay !~ /(\d+)h(\d+)m/i) && ($rollforward_time_of_replay !~ /^(\d+)h*$/i) && ($rollforward_time_of_replay !~ /^(\d+)m$/i)) { probe_utils->send_msg("stdout", "f", "Unsupported time format for option '-r'"); @@ -127,6 +136,10 @@ if ($debug) { #if failed to pass pre-check, exit directly exit $rst if ($rst); +# record every status's start time and end time for each node +# $node_status_time{$node}{$status}{start_time} = $start_time; +my %node_status_time = () if ($performance); +my $provision_type = $::DISKFUL; if ($rollforward_time_of_replay) { $monitor = 0; @@ -468,6 +481,7 @@ sub conclusion_report { my $start_rpower = 0; my $isntalling = 0; my $postbootscript = 0; + my $completed = 0; #calculate node provision result #the max value of all state is the final stop stage @@ -476,6 +490,7 @@ sub conclusion_report { $start_rpower = 1 if ($_ == $::STATE_POWER_ON); $isntalling = 1 if ($_ == $::STATE_INSTALLING); $postbootscript = 1 if ($_ == $::STATE_POSTBOOTSCRIPT); + $completed = 1 if ($_ == $::STATE_COMPLETED) } # Cover limited non-privision error @@ -484,16 +499,11 @@ sub conclusion_report { # 2 When replay, if there isn't reboot operation for target node during the rollback time window # That means there isn't provision process happened - if ($monitor) { - if (!$isntalling && $postbootscript) { - $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk"; - next; - } - } else { + unless ($monitor) { if (! $start_rpower) { $failed_node{$node}{non_provision_prediction} = "Without provision process during rollback time window"; next; - } elsif (!$isntalling && $postbootscript) { + } elsif (!$isntalling && $postbootscript && !$completed) { $failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk"; next; } @@ -505,7 +515,9 @@ sub conclusion_report { } } + my $is_success = 1; if (%failed_node) { + $is_success = 0; my $failed_node_num = keys %failed_node; if ($failed_node_num > 1) { probe_utils->send_msg("stdout", "d", "There are $failed_node_num node provision failures"); @@ -538,11 +550,144 @@ sub conclusion_report { } else { probe_utils->send_msg("stdout", "o", "All nodes provisioned successfully"); } + + performance_calculation($is_success) if ($performance); + return 0; } #------------------------------------------ +=head3 + Description: + Calculate the performance of provision (for each node) + Arguments: + performance: 1: calculate how much time spent for provision + 2: calculate how much time spent for each status (DHCP, RPM, POSTSCRIPTS, POSTBOOTSCRIPTS) + 3: reserve for diskless and diskful's other status + Returns: +=cut + +#------------------------------------------ +sub performance_calculation { + my $is_success = shift; + #print Dumper(%node_status_time); + + return if ((keys %node_status_time == 1 and $node_status_time{all}) or !%node_status_time); + + probe_utils->send_msg("stdout", "", "-----------node provision performance-----------"); + + my %all_spend_time = (); + my @status_for_time = (); + my $warninfo; + my $isnull = 0; + + my $length_node = 20; + my $space = " " x ($length_node - 4); + + # print table's first line + # @status_for_time: the status that needed to calculate time + if ($provision_type == $::DISKFUL and $performance == 3) { + print "NODE $space DHCP BOOTLOADER KERNEL INITRD KICKSTART RPM POST POSTBOOT COMPLETED\n"; + @status_for_time = ($::STATE_DHCP, $::STATE_BOOTLODER, $::STATE_KERNEL, $::STATE_INITRD, $::STATE_KICKSTART, $::STATE_INSTALLRPM, $::STATE_POSTSCRIPT, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED); + } elsif ($provision_type == $::DISKFUL and $performance == 2) { + print "NODE $space DHCP RPM POST POSTBOOT COMPLETED\n"; + @status_for_time = ($::STATE_DHCP, $::STATE_INSTALLRPM, $::STATE_POSTSCRIPT, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED); + } elsif ($provision_type == $::DISKLESS and $performance == 3) { + print "NODE $space DHCP BOOTLOADER KERNEL INITRD NETBOOTING ROOTIMG POSTBOOT COMPLETED\n"; + @status_for_time = ($::STATE_DHCP, $::STATE_BOOTLODER, $::STATE_KERNEL, $::STATE_INITRD, $::STATE_NETBOOTING, $::STATE_ROOTIMG, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED); + } elsif ($performance == 1) { + print "NODE $space COMPLETED\n"; + @status_for_time = ($::STATE_COMPLETED); + } + + # calculate time for each node and status + foreach my $node (keys %node_status_time) { + next if ($node eq "all"); + + # if did not get start time of power on, use osdeploy start time and show warning info + if ((!$node_status_time{$node}{$::STATE_POWER_ON}{start_time}) and $node_status_time{all}) { + $node_status_time{$node}{$::STATE_POWER_ON}{start_time} = $node_status_time{all}; + $warninfo = "Did not get start time from log during running, use xcatprobe start time"; + } + + if ($all_spend_time{start_time}) { + $all_spend_time{start_time} = ($all_spend_time{start_time} <= $node_status_time{$node}{$::STATE_POWER_ON}{start_time}) ? $all_spend_time{start_time} : $node_status_time{$node}{$::STATE_POWER_ON}{start_time}; + } else { + $all_spend_time{start_time} = $node_status_time{$node}{$::STATE_POWER_ON}{start_time}; + } + + if ($node_status_time{$node}{$::STATE_COMPLETED}{start_time}) { + $all_spend_time{end_time} = ($all_spend_time{end_time} >= $node_status_time{$node}{$::STATE_COMPLETED}{start_time}) ? $all_spend_time{end_time} : $node_status_time{$node}{$::STATE_COMPLETED}{start_time}; + } + + @timeinfo = (); + foreach my $status (@status_for_time) { + my $tmp_status; + my $tmp_detail_status; + if ($performance == 2 and $provision_type == $::DISKFUL) { + $tmp_detail_status = $::STATE_DHCP; + } else { + $tmp_detail_status = $::STATE_COMPLETED; + } + + # for some status, use time of power on as status's start time + if ($status <= $tmp_detail_status or $status == $::STATE_COMPLETED) { + $tmp_status = $::STATE_POWER_ON; + if ($warninfo) { + push @timeinfo, "NULL* "; + $isnull = 1; + $all_spend_time{$status}{rst} = "NULL* "; + next; + } + } else { + $tmp_status = $status; + } + + my $tmp_end_time; + $tmp_end_time = $node_status_time{$node}{$status}{end_time} if ($node_status_time{$node}{$status}{end_time}); + $tmp_end_time = $node_status_time{$node}{$status}{start_time} if ($status != $::STATE_DHCP and $status != $::STATE_INSTALLRPM and $status != $::STATE_POSTSCRIPT and $status != $::STATE_POSTBOOTSCRIPT and $node_status_time{$node}{$status}{start_time}); + + if ($node_status_time{$node}{$tmp_status}{start_time} && $tmp_end_time) { + if ($all_spend_time{$status}{start_time}) { + $all_spend_time{$status}{start_time} = ($all_spend_time{$status}{start_time} <= $node_status_time{$node}{$tmp_status}{start_time}) ? $all_spend_time{$status}{start_time} : $node_status_time{$node}{$tmp_status}{start_time}; + } else { + $all_spend_time{$status}{start_time} = $node_status_time{$node}{$tmp_status}{start_time}; + } + $all_spend_time{$status}{end_time} = ($all_spend_time{$status}{end_time} >= $tmp_end_time) ? $all_spend_time{$status}{end_time} : $tmp_end_time; + push @timeinfo, probe_utils->convert_second_to_time($tmp_end_time - $node_status_time{$node}{$tmp_status}{start_time}); + } else { + push @timeinfo, "NULL* "; + $all_spend_time{$status}{rst} = "NULL* "; + $isnull = 1; + } + } + + my $time_str = join(" ", @timeinfo); + $space = " " x ($length_node - length($node)); + print "$node $space $time_str\n"; + } + + my @time_rst = (); + foreach my $status (@status_for_time) { + if ($all_spend_time{$status}{rst} eq "NULL* ") { + push @time_rst, $all_spend_time{$status}{rst}; + } else { + my $status_time = probe_utils->convert_second_to_time($all_spend_time{$status}{end_time} - $all_spend_time{$status}{start_time}); + push @time_rst, $status_time; + } + } + + my $time_str = join(" ", @time_rst); + $space = " " x ($length_node - 3); + print "ALL $space $time_str\n"; + probe_utils->send_msg("stdout", "", "------------------------------------------------"); + probe_utils->send_msg("stdout", "", "$warninfo") if ($warninfo); + probe_utils->send_msg("stdout", "", "* : Did not get correct time, please run '$command_input -r xxhxxm' to get correct time") if ($isnull and $is_success); +} + +#------------------------------------------ + =head3 Description: Implement the monitor feature @@ -573,6 +718,8 @@ Start capturing every message during OS provision process.... "; probe_utils->send_msg("stdout", "", "$startline"); + $node_status_time{all} = time(); + my @openfilepids; my @openfilefds; my %fd_filetype_map; @@ -766,7 +913,7 @@ sub handle_dhcp_msg { my $ip = $1; my $mac = $2; my $nic = $3; - + if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = $log_ref->{msg}; @@ -793,6 +940,7 @@ sub handle_dhcp_msg { $ipnodemap{$ip} = $node; set_node_state($node_state_ref, $node, $::STATE_DHCP); + $node_status_time{$node}{$::STATE_DHCP}{end_time} = $log_ref->{time} if ($performance and !$node_status_time{$node}{$::STATE_DHCP}{end_time}); } } elsif ($log_ref->{msg} =~ /BOOTREQUEST\s+from\s+(.+)\s+via\s+([^:]+)(.*)/) { my $mac = $1; @@ -802,6 +950,7 @@ sub handle_dhcp_msg { my $record = "Receive BOOTREQUEST from $mac via $nic"; probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); + $node_status_time{$node}{$::STATE_DHCP}{start_time} = $log_ref->{time} if ($performance); } } elsif ($log_ref->{msg} =~ /BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) { my $ip = $1; @@ -821,6 +970,7 @@ sub handle_dhcp_msg { $ipnodemap{$ip} = $node; set_node_state($node_state_ref, $node, $::STATE_DHCP); + $node_status_time{$node}{$::STATE_DHCP}{end_time} = $log_ref->{time} if ($performance); } } } @@ -852,10 +1002,13 @@ sub handle_tftp_msg { if ($file =~ /xcat\/xnba.*/i or $file =~ /\/boot\/grub2\/powerpc-ieee1275\//i or $file =~ /\/yb\/node\/yaboot\-/i) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_BOOTLODER); + $node_status_time{$ipnodemap{$ip}}{$::STATE_BOOTLODER}{start_time} = $log_ref->{time} if ($performance); } elsif ($file =~ /vmlinuz|inst64|linux/) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KERNEL); + $node_status_time{$ipnodemap{$ip}}{$::STATE_KERNEL}{start_time} = $log_ref->{time} if ($performance); } elsif ($file =~ /initrd/i) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INITRD); + $node_status_time{$ipnodemap{$ip}}{$::STATE_INITRD}{start_time} = $log_ref->{time} if ($performance); } } } @@ -892,16 +1045,29 @@ sub handle_http_msg { if ($file =~ /vmlinuz|inst64/i or ($file =~ /linux/i and $file =~ /osimage/i)) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KERNEL); + $node_status_time{$ipnodemap{$ip}}{$::STATE_KERNEL}{start_time} = $log_ref->{time} if ($performance); push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KERNEL} }, "$record failed with $http_code") if ($http_code >= 400); } elsif ($file =~ /initrd/i and $file =~ /osimage/i) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INITRD); + $node_status_time{$ipnodemap{$ip}}{$::STATE_INITRD}{start_time} = $log_ref->{time} if ($performance); push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INITRD} }, "$record failed with $http_code") if ($http_code >= 400); } elsif (($file =~ /^\/install\/autoinst\//i) and ($file !~ /getinstdisk$/i) and ($file !~ /\.pre$/i) and ($file !~ /\.post$/i)) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KICKSTART); + $node_status_time{$ipnodemap{$ip}}{$::STATE_KICKSTART}{start_time} = $log_ref->{time} if ($performance); push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KICKSTART} }, "$record failed with $http_code") if ($http_code >= 400); } elsif ($file =~ /\.deb$/i or $file =~ /\/Packages\/.+\.rpm$/ or $file =~ /\/suse\/noarch\/.+\.rpm$/i) { set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INSTALLRPM); + if ($performance) { + if ($node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{start_time}) { + $node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{end_time} = $log_ref->{time}; + } else { + $node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{start_time} = $log_ref->{time}; + } + } push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INSTALLRPM} }, "$record failed with $http_code") if ($http_code >= 400); + } elsif ($file =~ /rootimg/) { + set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_ROOTIMG); + $node_status_time{$ipnodemap{$ip}}{$::STATE_ROOTIMG}{start_time} = $log_ref->{time} if ($performance); } } } @@ -939,6 +1105,7 @@ sub handle_cluster_msg { probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} Use command $command to reboot node $node") if ($monitor); push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); set_node_state($node_state_ref, $node, $::STATE_POWER_ON); + $node_status_time{$node}{$::STATE_POWER_ON}{start_time} = $log_ref->{time} if ($performance); } } } @@ -955,10 +1122,16 @@ sub handle_cluster_msg { if ($status eq "installing") { set_node_state($node_state_ref, $node, $::STATE_INSTALLING); + $node_status_time{$node}{$::STATE_INSTALLING}{start_time} = $log_ref->{time} if ($performance); } elsif ($status eq "powering-on") { set_node_state($node_state_ref, $node, $::STATE_POWERINGON); + $node_status_time{$node}{$::STATE_POWERINGON}{start_time} = $log_ref->{time} if ($performance); } elsif ($status eq "booting") { set_node_state($node_state_ref, $node, $::STATE_BOOTING); + $node_status_time{$node}{$::STATE_BOOTING}{start_time} = $log_ref->{time} if ($performance); + } elsif ($status eq "netbooting") { + set_node_state($node_state_ref, $node, $::STATE_NETBOOTING); + $node_status_time{$node}{$::STATE_NETBOOTING}{start_time} = $log_ref->{time} if ($performance); } elsif ($status eq "failed") { $node_state_ref->{$node}{done} = 1; } @@ -990,6 +1163,11 @@ sub handle_compute_msg { push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug); if ($log_ref->{msg} =~ /Running postscript:/i) { set_node_state($node_state_ref, $node, $::STATE_POSTSCRIPT); + if ($performance) { + if (!$node_status_time{$node}{$::STATE_POSTSCRIPT}{start_time}) { + $node_status_time{$node}{$::STATE_POSTSCRIPT}{start_time} = $log_ref->{time}; + } + } } elsif ($log_ref->{msg} =~ /postscript (.+) return with (\d+)/) { my $script_name = $1; my $return_code = $2; @@ -1000,8 +1178,14 @@ sub handle_compute_msg { push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }, $error_str; } } + $node_status_time{$node}{$::STATE_POSTSCRIPT}{end_time} = $log_ref->{time} if ($performance); } elsif ($log_ref->{msg} =~ /Running postbootscript:/i) { set_node_state($node_state_ref, $node, $::STATE_POSTBOOTSCRIPT); + if ($performance) { + if (!$node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{start_time}) { + $node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{start_time} = $log_ref->{time}; + } + } } elsif ($log_ref->{msg} =~ /postbootscript (.+) return with (\d+)/) { my $script_name = $1; my $return_code = $2; @@ -1012,10 +1196,12 @@ sub handle_compute_msg { push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTBOOTSCRIPT} }, $error_str; } } + $node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{end_time} = $log_ref->{time} if ($performance); } elsif ($log_ref->{msg} =~ /provision completed/) { set_node_state($node_state_ref, $node, $::STATE_COMPLETED); $node_state_ref->{$node}{done} = 1; probe_utils->send_msg("stdout", "o", "[$node] $log_ref->{time_record} provision completed") if ($monitor); + $node_status_time{$node}{$::STATE_COMPLETED}{start_time} = $log_ref->{time}; } } } @@ -1078,6 +1264,7 @@ sub set_node_state { @{ $node_state_ref->{$node}{statehistory} } = (); %{ $node_state_ref->{$node}{errors} } = (); push @{ $node_state_ref->{$node}{statehistory} }, $newstate; + $node_status_time{$node} = (); } else { my $index = @{ $node_state_ref->{$node}{statehistory} } - 1;