2
0
mirror of https://github.com/xcat2/xcat-core.git synced 2025-06-14 18:30:23 +00:00

xcatprobe osdeploy performance

This commit is contained in:
XuWei
2017-01-23 00:46:26 -05:00
parent 4c72fee543
commit 97ca01394d
3 changed files with 255 additions and 16 deletions

View File

@ -27,12 +27,14 @@ $::STATE_BOOTLODER = 4;
$::STATE_KERNEL = 5;
$::STATE_INITRD = 6;
$::STATE_KICKSTART = 7;
$::STATE_INSTALLING = 8;
$::STATE_INSTALLRPM = 9;
$::STATE_POSTSCRIPT = 10;
$::STATE_BOOTING = 11;
$::STATE_POSTBOOTSCRIPT = 12;
$::STATE_COMPLETED = 13;
$::STATE_NETBOOTING = 8;
$::STATE_ROOTIMG = 9;
$::STATE_INSTALLING = 10;
$::STATE_INSTALLRPM = 11;
$::STATE_POSTSCRIPT = 12;
$::STATE_BOOTING = 13;
$::STATE_POSTBOOTSCRIPT = 14;
$::STATE_COMPLETED = 15;
#The description of every important stage of provision process
%::STATE_DESC = (
@ -77,5 +79,7 @@ $::STATE_DISCOVER_COMPLETED = 10;
$::STATE_DISCOVER_COMPLETED => "discovery_complete",
);
$::DISKFUL = 1;
$::DISKLESS = 2;
1;

View File

@ -550,4 +550,52 @@ sub is_ntp_ready{
return 0;
}
#------------------------------------------
=head3
Description:
Convert second to time
Arguments:
second_in : the time in seconds
Returns:
xx:xx:xx xx hours xx minutes xx seconds
=cut
#------------------------------------------
sub convert_second_to_time {
my $second_in = shift;
$second_in = shift if (($second_in) && ($second_in =~ /probe_utils/));
my @time = ();
my $result;
if ($second_in == 0) {
return "00:00:00";
}
my $count = 0;
while ($count < 3) {
my $tmp_second;
if ($count == 2) {
$tmp_second = $second_in % 100;
} else {
$tmp_second = $second_in % 60;
}
if ($tmp_second == 0) {
push @time, "00";
} elsif ($tmp_second < 10) {
push @time, "0" . "$tmp_second";
} else {
push @time, "$tmp_second";
}
$second_in = ($second_in - $tmp_second) / 60;
$count++;
}
my @time_result = reverse @time;
$result = join(":", @time_result);
return $result;
}
1;

View File

@ -47,8 +47,8 @@ my $debug = 0;
my $program_name = basename("$0");
$::USAGE = "Usage:
$program_name -h
$program_name -n <node_range> [-t <max_waiting_time>] [-V]
$program_name -n <node_range> -r <roll_back_duration> [-V]
$program_name -n <node_range> [-t <max_waiting_time>] [-p <level>] [-V]
$program_name -n <node_range> -r <roll_back_duration> [-p <level>] [-V]
Description:
Probe operating system provision process. Supports two modes - 'Realtime monitor' and 'Replay history'.
@ -65,6 +65,9 @@ Options:
-r : Trigger 'Replay history' mode. Follow the duration of rolling back. Units are 'h' (hour) or 'm' (minute)
Supported format examples: 3h30m (3 hours and 30 minutes ago), 2h (2 hours ago), 40m (40 minutes ago) and 3 (3 hours ago).
If unit is not specified, hour will be used by default.
-p : Performance of provision for each node and all.
Supported level: 1 (show how much time spent for provision),
2 (show how much time spent for DHCP, Download RPM packages, Run Postscripts, Run Postbootscripts).
";
@ -82,6 +85,7 @@ if (
!GetOptions("--help|h|?" => \$help,
"T" => \$test,
"V" => \$verbose,
"p=s" => \$performance,
"t=s" => \$maxwaittime,
"r=s" => \$rollforward_time_of_replay,
"n=s" => \$noderange))
@ -107,6 +111,11 @@ unless ($noderange) {
exit 1;
}
my $command_input = "xcatprobe -w $program_name";
$command_input .= " -n $noderange" if ($noderange);
$command_input .= " -p $performance" if ($performance);
$command_input .= " -V" if ($verbose);
if ($rollforward_time_of_replay) {
if (($rollforward_time_of_replay !~ /(\d+)h(\d+)m/i) && ($rollforward_time_of_replay !~ /^(\d+)h*$/i) && ($rollforward_time_of_replay !~ /^(\d+)m$/i)) {
probe_utils->send_msg("stdout", "f", "Unsupported time format for option '-r'");
@ -127,6 +136,10 @@ if ($debug) {
#if failed to pass pre-check, exit directly
exit $rst if ($rst);
# record every status's start time and end time for each node
# $node_status_time{$node}{$status}{start_time} = $start_time;
my %node_status_time = () if ($performance);
my $provision_type = $::DISKFUL;
if ($rollforward_time_of_replay) {
$monitor = 0;
@ -468,6 +481,7 @@ sub conclusion_report {
my $start_rpower = 0;
my $isntalling = 0;
my $postbootscript = 0;
my $completed = 0;
#calculate node provision result
#the max value of all state is the final stop stage
@ -476,6 +490,7 @@ sub conclusion_report {
$start_rpower = 1 if ($_ == $::STATE_POWER_ON);
$isntalling = 1 if ($_ == $::STATE_INSTALLING);
$postbootscript = 1 if ($_ == $::STATE_POSTBOOTSCRIPT);
$completed = 1 if ($_ == $::STATE_COMPLETED)
}
# Cover limited non-privision error
@ -484,16 +499,11 @@ sub conclusion_report {
# 2 When replay, if there isn't reboot operation for target node during the rollback time window
# That means there isn't provision process happened
if ($monitor) {
if (!$isntalling && $postbootscript) {
$failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk";
next;
}
} else {
unless ($monitor) {
if (! $start_rpower) {
$failed_node{$node}{non_provision_prediction} = "Without provision process during rollback time window";
next;
} elsif (!$isntalling && $postbootscript) {
} elsif (!$isntalling && $postbootscript && !$completed) {
$failed_node{$node}{non_provision_prediction} = "Target node just reboot from disk";
next;
}
@ -505,7 +515,9 @@ sub conclusion_report {
}
}
my $is_success = 1;
if (%failed_node) {
$is_success = 0;
my $failed_node_num = keys %failed_node;
if ($failed_node_num > 1) {
probe_utils->send_msg("stdout", "d", "There are $failed_node_num node provision failures");
@ -538,11 +550,144 @@ sub conclusion_report {
} else {
probe_utils->send_msg("stdout", "o", "All nodes provisioned successfully");
}
performance_calculation($is_success) if ($performance);
return 0;
}
#------------------------------------------
=head3
Description:
Calculate the performance of provision (for each node)
Arguments:
performance: 1: calculate how much time spent for provision
2: calculate how much time spent for each status (DHCP, RPM, POSTSCRIPTS, POSTBOOTSCRIPTS)
3: reserve for diskless and diskful's other status
Returns:
=cut
#------------------------------------------
sub performance_calculation {
my $is_success = shift;
#print Dumper(%node_status_time);
return if ((keys %node_status_time == 1 and $node_status_time{all}) or !%node_status_time);
probe_utils->send_msg("stdout", "", "-----------node provision performance-----------");
my %all_spend_time = ();
my @status_for_time = ();
my $warninfo;
my $isnull = 0;
my $length_node = 20;
my $space = " " x ($length_node - 4);
# print table's first line
# @status_for_time: the status that needed to calculate time
if ($provision_type == $::DISKFUL and $performance == 3) {
print "NODE $space DHCP BOOTLOADER KERNEL INITRD KICKSTART RPM POST POSTBOOT COMPLETED\n";
@status_for_time = ($::STATE_DHCP, $::STATE_BOOTLODER, $::STATE_KERNEL, $::STATE_INITRD, $::STATE_KICKSTART, $::STATE_INSTALLRPM, $::STATE_POSTSCRIPT, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED);
} elsif ($provision_type == $::DISKFUL and $performance == 2) {
print "NODE $space DHCP RPM POST POSTBOOT COMPLETED\n";
@status_for_time = ($::STATE_DHCP, $::STATE_INSTALLRPM, $::STATE_POSTSCRIPT, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED);
} elsif ($provision_type == $::DISKLESS and $performance == 3) {
print "NODE $space DHCP BOOTLOADER KERNEL INITRD NETBOOTING ROOTIMG POSTBOOT COMPLETED\n";
@status_for_time = ($::STATE_DHCP, $::STATE_BOOTLODER, $::STATE_KERNEL, $::STATE_INITRD, $::STATE_NETBOOTING, $::STATE_ROOTIMG, $::STATE_POSTBOOTSCRIPT, $::STATE_COMPLETED);
} elsif ($performance == 1) {
print "NODE $space COMPLETED\n";
@status_for_time = ($::STATE_COMPLETED);
}
# calculate time for each node and status
foreach my $node (keys %node_status_time) {
next if ($node eq "all");
# if did not get start time of power on, use osdeploy start time and show warning info
if ((!$node_status_time{$node}{$::STATE_POWER_ON}{start_time}) and $node_status_time{all}) {
$node_status_time{$node}{$::STATE_POWER_ON}{start_time} = $node_status_time{all};
$warninfo = "Did not get start time from log during running, use xcatprobe start time";
}
if ($all_spend_time{start_time}) {
$all_spend_time{start_time} = ($all_spend_time{start_time} <= $node_status_time{$node}{$::STATE_POWER_ON}{start_time}) ? $all_spend_time{start_time} : $node_status_time{$node}{$::STATE_POWER_ON}{start_time};
} else {
$all_spend_time{start_time} = $node_status_time{$node}{$::STATE_POWER_ON}{start_time};
}
if ($node_status_time{$node}{$::STATE_COMPLETED}{start_time}) {
$all_spend_time{end_time} = ($all_spend_time{end_time} >= $node_status_time{$node}{$::STATE_COMPLETED}{start_time}) ? $all_spend_time{end_time} : $node_status_time{$node}{$::STATE_COMPLETED}{start_time};
}
@timeinfo = ();
foreach my $status (@status_for_time) {
my $tmp_status;
my $tmp_detail_status;
if ($performance == 2 and $provision_type == $::DISKFUL) {
$tmp_detail_status = $::STATE_DHCP;
} else {
$tmp_detail_status = $::STATE_COMPLETED;
}
# for some status, use time of power on as status's start time
if ($status <= $tmp_detail_status or $status == $::STATE_COMPLETED) {
$tmp_status = $::STATE_POWER_ON;
if ($warninfo) {
push @timeinfo, "NULL* ";
$isnull = 1;
$all_spend_time{$status}{rst} = "NULL* ";
next;
}
} else {
$tmp_status = $status;
}
my $tmp_end_time;
$tmp_end_time = $node_status_time{$node}{$status}{end_time} if ($node_status_time{$node}{$status}{end_time});
$tmp_end_time = $node_status_time{$node}{$status}{start_time} if ($status != $::STATE_DHCP and $status != $::STATE_INSTALLRPM and $status != $::STATE_POSTSCRIPT and $status != $::STATE_POSTBOOTSCRIPT and $node_status_time{$node}{$status}{start_time});
if ($node_status_time{$node}{$tmp_status}{start_time} && $tmp_end_time) {
if ($all_spend_time{$status}{start_time}) {
$all_spend_time{$status}{start_time} = ($all_spend_time{$status}{start_time} <= $node_status_time{$node}{$tmp_status}{start_time}) ? $all_spend_time{$status}{start_time} : $node_status_time{$node}{$tmp_status}{start_time};
} else {
$all_spend_time{$status}{start_time} = $node_status_time{$node}{$tmp_status}{start_time};
}
$all_spend_time{$status}{end_time} = ($all_spend_time{$status}{end_time} >= $tmp_end_time) ? $all_spend_time{$status}{end_time} : $tmp_end_time;
push @timeinfo, probe_utils->convert_second_to_time($tmp_end_time - $node_status_time{$node}{$tmp_status}{start_time});
} else {
push @timeinfo, "NULL* ";
$all_spend_time{$status}{rst} = "NULL* ";
$isnull = 1;
}
}
my $time_str = join(" ", @timeinfo);
$space = " " x ($length_node - length($node));
print "$node $space $time_str\n";
}
my @time_rst = ();
foreach my $status (@status_for_time) {
if ($all_spend_time{$status}{rst} eq "NULL* ") {
push @time_rst, $all_spend_time{$status}{rst};
} else {
my $status_time = probe_utils->convert_second_to_time($all_spend_time{$status}{end_time} - $all_spend_time{$status}{start_time});
push @time_rst, $status_time;
}
}
my $time_str = join(" ", @time_rst);
$space = " " x ($length_node - 3);
print "ALL $space $time_str\n";
probe_utils->send_msg("stdout", "", "------------------------------------------------");
probe_utils->send_msg("stdout", "", "$warninfo") if ($warninfo);
probe_utils->send_msg("stdout", "", "* : Did not get correct time, please run '$command_input -r xxhxxm' to get correct time") if ($isnull and $is_success);
}
#------------------------------------------
=head3
Description:
Implement the monitor feature
@ -573,6 +718,8 @@ Start capturing every message during OS provision process....
";
probe_utils->send_msg("stdout", "", "$startline");
$node_status_time{all} = time();
my @openfilepids;
my @openfilefds;
my %fd_filetype_map;
@ -766,7 +913,7 @@ sub handle_dhcp_msg {
my $ip = $1;
my $mac = $2;
my $nic = $3;
if (exists $macmap{$mac}) {
my $node = $macmap{$mac}{"node"};
my $record = $log_ref->{msg};
@ -793,6 +940,7 @@ sub handle_dhcp_msg {
$ipnodemap{$ip} = $node;
set_node_state($node_state_ref, $node, $::STATE_DHCP);
$node_status_time{$node}{$::STATE_DHCP}{end_time} = $log_ref->{time} if ($performance and !$node_status_time{$node}{$::STATE_DHCP}{end_time});
}
} elsif ($log_ref->{msg} =~ /BOOTREQUEST\s+from\s+(.+)\s+via\s+([^:]+)(.*)/) {
my $mac = $1;
@ -802,6 +950,7 @@ sub handle_dhcp_msg {
my $record = "Receive BOOTREQUEST from $mac via $nic";
probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} $record") if ($monitor);
push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
$node_status_time{$node}{$::STATE_DHCP}{start_time} = $log_ref->{time} if ($performance);
}
} elsif ($log_ref->{msg} =~ /BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) {
my $ip = $1;
@ -821,6 +970,7 @@ sub handle_dhcp_msg {
$ipnodemap{$ip} = $node;
set_node_state($node_state_ref, $node, $::STATE_DHCP);
$node_status_time{$node}{$::STATE_DHCP}{end_time} = $log_ref->{time} if ($performance);
}
}
}
@ -852,10 +1002,13 @@ sub handle_tftp_msg {
if ($file =~ /xcat\/xnba.*/i or $file =~ /\/boot\/grub2\/powerpc-ieee1275\//i or $file =~ /\/yb\/node\/yaboot\-/i) {
set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_BOOTLODER);
$node_status_time{$ipnodemap{$ip}}{$::STATE_BOOTLODER}{start_time} = $log_ref->{time} if ($performance);
} elsif ($file =~ /vmlinuz|inst64|linux/) {
set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KERNEL);
$node_status_time{$ipnodemap{$ip}}{$::STATE_KERNEL}{start_time} = $log_ref->{time} if ($performance);
} elsif ($file =~ /initrd/i) {
set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INITRD);
$node_status_time{$ipnodemap{$ip}}{$::STATE_INITRD}{start_time} = $log_ref->{time} if ($performance);
}
}
}
@ -892,16 +1045,29 @@ sub handle_http_msg {
if ($file =~ /vmlinuz|inst64/i or ($file =~ /linux/i and $file =~ /osimage/i)) {
set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KERNEL);
$node_status_time{$ipnodemap{$ip}}{$::STATE_KERNEL}{start_time} = $log_ref->{time} if ($performance);
push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KERNEL} }, "$record failed with $http_code") if ($http_code >= 400);
} elsif ($file =~ /initrd/i and $file =~ /osimage/i) {
set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INITRD);
$node_status_time{$ipnodemap{$ip}}{$::STATE_INITRD}{start_time} = $log_ref->{time} if ($performance);
push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INITRD} }, "$record failed with $http_code") if ($http_code >= 400);
} elsif (($file =~ /^\/install\/autoinst\//i) and ($file !~ /getinstdisk$/i) and ($file !~ /\.pre$/i) and ($file !~ /\.post$/i)) {
set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_KICKSTART);
$node_status_time{$ipnodemap{$ip}}{$::STATE_KICKSTART}{start_time} = $log_ref->{time} if ($performance);
push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_KICKSTART} }, "$record failed with $http_code") if ($http_code >= 400);
} elsif ($file =~ /\.deb$/i or $file =~ /\/Packages\/.+\.rpm$/ or $file =~ /\/suse\/noarch\/.+\.rpm$/i) {
set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_INSTALLRPM);
if ($performance) {
if ($node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{start_time}) {
$node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{end_time} = $log_ref->{time};
} else {
$node_status_time{$ipnodemap{$ip}}{$::STATE_INSTALLRPM}{start_time} = $log_ref->{time};
}
}
push (@{ $node_state_ref->{ $ipnodemap{$ip} }{errors}{$::STATE_INSTALLRPM} }, "$record failed with $http_code") if ($http_code >= 400);
} elsif ($file =~ /rootimg/) {
set_node_state($node_state_ref, $ipnodemap{$ip}, $::STATE_ROOTIMG);
$node_status_time{$ipnodemap{$ip}}{$::STATE_ROOTIMG}{start_time} = $log_ref->{time} if ($performance);
}
}
}
@ -939,6 +1105,7 @@ sub handle_cluster_msg {
probe_utils->send_msg("stdout", "d", "[$node] $log_ref->{time_record} Use command $command to reboot node $node") if ($monitor);
push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
set_node_state($node_state_ref, $node, $::STATE_POWER_ON);
$node_status_time{$node}{$::STATE_POWER_ON}{start_time} = $log_ref->{time} if ($performance);
}
}
}
@ -955,10 +1122,16 @@ sub handle_cluster_msg {
if ($status eq "installing") {
set_node_state($node_state_ref, $node, $::STATE_INSTALLING);
$node_status_time{$node}{$::STATE_INSTALLING}{start_time} = $log_ref->{time} if ($performance);
} elsif ($status eq "powering-on") {
set_node_state($node_state_ref, $node, $::STATE_POWERINGON);
$node_status_time{$node}{$::STATE_POWERINGON}{start_time} = $log_ref->{time} if ($performance);
} elsif ($status eq "booting") {
set_node_state($node_state_ref, $node, $::STATE_BOOTING);
$node_status_time{$node}{$::STATE_BOOTING}{start_time} = $log_ref->{time} if ($performance);
} elsif ($status eq "netbooting") {
set_node_state($node_state_ref, $node, $::STATE_NETBOOTING);
$node_status_time{$node}{$::STATE_NETBOOTING}{start_time} = $log_ref->{time} if ($performance);
} elsif ($status eq "failed") {
$node_state_ref->{$node}{done} = 1;
}
@ -990,6 +1163,11 @@ sub handle_compute_msg {
push(@{ $node_state_ref->{$node}{log} }, $log_ref->{msg}) if ($debug);
if ($log_ref->{msg} =~ /Running postscript:/i) {
set_node_state($node_state_ref, $node, $::STATE_POSTSCRIPT);
if ($performance) {
if (!$node_status_time{$node}{$::STATE_POSTSCRIPT}{start_time}) {
$node_status_time{$node}{$::STATE_POSTSCRIPT}{start_time} = $log_ref->{time};
}
}
} elsif ($log_ref->{msg} =~ /postscript (.+) return with (\d+)/) {
my $script_name = $1;
my $return_code = $2;
@ -1000,8 +1178,14 @@ sub handle_compute_msg {
push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTSCRIPT} }, $error_str;
}
}
$node_status_time{$node}{$::STATE_POSTSCRIPT}{end_time} = $log_ref->{time} if ($performance);
} elsif ($log_ref->{msg} =~ /Running postbootscript:/i) {
set_node_state($node_state_ref, $node, $::STATE_POSTBOOTSCRIPT);
if ($performance) {
if (!$node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{start_time}) {
$node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{start_time} = $log_ref->{time};
}
}
} elsif ($log_ref->{msg} =~ /postbootscript (.+) return with (\d+)/) {
my $script_name = $1;
my $return_code = $2;
@ -1012,10 +1196,12 @@ sub handle_compute_msg {
push @{ $node_state_ref->{$node}{errors}{$::STATE_POSTBOOTSCRIPT} }, $error_str;
}
}
$node_status_time{$node}{$::STATE_POSTBOOTSCRIPT}{end_time} = $log_ref->{time} if ($performance);
} elsif ($log_ref->{msg} =~ /provision completed/) {
set_node_state($node_state_ref, $node, $::STATE_COMPLETED);
$node_state_ref->{$node}{done} = 1;
probe_utils->send_msg("stdout", "o", "[$node] $log_ref->{time_record} provision completed") if ($monitor);
$node_status_time{$node}{$::STATE_COMPLETED}{start_time} = $log_ref->{time};
}
}
}
@ -1078,6 +1264,7 @@ sub set_node_state {
@{ $node_state_ref->{$node}{statehistory} } = ();
%{ $node_state_ref->{$node}{errors} } = ();
push @{ $node_state_ref->{$node}{statehistory} }, $newstate;
$node_status_time{$node} = ();
} else {
my $index = @{ $node_state_ref->{$node}{statehistory} } - 1;