#! /usr/bin/perl # IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; } use lib "$::XCATROOT/probe/lib/perl"; use probe_utils; use xCAT::NetworkUtils; use File::Basename; use IO::Select; use Time::Local; use Data::Dumper; use Getopt::Long qw(:config no_ignore_case); my $program_name = basename("$0"); my $help; my $test; my $maxwaittime = 60; #unit is minute my $output = "stdout"; my $verbose = 0; my $rst = 0; my $noderange; my $terminal = 0; my $installnic; my $monitor = 0; my $replaylog; ##used by feature replay deploymen log my %rawdata; #-%rawdata structure------- # $rawdata{nodename}{"history"} #array, the log history of current node # $rawdata{nodename}{"state"} #the latest status of current node, used for State Machine # $rawdata{nodename}{"statehistory"} #array, the history status of current node, used for State Machine #-------------------------- my %macmap; #-%macmap structure------- # $macmap{mac_addr}{"ip"}="x.x.x.x" # $macmap{mac_addr}{"node"}="nodename" #------------------------- my %ipnodemap; #-%ipnodemap structure------- # $ipnodemap{ip_addr}="nodename" #--------------------------- my %monitor_nodes; #- %monitor_nodes structure------- # $monitor_nodes{nodename}{"status"} #useless now # $monitor_nodes{nodename}{"rst"} #--------------------------------- # provision state machine my %state_set = ( "unknown" => 0, "server_reboot" => 1, "loaded_kernel_and_initrd" => 2, "kernel_and_initrd_got_ip" => 3, "install_os_packages" => 4, "run_postscript" => 5, "run_postbootscript" => 6, "done" => 7, ); my %state_set_reverse = ( "0" => "unknown", "1" => "server_reboot", "2" => "loaded_kernel_and_initrd", "3" => "kernel_and_initrd_got_ip", "4" => "install_os_packages", "5" => "run_postscript", "6" => "run_postbootscript", "7" => "done", ); my %valid_process; $valid_process{1}{process} = [ $state_set{unknown}, $state_set{done} ]; $valid_process{1}{type} = "reboot"; $valid_process{2}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{done} ]; $valid_process{2}{type} = "reboot"; $valid_process{3}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ]; $valid_process{3}{type} = "reboot"; $valid_process{4}{process} = [ $state_set{unknown}, $state_set{server_reboot}, $state_set{loaded_kernel_and_initrd}, $state_set{kernel_and_initrd_got_ip}, $state_set{install_os_packages}, $state_set{run_postscript}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ]; $valid_process{4}{type} = "deploy"; $valid_process{5}{process} = [ $state_set{unknown}, $state_set{loaded_kernel_and_initrd}, $state_set{kernel_and_initrd_got_ip}, $state_set{install_os_packages}, $state_set{run_postscript}, $state_set{server_reboot}, $state_set{run_postbootscript}, $state_set{done} ]; $valid_process{5}{type} = "deploy"; sub reset_state { my $state_ref = shift; my $condition = shift; my $reset_flag = 1; if ($$state_ref == $state_set{unknown} && $condition eq "dhcp") { $$state_ref = $state_set{server_reboot}; } elsif ($$state_ref == $state_set{unknown} && $condition eq "booted") { $$state_ref = $state_set{done}; } elsif (($$state_ref == $state_set{unknown} && $condition eq "tftp") || ($$state_ref == $state_set{unknown} && $condition eq "http")) { $$state_ref = $state_set{loaded_kernel_and_initrd}; } elsif ($$state_ref == $state_set{server_reboot} && $condition eq "booted") { $$state_ref = $state_set{done}; } elsif ($$state_ref == $state_set{server_reboot} && $condition eq "postscript") { $$state_ref = $state_set{run_postbootscript}; } elsif (($$state_ref == $state_set{server_reboot} && $condition eq "tftp") || ($$state_ref == $state_set{server_reboot} && $condition eq "http")) { $$state_ref = $state_set{loaded_kernel_and_initrd}; } elsif ($$state_ref == $state_set{loaded_kernel_and_initrd} && $condition eq "dhcp") { $$state_ref = $state_set{kernel_and_initrd_got_ip}; } elsif ($$state_ref == $state_set{kernel_and_initrd_got_ip} && $condition eq "http") { $$state_ref = $state_set{install_os_packages}; } elsif ($$state_ref == $state_set{install_os_packages} && $condition eq "postscript") { $$state_ref = $state_set{run_postscript}; } elsif ($$state_ref == $state_set{run_postscript} && $condition eq "dhcp") { $$state_ref = $state_set{server_reboot}; } elsif ($$state_ref == $state_set{run_postbootscript} && $condition eq "booted") { $$state_ref = $state_set{done}; } elsif ($$state_ref == $state_set{done} && $condition eq "dhcp") { $$state_ref = $state_set{server_reboot}; } elsif ($$state_ref == $state_set{done} && $condition eq "poweron") { $$state_ref = $state_set{server_reboot}; } else { $reset_flag = 0; } return $reset_flag; } my @candidate_svr_hostname_inlog; my $svr_hostname_short = `hostname -s`; chomp($svr_hostname_short); my $svr_hostname_domain = `hostname -d`; chomp($svr_hostname_domain); push(@candidate_svr_hostname_inlog, $svr_hostname_short); push(@candidate_svr_hostname_inlog, "$svr_hostname_short.$svr_hostname_domain"); $::USAGE = "Usage: $program_name -h $program_name -T $program_name -n [-V] $program_name -n -r [-V] Description: Do probe for os provision process. Realtime monitor or replay history of os provision process. If do realtime monitor, please run this before rpower node. Unsupport hierarchial structure now. Options: -h : Get usage information of $program_name -T : To verify if $program_name can work, reserve option for probe framework -V : Output more information for debug -n : The range of node to be monitor or replay log. -t : The maximum time to wait when doing monitor, the unit is minute, default is 60 minutes. -r : Replay history log to probe provision. need input a start time when probe should begin from. Support time format are xxhxxm, xxh, or xxm. h means hour, m means minute. If there isn't unit input, using hour by default. "; #------------------------------------------ =head3 Description: Check if all nodes are valid Arguments: node_range: node range Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub check_noderange { my $node_range = shift; my @cmdoutput = `lsdef $node_range -i ip,mac -c 2>&1`; my $rst = 0; my %nodecheckrst; foreach (@cmdoutput) { chomp($_); $_ =~ s/^\s+|\s+$//g; if ($_ =~ /^Error: Could not find an object named '(\w+)' .+/i) { $nodecheckrst{$1}{"error"} = "Could not find node definition"; } elsif ($_ =~ /(\S+):\s+mac=(.*)/i) { my $node = $1; my $mac = $2; if ($mac) { $nodecheckrst{$node}{"mac"} = $mac; } else { $nodecheckrst{$node}{"error"} = "Node $1 doesn't have MAC address"; } } elsif ($_ =~ /(\S+):\s+ip=(.*)/i) { my $node = $1; my $ip = $2; if ($ip) { $nodecheckrst{$node}{"ip"} = $ip; } } } foreach my $node (keys %nodecheckrst) { if (exists($nodecheckrst{$node}{error})) { probe_utils->send_msg("$output", "d", "$node : $nodecheckrst{$node}{error}"); $rst = 1; next; } my $noerror=1; my @macs = split(/\|/, $nodecheckrst{$node}{"mac"}); foreach my $mac (@macs) { #[NOTE] don't support 2 adapters in the same network now. TODO if ($mac =~ /\!\*NOIP\*/) { $mac =~ s/\!\*NOIP\*//g; $macmap{$mac}{"ip"} = "NOIP"; $macmap{$mac}{"node"} = $node; } else { $macmap{$mac}{"node"} = $node; if($nodecheckrst{$node}{"ip"}){ $macmap{$mac}{"ip"} = $nodecheckrst{$node}{"ip"}; }else{ my $nodeip = xCAT::NetworkUtils->getipaddr($node); if($nodeip){ $macmap{$mac}{"ip"} = $nodeip; }else{ $noerror = 0; $rst = 1; probe_utils->send_msg("$output", "f", "$node : can't be resolved to a IP address"); } } } } $monitor_nodes{$node}{"rst"} = 0 if($noerror); } unless (%monitor_nodes) { probe_utils->send_msg("$output", "d", "There is no valid node to handle"); $rst = 1; } return $rst; } #------------------------------------------ =head3 Description: Handle one line log come from dhcp log file Arguments: msg: one line http log installnic: target network interfaces Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub handle_dhcp_msg { my $msg = shift; if ($msg =~ /.+DHCPDISCOVER\s+from\s+(.+)\s+via\s+([^:]+)(.*)/i) { my $mac = $1; my $nic = $2; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Receive DHCPDISCOVER via $nic"; probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $record); } } elsif ($msg =~ /.+DHCPOFFER\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/i) { my $ip = $1; my $mac = $2; my $nic = $3; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send DHCPOFFER on $ip back to $mac via $nic"; probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $record); } } elsif ($msg !~ /unknown lease/ && $msg !~ /ignored/ && $msg =~ /.+DHCPREQUEST\s+for\s+(.+)\s\((.+)\)\s+from\s+(.+)\s+via\s+(.+)/) { my $ip = $1; my $server = $2; my $mac = $3; my $nic = $4; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Receive DHCPREQUEST from $mac for $ip via $nic"; probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $record); } } elsif ($msg =~ /.+DHCPACK\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/) { my $ip = $1; my $mac = $2; my $nic = $3; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send DHCPACK on $ip back to $mac via $nic"; probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $record); $ipnodemap{$ip} = $node; push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "dhcp")); if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) { my $warn_msg = "The ip($ip) assigned to $mac via DHCP is different with the ip($macmap{$mac}{'ip'}) in node definition."; probe_utils->send_msg("$output", "w", "$warn_msg") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $warn_msg); } } } elsif ($msg =~ /.+BOOTREQUEST\s+from\s+(.+)\s+via\s+([^:]+)(.*)/) { my $mac = $1; my $nic = $2; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Receive BOOTREQUEST from $mac via $nic"; probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $record); } } elsif ($msg =~ /.+BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) { my $ip = $1; my $mac = $2; my $nic = $3; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send BOOTREPLY on $ip back to $mac via $nic"; probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $record); $ipnodemap{$ip} = $node; push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "dhcp")); if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) { my $warn_msg = "The ip($ip) assigned to $mac via DHCP is different with the ip($macmap{$mac}{'ip'}) in node definition."; probe_utils->send_msg("$output", "w", "$warn_msg") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $warn_msg); } } } return 0; } #------------------------------------------ =head3 Description: Handle one line log come from tftp log file Arguments: msg: one line tftp log Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub handle_tftp_msg { my $msg = shift; if ($msg =~ /RRQ\s+from\s+(.+)\s+filename\s+(.+)/i) { my $ip = $1; my $file = $2; my $record = "Via TFTP download $file"; if (exists($rawdata{"$ipnodemap{$ip}"})) { probe_utils->send_msg("$output", "d", "[$ipnodemap{$ip}] $record") if ($monitor); push(@{ $rawdata{ $ipnodemap{$ip} }{"history"} }, $record); push(@{ $rawdata{ $ipnodemap{$ip} }{statehistory} }, $rawdata{ $ipnodemap{$ip} }{state}) if (reset_state(\$rawdata{ $ipnodemap{$ip} }{state}, "tftp")); } } } #------------------------------------------ =head3 Description: Handle one line log come from http log file Arguments: msg: one line http log Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub handle_http_msg { my $msg = shift; if ($msg =~ /(\d+\.\d+.\d+.\d+)\s.+GET\s+(.+)\s+HTTP.+/) { my $ip = $1; my $file = $2; my $record = "Via HTTP get $file"; if (exists($rawdata{"$ipnodemap{$ip}"})) { probe_utils->send_msg("$output", "d", "[$ipnodemap{$ip}] $record") if ($monitor); push(@{ $rawdata{ $ipnodemap{$ip} }{"history"} }, $record); push(@{ $rawdata{ $ipnodemap{$ip} }{statehistory} }, $rawdata{ $ipnodemap{$ip} }{state}) if (reset_state(\$rawdata{ $ipnodemap{$ip} }{state}, "http")); } } return 0; } #------------------------------------------ =head3 Description: Handle one line log come from cluster.log Arguments: msg: one line log Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub handle_cluster_msg { my $line = shift; my $node = ""; my $msg; my $status; my @splitline = split(/\s+/, $line); if (($splitline[4] =~ /^xcat/i) || ($splitline[5] =~ /^xcat/i)) { #log like: Aug 7 22:30:31 c910f02c01p09 xcat: c910f02c04p04 status: booted statustime: 08-07-2016 22:30:31 if (($splitline[6] =~ /^status:$/i) && ($splitline[8] =~ /^statustime:$/)) { $node = $splitline[5]; $status = $splitline[7]; if (exists($rawdata{$node})) { my $record = "Node status is changed to $status"; probe_utils->send_msg("$output", "d", "[$node] $record") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $record); #one node finish deployment if ($status eq "booted") { $monitor_nodes{$node}{"rst"} = 1 if (defined($monitor_nodes{$node})); push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "booted")); } if ($status eq "powering-on") { push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "poweron")); } } } } return 0; } #----------------------------------------- =head3 Description: Handle one line log come from computes.log Arguments: msg: one line compute log Returns: 0 : pass 1 : failed =cut #----------------------------------------- sub handle_compute_msg { my $line = shift; my $sender; my $node; my $msg; if ($line =~ /.+\d{2}:\d{2}:\d{2}\s+(.+)\s+(xcat.+)/i) { $sender = $1; $msg = $2; if (!xCAT::NetworkUtils->isIpaddr($sender)) { $node = $sender; } else { $node = $ipnodemap{$sender}; } if ($node ne "" && exists($rawdata{$node})) { probe_utils->send_msg("$output", "d", "[$node] $msg") if ($monitor); push(@{ $rawdata{$node}{"history"} }, $msg); #node start to run postscript or postbootscript if ($msg =~ /Running postscript/) { push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}) if (reset_state(\$rawdata{$node}{state}, "postscript")); } } } return 0; } #------------------------------------------ =head3 Description: Test if all nodes have finished job Arguments: One golble attribute %monitor_nodes; Returns: 1: all nodes finished 0: not all nodes finished =cut #------------------------------------------ sub all_monitor_node_done { my $done = 1; foreach my $node (keys %monitor_nodes) { if ($monitor_nodes{$node}{"rst"} == 0) { $done = 0; last; } } return $done; } #------------------------------------------ =head3 Description: Dump monitor history, categorised by mac address. Arguments: NULL Returns: =cut #------------------------------------------ sub dump_history { my $title = " ============================================================= = The summary of os provision: ============================================================= "; print "$title\n"; foreach $node (keys %rawdata) { if ($verbose) { print "[$node]\n"; my $httphit = 0; my @httptmp; foreach my $line (@{ $rawdata{$node}{"history"} }) { if ($line =~ /Via HTTP/) { if ($httphit) { shift @httptmp if ($#httptmp > 0); push @httptmp, $line; } else { print "\t$line\n"; $httphit = 1; } } else { if ($#httptmp > -1) { print "\tVia HTTP ..........\n"; print "\t$_\n" foreach (@httptmp); } @httptmp = (); $httphit = 0; print "\t$line\n"; } } } my @tmpnodestatehistory = @{ $rawdata{$node}{statehistory} }; #print "state history = @tmpnodestatehistory\n"; my %match_result; my $procidx = 0; my $newloop = 0; my $notfirstloop = 0; while (@tmpnodestatehistory) { undef %match_result if ($notfirstloop); $newloop = 0; foreach my $type (keys %valid_process) { if ($notfirstloop) { $procidx = 1; } else { $procidx = 0; } my $proclen = scalar(@{ $valid_process{$type}{process} }); my $i; for ($i = 0 ; $i < scalar(@tmpnodestatehistory) ; $i++) { if ($procidx < $proclen) { if ($tmpnodestatehistory[$i] == $valid_process{$type}{process}[$procidx]) { ++$procidx; } else { --$procidx if (notfirstloop); push @{ $match_result{$procidx} }, $type; last; } } else { splice(@tmpnodestatehistory, 0, $i); $newloop = 1; $notfirstloop = 1; last; } } if ($i == scalar(@tmpnodestatehistory)) { push @{ $match_result{$procidx} }, $type; next; } last if ($newloop); } last if (!$newloop); } #print "------------result---------------\n"; #print Dumper %match_result; my $max_match = 0; foreach my $key (keys %match_result) { $max_match = $key if ($key > $max_match); } my $formatprefix; if ($max_match == 0) { my $statelist = ""; for (my $i = 0 ; $i < scalar(@{ $rawdata{$node}{statehistory} }) ; $i++) { $statelist .= "$state_set_reverse{$rawdata{$node}{statehistory}[$i]} "; } probe_utils->send_msg("$output", "f", "[$node] deployment failed"); probe_utils->send_msg("$output", "d", "\t$node did unknown process, state change history is $statelist"); } elsif (scalar(@{ $match_result{$max_match} }) > 1) { probe_utils->send_msg("$output", "f", "[$node] deployment failed"); probe_utils->send_msg("$output", "d", "\tThere are more than one possible process satisfy $node situation"); foreach my $proc (@{ $match_result{$max_match} }) { if ($valid_process{$proc}{type} eq "deploy") { probe_utils->send_msg("$output", "d", "\tpossible process \"deploy\", pass $state_set_reverse{$valid_process{$proc}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$proc}{process}[$max_match]} stage"); } elsif ($valid_process{$proc}{type} eq "reboot") { probe_utils->send_msg("$output", "d", "\tpossible process \"reboot\", pass $state_set_reverse{$valid_process{$proc}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$proc}{process}[$max_match]} stage"); } } } else { if (($valid_process{ $match_result{$max_match}[0] }{type} eq "deploy") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] == $state_set{done})) { probe_utils->send_msg("$output", "o", "[$node] deployment completed"); } elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "deploy") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] != $state_set{done})) { probe_utils->send_msg("$output", "f", "[$node] deployment failed, pass $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match]} stage") } elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "reboot") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] == $state_set{done})) { probe_utils->send_msg("$output", "f", "[$node] reboot completed, without deployment process"); } elsif (($valid_process{ $match_result{$max_match}[0] }{type} eq "reboot") && ($valid_process{ $match_result{$max_match}[0] }{process}[ $max_match - 1 ] != $state_set{done})) { probe_utils->send_msg("$output", "f", "[$node] reboot failed,without deployment process, stop at $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match-1]} stage, something wrong during $state_set_reverse{$valid_process{$match_result{$max_match}[0]}{process}[$max_match]} stage"); } } } } #------------------------------------------ =head3 Description: Monitor the process of os provision Returns: 0: pass 1: failed =cut #------------------------------------------ sub do_monitor { $SIG{TERM} = $SIG{INT} = sub { $terminal = 1; }; my $rst = 0; my $startline = "------------------------------------------------------------- Start to capture every message during os provision process...... ------------------------------------------------------------- "; print("$startline\n"); my $varlogmsg = "/var/log/messages"; my $clusterlog = "/var/log/xcat/cluster.log"; my $computelog = "/var/log/xcat/computes.log"; my $httplog; if (-e "/var/log/httpd/access_log") { $httplog = "/var/log/httpd/access_log"; } elsif (-e "/var/log/apache2/access_log") { $httplog = "/var/log/apache2/access_log"; } elsif (-e "/var/log/apache2/access.log") { $httplog = "/var/log/apache2/access.log"; } my $varlogpid; my $clusterpid; my $httppid; my $computerpid; { if (!-e $varlogmsg) { probe_utils->send_msg("$output", "w", "$varlogmsg doesn't exist"); } else { if (!($varlogpid = open(VARLOGMSGFILE, "tail -f -n 0 $varlogmsg 2>&1 |"))) { probe_utils->send_msg("$output", "f", "Can't open $varlogmsg to get logs"); $rst = 1; last; } } if (!-e "$clusterlog") { probe_utils->send_msg("$output", "w", "$clusterlog doesn't exist"); probe_utils->send_msg("$output", "i", "If sles11 or xCAT2.11.x is using, please ignore above warning"); } else { if (!($clusterpid = open(CLUSTERLOGFILE, "tail -f -n 0 $clusterlog 2>&1 |"))) { probe_utils->send_msg("$output", "f", "Can't open $clusterlog to get logs"); $rst = 1; last; } } if (!-e "$httplog") { probe_utils->send_msg("$output", "w", "$httplog doesn't exist"); } else { if (!($httppid = open(HTTPLOGFILE, "tail -f -n 0 $httplog 2>&1 |"))) { probe_utils->send_msg("$output", "f", "Can't open $httplog to get logs"); $rst = 1; last; } } if (!-e "$computelog") { probe_utils->send_msg("$output", "w", "$computelog doesn't exist"); probe_utils->send_msg("$output", "i", "If sles11 or xCAT2.11.x is using, please ignore above warning"); } else { if (!($computerpid = open(COMPUTERFILE, "tail -f -n 0 $computelog 2>&1 |"))) { probe_utils->send_msg("$output", "f", "Can't open $computelog to get logs"); $rst = 1; last; } } my $select = new IO::Select; $select->add(\*VARLOGMSGFILE); $select->add(\*CLUSTERLOGFILE); $select->add(\*HTTPLOGFILE); $select->add(\*COMPUTERFILE); $| = 1; my $line = ""; my @hdls; my $hdl; my $starttime = time(); $monitor = 1; for (; ;) { if (@hdls = $select->can_read(0)) { foreach $hdl (@hdls) { if ($hdl == \*VARLOGMSGFILE) { chomp($line = ); my @tmp = split(/\s+/, $line); if ($tmp[4] =~ /dhcpd:/i && $line =~ /$installnic/) { handle_dhcp_msg("$line"); } elsif ($tmp[4] =~ /in.tftpd/i) { handle_tftp_msg("$line"); } elsif (($tmp[4] =~ /^xcat/i) || ($tmp[5] =~ /^xcat/i)) { if (grep(/^$tmp[3]$/, @candidate_svr_hostname_inlog)) { handle_cluster_msg("$line"); } else { handle_compute_msg("$line"); } } } elsif ($hdl == \*CLUSTERLOGFILE) { chomp($line = ); handle_cluster_msg("$line"); } elsif ($hdl == \*HTTPLOGFILE) { chomp($line = ); handle_http_msg("$line"); } elsif ($hdl == \*COMPUTERFILE) { chomp($line = ); handle_compute_msg("$line"); } } } if ($terminal || (%monitor_nodes && all_monitor_node_done())) { if ($terminal) { probe_utils->send_msg("$output", "d", "Get INT or TERM signal from STDIN"); } else { probe_utils->send_msg("$output", "o", "All nodes need to monitor have finished os provision process"); } last; } if (time() - $starttime > ($maxwaittime * 60)) { probe_utils->send_msg("$output", "i", "$maxwaittime minutes are expired, stop monitor"); last; } sleep 0.01; } &dump_history; } kill 'INT', $varlogpid if ($varlogpid); kill 'INT', $clusterpid if ($clusterpid); kill 'INT', $httppid if ($httppid); kill 'INT', $computerpid if ($computerpid); close(VARLOGMSGFILE) if (VARLOGMSGFILE); close(CLUSTERLOGFILE) if (CLUSTERLOGFILE); close(HTTPLOGFILE) if (HTTPLOGFILE); close(COMPUTERFILE) if (COMPUTERFILE); return $rst; } sub get_valid_logs { my $ref_timestamp = shift; my $year = shift; my $epoch_seconds_of_now = shift; my $bthistory_ref = shift; my $nics = "eth0"; my @orglogfilelist = ("/var/log/xcat/cluster.log", "/var/log/messages", "/var/log/xcat/computes.log"); my $httplog; if (-e "/var/log/httpd/access_log") { $httplog = "/var/log/httpd/access_log"; } elsif (-e "/var/log/apache2/access_log") { $httplog = "/var/log/apache2/access_log"; } elsif (-e "/var/log/apache2/access.log") { $httplog = "/var/log/apache2/access.log"; } push @orglogfilelist, $httplog; foreach my $f (@orglogfilelist) { my $filename = basename("$f"); $filename =~ s/(.+)\.(.+)/$1/g; my $path_only = dirname("$f"); my @rotatefiles; #TODO using opendir to refine below code my @alltargetfiles = `ls -lt $path_only |awk -F" " '/ $filename/ {print \$9}'`; foreach my $samenamefile (@alltargetfiles) { chomp($samenamefile); push @rotatefiles, "$path_only/$samenamefile"; } my $ishttplog = 0; $ishttplog = 1 if ($filename =~ /access[\._]log/); foreach my $file (@rotatefiles) { my $fd; my $filetype = `file $file 2>&1`; chomp($filetype); if ($filetype =~ /ASCII text/) { if (!open($fd, "$file")) { print "open $files failed\n"; next; } } else { #TODO handle compression files } #print "--->load $file\n"; my $line; my $historynum = 0; last unless ($line = <$fd>); chomp($line); my $needrotate = 0; my $logindex = 0; my @splitline = split(/\s+/, $line); my $timestamp; my $timestampepoch; if ($ishttplog) { $splitline[3] =~ s/^\[(.+)/$1/g; $timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]); } else { $timestamp = join(" ", @splitline[ 0 .. 2 ]); $timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now); } if ($ref_timestamp <= $timestampepoch) { $needrotate = 1; } else { seek($fd, 0, 2); my $tail = tell; my $head = 0; my $lasttail = $tail; my $i = 0; while ($head <= $tail) { my $middle = int(($tail - $head) / 2) + $head; seek($fd, $middle, 0); $line = <$fd>; $middle += length($line); last unless ($line = <$fd>); @splitline = split(/\s+/, $line); if ($ishttplog) { $splitline[3] =~ s/^\[(.+)/$1/g; $timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]); } else { $timestamp = join(" ", @splitline[ 0 .. 2 ]); $timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now); } if ($ref_timestamp == $timestampepoch) { $historynum = $middle; last; } elsif ($ref_timestamp < $timestampepoch) { $tail = $middle; last if ($tail == $lasttail); $lasttail = $tail; } else { $head = $middle; } } $historynum = $head unless ($historynum); } seek($fd, $historynum, 0); while (<$fd>) { chomp; @splitline = split(/\s+/, $_); if ($ishttplog) { $splitline[3] =~ s/^\[(.+)/$1/g; $timestampepoch = probe_utils->convert_to_epoch_seconds($splitline[3]); } else { $timestamp = join(" ", @splitline[ 0 .. 2 ]); $timestampepoch = probe_utils->convert_to_epoch_seconds($timestamp, $year, $epoch_seconds_of_now); } if (($splitline[4] =~ /dhcpd:/i && $_ =~ /$nics/) || ($splitline[4] =~ /in.tftpd/i) || (($splitline[4] =~ /^xcat/i) || ($splitline[5] =~ /^xcat/i)) || ($splitline[5] =~ /GET/ && $splitline[7] =~ /HTTP/)) { my $log = "$timestampepoch $filename$logindex $_"; $logindex++; push @$bthistory_ref, $log; } } close($fd); last unless ($needrotate); } } #sort logs depending on time my @sort_ht = sort(@$bthistory_ref); for ($i = 0 ; $i <= $#sort_ht ; $i++) { $sort_ht[$i] =~ s/^(\d+) (\S+) (.+)/$1 $3/g; } #delete duplicate logs my %count; @$bthistory_ref = grep { ++$count{$_} < 2; } @sort_ht; } sub do_replay { my $ref_timestamp = shift; my $timestr = scalar(localtime($ref_timestamp)); print "Start to search logs after '$timestr', please waiting for a while.............\n"; my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = localtime(time()); my $epoch_seconds_of_now = time(); my @bthistory; get_valid_logs($ref_timestamp, $year, $epoch_seconds_of_now, \@bthistory); foreach my $line (@bthistory) { $line =~ s/(\d+) (.+)/$2/g; my @tmp = split(/\s+/, $line); if ($tmp[4] =~ /dhcpd:/i && $line =~ /$nics/) { handle_dhcp_msg("$line"); } elsif ($tmp[4] =~ /in.tftpd/i) { handle_tftp_msg("$line"); } elsif (($tmp[4] =~ /^xcat/i) || ($tmp[5] =~ /^xcat/i)) { if (grep(/^$tmp[3]$/, @candidate_svr_hostname_inlog)) { handle_cluster_msg("$line"); } else { handle_compute_msg("$line"); } } elsif ($tmp[5] =~ /GET/ && $tmp[7] =~ /HTTP/) { handle_http_msg("$line"); } } &dump_history; return 0; } #------------------------------------- # main process #------------------------------------- if ( !GetOptions("--help|h|?" => \$help, "T" => \$test, "V" => \$verbose, "t" => \$maxwaittime, "r=s" => \$replaylog, "n=s" => \$noderange)) { probe_utils->send_msg("$output", "f", "Invalid parameter for $program_name"); probe_utils->send_msg("$output", "d", "$::USAGE"); exit 1; } if ($help) { if ($output ne "stdout") { probe_utils->send_msg("$output", "d", "$::USAGE"); } else { print "$::USAGE"; } exit 0; } if ($test) { probe_utils->send_msg("$output", "o", "Do probe for os provision process, realtime monitor of os provision process."); exit 0; } unless ($noderange) { probe_utils->send_msg("$output", "f", "A noderange is needed"); probe_utils->send_msg("$output", "d", "$::USAGE"); exit 1; } my $epoch_starttime = time(); if ($replaylog) { if ($replaylog =~ /(\d+)h(\d+)m/i) { $epoch_starttime -= ($1 * 3600 + $2 * 60) } elsif ($replaylog =~ /^(\d+)h*$/i) { $epoch_starttime -= $1 * 3600; } elsif ($replaylog =~ /^(\d+)m$/) { $epoch_starttime -= $1 * 60; } else { probe_utils->send_msg("$output", "f", "Unsupport time format for replay history log"); print "$::USAGE"; exit 1; } } my $msg = "All pre_defined nodes are valid"; my $rc = check_noderange($noderange); if ($rc) { probe_utils->send_msg("$output", "f", $msg); $rst = 1; } else { probe_utils->send_msg("$output", "o", $msg); } unless ($installnic) { my $masteripinsite = `tabdump site | awk -F',' '/^"master",/ { gsub(/"/, "", \$2) ; print \$2 }'`; chomp($masteripinsite); $installnic = `ip addr |grep -B2 $masteripinsite|awk -F" " '/mtu/{gsub(/:/,"",\$2); print \$2}'`; chomp($installnic); if (!$installnic) { probe_utils->send_msg("$output", "f", "The value of master in site table is $masteripinsite, can't get corresponding network interface"); $rst = 1; } else { probe_utils->send_msg("$output", "i", "The installation network interface is $installnic"); } } exit $rst if ($rst); foreach my $node (keys %monitor_nodes) { $rawdata{$node}{state} = $state_set{unknown}; push(@{ $rawdata{$node}{statehistory} }, $rawdata{$node}{state}); } if ($replaylog) { $rst = do_replay($epoch_starttime); exit $rst; } $rst = do_monitor(); exit $rst;