#! /usr/bin/perl # IBM(c) 2016 EPL license http://www.eclipse.org/legal/epl-v10.html BEGIN { $::XCATROOT = $ENV{'XCATROOT'} ? $ENV{'XCATROOT'} : -d '/opt/xcat' ? '/opt/xcat' : '/usr'; } use lib "$::XCATROOT/probe/lib/perl"; use probe_utils; use xCAT::NetworkUtils; use File::Basename; use IO::Select; use Getopt::Long qw(:config no_ignore_case); my $program_name = basename("$0"); my $help; my $test; my $output = "stdout"; my $verbose = 0; my $rst = 0; my $noderange; my %rawdata; my %ipnodemap; my %macmap; my $terminal = 0; my %monitor_nodes; $::USAGE = "Usage: $program_name -h $program_name -T $program_name -n [-V] Description: Do probe for os provision process, realtime monitor of os provision process. Please run this before rpower node. Options: -h : Get usage information of $program_name -T : To verify if $program_name can work, reserve option for probe framework -V : Output more information for debug -n : The range of monitored node. "; #------------------------------------------ =head3 Description: Check if all nodes are valid Arguments: node_range: node range Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub check_noderange { my $node_range = shift; my @cmdoutput = `lsdef $node_range -i ip,mac 2>&1`; my $rst = 0; my $currentnode = ""; my $ip = "NOIP"; my %nodecheckrst; my $mac_line; my @macs; foreach (@cmdoutput) { chomp($_); $_ =~ s/^\s+|\s+$//g; if ($_ =~ /^Error: Could not find an object named '(\w+)' .+/i) { $currentnode = $1; $nodecheckrst{$currentnode}{"error"} = "Could not find node definition"; $rst = 1; } elsif ($_ =~ /^\s*Object name: (\w+)/i) { # 'rst' is used to check whether the node process finished, 1 is finished. $monitor_nodes{$1}{"rst"} = 0; $currentnode = $1; $ip = "NOIP"; } elsif ($_ =~ /^ip=(.+)/i) { if ($1) { $ip = $1; } } elsif ($_ =~ /^mac=(.+)/i) { next unless ($1); $mac_line = $1; @macs = split(/\|/, $mac_line); foreach my $mac (@macs) { if ($mac =~ /\!\*NOIP\*/) { $mac =~ s/\!\*NOIP\*//g; $macmap{$mac}{"ip"} = "NOIP"; $macmap{$mac}{"node"} = $currentnode; } else { $macmap{$mac}{"ip"} = $ip; $macmap{$mac}{"node"} = $currentnode; } } } } foreach my $node (keys %nodecheckrst) { probe_utils->send_msg("$output", "d", "$node : $nodecheckrst{$node}{error}") if (exists($nodecheckrst{$node}{error})); } unless (%monitor_nodes) { probe_utils->send_msg("$output", "d", "There is no node to be monitored"); $rst = 1; } return $rst; } #------------------------------------------ =head3 Description: Handle one line log come from dhcp log file Arguments: msg: one line http log nics: target network interfaces Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub handle_dhcp_msg { my $msg = shift; if ($msg =~ /.+DHCPDISCOVER\s+from\s+(.+)\s+via\s+([^:]+)(.*)/i) { my $mac = $1; my $nic = $2; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Receive DHCPDISCOVER from [$node] $mac via $nic"; probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); } } elsif ($msg =~ /.+DHCPOFFER\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/i) { my $ip = $1; my $mac = $2; my $nic = $3; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send DHCPOFFER on $ip back to [$node] $mac via $nic"; probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); } } elsif ($msg !~ /unknown lease/ && $msg !~ /ignored/ && $msg =~ /.+DHCPREQUEST\s+for\s+(.+)\s\((.+)\)\s+from\s+(.+)\s+via\s+(.+)/) { my $ip = $1; my $server = $2; my $mac = $3; my $nic = $4; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Receive DHCPREQUEST from [$node] $mac for $ip via $nic"; probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) { my $warn_msg = "The ip of [$node] $mac from DHCP $ip is different with definition $macmap{$mac}{'ip'}."; probe_utils->send_msg("$output", "w", "$warn_msg"); push(@{ $rawdata{$node}{"history"} }, $warn_msg); } } } elsif ($msg =~ /.+DHCPACK\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/) { my $ip = $1; my $mac = $2; my $nic = $3; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send DHCPACK on $ip back to [$node] $mac via $nic"; probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); $ipnodemap{$ip} = $node; } } elsif ($msg =~ /.+BOOTREQUEST\s+from\s+(.+)\s+via\s+([^:]+)(.*)/) { my $mac = $1; my $nic = $2; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Receive BOOTREQUEST from [$node] $mac via $nic"; probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); } } elsif ($msg =~ /.+BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) { my $ip = $1; my $mac = $2; my $nic = $3; if (exists $macmap{$mac}) { my $node = $macmap{$mac}{"node"}; my $record = "Send BOOTREPLY on $ip back to [$node] $mac via $nic"; probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); $ipnodemap{$ip} = $node; if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) { my $warn_msg = "The ip of [$node] $mac from DHCP $ip is different with definition $macmap{$mac}{'ip'}."; probe_utils->send_msg("$output", "w", "$warn_msg"); push(@{ $rawdata{$node}{"history"} }, $warn_msg); } } } return 0; } #------------------------------------------ =head3 Description: Handle one line log come from tftp log file Arguments: msg: one line tftp log Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub handle_tftp_msg { my $msg = shift; if ($msg =~ /RRQ\s+from\s+(.+)\s+filename\s+(.+)/i) { my $ip = $1; my $file = $2; my $record = "[$ipnodemap{$ip}] Via TFTP $ip download $file"; if (exists($rawdata{"$ipnodemap{$ip}"})) { probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{ $ipnodemap{$ip} }{"history"} }, $record); } } } #------------------------------------------ =head3 Description: Handle one line log come from http log file Arguments: msg: one line http log Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub handle_http_msg { my $msg = shift; if ($msg =~ /(\d+\.\d+.\d+.\d+)\s.+GET\s+(.+)\s+HTTP.+/) { my $ip = $1; my $file = $2; my $record = "[$ipnodemap{$ip}] Via HTTP $ip GET $file"; if (exists($rawdata{"$ipnodemap{$ip}"})) { probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{ $ipnodemap{$ip} }{"history"} }, $record); } } return 0; } #------------------------------------------ =head3 Description: Handle one line log come from cluster.log Arguments: msg: one line log Returns: 0 : pass 1 : failed =cut #------------------------------------------ sub handle_cluster_msg { my $line = shift; my $node = ""; my $msg; my $status; if ($line =~ /.+\s+xcat:\s+(.+)\s+status:\s+(.+)\s+statustime:\s(.+)/) { $node = $1; $status = $2; if (exists($rawdata{$node})) { my $record = "Receive from $node : status is $status"; probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); } # When receive 'status is booted', check whether the $monitor_nodes{$node}{"status"} is installing. # If so, the node has finished its os provision. if (exists($rawdata{$node}) and ($status eq "booted")) { if ($monitor_nodes{$node}{"status"} eq "installing") { $record = "Node $node has finished it's os provision process"; probe_utils->send_msg("$output", "o", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); } else { $record = "NO installing process detected for node $node"; probe_utils->send_msg("$output", "f", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); } $monitor_nodes{$node}{"rst"} = 1 if (defined($monitor_nodes{$node})); } elsif (exists($rawdata{$node}) and ($status eq "failed")) { $monitor_nodes{$node}{"rst"} = 1 if (defined($monitor_nodes{$node})); probe_utils->send_msg("$output", "f", "Node $node has finished it's os provision process"); push(@{ $rawdata{$node}{"history"} }, "Node $node os provision failed"); } elsif (exists($rawdata{$node}) and ($status eq "installing")) { # record 'installing' status, to check when receive 'booted' status $monitor_nodes{$node}{"status"} = "installing"; } } return 0; } #----------------------------------------- =head3 Description: Handle one line log come from computes.log Arguments: msg: one line compute log Returns: 0 : pass 1 : failed =cut #----------------------------------------- sub handle_compute_msg { my $line = shift; my $sender; my $node; my $msg; if ($line =~ /.+\d{2}:\d{2}:\d{2}\s+(.+)\s+(xcat.+)/i) { $sender = $1; $msg = $2; if (!xCAT::NetworkUtils->isIpaddr($sender)) { $node = $sender; } else { $node = $ipnodemap{$sender}; } if ($node ne "" && exists($rawdata{$node})) { my $record = "Receive from $node : $msg"; probe_utils->send_msg("$output", "d", "$record"); push(@{ $rawdata{$node}{"history"} }, $record); } } return 0; } #------------------------------------------ =head3 Description: Test if all nodes have finished job Arguments: One golble attribute %monitor_nodes; Returns: 1: all nodes finished 0: not all nodes finished =cut #------------------------------------------ sub all_monitor_node_done { my $done = 1; foreach my $node (keys %monitor_nodes) { if ($monitor_nodes{$node}{"rst"} == 0) { $done = 0; last; } } return $done; } #------------------------------------------ =head3 Description: Dump monitor history, categorised by mac address. Arguments: NULL Returns: =cut #------------------------------------------ sub dump_history { my $title = " ============================================================= = The summary of os provision: ============================================================= "; print "$title\n"; foreach $node (keys %rawdata) { my $line_num = 0; my $http_num = 0; my $length_http; for (my $i = @{ $rawdata{$node}{"history"} } ; $i >= 0 ; $i--) { if (${ $rawdata{$node}{"history"} }[$i] =~ /Via HTTP/) { $length_http = $i; last; } } foreach my $line (@{ $rawdata{$node}{"history"} }) { # Print http message less than 10 lines if ($line =~ /Via HTTP/) { if (($http_num <= 4) or ($length_http - $line_num <= 4)) { probe_utils->send_msg("$output", "d", "\t$line"); } else { probe_utils->send_msg("$output", "d", "\t......") if ($http_num == 5); } $http_num++; } else { probe_utils->send_msg("$output", "d", "\t$line"); } $line_num++; } } } #------------------------------------------ =head3 Description: Monitor the process of os provision Returns: 0: pass 1: failed =cut #------------------------------------------ sub do_monitor { $SIG{TERM} = $SIG{INT} = sub { $terminal = 1; }; my $msg = "All pre_defined nodes are valid"; my $rc = check_noderange($noderange); if ($rc) { probe_utils->send_msg("$output", "f", $msg); $rst = 1; unless (%monitor_nodes) { return $rst; } } else { probe_utils->send_msg("$output", "o", $msg); } if (!$nics) { my $masteripinsite = `tabdump site | awk -F',' '/^"master",/ { gsub(/"/, "", \$2) ; print \$2 }'`; chomp($masteripinsite); $nics = `ip addr |grep -B2 $masteripinsite|awk -F" " '/mtu/{gsub(/:/,"",\$2); print \$2}'`; chomp($nics); if (!$nics) { probe_utils->send_msg("$output", "f", "The value of master in site table is $masteripinsite, can't get corresponding network interface"); return 1; } } my $rst = 0; my $startline = "------------------------------------------------------------- Start to capture every message during os provision process...... ------------------------------------------------------------- "; print("$startline\n"); my $varlogmsg = "/var/log/messages"; my $clusterlog = "/var/log/xcat/cluster.log"; my $computelog = "/var/log/xcat/computes.log"; my $httplog; if (-e "/var/log/httpd/access_log") { $httplog = "/var/log/httpd/access_log"; } elsif (-e "/var/log/apache2/access_log") { $httplog = "/var/log/apache2/access_log"; } elsif (-e "/var/log/apache2/access.log") { $httplog = "/var/log/apache2/access.log"; } my $varlogpid; my $clusterpid; my $httppid; my $computerpid; { if (!-e $varlogmsg) { probe_utils->send_msg("$output", "w", "$varlogmsg doesn't exist"); } else { if (!($varlogpid = open(VARLOGMSGFILE, "tail -f -n 0 $varlogmsg 2>&1 |"))) { probe_utils->send_msg("$output", "f", "Can't open $varlogmsg to get logs"); $rst = 1; last; } } if (!-e "$clusterlog") { probe_utils->send_msg("$output", "w", "$clusterlog doesn't exist"); } else { if (!($clusterpid = open(CLUSTERLOGFILE, "tail -f -n 0 $clusterlog 2>&1 |"))) { probe_utils->send_msg("$output", "f", "Can't open $clusterlog to get logs"); $rst = 1; last; } } if (!-e "$httplog") { probe_utils->send_msg("$output", "w", "$httplog doesn't exist"); } else { if (!($httppid = open(HTTPLOGFILE, "tail -f -n 0 $httplog 2>&1 |"))) { probe_utils->send_msg("$output", "f", "Can't open $httplog to get logs"); $rst = 1; last; } } if (!-e "$computelog") { probe_utils->send_msg("$output", "w", "$computelog doesn't exist"); } else { if (!($computerpid = open(COMPUTERFILE, "tail -f -n 0 $computelog 2>&1 |"))) { probe_utils->send_msg("$output", "f", "Can't open $computelog to get logs"); $rst = 1; last; } } my $select = new IO::Select; $select->add(\*VARLOGMSGFILE); $select->add(\*CLUSTERLOGFILE); $select->add(\*HTTPLOGFILE); $select->add(\*COMPUTERFILE); $| = 1; my $line = ""; my @hdls; my $hdl; my @candidate_svr_hostname_inlog; my $svr_hostname_short = `hostname -s`; chomp($svr_hostname_short); my $svr_hostname_domain = `hostname -d`; chomp($svr_hostname_domain); push(@candidate_svr_hostname_inlog, $svr_hostname_short); push(@candidate_svr_hostname_inlog, "$svr_hostname_short.$svr_hostname_domain"); for (; ;) { if (@hdls = $select->can_read(0)) { foreach $hdl (@hdls) { if ($hdl == \*VARLOGMSGFILE) { chomp($line = ); my @tmp = split(/\s+/, $line); if ($tmp[4] =~ /dhcpd:/i && $line =~ /$nics/) { handle_dhcp_msg("$line"); } elsif ($tmp[4] =~ /in.tftpd/i) { handle_tftp_msg("$line"); } elsif (($tmp[4] =~ /^xcat/i) || ($tmp[5] =~ /^xcat/i)) { if (grep(/^$tmp[3]$/, @candidate_svr_hostname_inlog)) { handle_cluster_msg("$line"); } else { handle_compute_msg("$line"); } } } elsif ($hdl == \*CLUSTERLOGFILE) { chomp($line = ); handle_cluster_msg("$line"); } elsif ($hdl == \*HTTPLOGFILE) { chomp($line = ); handle_http_msg("$line"); } elsif ($hdl == \*COMPUTERFILE) { chomp($line = ); handle_compute_msg("$line"); } } } if ($terminal || (%monitor_nodes && all_monitor_node_done())) { if ($terminal) { probe_utils->send_msg("$output", "d", "Get INT or TERM signal from STDIN"); } else { probe_utils->send_msg("$output", "o", "All nodes need to monitor have finished os provision process"); } last; } sleep 0.01; } &dump_history; } kill 'INT', $varlogpid if ($varlogpid); kill 'INT', $clusterpid if ($clusterpid); kill 'INT', $httppid if ($httppid); kill 'INT', $computerpid if ($computerpid); close(VARLOGMSGFILE) if (VARLOGMSGFILE); close(CLUSTERLOGFILE) if (CLUSTERLOGFILE); close(HTTPLOGFILE) if (HTTPLOGFILE); close(COMPUTERFILE) if (COMPUTERFILE); return $rst; } #------------------------------------- # main process #------------------------------------- if ( !GetOptions("--help|h|?" => \$help, "T" => \$test, "V" => \$verbose, "n=s" => \$noderange)) { probe_utils->send_msg("$output", "f", "Invalid parameter for $program_name"); probe_utils->send_msg("$output", "d", "$::USAGE"); exit 1; } if ($help) { if ($output ne "stdout") { probe_utils->send_msg("$output", "d", "$::USAGE"); } else { print "$::USAGE"; } exit 0; } if ($test) { probe_utils->send_msg("$output", "o", "Do probe for os provision process, realtime monitor of os provision process."); exit 0; } unless ($noderange) { probe_utils->send_msg("$output", "f", "Option -n is required"); probe_utils->send_msg("$output", "d", "$::USAGE"); exit 1; } $rst = do_monitor(); exit $rst;