mirror of
https://github.com/xcat2/xcat-core.git
synced 2025-05-29 09:13:08 +00:00
Merge pull request #1480 from xuweibj/osdeploy
xcatprobe osdeploy add monitoring computes.log
This commit is contained in:
commit
e3677ff9b4
@ -27,11 +27,11 @@ my %monitor_nodes;
|
||||
$::USAGE = "Usage:
|
||||
$program_name -h
|
||||
$program_name -T
|
||||
$program_name [-V]
|
||||
$program_name -n <node_range>
|
||||
$program_name -n <node_range> [-V]
|
||||
|
||||
Description:
|
||||
Do probe for os provision process, realtime monitor of os provision process.
|
||||
Please run this before rpower node.
|
||||
|
||||
Options:
|
||||
-h : Get usage information of $program_name
|
||||
@ -42,11 +42,13 @@ Options:
|
||||
|
||||
sub check_noderange{
|
||||
my $node_range = shift;
|
||||
my @cmdoutput = `lsdef $node_range 2>&1`;
|
||||
my @cmdoutput = `lsdef $node_range -i ip,mac 2>&1`;
|
||||
my $rst = 0;
|
||||
my $currentnode = "";
|
||||
my $ip = "NOIP";
|
||||
my %nodecheckrst;
|
||||
my $ip;
|
||||
my $mac_line;
|
||||
my @macs;
|
||||
|
||||
foreach (@cmdoutput) {
|
||||
chomp($_);
|
||||
@ -58,68 +60,126 @@ sub check_noderange{
|
||||
} elsif ($_ =~ /^\s*Object name: (\w+)/i) {
|
||||
$monitor_nodes{$1} = 0;
|
||||
$currentnode = $1;
|
||||
$ip = "NOIP";
|
||||
} elsif ($_ =~ /^ip=(.+)/i) {
|
||||
$ip = $1;
|
||||
} elsif ($_ =~ /^mac=(.+)\|(.+)!\*NOIP\*/i) {
|
||||
$macmap{$1}{"ip"} = $ip;
|
||||
$macmap{$2}{"ip"} = $ip;
|
||||
$macmap{$1}{"node"} = $currentnode;
|
||||
$macmap{$2}{"node"} = $currentnode;
|
||||
} elsif ($_ =~ /^mac=(.+)\|(.+)/i) {
|
||||
$macmap{$1}{"ip"} = $ip;
|
||||
$macmap{$2}{"ip"} = $ip;
|
||||
$macmap{$1}{"node"} = $currentnode;
|
||||
$macmap{$2}{"node"} = $currentnode;
|
||||
} elsif ($_ =~ /^mac=(.+)/i) {
|
||||
$macmap{$1}{"ip"} = $ip;
|
||||
$macmap{$1}{"node"} = $currentnode;
|
||||
$mac_line = $1;
|
||||
@macs = split(/\|/, $mac_line);
|
||||
foreach my $mac(@macs) {
|
||||
if ($mac =~ /\!\*NOIP\*/) {
|
||||
$mac =~ s/\!\*NOIP\*//g;
|
||||
$macmap{$mac}{"ip"} = "NOIP";
|
||||
$macmap{$mac}{"node"} = $currentnode;
|
||||
}
|
||||
else {
|
||||
$macmap{$mac}{"ip"} = $ip;
|
||||
$macmap{$mac}{"node"} = $currentnode;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $node (keys %nodecheckrst) {
|
||||
probe_utils->send_msg("$output", "d", "$node : $nodecheckrst{$node}{error}") if(exists($nodecheckrst{$node}{error}));
|
||||
}
|
||||
|
||||
unless (%monitor_nodes) {
|
||||
probe_utils->send_msg("$output", "d", "There is no node to be monitored");
|
||||
$rst = 1;
|
||||
}
|
||||
return $rst;
|
||||
}
|
||||
|
||||
sub handle_dhcp_msg {
|
||||
my $msg = shift;
|
||||
|
||||
if ($msg !~ /unknown lease/ && $msg !~ /ignored/ && $msg =~ /.+DHCPREQUEST\s+for\s+(.+)\s\((.+)\)\s+from\s+(.+)\s+via\s+(.+)/) {
|
||||
if ($msg =~ /.+DHCPDISCOVER\s+from\s+(.+)\s+via\s+([^:]+)(.*)/i) {
|
||||
my $mac = $1;
|
||||
my $nic = $2;
|
||||
|
||||
if (exists $macmap{$mac}) {
|
||||
my $node = $macmap{$mac}{"node"};
|
||||
my $record = "Receive DHCPDISCOVER from [$node] $mac via $nic";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{$node}{"history"} }, $record);
|
||||
}
|
||||
} elsif ($msg =~ /.+DHCPOFFER\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/i) {
|
||||
my $ip = $1;
|
||||
my $mac = $2;
|
||||
my $nic = $3;
|
||||
|
||||
if (exists $macmap{$mac}) {
|
||||
my $node = $macmap{$mac}{"node"};
|
||||
my $record = "Send DHCPOFFER on $ip back to [$node] $mac via $nic";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{$node}{"history"} }, $record);
|
||||
}
|
||||
|
||||
} elsif ($msg !~ /unknown lease/ && $msg !~ /ignored/ && $msg =~ /.+DHCPREQUEST\s+for\s+(.+)\s\((.+)\)\s+from\s+(.+)\s+via\s+(.+)/) {
|
||||
my $ip = $1;
|
||||
my $server = $2;
|
||||
my $mac = $3;
|
||||
my $nic = $4;
|
||||
my $record = "Receive DHCPREQUEST from $mac for $ip via $nic";
|
||||
|
||||
if (exists $macmap{$mac}) {
|
||||
my $node = $macmap{$mac}{"node"};
|
||||
my $node = $macmap{$mac}{"node"};
|
||||
my $record = "Receive DHCPREQUEST from [$node] $mac for $ip via $nic";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{$node}{"history"} }, $record);
|
||||
|
||||
if ($macmap{$mac}{"ip"} != $ip) {
|
||||
my $warn_msg = "The ip of $mac from DHCP $ip is different with definition $macmap{$mac}{'ip'}.";
|
||||
if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
|
||||
my $warn_msg = "The ip of [$node] $mac from DHCP $ip is different with definition $macmap{$mac}{'ip'}.";
|
||||
probe_utils->send_msg("$output", "w", "$warn_msg");
|
||||
if (exists($rawdata{$node})) {
|
||||
push(@{ $rawdata{$node}{"history"} }, $warn_msg);
|
||||
}
|
||||
push(@{ $rawdata{$node}{"history"} }, $warn_msg);
|
||||
}
|
||||
}
|
||||
} elsif ($msg =~ /.+DHCPACK\s+on\s+(.+)\s+to\s+(.+)\s+via\s+(.+)/) {
|
||||
my $ip = $1;
|
||||
my $mac = $2;
|
||||
my $nic = $3;
|
||||
my $record = "Send DHCPACK on $ip back to $mac via $nic";
|
||||
|
||||
if (exists $macmap{$mac}) {
|
||||
my $node = $macmap{$mac}{"node"};
|
||||
if (exists($rawdata{$node})) {
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{$node}{"history"} }, $record);
|
||||
my $record = "Send DHCPACK on $ip back to [$node] $mac via $nic";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{$node}{"history"} }, $record);
|
||||
if ($macmap{$mac}{"ip"} != "NOIP") {
|
||||
$ipnodemap{$ip} = $node;
|
||||
}
|
||||
}
|
||||
} elsif ($msg =~ /.+BOOTREQUEST\s+from\s+(.+)\s+via\s+([^:]+)(.*)/) {
|
||||
my $mac = $1;
|
||||
my $nic = $2;
|
||||
|
||||
if (exists $macmap{$mac}) {
|
||||
my $node = $macmap{$mac}{"node"};
|
||||
my $record = "Receive BOOTREQUEST from [$node] $mac via $nic";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{$node}{"history"} }, $record);
|
||||
}
|
||||
} elsif ($msg =~ /.+BOOTREPLY\s+for\s+(.+)\s+to\s+.+(\w\w:\w\w:\w\w:\w\w:\w\w:\w\w).+via\s+(.+)/) {
|
||||
my $ip = $1;
|
||||
my $mac = $2;
|
||||
my $nic = $3;
|
||||
|
||||
if (exists $macmap{$mac}) {
|
||||
my $node = $macmap{$mac}{"node"};
|
||||
my $record = "Send BOOTREPLY on $ip back to [$node] $mac via $nic";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{$node}{"history"} }, $record);
|
||||
if ($macmap{$mac}{"ip"} != "NOIP") {
|
||||
$ipnodemap{$ip} = $node;
|
||||
}
|
||||
|
||||
if ($macmap{$mac}{"ip"} != "NOIP" and $macmap{$mac}{"ip"} != $ip) {
|
||||
my $warn_msg = "The ip of [$node] $mac from DHCP $ip is different with definition $macmap{$mac}{'ip'}.";
|
||||
probe_utils->send_msg("$output", "w", "$warn_msg");
|
||||
push(@{ $rawdata{$node}{"history"} }, $warn_msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -128,7 +188,7 @@ sub handle_http_msg {
|
||||
if ($msg =~ /(\d+\.\d+.\d+.\d+)\s.+GET\s+(.+)\s+HTTP.+/) {
|
||||
my $ip = $1;
|
||||
my $file = $2;
|
||||
my $record = "Via HTTP $ip download $file";
|
||||
my $record = "[$ipnodemap{$ip}] Via HTTP $ip GET $file";
|
||||
|
||||
if (exists($rawdata{"$ipnodemap{$ip}"})) {
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
@ -145,7 +205,7 @@ sub handle_cluster_msg {
|
||||
my $msg;
|
||||
my $status;
|
||||
|
||||
if ($line =~ /.+\d+:\d+:\d+\s+(.+)\s+(xcat.+)/i) {
|
||||
if ($line =~ /.+\d{2}:\d{2}:\d{2}\s+(.+)\s+(xcat.+)/i) {
|
||||
$sender = $1;
|
||||
$msg = $2;
|
||||
|
||||
@ -155,7 +215,7 @@ sub handle_cluster_msg {
|
||||
$node = $ipnodemap{$sender};
|
||||
}
|
||||
if ($node ne "" && exists($rawdata{$node})) {
|
||||
my $record = "Recv from $node : $msg";
|
||||
my $record = "Receive from $node : $msg";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{ $node }{"history"} }, $record);
|
||||
}
|
||||
@ -166,7 +226,7 @@ sub handle_cluster_msg {
|
||||
$status = $2;
|
||||
|
||||
if (exists($rawdata{$node})) {
|
||||
my $record = "Recv from $node : xcat: status is $status";
|
||||
my $record = "Receive from $node : status is $status";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{ $node }{"history"} }, $record);
|
||||
}
|
||||
@ -174,11 +234,39 @@ sub handle_cluster_msg {
|
||||
if (exists($rawdata{$node}) and ($status eq "booted")) {
|
||||
$monitor_nodes{$node} = 1 if (defined($monitor_nodes{$node}));
|
||||
probe_utils->send_msg("$output", "o", "Node $node has finished it's os provision process");
|
||||
} elsif (exists($rawdata{$node}) and ($status eq "failed")) {
|
||||
$monitor_nodes{$node} = 1 if (defined($monitor_nodes{$node}));
|
||||
probe_utils->send_msg("$output", "f", "Node $node has finished it's os provision process");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub handle_compute_msg {
|
||||
my $line = shift;
|
||||
my $sender;
|
||||
my $node;
|
||||
my $msg;
|
||||
|
||||
if ($line =~ /.+\d{2}:\d{2}:\d{2}\s+(.+)\s+(xcat.+)/i) {
|
||||
$sender = $1;
|
||||
$msg = $2;
|
||||
|
||||
if (!xCAT::NetworkUtils->isIpaddr($sender)) {
|
||||
$node = $sender;
|
||||
} else {
|
||||
$node = $ipnodemap{$sender};
|
||||
}
|
||||
if ($node ne "" && exists($rawdata{$node})) {
|
||||
my $record = "Receive from $node : $msg";
|
||||
probe_utils->send_msg("$output", "d", "$record");
|
||||
push(@{ $rawdata{ $node }{"history"} }, $record);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub all_monitor_node_done {
|
||||
my $done = 1;
|
||||
foreach my $node (keys %monitor_nodes) {
|
||||
@ -211,6 +299,7 @@ sub dump_history {
|
||||
}
|
||||
}
|
||||
foreach my $line (@{ $rawdata{$node}{"history"} }) {
|
||||
# Print http message less than 10 lines
|
||||
if ($line =~ /Via HTTP/)
|
||||
{
|
||||
if (($http_num <= 4) or ($length_http - $line_num <= 4)){
|
||||
@ -245,7 +334,7 @@ sub do_monitor{
|
||||
}
|
||||
|
||||
if (!$nics) {
|
||||
my $masteripinsite = `tabdump site | awk -F',' '/master/ { gsub(/"/, "", \$2) ; print \$2 }'`;
|
||||
my $masteripinsite = `tabdump site | awk -F',' '/^"master",/ { gsub(/"/, "", \$2) ; print \$2 }'`;
|
||||
chomp($masteripinsite);
|
||||
$nics = `ip addr |grep -B2 $masteripinsite|awk -F" " '/mtu/{gsub(/:/,"",\$2); print \$2}'`;
|
||||
chomp($nics);
|
||||
@ -256,8 +345,14 @@ sub do_monitor{
|
||||
}
|
||||
|
||||
my $rst = 0;
|
||||
my $startline = "-------------------------------------------------------------
|
||||
Start to capture every message during os provision process......
|
||||
my $startline = "
|
||||
-------------------------------------------------------------
|
||||
___
|
||||
____ _ _____ _.-| | |\\__/,| (`\\
|
||||
__ __/ ___| / \\|_ _| { | | |x x |__ _) )
|
||||
\\ \\/ / | / _ \\ | | \"-.|___| _.( T ) ` /
|
||||
> <| |___ / ___ \\| | .--'-`-. _((_ `^--' /_< \\
|
||||
/_/\\_\\\\____/_/ \\_\\_| .+|______|__.-||__)`-'(((/ (((/
|
||||
-------------------------------------------------------------
|
||||
";
|
||||
|
||||
@ -265,6 +360,7 @@ Start to capture every message during os provision process......
|
||||
|
||||
my $varlogmsg = "/var/log/messages";
|
||||
my $clusterlog = "/var/log/xcat/cluster.log";
|
||||
my $computelog = "/var/log/xcat/computes.log";
|
||||
|
||||
my $httplog;
|
||||
if (-e "/var/log/httpd/access_log") {
|
||||
@ -278,58 +374,58 @@ Start to capture every message during os provision process......
|
||||
my $varlogpid;
|
||||
my $clusterpid;
|
||||
my $httppid;
|
||||
my $computerpid;
|
||||
|
||||
if (!($varlogpid = open(VARLOGMSGFILE, "tail -f $varlogmsg 2>&1 |"))) {
|
||||
if (!($varlogpid = open(VARLOGMSGFILE, "tail -f -n 0 $varlogmsg 2>&1 |"))) {
|
||||
probe_utils->send_msg("$output", "f", "Can't open $varlogmsg to get logs");
|
||||
$rst = 1;
|
||||
last;
|
||||
}
|
||||
if (!($clusterpid = open(CLUSTERLOGFILE, "tail -f $clusterlog 2>&1 |"))) {
|
||||
if (!($clusterpid = open(CLUSTERLOGFILE, "tail -f -n 0 $clusterlog 2>&1 |"))) {
|
||||
probe_utils->send_msg("$output", "f", "Can't open $clusterlog to get logs");
|
||||
$rst = 1;
|
||||
last;
|
||||
}
|
||||
if (!($httppid = open(HTTPLOGFILE, "tail -f $httplog 2>&1 |"))) {
|
||||
if (!($httppid = open(HTTPLOGFILE, "tail -f -n 0 $httplog 2>&1 |"))) {
|
||||
probe_utils->send_msg("$output", "f", "Can't open $httplog to get logs");
|
||||
$rst = 1;
|
||||
last;
|
||||
}
|
||||
if (!($computerpid = open(COMPUTERFILE, "tail -f -n 0 $computelog 2>&1 |"))) {
|
||||
probe_utils->send_msg("$output", "f", "Can't open $computelog to get logs");
|
||||
$rst = 1;
|
||||
last;
|
||||
}
|
||||
|
||||
my $select = new IO::Select;
|
||||
$select->add(\*VARLOGMSGFILE);
|
||||
$select->add(\*CLUSTERLOGFILE);
|
||||
$select->add(\*HTTPLOGFILE);
|
||||
$select->add(\*COMPUTERFILE);
|
||||
$| = 1;
|
||||
|
||||
my $line = "";
|
||||
my @hdls;
|
||||
my $hdl;
|
||||
my $oldlines = 10;
|
||||
my $varlogmsgcnt = 0;
|
||||
my $clusterlogcnt = 0;
|
||||
my $httplogcnt = 0;
|
||||
|
||||
for (; ;){
|
||||
if (@hdls = $select->can_read(0)) {
|
||||
foreach $hdl (@hdls) {
|
||||
if ($hdl == \*VARLOGMSGFILE) {
|
||||
chomp($line = <VARLOGMSGFILE>);
|
||||
++$varlogmsgcnt;
|
||||
last if ($varlogmsgcnt <= $oldlines);
|
||||
my @tmp = split(/\s+/, $line);
|
||||
if ($tmp[4] =~ /dhcpd:/i && $line =~ /$nics/) {
|
||||
handle_dhcp_msg("$line");
|
||||
}
|
||||
} elsif ($hdl == \*CLUSTERLOGFILE) {
|
||||
chomp($line = <CLUSTERLOGFILE>);
|
||||
++$clusterlogcnt;
|
||||
last if ($clusterlogcnt <= $oldlines);
|
||||
handle_cluster_msg("$line");
|
||||
} elsif ($hdl == \*HTTPLOGFILE) {
|
||||
chomp($line = <HTTPLOGFILE>);
|
||||
++$httplogcnt;
|
||||
last if ($httplogcnt <= $oldlines);
|
||||
handle_http_msg("$line");
|
||||
} elsif ($hdl == \*COMPUTERFILE) {
|
||||
chomp($line = <COMPUTERFILE>);
|
||||
handle_compute_msg("$line");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -345,12 +441,14 @@ Start to capture every message during os provision process......
|
||||
}
|
||||
&dump_history;
|
||||
|
||||
kill 'INT', $varlogpid if ($varlogpid);
|
||||
kill 'INT', $varlogpid if ($varlogpid);
|
||||
kill 'INT', $clusterpid if ($clusterpid);
|
||||
kill 'INT', $httppid if ($httppid);
|
||||
close(VARLOGMSGFILE) if (VARLOGMSGFILE);
|
||||
close(CLUSTERLOGFILE) if(CLUSTERLOGFILE);
|
||||
close(HTTPLOGFILE) if(HTTPLOGFILE);
|
||||
kill 'INT', $computerpid if ($computerpid);
|
||||
close(VARLOGMSGFILE) if (VARLOGMSGFILE);
|
||||
close(CLUSTERLOGFILE) if (CLUSTERLOGFILE);
|
||||
close(HTTPLOGFILE) if (HTTPLOGFILE);
|
||||
close(COMPUTERFILE) if (COMPUTERFILE);
|
||||
|
||||
return $rst;
|
||||
}
|
||||
@ -383,6 +481,12 @@ if ($test) {
|
||||
exit 0;
|
||||
}
|
||||
|
||||
unless ( $noderange ) {
|
||||
probe_utils->send_msg("$output", "f", "Option -n is required");
|
||||
probe_utils->send_msg("$output", "d", "$::USAGE");
|
||||
exit 1;
|
||||
}
|
||||
|
||||
$rst = do_monitor();
|
||||
|
||||
exit $rst;
|
||||
|
Loading…
x
Reference in New Issue
Block a user