mirror of
https://github.com/xcat2/xcat-core.git
synced 2025-05-30 09:36:41 +00:00
* Fix the issue 'install monitor' exit unexpected (#4582). - add eval block to cover all possible codes which might die in do_installm_service - add more logs in install monitor to record useful inforamtion - add sleep 0.01 to avoid CPU too high - fix the wrong timeout usage (alarm - die) - recover the default INT/TERM singal handler to avoid UDP listener will quick when kill 'install monitor' * Avoid SSL CPU 100% when hit max open file number
This commit is contained in:
parent
d8969d7189
commit
cb4edc2bc1
@ -332,6 +332,7 @@ sub do_installm_service {
|
||||
my $socket;
|
||||
my $installpidfile;
|
||||
my $retry = 1;
|
||||
$SIG{TERM} = $SIG{INT} = 'DEFAULT';
|
||||
$SIG{USR2} = sub {
|
||||
if ($socket) { # do not mess with pid file except when we still have the socket.
|
||||
unlink("/var/run/xcat/installservice.pid"); close($socket); $quit = 1;
|
||||
@ -385,80 +386,91 @@ sub do_installm_service {
|
||||
open($installpidfile, ">", "/var/run/xcat/installservice.pid"); # if here, everyone else has unlinked installservicepid or doesn't care
|
||||
print $installpidfile $$;
|
||||
close($installpidfile);
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: install monitor process $$ start");
|
||||
until ($quit) {
|
||||
$SIG{ALRM} = sub { xCAT::MsgUtils->message("S", "XCATTIMEOUT"); die; };
|
||||
$SIG{ALRM} = sub { die "XCATTIMEOUT"; };
|
||||
my $conn;
|
||||
next unless $conn = $socket->accept;
|
||||
my $conn_peer_addr;
|
||||
my $node;
|
||||
my $validclient = 0;
|
||||
|
||||
# check if a rescanplugins request has come in
|
||||
my @rescans;
|
||||
if (@rescans = $rescanrselect->can_read(0)) {
|
||||
foreach my $rrequest (@rescans) {
|
||||
my $rescan_request = fd_retrieve($rrequest);
|
||||
if ($$rescan_request =~ /rescanplugins/) {
|
||||
scan_plugins('', '1');
|
||||
} else {
|
||||
print "ignoring unrecognized pipe request received by install monitor from ssl listener: $rescan_request \n";
|
||||
next unless $conn = $socket->accept;
|
||||
eval {
|
||||
# check if a rescanplugins request has come in
|
||||
my @rescans;
|
||||
if (@rescans = $rescanrselect->can_read(0)) {
|
||||
foreach my $rrequest (@rescans) {
|
||||
my $rescan_request = fd_retrieve($rrequest);
|
||||
if ($$rescan_request =~ /rescanplugins/) {
|
||||
scan_plugins('', '1');
|
||||
} else {
|
||||
xCAT::MsgUtils->trace(0, "W", "xcatd: ignoring unrecognized pipe request received by install monitor from ssl listener: $rescan_request.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
my $conn_peer_addr = $conn->peerhost();
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd received a connection request from $conn_peer_addr");
|
||||
$conn_peer_addr = $conn->peerhost();
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: received a connection request from $conn_peer_addr");
|
||||
|
||||
my $client_name;
|
||||
my $client_aliases;
|
||||
my @clients;
|
||||
if ($inet6support) {
|
||||
($client_name, $client_aliases) = gethostbyaddr($conn->peeraddr, AF_INET6);
|
||||
unless ($client_name) { ($client_name, $client_aliases) = gethostbyaddr($conn->peeraddr, AF_INET); }
|
||||
} else {
|
||||
($client_name, $client_aliases) = gethostbyaddr($conn->peeraddr, AF_INET);
|
||||
}
|
||||
|
||||
unless ($client_name) {
|
||||
my $addrfamily=sockaddr_family(getpeername($conn));
|
||||
my $myaddr=Socket::inet_ntop($addrfamily,$conn->peeraddr);
|
||||
xCAT::MsgUtils->message("SE", "xcatd received a connection request from unknown host with ip address $myaddr, please check whether the reverse name resolution works correctly. The connection request will be ignored");
|
||||
#print "xcatd received a connection request from unknown host with ip address $myaddr, please check whether the reverse name resolution works correctly. The connection request will be ignored\n";
|
||||
close($conn);
|
||||
next;
|
||||
}
|
||||
|
||||
$clients[0] = $client_name;
|
||||
if ($client_aliases) {
|
||||
push @clients, split(/\s+/, $client_aliases);
|
||||
}
|
||||
|
||||
my $validclient = 0;
|
||||
my $node;
|
||||
my $domain;
|
||||
|
||||
foreach my $client (@clients) {
|
||||
my @ndn = ($client);
|
||||
my $nd = xCAT::NetworkUtils->getNodeDomains(\@ndn);
|
||||
my %nodedomains = %{$nd};
|
||||
$domain = $nodedomains{$client};
|
||||
$client =~ s/\..*//;
|
||||
if ($domain) {
|
||||
$client =~ s/\.$domain//;
|
||||
my $client_name;
|
||||
my $client_aliases;
|
||||
my @clients;
|
||||
if ($inet6support) {
|
||||
($client_name, $client_aliases) = gethostbyaddr($conn->peeraddr, AF_INET6);
|
||||
unless ($client_name) {
|
||||
($client_name, $client_aliases) = gethostbyaddr($conn->peeraddr, AF_INET);
|
||||
}
|
||||
} else {
|
||||
($client_name, $client_aliases) = gethostbyaddr($conn->peeraddr, AF_INET);
|
||||
}
|
||||
|
||||
unless ($client_name) {
|
||||
die "XCATUNKOWNCLIENT"; # use die instead of next to avoid 'Exiting eval via next' message
|
||||
}
|
||||
|
||||
$clients[0] = $client_name;
|
||||
if ($client_aliases) {
|
||||
push @clients, split(/\s+/, $client_aliases);
|
||||
}
|
||||
|
||||
my $domain;
|
||||
my %handled_client=();
|
||||
foreach my $client (@clients) {
|
||||
next if (exists $handled_client{$client});
|
||||
$handled_client{$client}=1;
|
||||
my @ndn = ($client);
|
||||
my $nd = xCAT::NetworkUtils->getNodeDomains(\@ndn);
|
||||
my %nodedomains = %{$nd};
|
||||
$domain = $nodedomains{$client};
|
||||
$client =~ s/\..*//;
|
||||
}
|
||||
if ($domain) {
|
||||
$client =~ s/\.$domain//;
|
||||
} else {
|
||||
$client =~ s/\..*//;
|
||||
}
|
||||
|
||||
# ensure this is coming from a node IP at least
|
||||
($node) = noderange($client);
|
||||
if ($node) { # Means the source isn't valid
|
||||
$validclient = 1;
|
||||
last;
|
||||
# ensure this is coming from a node IP at least
|
||||
($node) = noderange($client);
|
||||
if ($node) { # Means the source isn't valid
|
||||
#$validclient = 1;
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: $conn_peer_addr is matched with node $node");
|
||||
last;
|
||||
}
|
||||
}
|
||||
unless ($node) {
|
||||
xCAT::MsgUtils->trace(0, "E", "xcatd: received a connection request from $conn_peer_addr($client_name), which can not be found in xCAT nodelist table. The connection request will be ignored");
|
||||
}
|
||||
};
|
||||
if ($@) {
|
||||
$node = undef;
|
||||
if ($@ =~ /XCATUNKOWNCLIENT/) {
|
||||
xCAT::MsgUtils->trace(0, "E", "xcatd: received a connection request from unknown host with ip address $conn_peer_addr, please check whether the reverse name resolution works correctly. The connection request will be ignored");
|
||||
} else {
|
||||
xCAT::MsgUtils->message("SE", "xcatd received a connection request from $client, which can not be found in xCAT nodelist table. The connection request will be ignored");
|
||||
#print "xcatd received a connection request from $client, which can not be found in xCAT nodelist table. The connection request will be ignored\n";
|
||||
xCAT::MsgUtils->trace(0, "E", "xcatd: possible BUG encountered by xCAT install monitor service: " . $@);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
unless ($validclient) {
|
||||
unless ($node) {
|
||||
close($conn);
|
||||
sleep 0.01;
|
||||
next;
|
||||
}
|
||||
my $tftpdir = xCAT::TableUtils->getTftpDir();
|
||||
@ -481,7 +493,6 @@ sub do_installm_service {
|
||||
#unless ($pid) { # fork off the nodeset and potential slowness
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: triggering \'nodeset $node next\'...");
|
||||
plugin_command(\%request, undef, \&build_response);
|
||||
|
||||
#exit(0);
|
||||
#}
|
||||
close($conn);
|
||||
@ -494,7 +505,7 @@ sub do_installm_service {
|
||||
node => [$node],
|
||||
arg => ["$newstat"],
|
||||
);
|
||||
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: triggering \'updatenodestat $node $newstat\'...");
|
||||
# node should be blocked, race condition may occur otherwise
|
||||
#my $pid=xCAT::Utils->xfork();
|
||||
#unless ($pid) { # fork off the nodeset and potential slowness
|
||||
@ -505,14 +516,17 @@ sub do_installm_service {
|
||||
}
|
||||
close($conn);
|
||||
} elsif ($text =~ /^unlocktftpdir/) { # TODO: only nodes in install state should be allowed
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: unlock tftpdir for $node...");
|
||||
mkpath("$tftpdir/xcat/$node");
|
||||
chmod 01777, "$tftpdir/xcat/$node";
|
||||
chmod 0666, glob("$tftpdir/xcat/$node/*");
|
||||
close($conn);
|
||||
} elsif ($text =~ /locktftpdir/) {
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: lock tftpdir for $node...");
|
||||
chmod 0755, "$tftpdir/xcat/$node";
|
||||
chmod 0644, glob("$tftpdir/xcat/$node/*");
|
||||
} elsif ($text =~ /^getpostscript/) {
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: handle getpostscript requesting from $node...");
|
||||
my $reply = plugin_command({ command => ['getpostscript'], _xcat_clienthost => [$node] }, undef, \&build_response);
|
||||
foreach (@{ $reply->{data} }) {
|
||||
print $conn $_;
|
||||
@ -520,10 +534,12 @@ sub do_installm_service {
|
||||
print $conn "#END OF SCRIPT\n";
|
||||
close($conn);
|
||||
} elsif ($text =~ /^syncfiles/) {
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: handle syncfiles requesting from $node...");
|
||||
plugin_command({ command => ['syncfiles'], _xcat_clienthost => [$node] }, undef, \&build_response);
|
||||
print $conn "syncfiles done\n";
|
||||
close($conn);
|
||||
} elsif ($text =~ /^setiscsiparms/) {
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: handle setiscsiparms requesting from $node...");
|
||||
$text =~ s/^setiscsiparms\s+//;
|
||||
my $kname;
|
||||
my $iname;
|
||||
@ -559,31 +575,40 @@ sub do_installm_service {
|
||||
arg => ["$text"],
|
||||
);
|
||||
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: triggering \'updatenodeappstat $node $text\'...");
|
||||
plugin_command(\%request, undef, \&build_response);
|
||||
close($conn);
|
||||
} elsif ($text =~ /basecustremv/) {
|
||||
|
||||
$text =~ s/basecustremv //;
|
||||
chomp $text;
|
||||
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: handle basecustremv requesting for $text...");
|
||||
# remove the BASECUST_REMOVAL line from /tftpboot/hostname.info file
|
||||
my $myfile = "/tftpboot/$text" . ".info";
|
||||
`/usr/bin/cat $myfile | /usr/bin/sed "/BASECUST_REMOVAL/d">/tmp/$text.nimtmp`;
|
||||
`/usr/bin/mv /tmp/$text.nimtmp $myfile`;
|
||||
close($conn);
|
||||
} else {
|
||||
sleep 0.01;
|
||||
chomp $text;
|
||||
xCAT::MsgUtils->trace(0, "E", "xcatd: install monitor does not support \'$text\', the connection request from $conn_peer_addr will be ignored.");
|
||||
close($conn); #close it to avoid the DDOS attack
|
||||
next;
|
||||
}
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: finish a connection request for $node from $conn_peer_addr");
|
||||
alarm(2);
|
||||
}
|
||||
alarm(0);
|
||||
};
|
||||
if ($@) {
|
||||
if ($@ =~ /XCATTIMEOUT/) {
|
||||
xCAT::MsgUtils->message("S", "xcatd: install monitor timed out talking to $node");
|
||||
xCAT::MsgUtils->trace(0, "W", "xcatd: install monitor timed out talking to $node($conn_peer_addr)");
|
||||
} else {
|
||||
xCAT::MsgUtils->message("S", "xcatd: possible BUG encountered by xCAT install monitor service: " . $@);
|
||||
xCAT::MsgUtils->trace(0, "E", "xcatd: possible BUG encountered by xCAT install monitor service: " . $@);
|
||||
close($conn);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if (open($installpidfile, "<", "/var/run/xcat/installservice.pid")) {
|
||||
my $pid = <$installpidfile>;
|
||||
@ -634,6 +659,7 @@ sub do_discovery_process {
|
||||
populate_vpd_hash();
|
||||
populate_mp_hash();
|
||||
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: Discovery worker process $$ start");
|
||||
while (not $quit) {
|
||||
my $msg = fd_retrieve($broker);
|
||||
if ((time() - $vintage) > 15) {
|
||||
@ -745,6 +771,8 @@ sub do_udp_service { # This function opens up a UDP port
|
||||
open($udppidfile, ">", "/var/run/xcat/udpservice.pid"); # if here, everyone else has unlinked udpservicepid or doesn't care
|
||||
print $udppidfile $$;
|
||||
close($udppidfile);
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: UDP listener process $$ start");
|
||||
|
||||
$select->add($socket);
|
||||
$udpcontext->{socket} = $socket;
|
||||
$select->add($sslctl);
|
||||
@ -945,7 +973,8 @@ unless (xCAT::Utils->isLinux()) { # messes up the output of the service cmd on l
|
||||
};
|
||||
}
|
||||
if ($@) {
|
||||
print "ERROR: $@";
|
||||
#print "ERROR: $@";
|
||||
xCAT::MsgUtils->trace(0, "E", "xcatd: possible BUG encountered: " . $@);
|
||||
xexit;
|
||||
}
|
||||
unless ($foreground) {
|
||||
@ -1229,7 +1258,7 @@ unless ($cmdlog_svrpid) {
|
||||
open($cmdlogpidfile, ">$cmdlogservicefile");
|
||||
print $cmdlogpidfile $$;
|
||||
close($cmdlogpidfile);
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: command log process $$ start");
|
||||
xCAT::MsgUtils->trace(0, "I", "xcatd: Command log writer process $$ start");
|
||||
|
||||
my $cmdlog_logfile_path = dirname($cmdlog_logfile);
|
||||
mkpath("$cmdlog_logfile_path") unless (-d "$cmdlog_logfile_path");
|
||||
@ -1411,7 +1440,7 @@ until ($quit) {
|
||||
if (@pendingconnections) {
|
||||
while ($listenwatcher->can_read(0)) { # grab everything we can, but don't spend any time waiting for more
|
||||
$tconn = $listener->accept;
|
||||
unless ($tconn) { next; }
|
||||
unless ($tconn) { sleep 0.01; next; } # increase max open file number might cause dead-loop here, sleep for a while
|
||||
push @pendingconnections, $tconn;
|
||||
}
|
||||
} else {
|
||||
|
Loading…
x
Reference in New Issue
Block a user