From ee1d71fe2fbe1d94ba9b6f24f9fdf9ed9ff803b3 Mon Sep 17 00:00:00 2001 From: chenglch Date: Thu, 21 Jan 2016 00:35:13 -0500 Subject: [PATCH] Partial issue #537, try to detect the error of db access process This patch add changes to detect the error and enter in direct db access mode so that the other process can access the data without db access process temporarily. Direct access is not suggested way and currently nodelist (noderange) can not support this mode. Partial issue: #537 --- perl-xCAT/xCAT/NodeRange.pm | 8 ++++-- perl-xCAT/xCAT/Table.pm | 54 +++++++++++++++++++++++++++++++------ perl-xCAT/xCAT/Utils.pm | 30 +++++++++++++++++++++ 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/perl-xCAT/xCAT/NodeRange.pm b/perl-xCAT/xCAT/NodeRange.pm index 978f6ed1e..85f7bb9cf 100644 --- a/perl-xCAT/xCAT/NodeRange.pm +++ b/perl-xCAT/xCAT/NodeRange.pm @@ -226,8 +226,12 @@ sub expandatom { } if ($grptab and (($glstamp < (time()-5)) or (not $didgrouplist and not scalar @grplist))) { $didgrouplist = 1; - $glstamp=time(); - @grplist = @{$grptab->getAllEntries()}; + $glstamp=time(); + my $grplist_ptr = $grptab->getAllEntries(); + if (!$grplist_ptr) { + return undef; + } + @grplist = @{$grplist_ptr}; } my $isdynamicgrp = 0; foreach my $grpdef_ref (@grplist) { diff --git a/perl-xCAT/xCAT/Table.pm b/perl-xCAT/xCAT/Table.pm index 706c08e77..68205c876 100644 --- a/perl-xCAT/xCAT/Table.pm +++ b/perl-xCAT/xCAT/Table.pm @@ -63,6 +63,7 @@ $DBI::dbi_debug=9; # increase the debug output use strict; use Scalar::Util qw/weaken/; +use xCAT::Utils; require xCAT::Schema; require xCAT::NodeRange; use Text::Balanced qw(extract_bracketed); @@ -94,24 +95,45 @@ sub dbc_submit { $request->{'wantarray'} = wantarray(); my $clisock; my $tries=300; + my $retdata; + my $err; while($tries and !($clisock = IO::Socket::UNIX->new(Peer => $dbsockpath, Type => SOCK_STREAM, Timeout => 120) ) ) { #print "waiting for clisock to be available\n"; + if ($tries % 10 == 0 and $dbworkerpid != 0 and not xCAT::Utils::is_process_exists($dbworkerpid)) { + $dbworkerpid = 0; + xCAT::MsgUtils->message("S","xcatd: DB access process is down, xcat is running in direct access mode. " + ."Please restart xcatd to avoid of this error."); + return undef; + } sleep 0.1; $tries--; } - unless ($clisock) { + if ( $dbworkerpid !=0 and !$clisock) { use Carp qw/cluck/; cluck(); } - store_fd($request,$clisock); + eval { + store_fd($request,$clisock); + }; + if ($@) { + $err = $@; + xCAT::MsgUtils->message("S","xcatd: Error happend when sending data to DB access process ".$err); + return undef; + } #print $clisock $data; my $data=""; my $lastline=""; - my $retdata = fd_retrieve($clisock); + eval { + $retdata = fd_retrieve($clisock); + }; + if ($@) { + $err = $@; + xCAT::MsgUtils->message("S","xcatd: Error happened when receiving data from DB access process ".$err); + return undef; + } close($clisock); if (ref $retdata eq "SCALAR") { #bug detected #in the midst of the operation, die like it used to die - my $err; $$retdata =~ /\*XCATBUGDETECTED\*:(.*):\*XCATBUGDETECTED\*/s; $err = $1; die $err; @@ -2225,10 +2247,15 @@ sub getNodeAttribs { my $self = shift; if ($dbworkerpid) { #TODO: should this be moved outside of the DB worker entirely? I'm thinking so, but I don't dare do so right now... - #the benefit would be the potentially computationally intensive substitution logic would be moved out and less time inside limited - #db worker scope + #the benefit would be the potentially computationally intensive substitution logic would be moved out and less time inside limited + #db worker scope return dbc_call($self,'getNodeAttribs',@_); } + + if (!defined($self->{dbh})) { + xCAT::MsgUtils->message("S","xcatd: DBI is missing, Please check the db access process."); + return undef; + } my $node = shift; my @attribs; my %options = (); @@ -2699,6 +2726,12 @@ sub getAllEntries if ($dbworkerpid) { return dbc_call($self,'getAllEntries',@_); } + + if (!defined($self->{dbh})) { + xCAT::MsgUtils->message("S","xcatd: DBI is missing, Please check the db access process."); + return undef; + } + my $allentries = shift; my @rets; my $query; @@ -2995,13 +3028,18 @@ sub getAllAttribs if ($dbworkerpid) { return dbc_call($self,'getAllAttribs',@_); } + + if (!defined($self->{dbh})) { + xCAT::MsgUtils->message("S","xcatd: DBI is missing, Please check the db access process."); + return undef; + } #print "Being asked to dump ".$self->{tabname}."for something\n"; my @attribs = @_; my @results = (); if ($self->{_use_cache}) { if ($self->{_cachestamp} < (time()-5)) { #NEVER use a cache older than 5 seconds - $self->_refresh_cache(); - } + $self->_refresh_cache(); + } my @results; my $cacheline; CACHELINE: foreach $cacheline (@{$self->{_tablecache}}) { diff --git a/perl-xCAT/xCAT/Utils.pm b/perl-xCAT/xCAT/Utils.pm index de59edd5c..4236d5d76 100644 --- a/perl-xCAT/xCAT/Utils.pm +++ b/perl-xCAT/xCAT/Utils.pm @@ -4620,4 +4620,34 @@ sub lookupNetboot{ return $ret; } +#-------------------------------------------------------------------------------- + +=head3 is_process_exists + Check whether a process is exist. + Arguments: + process id + Returns: + 1 process is exist + 0 process is not exist + Globals: + none + Error: + none + Example: + xCAT::Utils->is_process_exists($pid); + Comments: + none +=cut + +#-------------------------------------------------------------------------------- +sub is_process_exists{ + my $pid = shift; + my $cmd = "kill -0 $pid"; + xCAT::Utils->runcmd($cmd, -1); + if ( $::RUNCMD_RC == 0 ) { + return 1; + } + return 0; +} + 1;