2
0
mirror of https://opendev.org/x/pyghmi synced 2025-02-18 19:44:28 +00:00

Retry login on dead session

Sometimes a BMC just up and drops an IPMI session without warning.

The only way to recover is to login, so incorporate a login into the
retry logic.

Now a couple of retries are done the 'cheap' way (retransmit) followed
by a full login attempt.

This does mean that SOL sessions are less likely to survive a shared
nic outage during PXE boot or similar,
however we just have to take that hit.

Change-Id: Ica11d29519b1c69cbd151bce9100f8a7517544ca
This commit is contained in:
Jarrod Johnson 2022-03-25 11:51:32 -04:00
parent 4b4ae0eb97
commit 6836bfffdd

View File

@ -84,6 +84,12 @@ MAX_IDLE = 29
# incorrect idle
def unmatched(response, netfn, cmd):
if not response:
return True
return response['command'] != cmd or response['netfn'] != netfn
def define_worker():
class _IOWorker(threading.Thread):
def join(self):
@ -497,6 +503,8 @@ class Session(object):
else:
self.privlevel = 4
self.autopriv = True
self.onlogpayload = None
self.onlogpayloadtype = None
self.logoutexpiry = None
self.autokeepalive = keepalive
self.maxtimeout = 3 # be aggressive about giving up on initial packet
@ -578,12 +586,18 @@ class Session(object):
while self.logonwaiters:
waiter = self.logonwaiters.pop()
waiter(parameter)
if self.onlogpayload:
self.lastpayload = self.onlogpayload
self.last_payload_type = self.onlogpayloadtype
self.onlogpayload = None
self.send_payload()
def _initsession(self):
# NOTE(jbjohnso): this number can be whatever we want.
# I picked 'xCAT' minus 1 so that a hexdump of packet
# would show xCAT
self.localsid = 2017673555
self.remseqnumber = None
self.confalgo = 0
self.aeskey = None
self.integrityalgo = 0
@ -742,8 +756,9 @@ class Session(object):
while self._isincommand():
_io_wait(self._isincommand(), self.sockaddr, self.evq)
def awaitresponse(self, retry):
while retry and self.lastresponse is None and self.logged:
def awaitresponse(self, retry, netfn, command):
while (retry and unmatched(self.lastresponse, netfn, command)
and (self.logged or self.onlogpayload)):
timeout = self.expiration - _monotonic_time()
_io_wait(timeout, self.sockaddr)
while self.iterwaiters:
@ -798,8 +813,9 @@ class Session(object):
# behavior of only the constructor needing a callback. From then on,
# synchronous usage of the class acts in a greenthread style governed
# by order of data on the network
self.awaitresponse(retry)
self.awaitresponse(retry, netfn + 1, command)
lastresponse = self.lastresponse
self.lastresponse = None
self.incommand = False
while self.evq:
self.evq.popleft().set()
@ -963,7 +979,6 @@ class Session(object):
if 'error' in response:
self.onlogon(response)
return
self.maxtimeout = 6 # we have a confirmed bmc, be more tenacious
if response['code'] == 0xcc and self.ipmi15only is not None:
# tried ipmi 2.0 against a 1.5 which should work, but some bmcs
# thought 'reserved' meant 'must be zero'
@ -1022,6 +1037,8 @@ class Session(object):
def _req_priv_level(self):
self.logged = 1
self.logoutexpiry = None
self.maxtimeout = 2 # Switch quickly to the relogin recovery
self.logontries = 1 # have one relogin waiting to heal
response = self.raw_command(netfn=0x6, command=0x3b,
data=[self.privlevel])
if response['code']:
@ -1331,7 +1348,7 @@ class Session(object):
iserver.pktqueue.append(qent)
iserver.process_pktqueue()
return
if (hasattr(self, 'remseqnumber')
if (self.remseqnumber is not None
and remseqnumber < self.remseqnumber):
return -5 # remote sequence number is too low, reject it
self.remseqnumber = remseqnumber
@ -1409,7 +1426,7 @@ class Session(object):
if sid != self.localsid: # session id mismatch, drop it
return
remseqnumber = struct.unpack("<I", bytes(data[10:14]))[0]
if (hasattr(self, 'remseqnumber')
if (self.remseqnumber is not None
and (remseqnumber < self.remseqnumber)
and (self.remseqnumber != 0xffffffff)):
return
@ -1715,11 +1732,16 @@ class Session(object):
self.nowait = True
self.timeout += 1
if self.timeout > self.maxtimeout:
response = {'error': 'timeout', 'code': 0xffff}
self.ipmicallback(response)
self.nowait = False
self._mark_broken()
return
if not self.logontries:
response = {'error': 'timeout', 'code': 0xffff}
self.ipmicallback(response)
self.nowait = False
self._mark_broken()
return
else:
self.onlogpayload = self.lastpayload
self.onlogpayloadtype = self.last_payload_type
self._relog()
elif self.sessioncontext == 'FAILED':
self.lastpayload = None
self.nowait = False