mirror of
https://github.com/xcat2/confluent.git
synced 2025-01-17 13:13:18 +00:00
Explicitly check IPMI console health
In theory, pyghmi should be doing a self-health check. It has been discovered at scale that this self-health check may encounter issues. For now, try to workaround by having another health check at the confluent level, deferred by console activity. It's also spaced far apart so it should not significantly add to idle load (one check every ~5 minutes, spread out).
This commit is contained in:
parent
5d105c43e5
commit
ec02097b52
@ -306,6 +306,8 @@ class ConsoleHandler(object):
|
||||
self.reconnect = None
|
||||
self.connectstate = 'connecting'
|
||||
eventlet.spawn(self._connect)
|
||||
else:
|
||||
self._console.ping()
|
||||
|
||||
|
||||
|
||||
|
@ -25,6 +25,7 @@ import eventlet.greenpool as greenpool
|
||||
import eventlet.queue as queue
|
||||
import pyghmi.constants as pygconstants
|
||||
import pyghmi.exceptions as pygexc
|
||||
import random
|
||||
console = eventlet.import_patched('pyghmi.ipmi.console')
|
||||
ipmicommand = eventlet.import_patched('pyghmi.ipmi.command')
|
||||
import socket
|
||||
@ -203,12 +204,18 @@ class IpmiConsole(conapi.Console):
|
||||
self.bmc = connparams['bmc']
|
||||
self.port = connparams['port']
|
||||
self.connected = False
|
||||
self.checker = None
|
||||
# Cannot actually create console until 'connect', when we get callback
|
||||
|
||||
def __del__(self):
|
||||
self.solconnection = None
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.checker:
|
||||
# if we got data from remote, delay the ping check
|
||||
self.checker.cancel()
|
||||
self.checker = eventlet.spawn_after(290 + random.random() * 30,
|
||||
self.ping)
|
||||
if type(data) == dict:
|
||||
if 'error' in data:
|
||||
self.solconnection = None
|
||||
@ -245,8 +252,14 @@ class IpmiConsole(conapi.Console):
|
||||
self.connected = True
|
||||
except socket.gaierror as err:
|
||||
raise exc.TargetEndpointUnreachable(str(err))
|
||||
if self.checker:
|
||||
self.checker.cancel()
|
||||
self.checker = eventlet.spawn_after(290 + random.random() * 30,
|
||||
self.ping)
|
||||
|
||||
def close(self):
|
||||
if self.checker:
|
||||
self.checker.cancel()
|
||||
if self.solconnection is not None:
|
||||
# break the circular reference here
|
||||
self.solconnection.out_handler = _donothing
|
||||
@ -261,6 +274,12 @@ class IpmiConsole(conapi.Console):
|
||||
def send_break(self):
|
||||
self.solconnection.send_break()
|
||||
|
||||
def ping(self):
|
||||
rsp = self.solconnection.ipmi_session.raw_command(netfn=6,
|
||||
command=0x4b,
|
||||
data=(1, 1))
|
||||
self.solconnection._got_payload_instance_info(rsp)
|
||||
|
||||
|
||||
def perform_requests(operator, nodes, element, cfg, inputdata):
|
||||
cryptit = cfg.decrypt
|
||||
|
Loading…
x
Reference in New Issue
Block a user