2
0
mirror of https://github.com/xcat2/confluent.git synced 2025-01-17 13:13:18 +00:00

Explicitly check IPMI console health

In theory, pyghmi should be doing a self-health check.  It has been discovered at scale that
this self-health check may encounter issues.  For now, try to workaround by having another
health check at the confluent level, deferred by console activity.  It's also spaced far apart
so it should not significantly add to idle load (one check every ~5 minutes, spread out).
This commit is contained in:
Jarrod Johnson 2016-05-19 14:44:28 -04:00
parent 5d105c43e5
commit ec02097b52
2 changed files with 21 additions and 0 deletions

View File

@ -306,6 +306,8 @@ class ConsoleHandler(object):
self.reconnect = None
self.connectstate = 'connecting'
eventlet.spawn(self._connect)
else:
self._console.ping()

View File

@ -25,6 +25,7 @@ import eventlet.greenpool as greenpool
import eventlet.queue as queue
import pyghmi.constants as pygconstants
import pyghmi.exceptions as pygexc
import random
console = eventlet.import_patched('pyghmi.ipmi.console')
ipmicommand = eventlet.import_patched('pyghmi.ipmi.command')
import socket
@ -203,12 +204,18 @@ class IpmiConsole(conapi.Console):
self.bmc = connparams['bmc']
self.port = connparams['port']
self.connected = False
self.checker = None
# Cannot actually create console until 'connect', when we get callback
def __del__(self):
self.solconnection = None
def handle_data(self, data):
if self.checker:
# if we got data from remote, delay the ping check
self.checker.cancel()
self.checker = eventlet.spawn_after(290 + random.random() * 30,
self.ping)
if type(data) == dict:
if 'error' in data:
self.solconnection = None
@ -245,8 +252,14 @@ class IpmiConsole(conapi.Console):
self.connected = True
except socket.gaierror as err:
raise exc.TargetEndpointUnreachable(str(err))
if self.checker:
self.checker.cancel()
self.checker = eventlet.spawn_after(290 + random.random() * 30,
self.ping)
def close(self):
if self.checker:
self.checker.cancel()
if self.solconnection is not None:
# break the circular reference here
self.solconnection.out_handler = _donothing
@ -261,6 +274,12 @@ class IpmiConsole(conapi.Console):
def send_break(self):
self.solconnection.send_break()
def ping(self):
rsp = self.solconnection.ipmi_session.raw_command(netfn=6,
command=0x4b,
data=(1, 1))
self.solconnection._got_payload_instance_info(rsp)
def perform_requests(operator, nodes, element, cfg, inputdata):
cryptit = cfg.decrypt