From 50891f235e480070b015e047a13e6adb61f52ecf Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Sat, 1 Feb 2014 18:49:36 -0500 Subject: [PATCH] Implement session loss recovery When a console object reports disconnect, have mechanism to get back in. --- confluent/consoleserver.py | 20 ++++++++++++++++++-- confluent/interface/console.py | 12 ++++++++++++ plugins/hardwaremanagement/ipmi.py | 14 +++++++++++--- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/confluent/consoleserver.py b/confluent/consoleserver.py index 399ddb5a..0f6c9192 100644 --- a/confluent/consoleserver.py +++ b/confluent/consoleserver.py @@ -7,6 +7,7 @@ #we track nodes that are actively being logged, watched, or have attached #there should be no more than one handler per node +import confluent.interface.console as conapi import confluent.pluginapi as plugin import confluent.util as util import eventlet @@ -17,9 +18,15 @@ _handled_consoles = {} class _ConsoleHandler(object): def __init__(self, node, configmanager): self.rcpts = {} - self._console = plugin.handle_path("/node/%s/_console/session" % node, - "create", configmanager) + self.cfgmgr = configmanager + self.node = node self.buffer = bytearray() + self._connect() + + def _connect(self): + self._console = plugin.handle_path( + "/node/%s/_console/session" % self.node, + "create", self.cfgmgr) self._console.connect(self.get_console_output) def unregister_rcpt(self, handle): @@ -44,6 +51,15 @@ class _ConsoleHandler(object): #also, timestamp data... def get_console_output(self, data): + # Spawn as a greenthread, return control as soon as possible + # to the console object + eventlet.spawn(self._handle_console_output, data) + + def _handle_console_output(self, data): + if type(data) == int: + if data == conapi.ConsoleEvent.Disconnect: + self._connect() + return self.buffer += data #TODO: analyze buffer for registered events, examples: # panics diff --git a/confluent/interface/console.py b/confluent/interface/console.py index 97410041..074a31de 100644 --- a/confluent/interface/console.py +++ b/confluent/interface/console.py @@ -21,3 +21,15 @@ class Console(object): def wait_for_data(self, timeout=600): raise NotImplementedException("Subclassing required") + def ping(self): + """This function is a hint to the console plugin that now would be a + nice time to assess health of console connection. Plugins that see + a use for this should be periodically doing this on their own for + logging and such, this provides a hint that a user has taken an + explicit interest in the console output. In my experience, this + correlates with some conditions that may suggest a loss of console + as well, so consoles can schedule a health check to run at this time. + No return is expected, any error condition can be reported by sending + ConsoleEvent.Disconnect, just like normal.""" + pass + diff --git a/plugins/hardwaremanagement/ipmi.py b/plugins/hardwaremanagement/ipmi.py index 9aa1198f..3d416d6f 100644 --- a/plugins/hardwaremanagement/ipmi.py +++ b/plugins/hardwaremanagement/ipmi.py @@ -1,5 +1,5 @@ import collections -import confluent.interface.console +import confluent.interface.console as conapi import confluent.messages as msg import eventlet import eventlet.event @@ -107,10 +107,11 @@ def get_conn_params(node, configdata): } -class IpmiConsole(confluent.interface.console.Console): +class IpmiConsole(conapi.Console): def __init__(self, node, config): crypt = config.decrypt config.decrypt = True + self.broken = False configdata = config.get_node_attributes([node], ['secret.ipmiuser', 'secret.ipmipassphrase', 'secret.managementuser', 'secret.managementpassphrase', @@ -125,7 +126,12 @@ class IpmiConsole(confluent.interface.console.Console): def handle_data(self, data): if type(data) == dict: - #TODO: convert dict into a confluent.interface.console.ConsoleEvent + disconnect = frozenset(('Session Disconnected', 'timeout')) + if 'error' in data and data['error'] in disconnect: + self.broken = True + self.datacallback(conapi.ConsoleEvent.Disconnect) + else: + raise Exception("Unrecognized pyghmi input %s" % repr(data)) else: self.datacallback(data) @@ -156,6 +162,8 @@ class IpmiConsole(confluent.interface.console.Console): def write(self, data): global chainpulled + while self.solconnection is None and not self.broken: + wait_on_ipmi() ipmiq.append((self.solconnection.send_data, (data,))) if not chainpulled: chainpulled = True