2
0
mirror of https://github.com/xcat2/confluent.git synced 2025-01-28 11:57:37 +00:00

Implement session loss recovery

When a console object reports disconnect, have mechanism to get back in.
This commit is contained in:
Jarrod Johnson 2014-02-01 18:49:36 -05:00
parent dae131d40f
commit 50891f235e
3 changed files with 41 additions and 5 deletions

View File

@ -7,6 +7,7 @@
#we track nodes that are actively being logged, watched, or have attached
#there should be no more than one handler per node
import confluent.interface.console as conapi
import confluent.pluginapi as plugin
import confluent.util as util
import eventlet
@ -17,9 +18,15 @@ _handled_consoles = {}
class _ConsoleHandler(object):
def __init__(self, node, configmanager):
self.rcpts = {}
self._console = plugin.handle_path("/node/%s/_console/session" % node,
"create", configmanager)
self.cfgmgr = configmanager
self.node = node
self.buffer = bytearray()
self._connect()
def _connect(self):
self._console = plugin.handle_path(
"/node/%s/_console/session" % self.node,
"create", self.cfgmgr)
self._console.connect(self.get_console_output)
def unregister_rcpt(self, handle):
@ -44,6 +51,15 @@ class _ConsoleHandler(object):
#also, timestamp data...
def get_console_output(self, data):
# Spawn as a greenthread, return control as soon as possible
# to the console object
eventlet.spawn(self._handle_console_output, data)
def _handle_console_output(self, data):
if type(data) == int:
if data == conapi.ConsoleEvent.Disconnect:
self._connect()
return
self.buffer += data
#TODO: analyze buffer for registered events, examples:
# panics

View File

@ -21,3 +21,15 @@ class Console(object):
def wait_for_data(self, timeout=600):
raise NotImplementedException("Subclassing required")
def ping(self):
"""This function is a hint to the console plugin that now would be a
nice time to assess health of console connection. Plugins that see
a use for this should be periodically doing this on their own for
logging and such, this provides a hint that a user has taken an
explicit interest in the console output. In my experience, this
correlates with some conditions that may suggest a loss of console
as well, so consoles can schedule a health check to run at this time.
No return is expected, any error condition can be reported by sending
ConsoleEvent.Disconnect, just like normal."""
pass

View File

@ -1,5 +1,5 @@
import collections
import confluent.interface.console
import confluent.interface.console as conapi
import confluent.messages as msg
import eventlet
import eventlet.event
@ -107,10 +107,11 @@ def get_conn_params(node, configdata):
}
class IpmiConsole(confluent.interface.console.Console):
class IpmiConsole(conapi.Console):
def __init__(self, node, config):
crypt = config.decrypt
config.decrypt = True
self.broken = False
configdata = config.get_node_attributes([node],
['secret.ipmiuser', 'secret.ipmipassphrase',
'secret.managementuser', 'secret.managementpassphrase',
@ -125,7 +126,12 @@ class IpmiConsole(confluent.interface.console.Console):
def handle_data(self, data):
if type(data) == dict:
#TODO: convert dict into a confluent.interface.console.ConsoleEvent
disconnect = frozenset(('Session Disconnected', 'timeout'))
if 'error' in data and data['error'] in disconnect:
self.broken = True
self.datacallback(conapi.ConsoleEvent.Disconnect)
else:
raise Exception("Unrecognized pyghmi input %s" % repr(data))
else:
self.datacallback(data)
@ -156,6 +162,8 @@ class IpmiConsole(confluent.interface.console.Console):
def write(self, data):
global chainpulled
while self.solconnection is None and not self.broken:
wait_on_ipmi()
ipmiq.append((self.solconnection.send_data, (data,)))
if not chainpulled:
chainpulled = True