From 4f2b15dc8e32dc665da18c030451bd16690aa0df Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 4 Apr 2014 18:40:45 -0400 Subject: [PATCH] First pass at automatic reconfiguration enablement The ipmi plugin, at least, is not yet quite right. Need to continue debugging having a console session open, then changing the bmc to a bad address, then changing it back. I fixed some of the easier exceptions, but it is clearly still getting quite confused to the point where 3 or 4 cycles guarantees the console can not easily heal. --- confluent/consoleserver.py | 18 +++++++++++++++++ plugins/hardwaremanagement/ipmi.py | 32 ++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/confluent/consoleserver.py b/confluent/consoleserver.py index b76d362a..fab16fab 100644 --- a/confluent/consoleserver.py +++ b/confluent/consoleserver.py @@ -18,6 +18,8 @@ import random _handled_consoles = {} +_genwatchattribs = frozenset(('console.method',)) + class _ConsoleHandler(object): def __init__(self, node, configmanager): @@ -39,10 +41,17 @@ class _ConsoleHandler(object): self.shiftin = '0' self.users = {} self.connectstate = 'connecting' + self._attribwatcher = None + self._console = None eventlet.spawn(self._connect) + def _attribschanged(self, **kwargs): + eventlet.spawn(self._connect) def _connect(self): + if self._console: + self._console.close() + self._console = None self.connectstate = 'connecting' self._send_rcpts({'connectstate': self.connectstate}) if self.reconnect: @@ -51,6 +60,15 @@ class _ConsoleHandler(object): self._console = plugin.handle_path( "/nodes/%s/_console/session" % self.node, "create", self.cfgmgr) + if self._attribwatcher: + self.cfgmgr.remove_watcher(self._attribwatcher) + self._attribwatcher = None + if hasattr(self._console, "configattributes"): + attribstowatch = self._console.configattributes | _genwatchattribs + else: + attribstowatch = _genwatchattribs + self.cfgmgr.watch_attributes((self.node,), attribstowatch, + self._attribschanged) try: self._console.connect(self.get_console_output) except exc.TargetEndpointUnreachable: diff --git a/plugins/hardwaremanagement/ipmi.py b/plugins/hardwaremanagement/ipmi.py index 440519ab..44cb10fa 100644 --- a/plugins/hardwaremanagement/ipmi.py +++ b/plugins/hardwaremanagement/ipmi.py @@ -57,16 +57,18 @@ def get_conn_params(node, configdata): 'port': 623, } +_configattributes = ('secret.hardwaremanagementuser', + 'secret.hardwaremanagementpassphrase', + 'secret.ipmikg', 'hardwaremanagement.manager') class IpmiConsole(conapi.Console): + configattributes = frozenset(_configattributes) + def __init__(self, node, config): crypt = config.decrypt config.decrypt = True self.broken = False - configdata = config.get_node_attributes([node], - ['secret.hardwaremanagementuser', - 'secret.hardwaremanagementpassphrase', - 'secret.ipmikg', 'hardwaremanagement.manager']) + configdata = config.get_node_attributes([node], _configattributes) connparams = get_conn_params(node, configdata[node]) config.decrypt = crypt self.username = connparams['username'] @@ -77,10 +79,14 @@ class IpmiConsole(conapi.Console): self.connected = False # Cannot actually create console until 'connect', when we get callback + def __del__(self): + self.solconnection = None + def handle_data(self, data): if type(data) == dict: disconnect = frozenset(('Session Disconnected', 'timeout')) if 'error' in data and data['error'] in disconnect: + self.solconnection = None self.broken = True self.error = data['error'] if self.connected: @@ -92,6 +98,9 @@ class IpmiConsole(conapi.Console): def connect(self,callback): self.datacallback = callback + # we provide a weak reference to pyghmi as otherwise we'd + # have a circular reference and reference counting would never get + # out... try: self.solconnection = console.Console(bmc=self.bmc, port=self.port, userid=self.username, @@ -102,12 +111,22 @@ class IpmiConsole(conapi.Console): w = eventlet.event.Event() _ipmiwaiters.append(w) w.wait() + if self.broken: + break if self.broken: raise exc.TargetEndpointUnreachable(self.error) except socket.gaierror as err: raise exc.TargetEndpointUnreachable(str(err)) + def close(self): + if hasattr(self, 'solconnection') and self.solconnection is not None: + # break the circular reference here + self.solconnection.out_handler = None + self.solconnection = None + self.broken = True + self.error = "closed" + def write(self, data): self.solconnection.send_data(data) @@ -116,10 +135,7 @@ class IpmiIterator(object): def __init__(self, operator, nodes, element, cfg, inputdata): crypt = cfg.decrypt cfg.decrypt = True - configdata = cfg.get_node_attributes(nodes, - ['secret.hardwaremanagementuser', - 'secret.hardwaremanagementpassphrase', - 'secret.ipmikg', 'hardwaremanagement.manager']) + configdata = cfg.get_node_attributes(nodes, _configattributes) cfg.decrypt = crypt self.gpile = greenpool.GreenPile() for node in nodes: