From 68037473d0b9d2bf18a06e5803fb0fae382e4a04 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Wed, 26 Apr 2017 14:26:00 -0400 Subject: [PATCH] Replace login event mechanism Provide a different scheme that does not involve a wait(), if by chance the flow dies without getting back to our thread. wait() has no timeout so this is a strategy to cope by making sure we hang for no longer than 3 minutes, which is well beyond any time a login should possibly take. --- .../confluent/plugins/hardwaremanagement/ipmi.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py index f4278a90..ff956a9a 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/ipmi.py @@ -342,10 +342,8 @@ class IpmiHandler(object): self.ipmicmd = None self.inputdata = inputdata tenant = cfg.tenant - self._logevt = None if ((node, tenant) not in persistent_ipmicmds or not persistent_ipmicmds[(node, tenant)].ipmi_session.logged): - self._logevt = threading.Event() try: persistent_ipmicmds[(node, tenant)].close_confluent() except KeyError: # was no previous session @@ -356,6 +354,14 @@ class IpmiHandler(object): userid=connparams['username'], password=connparams['passphrase'], kg=connparams['kg'], port=connparams['port'], onlogon=self.logged) + ipmisess = persistent_ipmicmds[(node, tenant)].ipmi_session + begin = util.monotonic_time() + while ((not (self.broken or self.loggedin)) and + (util.monotonic_time() - begin) < 180): + ipmisess.wait_for_rsp(180) + if not (self.broken or self.loggedin): + raise exc.TargetEndpointUnreachable( + "Login process to " + bmc + " died") except socket.gaierror as ge: if ge[0] == -2: raise exc.TargetEndpointUnreachable(ge[1]) @@ -374,12 +380,8 @@ class IpmiHandler(object): self.ipmicmd = ipmicmd self.loggedin = True self.ipmicmd.setup_confluent_keyhandler() - self._logevt.set() def handle_request(self): - if self._logevt is not None: - self._logevt.wait() - self._logevt = None if self.broken: if (self.error == 'timeout' or 'Insufficient resources' in self.error):