From 779b5c9ede8ffa4da0cde2713f427d08c3482c3b Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 8 Mar 2019 16:02:52 -0500 Subject: [PATCH] Speed up cnos health and add stubs Add concurrency to accelerate nodehealth and provide stubs for the as-yet unimplemented functionality. --- .../plugins/hardwaremanagement/cnos.py | 119 +++++++++++++----- 1 file changed, 85 insertions(+), 34 deletions(-) diff --git a/confluent_server/confluent/plugins/hardwaremanagement/cnos.py b/confluent_server/confluent/plugins/hardwaremanagement/cnos.py index 3297be05..a5b643ad 100644 --- a/confluent_server/confluent/plugins/hardwaremanagement/cnos.py +++ b/confluent_server/confluent/plugins/hardwaremanagement/cnos.py @@ -24,6 +24,7 @@ import eventlet +import eventlet.queue as queue import confluent.exceptions as exc webclient = eventlet.import_patched('pyghmi.util.webclient') import confluent.messages as msg @@ -51,45 +52,95 @@ def cnos_login(node, configmanager, creds): return wc raise exc.TargetEndpointBadCredentials('Unable to authenticate') +def update(nodes, element, configmanager, inputdata): + for node in nodes: + yield msg.ConfluentNodeError(node, 'Not Implemented') + +def delete(nodes, element, configmanager, inputdata): + for node in nodes: + yield msg.ConfluentNodeError(node, 'Not Implemented') + +def create(nodes, element, configmanager, inputdata): + for node in nodes: + yield msg.ConfluentNodeError(node, 'Not Implemented') + def retrieve(nodes, element, configmanager, inputdata): + results = queue.LightQueue() + workers = set([]) if element == ['power', 'state']: for node in nodes: yield msg.PowerState(node=node, state='on') - if element == ['health', 'hardware']: + return + elif element == ['health', 'hardware']: creds = configmanager.get_node_attributes( nodes, ['secret.hardwaremanagementuser', 'secret.hardwaremanagementpassword'], decrypt=True) for node in nodes: - wc = cnos_login(node, configmanager, creds) - hinfo = wc.grab_json_response('/nos/api/sysinfo/globalhealthstatus') - summary = hinfo['status'].lower() - if summary == 'noncritical': - summary = 'warning' - yield msg.HealthSummary(summary, name=node) - state = None - badreadings = [] - if summary != 'ok': # temperature or dump or fans or psu - wc.grab_json_response('/nos/api/sysinfo/panic_dump') - switchinfo = wc.grab_json_response('/nos/api/sysinfo/panic_dump') - if switchinfo: - badreadings.append(SwitchSensor('Panicdump', ['Present'], health='warning')) - switchinfo = wc.grab_json_response('/nos/api/sysinfo/temperatures') - for temp in switchinfo: - if temp == 'Temperature threshold': - continue - if switchinfo[temp]['State'] != 'OK': - temphealth = switchinfo[temp]['State'].lower() - if temphealth == 'noncritical': - temphealth = 'warning' - tempval = switchinfo[temp]['Temp'] - badreadings.append(SwitchSensor(temp, [], value=tempval, health=temphealth)) - switchinfo = wc.grab_json_response('/nos/api/sysinfo/fans') - for fan in switchinfo: - if switchinfo[fan]['speed-rpm'] < 100: - badreadings.append(SwitchSensor(fan, [], value=switchinfo[fan]['speed-rpm'], health='critical')) - switchinfo = wc.grab_json_response('/nos/api/sysinfo/power') - for psu in switchinfo: - if switchinfo[psu]['State'] != 'Normal ON': - psuname = switchinfo[psu]['Name'] - badreadings.append(SwitchSensor(psuname, states=[switchinfo[psu]['State']], health='critical')) - yield msg.SensorReadings(badreadings, name=node) + workers.add(eventlet.spawn(retrieve_health, configmanager, creds, + node, results)) + else: + for node in nodes: + yield msg.ConfluentNodeError(node, 'Not Implemented') + return + currtimeout = 10 + while workers: + try: + datum = results.get(10) + while datum: + if datum: + yield datum + datum = results.get_nowait() + except queue.Empty: + pass + eventlet.sleep(0.001) + for t in list(workers): + if t.dead: + workers.discard(t) + try: + while True: + datum = results.get_nowait() + if datum: + yield datum + except queue.Empty: + pass + +def retrieve_health(configmanager, creds, node, results): + wc = cnos_login(node, configmanager, creds) + hinfo = wc.grab_json_response('/nos/api/sysinfo/globalhealthstatus') + summary = hinfo['status'].lower() + if summary == 'noncritical': + summary = 'warning' + results.put(msg.HealthSummary(summary, name=node)) + state = None + badreadings = [] + if summary != 'ok': # temperature or dump or fans or psu + wc.grab_json_response('/nos/api/sysinfo/panic_dump') + switchinfo = wc.grab_json_response('/nos/api/sysinfo/panic_dump') + if switchinfo: + badreadings.append( + SwitchSensor('Panicdump', ['Present'], health='warning')) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/temperatures') + for temp in switchinfo: + if temp == 'Temperature threshold': + continue + if switchinfo[temp]['State'] != 'OK': + temphealth = switchinfo[temp]['State'].lower() + if temphealth == 'noncritical': + temphealth = 'warning' + tempval = switchinfo[temp]['Temp'] + badreadings.append( + SwitchSensor(temp, [], value=tempval, health=temphealth)) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/fans') + for fan in switchinfo: + if switchinfo[fan]['speed-rpm'] < 100: + badreadings.append( + SwitchSensor(fan, [], value=switchinfo[fan]['speed-rpm'], + health='critical')) + switchinfo = wc.grab_json_response('/nos/api/sysinfo/power') + for psu in switchinfo: + if switchinfo[psu]['State'] != 'Normal ON': + psuname = switchinfo[psu]['Name'] + badreadings.append( + SwitchSensor(psuname, states=[switchinfo[psu]['State']], + health='critical')) + results.put(msg.SensorReadings(badreadings, name=node))