2
0
mirror of https://github.com/xcat2/confluent.git synced 2025-10-24 07:55:31 +00:00

Speed up cnos health and add stubs

Add concurrency to accelerate nodehealth and provide stubs
for the as-yet unimplemented functionality.
This commit is contained in:
Jarrod Johnson
2019-03-08 16:02:52 -05:00
parent 16abf7cb64
commit 656e82c3fe

View File

@@ -24,6 +24,7 @@
import eventlet
import eventlet.queue as queue
import confluent.exceptions as exc
webclient = eventlet.import_patched('pyghmi.util.webclient')
import confluent.messages as msg
@@ -52,45 +53,95 @@ def cnos_login(node, configmanager, creds):
return wc
raise exc.TargetEndpointBadCredentials('Unable to authenticate')
def update(nodes, element, configmanager, inputdata):
for node in nodes:
yield msg.ConfluentNodeError(node, 'Not Implemented')
def delete(nodes, element, configmanager, inputdata):
for node in nodes:
yield msg.ConfluentNodeError(node, 'Not Implemented')
def create(nodes, element, configmanager, inputdata):
for node in nodes:
yield msg.ConfluentNodeError(node, 'Not Implemented')
def retrieve(nodes, element, configmanager, inputdata):
results = queue.LightQueue()
workers = set([])
if element == ['power', 'state']:
for node in nodes:
yield msg.PowerState(node=node, state='on')
if element == ['health', 'hardware']:
return
elif element == ['health', 'hardware']:
creds = configmanager.get_node_attributes(
nodes, ['secret.hardwaremanagementuser', 'secret.hardwaremanagementpassword'], decrypt=True)
for node in nodes:
wc = cnos_login(node, configmanager, creds)
hinfo = wc.grab_json_response('/nos/api/sysinfo/globalhealthstatus')
summary = hinfo['status'].lower()
if summary == 'noncritical':
summary = 'warning'
yield msg.HealthSummary(summary, name=node)
state = None
badreadings = []
if summary != 'ok': # temperature or dump or fans or psu
wc.grab_json_response('/nos/api/sysinfo/panic_dump')
switchinfo = wc.grab_json_response('/nos/api/sysinfo/panic_dump')
if switchinfo:
badreadings.append(SwitchSensor('Panicdump', ['Present'], health='warning'))
switchinfo = wc.grab_json_response('/nos/api/sysinfo/temperatures')
for temp in switchinfo:
if temp == 'Temperature threshold':
continue
if switchinfo[temp]['State'] != 'OK':
temphealth = switchinfo[temp]['State'].lower()
if temphealth == 'noncritical':
temphealth = 'warning'
tempval = switchinfo[temp]['Temp']
badreadings.append(SwitchSensor(temp, [], value=tempval, health=temphealth))
switchinfo = wc.grab_json_response('/nos/api/sysinfo/fans')
for fan in switchinfo:
if switchinfo[fan]['speed-rpm'] < 100:
badreadings.append(SwitchSensor(fan, [], value=switchinfo[fan]['speed-rpm'], health='critical'))
switchinfo = wc.grab_json_response('/nos/api/sysinfo/power')
for psu in switchinfo:
if switchinfo[psu]['State'] != 'Normal ON':
psuname = switchinfo[psu]['Name']
badreadings.append(SwitchSensor(psuname, states=[switchinfo[psu]['State']], health='critical'))
yield msg.SensorReadings(badreadings, name=node)
workers.add(eventlet.spawn(retrieve_health, configmanager, creds,
node, results))
else:
for node in nodes:
yield msg.ConfluentNodeError(node, 'Not Implemented')
return
currtimeout = 10
while workers:
try:
datum = results.get(10)
while datum:
if datum:
yield datum
datum = results.get_nowait()
except queue.Empty:
pass
eventlet.sleep(0.001)
for t in list(workers):
if t.dead:
workers.discard(t)
try:
while True:
datum = results.get_nowait()
if datum:
yield datum
except queue.Empty:
pass
def retrieve_health(configmanager, creds, node, results):
wc = cnos_login(node, configmanager, creds)
hinfo = wc.grab_json_response('/nos/api/sysinfo/globalhealthstatus')
summary = hinfo['status'].lower()
if summary == 'noncritical':
summary = 'warning'
results.put(msg.HealthSummary(summary, name=node))
state = None
badreadings = []
if summary != 'ok': # temperature or dump or fans or psu
wc.grab_json_response('/nos/api/sysinfo/panic_dump')
switchinfo = wc.grab_json_response('/nos/api/sysinfo/panic_dump')
if switchinfo:
badreadings.append(
SwitchSensor('Panicdump', ['Present'], health='warning'))
switchinfo = wc.grab_json_response('/nos/api/sysinfo/temperatures')
for temp in switchinfo:
if temp == 'Temperature threshold':
continue
if switchinfo[temp]['State'] != 'OK':
temphealth = switchinfo[temp]['State'].lower()
if temphealth == 'noncritical':
temphealth = 'warning'
tempval = switchinfo[temp]['Temp']
badreadings.append(
SwitchSensor(temp, [], value=tempval, health=temphealth))
switchinfo = wc.grab_json_response('/nos/api/sysinfo/fans')
for fan in switchinfo:
if switchinfo[fan]['speed-rpm'] < 100:
badreadings.append(
SwitchSensor(fan, [], value=switchinfo[fan]['speed-rpm'],
health='critical'))
switchinfo = wc.grab_json_response('/nos/api/sysinfo/power')
for psu in switchinfo:
if switchinfo[psu]['State'] != 'Normal ON':
psuname = switchinfo[psu]['Name']
badreadings.append(
SwitchSensor(psuname, states=[switchinfo[psu]['State']],
health='critical'))
results.put(msg.SensorReadings(badreadings, name=node))