2
0
mirror of https://github.com/xcat2/confluent.git synced 2025-01-21 23:23:40 +00:00
Jarrod Johnson d836bf7298 Map health to nagios compatible codes
nodehealth adds granularity and health to the return code.
Rather than only reflecting failure running command, also
have nodehealth assign exit code according to the expectations
of nagios.
2018-10-30 13:56:09 -04:00

108 lines
3.5 KiB
Python
Executable File

#!/usr/bin/env python
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2015-2017 Lenovo
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import codecs
import optparse
import os
import signal
import sys
try:
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
except AttributeError:
pass
path = os.path.dirname(os.path.realpath(__file__))
path = os.path.realpath(os.path.join(path, '..', 'lib', 'python'))
if path.startswith('/opt'):
sys.path.append(path)
import confluent.client as client
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
argparser = optparse.OptionParser(usage="Usage: %prog <noderange>")
(options, args) = argparser.parse_args()
try:
noderange = args[0]
except IndexError:
argparser.print_help()
sys.exit(1)
client.check_globbing(noderange)
codemappings = {
'ok': 0,
'warning': 1,
'critical': 2,
'failed': 2,
}
def main():
global session, exitcode, healthbynode, healthexplanations, health, node, sensor, explanation
session = client.Command()
exitcode = 0
healthbynode = {}
healthexplanations = {}
for health in session.read(
'/noderange/{0}/health/hardware'.format(noderange)):
if 'error' in health:
sys.stderr.write(health['error'] + '\n')
if 'errorcode' in health:
exitcode |= health['errorcode']
else:
exitcode |= 1
continue
if 'databynode' not in health:
continue
health = health['databynode']
for node in health:
if 'error' in health[node]:
sys.stderr.write('{0}: Error: {1}\n'.format(
node, health[node]['error']))
exitcode = 3
if 'health' in health[node]:
healthbynode[node] = health[node]['health']['value']
if codemappings[healthbynode[node]] > exitcode:
exitcode = codemappings[healthbynode[node]]
if 'sensors' in health[node]:
healthexplanations[node] = []
for sensor in health[node]['sensors']:
explanation = sensor['name'] + ':'
if sensor['value'] is not None:
explanation += str(sensor['value'])
if sensor['units'] is not None:
explanation += sensor['units']
if sensor['states']:
explanation += ','
if sensor['states']:
explanation += ','.join(sensor['states'])
healthexplanations[node].append(explanation)
if node in healthbynode and node in healthexplanations:
if healthexplanations[node]:
print(u'{0}: {1} ({2})'.format(
node, healthbynode[node],
','.join(healthexplanations[node])))
else:
print('{0}: {1}'.format(node, healthbynode[node]))
try:
main()
except KeyboardInterrupt:
print('')
sys.exit(exitcode)