2
0
mirror of https://github.com/xcat2/confluent.git synced 2025-08-18 01:00:24 +00:00
Files
confluent/confluent_server/confluent/discovery/core.py
Jarrod Johnson cb00c5d35d Place limits on discovery resource consumption
Situation of filehandle exhaustion was seen.  In the scenario observed,
multiple connections to the same target were seen.  So:

1) Backout the recheck block and replace with more comprehensively
placed semaphore.

2) Place a discovery pool limit of 500 to generally constrain things.

3) Further limit things to one detected thread per mac address
2017-08-26 22:45:36 -04:00

940 lines
37 KiB
Python

# Copyright 2016-2017 Lenovo
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This manages the detection and auto-configuration of nodes.
# Discovery sources may implement scans and may be passive or may provide
# both.
# The phases and actions:
# - Detect - Notice the existance of a potentially supported target
# - Potentially apply a secure replacement for default credential
# (perhaps using some key identifier combined with some string
# denoting temporary use, and use confluent master integrity key
# to generate a password in a formulaic way?)
# - Do some universal reconfiguration if applicable (e.g. if something is
# part of an enclosure with an optionally enabled enclosure manager,
# check and request enclosure manager enablement
# - Throughout all of this, at this phase no sensitive data is divulged,
# only using credentials that are factory default or equivalent to
# factory default
# - Request transition to Locate
# - Locate - Use available cues to ascertain the physical location. This may
# be mac address lookup through switch or correlated by a server
# enclosure manager. If the location data suggests a node identity,
# then proceed to the 'verify' state
# - Verify - Given the current information and candidate upstream verifier,
# verify the authenticity of the servers claim in an automated way
# if possible. A few things may happen at this juncture
# - Verification outright fails (confirmed negative response)
# - Audit log entry created, element is not *allowed* to
# proceed
# - Verification not possible (neither good or bad)
# - If security policy is set to low, proceed to 'Manage'
# - Otherwise, log the detection event and stop (user
# would then manually bless the endpoint if applicable
# - Verification succeeds
# - If security policy is set to strict (or manual, whichever
# word works best, note the successfull verification, but
# do not manage
# - Otherwise, proceed to 'Manage'
# -Pre-configure - Given data up to this point, try to do some pre-config.
# For example, if located and X, then check for S, enable S
# This happens regardless of verify, as verify may depend on
# S
# - Manage
# - Create the node if autonode (Deferred)
# - If there is not a defined ip address, collect the current LLA and use
# that value.
# - If no username/password defined, generate a unique password, 20 bytes
# long, written to pass most complexity rules (15 random bytes, base64,
# retry until uppercase, lowercase, digit, and symbol all present)
# - Apply defined configuration to endpoint
import confluent.config.configmanager as cfm
import confluent.discovery.protocols.pxe as pxe
#import confluent.discovery.protocols.ssdp as ssdp
import confluent.discovery.protocols.slp as slp
import confluent.discovery.handlers.imm as imm
import confluent.discovery.handlers.pxe as pxeh
import confluent.discovery.handlers.smm as smm
import confluent.discovery.handlers.xcc as xcc
import confluent.exceptions as exc
import confluent.log as log
import confluent.messages as msg
import confluent.networking.macmap as macmap
import confluent.noderange as noderange
import confluent.util as util
import traceback
import eventlet
import eventlet.greenpool
import eventlet.semaphore
class nesteddict(dict):
def __missing__(self, key):
v = self[key] = nesteddict()
return v
nodehandlers = {
'service:lenovo-smm': smm,
'service:management-hardware.Lenovo:lenovo-xclarity-controller': xcc,
'service:management-hardware.IBM:integrated-management-module2': imm,
'pxe-client': pxeh,
}
servicenames = {
'pxe-client': 'pxe-client',
'service:lenovo-smm': 'lenovo-smm',
'service:management-hardware.Lenovo:lenovo-xclarity-controller': 'lenovo-xcc',
'service:management-hardware.IBM:integrated-management-module2': 'lenovo-imm2',
}
servicebyname = {
'pxe-client': 'pxe-client',
'lenovo-smm': 'service:lenovo-smm',
'lenovo-xcc': 'service:management-hardware.Lenovo:lenovo-xclarity-controller',
'lenovo-imm2': 'service:management-hardware.IBM:integrated-management-module2',
}
discopool = eventlet.greenpool.GreenPool(500)
runningevals = {}
# Passive-only auto-detection protocols:
# PXE
# Both passive and active
# SLP (passive mode listens for SLP DA and unicast interrogation of the system)
# mDNS
# SSD
# Also there are location providers
# Switch
# chassis
# chassis may in turn describe more chassis
# We normalize discovered node data to the following pieces of information:
# * Detected node name (if available, from switch discovery or similar or
# auto generated node name.
# * Model number
# * Model name
# * Serial number
# * System UUID (in x86 space, specifically whichever UUID would be in DMI)
# * Network interfaces and addresses
# * Switch connectivity information
# * enclosure information
# * Management TLS fingerprint if validated (switch publication or enclosure)
# * System TLS fingerprint if validated (switch publication or system manager)
#TODO: by serial, by uuid, by node
known_info = {}
known_services = {}
known_serials = {}
known_uuids = nesteddict()
known_nodes = nesteddict()
unknown_info = {}
pending_nodes = {}
def enrich_pxe_info(info):
sn = None
mn = None
uuid = info.get('uuid', '')
if not uuid:
return info
for mac in known_uuids.get(uuid, {}):
if not sn and 'serialnumber' in known_uuids[uuid][mac]:
info['serialnumber'] = known_uuids[uuid][mac]['serialnumber']
if not mn and 'modelnumber' in known_uuids[uuid][mac]:
info['modelnumber'] = known_uuids[uuid][mac]['modelnumber']
def send_discovery_datum(info):
addresses = info.get('addresses', [])
yield msg.KeyValueData({'nodename': info.get('nodename', '')})
yield msg.KeyValueData({'ipaddrs': [x[0] for x in addresses]})
if info['handler'] == pxeh:
enrich_pxe_info(info)
sn = info.get('serialnumber', '')
mn = info.get('modelnumber', '')
uuid = info.get('uuid', '')
if uuid:
relatedmacs = []
for mac in known_uuids.get(uuid, {}):
if mac and mac != info.get('hwaddr', ''):
relatedmacs.append(mac)
if relatedmacs:
yield msg.KeyValueData({'relatedmacs': relatedmacs})
yield msg.KeyValueData({'serialnumber': sn})
yield msg.KeyValueData({'modelnumber': mn})
yield msg.KeyValueData({'uuid': uuid})
if 'enclosure.bay' in info:
yield msg.KeyValueData({'bay': int(info['enclosure.bay'])})
yield msg.KeyValueData({'macs': [info.get('hwaddr', '')]})
types = []
for infotype in info.get('services', []):
if infotype in servicenames:
types.append(servicenames[infotype])
yield msg.KeyValueData({'types': types})
def _info_matches(info, criteria):
model = criteria.get('by-model', None)
devtype = criteria.get('by-type', None)
node = criteria.get('by-node', None)
serial = criteria.get('by-serial', None)
status = criteria.get('by-state', None)
uuid = criteria.get('by-uuid', None)
if model and info.get('modelnumber', None) != model:
return False
if devtype and devtype not in info.get('services', []):
return False
if node and info.get('nodename', None) != node:
return False
if serial and info.get('serialnumber', None) != serial:
return False
if status and info.get('discostatus', None) != status:
return False
if uuid and info.get('uuid', None) != uuid:
return False
return True
def list_matching_nodes(criteria):
retnodes = []
for node in known_nodes:
for mac in known_nodes[node]:
info = known_info[mac]
if _info_matches(info, criteria):
retnodes.append(node)
break
retnodes.sort(key=noderange.humanify_nodename)
return [msg.ChildCollection(node + '/') for node in retnodes]
def list_matching_serials(criteria):
for serial in sorted(list(known_serials)):
info = known_serials[serial]
if _info_matches(info, criteria):
yield msg.ChildCollection(serial + '/')
def list_matching_uuids(criteria):
for uuid in sorted(list(known_uuids)):
for mac in known_uuids[uuid]:
info = known_uuids[uuid][mac]
if _info_matches(info, criteria):
yield msg.ChildCollection(uuid + '/')
break
def list_matching_states(criteria):
return [msg.ChildCollection(x) for x in ('discovered/', 'identified/',
'unidentified/')]
def list_matching_macs(criteria):
for mac in sorted(list(known_info)):
info = known_info[mac]
if _info_matches(info, criteria):
yield msg.ChildCollection(mac.replace(':', '-'))
def list_matching_types(criteria):
rettypes = []
for infotype in known_services:
typename = servicenames[infotype]
if ('by-model' not in criteria or
criteria['by-model'] in known_services[infotype]):
rettypes.append(typename)
return [msg.ChildCollection(typename + '/')
for typename in sorted(rettypes)]
def list_matching_models(criteria):
for model in sorted(list(detected_models())):
if ('by-type' not in criteria or
model in known_services[criteria['by-type']]):
yield msg.ChildCollection(model + '/')
def show_info(mac):
mac = mac.replace('-', ':')
if mac not in known_info:
raise exc.NotFoundException(mac + ' not a known mac address')
for i in send_discovery_datum(known_info[mac]):
yield i
list_info = {
'by-node': list_matching_nodes,
'by-serial': list_matching_serials,
'by-type': list_matching_types,
'by-model': list_matching_models,
'by-mac': list_matching_macs,
'by-state': list_matching_states,
'by-uuid': list_matching_uuids,
}
multi_selectors = set([
'by-type',
'by-model',
'by-state',
'by-uuid',
])
node_selectors = set([
'by-node',
'by-serial',
])
single_selectors = set([
'by-mac',
])
def _parameterize_path(pathcomponents):
listrequested = False
childcoll = True
if len(pathcomponents) % 2 == 1:
listrequested = pathcomponents[-1]
pathcomponents = pathcomponents[:-1]
pathit = iter(pathcomponents)
keyparams = {}
validselectors = multi_selectors | node_selectors | single_selectors
for key, val in zip(pathit, pathit):
if key not in validselectors:
raise exc.NotFoundException('{0} is not valid here'.format(key))
if key == 'by-type':
keyparams[key] = servicebyname.get(val, None)
else:
keyparams[key] = val
validselectors.discard(key)
if key in single_selectors:
childcoll = False
validselectors = set([])
elif key in node_selectors:
validselectors = single_selectors | set([])
return validselectors, keyparams, listrequested, childcoll
def handle_api_request(configmanager, inputdata, operation, pathcomponents):
if operation == 'retrieve':
return handle_read_api_request(pathcomponents)
elif (operation in ('update', 'create') and
pathcomponents == ['discovery', 'rescan']):
if inputdata != {'rescan': 'start'}:
raise exc.InvalidArgumentException()
rescan()
return (msg.KeyValueData({'rescan': 'started'}),)
elif (operation in ('update', 'create')):
if 'node' not in inputdata:
raise exc.InvalidArgumentException('Missing node name in input')
_, queryparms, _, _ = _parameterize_path(pathcomponents[1:])
if 'by-mac' not in queryparms:
raise exc.InvalidArgumentException('Must target using "by-mac"')
mac = queryparms['by-mac'].replace('-', ':')
if mac not in known_info:
raise exc.NotFoundException('{0} not found'.format(mac))
info = known_info[mac]
handler = info['handler'].NodeHandler(info, configmanager)
eval_node(configmanager, handler, info, inputdata['node'],
manual=True)
return [msg.AssignedResource(inputdata['node'])]
raise exc.NotImplementedException(
'Unable to {0} to {1}'.format(operation, '/'.join(pathcomponents)))
def handle_read_api_request(pathcomponents):
# TODO(jjohnson2): This should be more generalized...
# odd indexes into components are 'by-'*, even indexes
# starting at 2 are parameters to previous index
subcats, queryparms, indexof, coll = _parameterize_path(pathcomponents[1:])
if len(pathcomponents) == 1:
dirlist = [msg.ChildCollection(x + '/') for x in sorted(list(subcats))]
dirlist.append(msg.ChildCollection('rescan'))
return dirlist
if not coll:
return show_info(queryparms['by-mac'])
if not indexof:
return [msg.ChildCollection(x + '/') for x in sorted(list(subcats))]
if indexof not in list_info:
raise exc.NotFoundException('{0} is not found'.format(indexof))
return list_info[indexof](queryparms)
def detected_services():
for srv in known_services:
yield servicenames[srv]
def detected_models():
knownmodels = set([])
for info in known_info:
info = known_info[info]
if 'modelnumber' in info and info['modelnumber'] not in knownmodels:
knownmodels.add(info['modelnumber'])
yield info['modelnumber']
def _recheck_nodes(nodeattribs, configmanager):
if rechecklock.locked():
# if already in progress, don't run again
# it may make sense to schedule a repeat, but will try the easier and less redundant way first
return
with rechecklock:
return _recheck_nodes_backend(nodeattribs, configmanager)
def _recheck_nodes_backend(nodeattribs, configmanager):
global rechecker
_map_unique_ids(nodeattribs)
# for the nodes whose attributes have changed, consider them as potential
# strangers
for node in nodeattribs:
if node in known_nodes:
for somemac in known_nodes[node]:
unknown_info[somemac] = known_nodes[node][somemac]
unknown_info[somemac]['discostatus'] = 'unidentified'
# Now we go through ones we did not find earlier
for mac in list(unknown_info):
try:
_recheck_single_unknown(configmanager, mac)
except Exception:
traceback.print_exc()
continue
# now we go through ones that were identified, but could not pass
# policy or hadn't been able to verify key
for nodename in pending_nodes:
info = pending_nodes[nodename]
try:
handler = info['handler'].NodeHandler(info, configmanager)
discopool.spawn_n(eval_node, configmanager, handler, info, nodename)
except Exception:
traceback.print_exc()
log.log({'error': 'Unexpected error during discovery of {0}, check debug '
'logs'.format(nodename)})
def _recheck_single_unknown(configmanager, mac):
global rechecker
global rechecktime
info = unknown_info.get(mac, None)
if not info:
return
if info['handler'] != pxeh and not info.get('addresses', None):
#log.log({'info': 'Missing address information in ' + repr(info)})
return
handler = info['handler'].NodeHandler(info, configmanager)
if handler.https_supported and not handler.https_cert:
if handler.cert_fail_reason == 'unreachable':
log.log(
{
'info': '{0} with hwaddr {1} is not reachable at {2}'
''.format(
handler.devname, info['hwaddr'], handler.ipaddr
)})
# addresses data is bad, delete the offending ip
info['addresses'] = [x for x in info.get('addresses', []) if x != handler.ipaddr]
# TODO(jjohnson2): rescan due to bad peer addr data?
# not just wait around for the next announce
return
log.log(
{
'info': '{0} with hwaddr {1} at address {2} is not yet running '
'https, will examine later'.format(
handler.devname, info['hwaddr'], handler.ipaddr
)})
if rechecker is not None and rechecktime > util.monotonic_time() + 300:
rechecker.cancel()
# if cancel did not result in dead, then we are in progress
if rechecker is None or rechecker.dead:
rechecktime = util.monotonic_time() + 300
rechecker = eventlet.spawn_after(300, _periodic_recheck,
configmanager)
return
nodename = get_nodename(configmanager, handler, info)
if nodename:
if handler.https_supported:
dp = configmanager.get_node_attributes([nodename],
('pubkeys.tls_hardwaremanager',))
lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
{}).get('value', None)
if util.cert_matches(lastfp, handler.https_cert):
info['nodename'] = nodename
known_nodes[nodename][info['hwaddr']] = info
info['discostatus'] = 'discovered'
return # already known, no need for more
discopool.spawn_n(eval_node, configmanager, handler, info, nodename)
def safe_detected(info):
if 'hwaddr' not in info:
return
if info['hwaddr'] in runningevals:
# Do not evaluate the same mac multiple times at once
return
runningevals[info['hwaddr']] = discopool.spawn(eval_detected, info)
def eval_detected(info):
try:
detected(info)
except Exception as e:
traceback.print_exc()
del runningevals[info['hwaddr']]
def detected(info):
global rechecker
global rechecktime
# later, manual and CMM discovery may act on SN and/or UUID
for service in info['services']:
if nodehandlers.get(service, None):
if service not in known_services:
known_services[service] = set([])
handler = nodehandlers[service]
info['handler'] = handler
break
else: # no nodehandler, ignore for now
return
try:
snum = info['attributes']['enclosure-serial-number'][0].strip()
if snum:
info['serialnumber'] = snum
known_serials[info['serialnumber']] = info
except (KeyError, IndexError):
pass
try:
info['modelnumber'] = info['attributes']['enclosure-machinetype-model'][0]
known_services[service].add(info['modelnumber'])
except (KeyError, IndexError):
pass
if info['hwaddr'] in known_info and 'addresses' in info:
# we should tee these up for parsing when an enclosure comes up
# also when switch config parameters change, should discard
# and there's also if wiring is fixed...
# of course could periodically revisit known_nodes
# replace potentially stale address info
#TODO(jjohnson2): remove this
# temporary workaround for XCC not doing SLP DA over dedicated port
# bz 93219, fix submitted, but not in builds yet
# strictly speaking, going ipv4 only legitimately is mistreated here,
# but that should be an edge case
oldaddr = known_info[info['hwaddr']].get('addresses', [])
for addr in info['addresses']:
if addr[0].startswith('fe80::'):
break
else:
for addr in oldaddr:
if addr[0].startswith('fe80::'):
info['addresses'].append(addr)
if known_info[info['hwaddr']].get(
'addresses', []) == info['addresses']:
# if the ip addresses match, then assume no changes
# now something resetting to defaults could, in theory
# have the same address, but need to be reset
# in that case, however, a user can clear pubkeys to force a check
return
known_info[info['hwaddr']] = info
cfg = cfm.ConfigManager(None)
handler = handler.NodeHandler(info, cfg)
handler.scan()
uuid = info.get('uuid', None)
if uuid:
known_uuids[uuid][info['hwaddr']] = info
if handler.https_supported and not handler.https_cert:
if handler.cert_fail_reason == 'unreachable':
log.log(
{
'info': '{0} with hwaddr {1} is not reachable at {2}'
''.format(
handler.devname, info['hwaddr'], handler.ipaddr
)})
info['addresses'] = [x for x in info.get('addresses', []) if x != handler.ipaddr]
return
log.log(
{'info': '{0} with hwaddr {1} at address {2} is not yet running '
'https, will examine later'.format(
handler.devname, info['hwaddr'], handler.ipaddr
)})
if rechecker is not None and rechecktime > util.monotonic_time() + 300:
rechecker.cancel()
if rechecker is None or rechecker.dead:
rechecktime = util.monotonic_time() + 300
rechecker = eventlet.spawn_after(300, _periodic_recheck, cfg)
unknown_info[info['hwaddr']] = info
info['discostatus'] = 'unidentfied'
#TODO, eventlet spawn after to recheck sooner, or somehow else
# influence periodic recheck to shorten delay?
return
nodename = get_nodename(cfg, handler, info)
if nodename and handler.https_supported:
dp = cfg.get_node_attributes([nodename],
('pubkeys.tls_hardwaremanager',))
lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
{}).get('value', None)
if util.cert_matches(lastfp, handler.https_cert):
info['nodename'] = nodename
known_nodes[nodename][info['hwaddr']] = info
info['discostatus'] = 'discovered'
return # already known, no need for more
#TODO(jjohnson2): We might have to get UUID for certain searches...
#for now defer probe until inside eval_node. We might not have
#a nodename without probe in the future.
if nodename:
eval_node(cfg, handler, info, nodename)
else:
log.log(
{'info': 'Detected unknown {0} with hwaddr {1} at '
'address {2}'.format(
handler.devname, info['hwaddr'], handler.ipaddr
)})
info['discostatus'] = 'unidentified'
unknown_info[info['hwaddr']] = info
def get_nodename(cfg, handler, info):
if not handler.https_supported:
curruuid = info['uuid']
nodename = nodes_by_uuid.get(curruuid, None)
if nodename is None:
_map_unique_ids()
nodename = nodes_by_uuid.get(curruuid, None)
if nodename is None:
# TODO: if there are too many matches on port for a
# given type, error! Can't just arbitarily limit,
# shared nic with vms is possible and valid
nodename = macmap.find_node_by_mac(info['hwaddr'], cfg)
return nodename
currcert = handler.https_cert
if not currcert:
info['discofailure'] = 'nohttps'
return None
currprint = util.get_fingerprint(currcert)
nodename = nodes_by_fprint.get(currprint, None)
if not nodename:
nodename = macmap.find_node_by_mac(info['hwaddr'], cfg)
return nodename
def eval_node(cfg, handler, info, nodename, manual=False):
try:
handler.probe() # unicast interrogation as possible to get more data
# for now, we search switch only, ideally we search cmm, smm, and
# switch concurrently
# do some preconfig, for example, to bring a SMM online if applicable
handler.preconfig()
except Exception as e:
unknown_info[info['hwaddr']] = info
info['discostatus'] = 'unidentified'
errorstr = 'An error occured during discovery, check the ' \
'trace and stderr logs, mac was {0} and ip was {1}' \
', the node or the containing enclosure was {2}' \
''.format(info['hwaddr'], handler.ipaddr, nodename)
traceback.print_exc()
if manual:
raise exc.InvalidArgumentException(errorstr)
log.log({'error': errorstr})
return
# first, if had a bay, it was in an enclosure. If it was discovered by
# switch, it is probably the enclosure manager and not
# the node directly. switch is ambiguous and we should leave it alone
if 'enclosure.bay' in info and handler.is_enclosure:
unknown_info[info['hwaddr']] = info
info['discostatus'] = 'unidentified'
log.log({'error': 'Something that is an enclosure reported a bay, '
'not possible'})
if manual:
raise exc.InvalidArgumentException()
return
nl = list(cfg.filter_node_attributes('enclosure.manager=' + nodename))
if not handler.is_enclosure and nl:
# The specified node is an enclosure (has nodes mapped to it), but
# what we are talking to is *not* an enclosure
if 'enclosure.bay' not in info:
unknown_info[info['hwaddr']] = info
info['discostatus'] = 'unidentified'
errorstr = '{2} with mac {0} is in {1}, but unable to ' \
'determine bay number'.format(info['hwaddr'],
nodename,
handler.ipaddr)
if manual:
raise exc.InvalidArgumentException(errorstr)
log.log({'error': errorstr})
return
# search for nodes fitting our description using filters
# lead with the most specific to have a small second pass
nl = cfg.filter_node_attributes(
'enclosure.bay={0}'.format(info['enclosure.bay']), nl)
nl = list(nl)
if len(nl) != 1:
info['discofailure'] = 'ambigconfig'
if len(nl):
errorstr = 'The following nodes have duplicate ' \
'enclosure attributes: ' + ','.join(nl)
else:
errorstr = 'The {0} in enclosure {1} bay {2} does not ' \
'seem to be a defined node ({3})'.format(
handler.devname, nodename,
info['enclosure.bay'],
handler.ipaddr,
)
if manual:
raise exc.InvalidArgumentException(errorstr)
log.log({'error': errorstr})
unknown_info[info['hwaddr']] = info
info['discostatus'] = 'unidentified'
return
nodename = nl[0]
if not discover_node(cfg, handler, info, nodename, manual):
# store it as pending, assuming blocked on enclosure
# assurance...
pending_nodes[nodename] = info
else:
# we can and did accurately discover by switch or in enclosure
if not discover_node(cfg, handler, info, nodename, manual):
pending_nodes[nodename] = info
def discover_node(cfg, handler, info, nodename, manual):
known_nodes[nodename][info['hwaddr']] = info
if info['hwaddr'] in unknown_info:
del unknown_info[info['hwaddr']]
info['discostatus'] = 'identified'
dp = cfg.get_node_attributes(
[nodename], ('discovery.policy',
'pubkeys.tls_hardwaremanager'))
policy = dp.get(nodename, {}).get('discovery.policy', {}).get(
'value', None)
if policy is None:
policy = ''
policies = set(policy.split(','))
lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
{}).get('value', None)
# TODO(jjohnson2): permissive requires we guarantee storage of
# the pubkeys, which is deferred for a little bit
# Also, 'secure', when we have the needed infrastructure done
# in some product or another.
if 'pxe' in policies and info['handler'] == pxeh:
return do_pxe_discovery(cfg, handler, info, manual, nodename, policies)
elif ('permissive' in policies and handler.https_supported and lastfp and
not util.cert_matches(lastfp, handler.https_cert) and not manual):
info['discofailure'] = 'fingerprint'
log.log({'info': 'Detected replacement of {0} with existing '
'fingerprint and permissive discovery policy, not '
'doing discovery unless discovery.policy=open or '
'pubkeys.tls_hardwaremanager attribute is cleared '
'first'.format(nodename)})
return False # With a permissive policy, do not discover new
elif policies & set(('open', 'permissive')) or manual:
info['nodename'] = nodename
if info['handler'] == pxeh:
return do_pxe_discovery(cfg, handler, info, manual, nodename, policies)
elif manual or not util.cert_matches(lastfp, handler.https_cert):
# only 'discover' if it is not the same as last time
try:
handler.config(nodename)
except Exception as e:
info['discofailure'] = 'bug'
log.log(
{'error':
'Error encountered trying to set up {0}, {1}'.format(
nodename, str(e))})
traceback.print_exc()
return False
newnodeattribs = {}
if 'uuid' in info:
newnodeattribs['id.uuid'] = info['uuid']
if 'serialnumber' in info:
newnodeattribs['id.serial'] = info['serialnumber']
if 'modelnumber' in info:
newnodeattribs['id.model'] = info['modelnumber']
if handler.https_cert:
newnodeattribs['pubkeys.tls_hardwaremanager'] = \
util.get_fingerprint(handler.https_cert)
if newnodeattribs:
cfg.set_node_attributes({nodename: newnodeattribs})
log.log({'info': 'Discovered {0} ({1})'.format(nodename,
handler.devname)})
info['discostatus'] = 'discovered'
return True
log.log({'info': 'Detected {0}, but discovery.policy is not set to a '
'value allowing discovery (open or permissive)'.format(
nodename)})
info['discofailure'] = 'policy'
return False
def do_pxe_discovery(cfg, handler, info, manual, nodename, policies):
# use uuid based scheme in lieu of tls cert, ideally only
# for stateless 'discovery' targets like pxe, where data does not
# change
uuidinfo = cfg.get_node_attributes(nodename, ['id.uuid', 'id.serial', 'id.model', 'net*.bootable'])
if manual or policies & set(('open', 'pxe')):
enrich_pxe_info(info)
attribs = {}
olduuid = uuidinfo.get(nodename, {}).get('id.uuid', None)
uuid = info.get('uuid', None)
if uuid and uuid != olduuid:
attribs['id.uuid'] = info['uuid']
sn = info.get('serialnumber', None)
mn = info.get('modelnumber', None)
if sn and sn != uuidinfo.get(nodename, {}).get('id.serial', None):
attribs['id.serial'] = sn
if mn and mn != uuidinfo.get(nodename, {}).get('id.model', None):
attribs['id.model'] = mn
for attrname in uuidinfo.get(nodename, {}):
if attrname.endswith('.bootable') and uuidinfo[nodename][attrname].get('value', None):
newattrname = attrname[:-8] + 'hwaddr'
attribs[newattrname] = info['hwaddr']
if attribs:
cfg.set_node_attributes({nodename: attribs})
if info['uuid'] in known_pxe_uuids:
return True
known_pxe_uuids[info['uuid']] = nodename
log.log({'info': 'Detected {0} ({1} with mac {2})'.format(
nodename, handler.devname, info['hwaddr'])})
return True
attribwatcher = None
nodeaddhandler = None
needaddhandled = False
def _handle_nodelist_change(configmanager):
global needaddhandled
global nodeaddhandler
_recheck_nodes((), configmanager)
if needaddhandled:
needaddhandled = False
nodeaddhandler = eventlet.spawn(_handle_nodelist_change, configmanager)
else:
nodeaddhandler = None
def newnodes(added, deleting, configmanager):
global attribwatcher
global needaddhandled
global nodeaddhandler
configmanager.remove_watcher(attribwatcher)
allnodes = configmanager.list_nodes()
attribwatcher = configmanager.watch_attributes(
allnodes, ('discovery.policy', 'net*.switch',
'hardwaremanagement.manager', 'net*.switchport', 'id.uuid',
'pubkeys.tls_hardwaremanager', 'net*.bootable'), _recheck_nodes)
if nodeaddhandler:
needaddhandled = True
else:
nodeaddhandler = eventlet.spawn(_handle_nodelist_change, configmanager)
rechecker = None
rechecktime = None
rechecklock = eventlet.semaphore.Semaphore()
def _periodic_recheck(configmanager):
global rechecker
global rechecktime
rechecker = None
try:
_recheck_nodes((), configmanager)
except Exception:
traceback.print_exc()
log.log({'error': 'Unexpected error during discovery, check debug '
'logs'})
# if rechecker is set, it means that an accelerated schedule
# for rechecker was requested in the course of recheck_nodes
if rechecker is None:
rechecktime = util.monotonic_time() + 900
rechecker = eventlet.spawn_after(900, _periodic_recheck,
configmanager)
def rescan():
_map_unique_ids()
eventlet.spawn_n(slp.active_scan, safe_detected)
def start_detection():
global attribwatcher
global rechecker
_map_unique_ids()
cfg = cfm.ConfigManager(None)
allnodes = cfg.list_nodes()
attribwatcher = cfg.watch_attributes(
allnodes, ('discovery.policy', 'net*.switch',
'hardwaremanagement.manager', 'net*.switchport', 'id.uuid',
'pubkeys.tls_hardwaremanager'), _recheck_nodes)
cfg.watch_nodecollection(newnodes)
eventlet.spawn_n(slp.snoop, safe_detected)
eventlet.spawn_n(pxe.snoop, safe_detected)
if rechecker is None:
rechecktime = util.monotonic_time() + 900
rechecker = eventlet.spawn_after(900, _periodic_recheck, cfg)
# eventlet.spawn_n(ssdp.snoop, safe_detected)
nodes_by_fprint = {}
nodes_by_uuid = {}
known_pxe_uuids = {}
def _map_unique_ids(nodes=None):
global nodes_by_uuid
global nodes_by_fprint
nodes_by_uuid = {}
nodes_by_fprint = {}
# Map current known ids based on uuid and fingperprints for fast lookup
cfg = cfm.ConfigManager(None)
if nodes is None:
nodes = cfg.list_nodes()
bigmap = cfg.get_node_attributes(nodes,
('id.uuid',
'pubkeys.tls_hardwaremanager'))
uuid_by_nodes = {}
fprint_by_nodes = {}
for uuid in nodes_by_uuid:
node = nodes_by_uuid[uuid]
if node in bigmap:
uuid_by_nodes[node] = uuid
for fprint in nodes_by_fprint:
node = nodes_by_fprint[fprint]
if node in bigmap:
fprint_by_nodes[node] =fprint
for node in bigmap:
if node in uuid_by_nodes:
del nodes_by_uuid[uuid_by_nodes[node]]
if node in fprint_by_nodes:
del nodes_by_fprint[fprint_by_nodes[node]]
uuid = bigmap[node].get('id.uuid', {}).get('value', None)
if uuid:
nodes_by_uuid[uuid] = node
fprint = bigmap[node].get(
'pubkeys.tls_hardwaremanager', {}).get('value', None)
if fprint:
nodes_by_fprint[fprint] = node
for uuid in known_pxe_uuids:
if uuid not in nodes_by_uuid:
nodes_by_uuid[uuid] = known_pxe_uuids[uuid]
if __name__ == '__main__':
start_detection()
while True:
eventlet.sleep(30)