2
0
mirror of https://github.com/xcat2/confluent.git synced 2024-11-29 13:00:03 +00:00

Try to put the brakes on too many mac discovery attempts

In various scenarios, too many macs on a port can be a sign of trouble.
For example, a chained SMM configuration with head on switch port, or
incorrectly pointing a nodes net attributes at a switch uplink port, or
defining SMMs without any nodes, causing XCCs to think they are
rackmount.  This sets some sanity value for avoiding problems.  This is
of course a mitigation, invalid scenarios could still run afoul of the
limits, but it should catch a large chunk of offending scenarios.
This commit is contained in:
Jarrod Johnson 2017-11-14 14:28:31 -05:00
parent 0b0f325240
commit 76bfb29d60
4 changed files with 30 additions and 15 deletions

View File

@ -634,7 +634,17 @@ def get_nodename(cfg, handler, info):
_map_unique_ids()
nodename = nodes_by_uuid.get(curruuid, None)
if not nodename: # as a last resort, search switch for info
nodename = macmap.find_node_by_mac(info['hwaddr'], cfg)
nodename, macinfo = macmap.find_nodeinfo_by_mac(info['hwaddr'], cfg)
if (nodename and
not handler.discoverable_by_switch(macinfo['maccount'])):
if handler.devname == 'SMM':
errorstr = 'Attempt to discover SMM by switch, but chained ' \
'topology or incorrect net attributes detected, ' \
'which is not compatible with switch discovery ' \
'of SMM, nodename would have been ' \
'{0}'.format(nodename)
log.log({'error': errorstr})
return None
return nodename

View File

@ -19,6 +19,10 @@ webclient = eventlet.import_patched('pyghmi.util.webclient')
class NodeHandler(object):
https_supported = True
is_enclosure = False
devname = ''
maxmacs = 2 # reasonable default, allowing for common scenario of
# shared nic in theory, but blocking enclosure managers
# and uplink ports
def __init__(self, info, configmanager):
self._certfailreason = None
@ -50,9 +54,10 @@ class NodeHandler(object):
def preconfig(self):
return
@property
def discoverable_by_switch(self):
return True
def discoverable_by_switch(self, macs):
# Given the number of macs sharing the port, is this handler
# appropriate?
return macs <= self.maxmacs
def _savecert(self, certificate):
self._fp = certificate

View File

@ -26,6 +26,7 @@ def fixuuid(baduuid):
class NodeHandler(bmchandler.NodeHandler):
is_enclosure = True
devname = 'SMM'
maxmacs = 5 # support an enclosure, but try to avoid catching daisy chain
def scan(self):
# the UUID is in a weird order, fix it up to match

View File

@ -229,33 +229,32 @@ def _map_switch_backend(args):
_macsbyswitch[switch][ifname] = [mac]
nodename = _nodelookup(switch, ifname)
if nodename is not None:
if mac in _nodesbymac and _nodesbymac[mac] != nodename:
if mac in _nodesbymac and _nodesbymac[mac][0] != nodename:
# For example, listed on both a real edge port
# and by accident a trunk port
log.log({'error': '{0} and {1} described by ambiguous'
' switch topology values'.format(nodename,
_nodesbymac[mac]
)})
_nodesbymac[mac] = None
' switch topology values'.format(
nodename, _nodesbymac[mac][0])})
_nodesbymac[mac] = (None, None)
else:
_nodesbymac[mac] = nodename
_nodesbymac[mac] = (nodename, maccounts[ifname])
switchbackoff = 30
def find_node_by_mac(mac, configmanager):
def find_nodeinfo_by_mac(mac, configmanager):
now = util.monotonic_time()
if vintage and (now - vintage) < 90 and mac in _nodesbymac:
return _nodesbymac[mac]
return _nodesbymac[mac][0], {'maccount': _nodesbymac[mac][1]}
# do not actually sweep switches more than once every 30 seconds
# however, if there is an update in progress, wait on it
for _ in update_macmap(configmanager,
vintage and (now - vintage) < switchbackoff):
if mac in _nodesbymac:
return _nodesbymac[mac]
return _nodesbymac[mac][0], {'maccount': _nodesbymac[mac][1]}
# If update_mac bailed out, still check one last time
return _nodesbymac.get(mac, None)
return _nodesbymac.get(mac, (None, {'maccount': 0}))
mapupdating = eventlet.semaphore.Semaphore()
@ -456,7 +455,7 @@ def dump_macinfo(macaddr):
raise exc.NotFoundException(
'{0} not found in mac table of '
'any known switches'.format(macaddr))
return _dump_locations(info, macaddr, _nodesbymac.get(macaddr, None))
return _dump_locations(info, macaddr, _nodesbymac.get(macaddr, (None,))[0])
def rescan(cfg):