mirror of
https://github.com/xcat2/confluent.git
synced 2025-04-15 01:29:34 +00:00
500 lines
20 KiB
Python
500 lines
20 KiB
Python
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
|
|
|
# Copyright 2014 IBM Corporation
|
|
# Copyright 2015-2017 Lenovo
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# this will implement noderange grammar
|
|
|
|
# considered ast, but a number of things violate python grammar like [] in
|
|
# the middle of strings and use of @ for anything is not in their syntax
|
|
|
|
|
|
import copy
|
|
import itertools
|
|
import pyparsing as pp
|
|
import re
|
|
|
|
try:
|
|
range = xrange
|
|
except NameError:
|
|
pass
|
|
|
|
# construct custom grammar with pyparsing
|
|
_nodeword = pp.Word(pp.alphanums + '~^$/=-_:.*+!')
|
|
_nodebracket = pp.QuotedString(quoteChar='[', endQuoteChar=']',
|
|
unquoteResults=False)
|
|
_nodeatom = pp.Group(pp.OneOrMore(_nodeword | _nodebracket))
|
|
_paginationstart = pp.Group(pp.Word('<', pp.nums))
|
|
_paginationend = pp.Group(pp.Word('>', pp.nums))
|
|
_grammar = _nodeatom | ',-' | ',' | '@' | _paginationstart | _paginationend
|
|
_parser = pp.nestedExpr(content=_grammar)
|
|
|
|
_numextractor = pp.OneOrMore(pp.Word(pp.alphas + '-') | pp.Word(pp.nums))
|
|
|
|
numregex = re.compile('([0-9]+)')
|
|
|
|
lastnoderange = None
|
|
|
|
def humanify_nodename(nodename):
|
|
"""Analyzes nodename in a human way to enable natural sort
|
|
|
|
:param nodename: The node name to analyze
|
|
:returns: A structure that can be consumed by 'sorted'
|
|
"""
|
|
return [int(text) if text.isdigit() else text.lower()
|
|
for text in re.split(numregex, nodename)]
|
|
|
|
def unnumber_nodename(nodename):
|
|
# stub out numbers
|
|
chunked = ["{}" if text.isdigit() else text.lower()
|
|
for text in re.split(numregex, nodename)]
|
|
return chunked
|
|
|
|
def getnumbers_nodename(nodename):
|
|
return [x for x in re.split(numregex, nodename) if x.isdigit()]
|
|
|
|
|
|
class Bracketer(object):
|
|
__slots__ = ['sequences', 'count', 'nametmpl', 'diffn', 'tokens', 'numlens']
|
|
|
|
def __init__(self, nodename):
|
|
self.sequences = []
|
|
self.numlens = []
|
|
realnodename = nodename
|
|
if ':' in nodename:
|
|
realnodename = nodename.split(':', 1)[0]
|
|
self.count = len(getnumbers_nodename(realnodename))
|
|
self.nametmpl = unnumber_nodename(realnodename)
|
|
for n in range(self.count):
|
|
self.sequences.append(None)
|
|
self.numlens.append([0, 0])
|
|
self.diffn = None
|
|
self.tokens = []
|
|
self.extend(nodename)
|
|
if self.count == 0:
|
|
self.tokens = [nodename]
|
|
|
|
def extend(self, nodeorseq):
|
|
# can only differentiate a single number
|
|
endname = None
|
|
endnums = None
|
|
if ':' in nodeorseq:
|
|
nodename, endname = nodeorseq.split(':', 1)
|
|
else:
|
|
nodename = nodeorseq
|
|
txtnums = getnumbers_nodename(nodename)
|
|
nums = [int(x) for x in txtnums]
|
|
for n in range(self.count):
|
|
# First pass to see if we have exactly one different number
|
|
padto = len(txtnums[n])
|
|
needpad = (padto != len('{}'.format(nums[n])))
|
|
if self.sequences[n] is None:
|
|
# We initialize to text pieces, 'currstart', and 'prev' number
|
|
self.sequences[n] = [[], nums[n], nums[n]]
|
|
self.numlens[n] = [len(txtnums[n]), len(txtnums[n])]
|
|
elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto:
|
|
continue # new nodename has no new number, keep going
|
|
else: # if self.sequences[n][2] != nums[n] or :
|
|
if self.diffn is not None and (n != self.diffn or
|
|
(padto < self.numlens[n][1]) or
|
|
(needpad and padto != self.numlens[n][1])):
|
|
self.flush_current()
|
|
self.sequences[n] = [[], nums[n], nums[n]]
|
|
self.numlens[n] = [padto, padto]
|
|
self.diffn = n
|
|
for n in range(self.count):
|
|
padto = len(txtnums[n])
|
|
needpad = (padto != len('{}'.format(nums[n])))
|
|
if self.sequences[n] is None:
|
|
# We initialize to text pieces, 'currstart', and 'prev' number
|
|
self.sequences[n] = [[], nums[n], nums[n]]
|
|
self.numlens[n] = [len(txtnums[n]), len(txtnums[n])]
|
|
elif self.sequences[n][2] == nums[n] and self.numlens[n][1] == padto:
|
|
continue # new nodename has no new number, keep going
|
|
else: # if self.sequences[n][2] != nums[n] or :
|
|
if self.diffn is not None and (n != self.diffn or
|
|
(padto < self.numlens[n][1]) or
|
|
(needpad and padto != self.numlens[n][1])):
|
|
self.flush_current()
|
|
self.sequences[n] = [[], nums[n], nums[n]]
|
|
self.numlens[n] = [padto, padto]
|
|
self.diffn = None
|
|
else:
|
|
self.diffn = n
|
|
if self.sequences[n][2] == (nums[n] - 1):
|
|
self.sequences[n][2] = nums[n]
|
|
self.numlens[n][1] = padto
|
|
elif self.sequences[n][2] < (nums[n] - 1):
|
|
if self.sequences[n][2] != self.sequences[n][1]:
|
|
fmtstr = '{{:0{}d}}:{{:0{}d}}'.format(*self.numlens[n])
|
|
self.sequences[n][0].append(fmtstr.format(self.sequences[n][1], self.sequences[n][2]))
|
|
else:
|
|
fmtstr = '{{:0{}d}}'.format(self.numlens[n][0])
|
|
self.sequences[n][0].append(fmtstr.format(self.sequences[n][1]))
|
|
self.sequences[n][1] = nums[n]
|
|
self.numlens[n][0] = padto
|
|
self.sequences[n][2] = nums[n]
|
|
self.numlens[n][1] = padto
|
|
|
|
def flush_current(self):
|
|
txtfields = []
|
|
if self.sequences and self.sequences[0] is not None:
|
|
for n in range(self.count):
|
|
if self.sequences[n][1] == self.sequences[n][2]:
|
|
fmtstr = '{{:0{}d}}'.format(self.numlens[n][0])
|
|
self.sequences[n][0].append(fmtstr.format(self.sequences[n][1]))
|
|
else:
|
|
fmtstr = '{{:0{}d}}:{{:0{}d}}'.format(*self.numlens[n])
|
|
self.sequences[n][0].append(fmtstr.format(self.sequences[n][1], self.sequences[n][2]))
|
|
txtfield = ','.join(self.sequences[n][0])
|
|
if txtfield.isdigit():
|
|
txtfields.append(txtfield)
|
|
else:
|
|
txtfields.append('[{}]'.format(txtfield))
|
|
self.tokens.append(''.join(self.nametmpl).format(*txtfields))
|
|
self.sequences = []
|
|
for n in range(self.count):
|
|
self.sequences.append(None)
|
|
|
|
@property
|
|
def range(self):
|
|
if self.sequences:
|
|
self.flush_current()
|
|
return ','.join(self.tokens)
|
|
|
|
|
|
def group_elements(elems):
|
|
""" Take the specefied elements and chunk them according to text similarity
|
|
"""
|
|
prev = None
|
|
currchunk = []
|
|
chunked_elems = [currchunk]
|
|
for elem in elems:
|
|
elemtxt = unnumber_nodename(elem)
|
|
if not prev:
|
|
prev = elemtxt
|
|
currchunk.append(elem)
|
|
continue
|
|
if prev == elemtxt:
|
|
currchunk.append(elem)
|
|
else:
|
|
currchunk = [elem]
|
|
chunked_elems.append(currchunk)
|
|
prev = elemtxt
|
|
return chunked_elems
|
|
|
|
|
|
class ReverseNodeRange(object):
|
|
"""Abbreviate a set of nodes to a shorter noderange representation
|
|
|
|
:param nodes: List of nodes as a list, tuple, etc.
|
|
:param config: Config manager
|
|
"""
|
|
|
|
def __init__(self, nodes, config):
|
|
self.cfm = config
|
|
self.nodes = set(nodes)
|
|
|
|
|
|
@property
|
|
def noderange(self):
|
|
subsetgroups = []
|
|
allgroups = self.cfm.get_groups(sizesort=True)
|
|
for group in allgroups:
|
|
if lastnoderange:
|
|
for nr in lastnoderange:
|
|
if lastnoderange[nr] - self.nodes:
|
|
continue
|
|
if self.nodes - lastnoderange[nr]:
|
|
continue
|
|
return nr
|
|
nl = set(
|
|
self.cfm.get_nodegroup_attributes(group).get('nodes', []))
|
|
if len(nl) > len(self.nodes) or not nl:
|
|
continue
|
|
if not nl - self.nodes:
|
|
subsetgroups.append(group)
|
|
self.nodes -= nl
|
|
if not self.nodes:
|
|
break
|
|
# then, analyze sequentially identifying matching alpha subsections
|
|
# then try out noderange from beginning to end
|
|
# we need to know discontinuities, which are either:
|
|
# nodes that appear in the noderange that are not in the nodes
|
|
# nodes that do not exist at all (we need a noderange modification
|
|
# that returns non existing nodes)
|
|
ranges = []
|
|
try:
|
|
subsetgroups.sort(key=humanify_nodename)
|
|
groupchunks = group_elements(subsetgroups)
|
|
for gc in groupchunks:
|
|
if not gc:
|
|
continue
|
|
bracketer = Bracketer(gc[0])
|
|
for chnk in gc[1:]:
|
|
bracketer.extend(chnk)
|
|
ranges.append(bracketer.range)
|
|
except Exception:
|
|
subsetgroups.sort()
|
|
ranges.extend(subsetgroups)
|
|
try:
|
|
nodes = sorted(self.nodes, key=humanify_nodename)
|
|
nodechunks = group_elements(nodes)
|
|
for nc in nodechunks:
|
|
if not nc:
|
|
continue
|
|
bracketer = Bracketer(nc[0])
|
|
for chnk in nc[1:]:
|
|
bracketer.extend(chnk)
|
|
ranges.append(bracketer.range)
|
|
except Exception:
|
|
ranges.extend(sorted(self.nodes))
|
|
return ','.join(ranges)
|
|
|
|
|
|
|
|
# TODO: pagination operators <pp.nums and >pp.nums for begin and end respective
|
|
class NodeRange(object):
|
|
"""Iterate over a noderange
|
|
|
|
:param noderange: string representing a noderange to evaluate
|
|
:param config: Config manager object to use to vet elements
|
|
"""
|
|
|
|
def __init__(self, noderange, config=None, purenumeric=False):
|
|
global lastnoderange
|
|
self.beginpage = None
|
|
self.endpage = None
|
|
self.cfm = config
|
|
self.purenumeric = purenumeric
|
|
try:
|
|
elements = _parser.parseString("(" + noderange + ")", parseAll=True).asList()[0]
|
|
except pp.ParseException as pe:
|
|
raise Exception("Invalid syntax")
|
|
if noderange[0] in ('<', '>'):
|
|
# pagination across all nodes
|
|
self._evaluate(elements)
|
|
self._noderange = set(self.cfm.list_nodes())
|
|
else:
|
|
self._noderange = self._evaluate(elements)
|
|
lastnoderange = {noderange: set(self._noderange)}
|
|
|
|
@property
|
|
def nodes(self):
|
|
if self.beginpage is None and self.endpage is None:
|
|
return self._noderange
|
|
sortedlist = list(self._noderange)
|
|
try:
|
|
sortedlist.sort(key=humanify_nodename)
|
|
except TypeError:
|
|
# The natural sort attempt failed, fallback to ascii sort
|
|
sortedlist.sort()
|
|
if self.beginpage is not None:
|
|
sortedlist = sortedlist[self.beginpage:]
|
|
if self.endpage is not None:
|
|
sortedlist = sortedlist[:self.endpage]
|
|
return set(sortedlist)
|
|
|
|
def _evaluate(self, parsetree, filternodes=None):
|
|
current_op = 0 # enum, 0 union, 1 subtract, 2 intersect
|
|
current_range = set([])
|
|
if not isinstance(parsetree[0], list): # down to a plain text thing
|
|
return self._expandstring(parsetree, filternodes)
|
|
for elem in parsetree:
|
|
if elem == ',-':
|
|
current_op = 1
|
|
elif elem == ',':
|
|
current_op = 0
|
|
elif elem == '@':
|
|
current_op = 2
|
|
elif current_op == 0:
|
|
current_range |= self._evaluate(elem)
|
|
elif current_op == 1:
|
|
current_range -= self._evaluate(elem, current_range)
|
|
elif current_op == 2:
|
|
current_range &= self._evaluate(elem, current_range)
|
|
return current_range
|
|
|
|
def failorreturn(self, atom):
|
|
if self.cfm is not None:
|
|
raise Exception(atom + " not valid")
|
|
return set([atom])
|
|
|
|
def expandrange(self, seqrange, delimiter):
|
|
increment = 1
|
|
if self.purenumeric:
|
|
seqranges = seqrange.replace('-', ':').replace('..', ':')
|
|
pieces = seqranges.split(':')
|
|
if len(pieces) > 3:
|
|
raise Exception("Invalid numeric sequence")
|
|
leftbits = [pieces[0]]
|
|
rightbits = [pieces[1]]
|
|
if len(pieces) == 3:
|
|
increment = int(pieces[2])
|
|
else:
|
|
pieces = seqrange.split(delimiter)
|
|
if len(pieces) % 2 != 0:
|
|
return self.failorreturn(seqrange)
|
|
halflen = len(pieces) // 2
|
|
left = delimiter.join(pieces[:halflen])
|
|
right = delimiter.join(pieces[halflen:])
|
|
leftbits = _numextractor.parseString(left).asList()
|
|
rightbits = _numextractor.parseString(right).asList()
|
|
if len(leftbits) != len(rightbits):
|
|
return self.failorreturn(seqrange)
|
|
finalfmt = ''
|
|
iterators = []
|
|
for idx in range(len(leftbits)):
|
|
if leftbits[idx] == rightbits[idx]:
|
|
finalfmt += leftbits[idx]
|
|
elif leftbits[idx][0] in pp.alphas or rightbits[idx][0] in pp.alphas:
|
|
# if string portion unequal, not going to work
|
|
return self.failorreturn(seqrange)
|
|
else:
|
|
curseq = []
|
|
finalfmt += '{%d}' % len(iterators)
|
|
iterators.append(curseq)
|
|
leftnum = int(leftbits[idx])
|
|
rightnum = int(rightbits[idx])
|
|
if leftnum > rightnum:
|
|
return self.failorreturn(seqrange)
|
|
width = len(rightbits[idx])
|
|
minnum = rightnum
|
|
maxnum = leftnum + 1 # range goes to n-1...
|
|
elif rightnum > leftnum:
|
|
width = len(leftbits[idx])
|
|
minnum = leftnum
|
|
maxnum = rightnum + 1
|
|
else: # differently padded, but same number...
|
|
return self.failorreturn(seqrange)
|
|
numformat = '{0:0%d}' % width
|
|
for num in range(minnum, maxnum, increment):
|
|
curseq.append(numformat.format(num))
|
|
results = set([])
|
|
for combo in itertools.product(*iterators):
|
|
entname = finalfmt.format(*combo)
|
|
results |= self.expand_entity(entname)
|
|
return results
|
|
|
|
def expand_entity(self, entname):
|
|
if self.cfm is None or self.cfm.is_node(entname):
|
|
return set([entname])
|
|
if self.cfm.is_nodegroup(entname):
|
|
grpcfg = self.cfm.get_nodegroup_attributes(entname)
|
|
nodes = copy.copy(grpcfg['nodes'])
|
|
if 'noderange' in grpcfg and grpcfg['noderange']:
|
|
nodes |= NodeRange(
|
|
grpcfg['noderange']['value'], self.cfm).nodes
|
|
return nodes
|
|
raise Exception('Unknown node ' + entname)
|
|
|
|
def _expandstring(self, element, filternodes=None):
|
|
prefix = ''
|
|
if element[0][0] in ('/', '~'):
|
|
if self.purenumeric:
|
|
raise Exception('Regular expression not supported within "[]"')
|
|
element = ''.join(element)
|
|
nameexpression = element[1:]
|
|
if self.cfm is None:
|
|
raise Exception('Verification configmanager required')
|
|
return set(self.cfm.filter_nodenames(nameexpression, filternodes))
|
|
elif '=' in element[0] or '!~' in element[0]:
|
|
if self.purenumeric:
|
|
raise Exception('Equality/Inequality operators (=, !=, =~, !~) are invalid within "[]"')
|
|
element = ''.join(element)
|
|
if self.cfm is None:
|
|
raise Exception('Verification configmanager required')
|
|
return set(self.cfm.filter_node_attributes(element, filternodes))
|
|
for idx in range(len(element)):
|
|
if element[idx][0] == '[':
|
|
nodes = set([])
|
|
for numeric in NodeRange(element[idx][1:-1], purenumeric=True).nodes:
|
|
nodes |= self._expandstring(
|
|
[prefix + numeric] + element[idx + 1:])
|
|
return nodes
|
|
else:
|
|
prefix += element[idx]
|
|
element = prefix
|
|
if self.cfm is not None:
|
|
# this is where we would check for exactly this
|
|
if self.cfm.is_node(element):
|
|
return set([element])
|
|
if self.cfm.is_nodegroup(element):
|
|
grpcfg = self.cfm.get_nodegroup_attributes(element)
|
|
nodes = copy.copy(grpcfg['nodes'])
|
|
if 'noderange' in grpcfg and grpcfg['noderange']:
|
|
nodes |= NodeRange(
|
|
grpcfg['noderange']['value'], self.cfm).nodes
|
|
return nodes
|
|
if ':' in element: # : range for less ambiguity
|
|
return self.expandrange(element, ':')
|
|
elif '..' in element:
|
|
return self.expandrange(element, '..')
|
|
elif '-' in element:
|
|
return self.expandrange(element, '-')
|
|
elif '+' in element:
|
|
element, increment = element.split('+')
|
|
try:
|
|
nodename, domain = element.split('.')
|
|
except ValueError:
|
|
nodename = element
|
|
domain = ''
|
|
increment = int(increment)
|
|
elembits = _numextractor.parseString(nodename).asList()
|
|
endnum = str(int(elembits[-1]) + increment)
|
|
left = ''.join(elembits)
|
|
if domain:
|
|
left += '.' + domain
|
|
right = ''.join(elembits[:-1])
|
|
right += endnum
|
|
if domain:
|
|
right += '.' + domain
|
|
nrange = left + ':' + right
|
|
return self.expandrange(nrange, ':')
|
|
elif '<' in element:
|
|
self.beginpage = int(element[1:])
|
|
return set([])
|
|
elif '>' in element:
|
|
self.endpage = int(element[1:])
|
|
return set([])
|
|
if self.cfm is None:
|
|
return set([element])
|
|
raise Exception(element + ' not a recognized node, group, or alias')
|
|
|
|
if __name__ == '__main__':
|
|
cases = [
|
|
(['r3u4', 'r5u6'], 'r3u4,r5u6'), # should not erroneously gather
|
|
(['r3u4s1', 'r5u6s3'], 'r3u4s1,r5u6s3'), # should not erroneously gather
|
|
(['r3u4s1', 'r3u4s2', 'r5u4s3'], 'r3u4s[1:2],r5u4s3'), # should not erroneously gather
|
|
(['r3u4', 'r3u5', 'r3u6', 'r3u9', 'r4u1'], 'r3u[4:6,9],r4u1'),
|
|
(['n01', 'n2', 'n03'], 'n01,n2,n03'),
|
|
(['n7', 'n8', 'n09', 'n10', 'n11', 'n12', 'n13', 'n14', 'n15', 'n16',
|
|
'n17', 'n18', 'n19', 'n20'], 'n[7:8],n[09:20]')
|
|
]
|
|
for case in cases:
|
|
gc = case[0]
|
|
bracketer = Bracketer(gc[0])
|
|
for chnk in gc[1:]:
|
|
bracketer.extend(chnk)
|
|
br = bracketer.range
|
|
resnodes = NodeRange(br).nodes
|
|
if set(resnodes) != set(gc):
|
|
print('FAILED: ' + repr(sorted(gc)))
|
|
print('RESULT: ' + repr(sorted(resnodes)))
|
|
print('EXPECTED: ' + repr(case[1]))
|
|
print('ACTUAL: ' + br)
|
|
|
|
|
|
|