From b6068823271c96b4ab344dde03813aae8b1a72c3 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 19 Apr 2024 08:22:32 -0400 Subject: [PATCH] Have collate preserve relative whitespace The change to tolerate either a space or no space ended up greedily consuming whitespace. Do best possible in two cases: For log, use the first line as a clue, and consistently pad or not pad according to first line. It won't catch different pad strategies, or handle first line being indented but other lines not being indented. For the textgroup variant, allow subsequent lines to revise the pad downward, and accept any whitespace, not just space. --- confluent_client/bin/collate | 11 +++++++++-- confluent_client/confluent/textgroup.py | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/confluent_client/bin/collate b/confluent_client/bin/collate index 07095901..2a086303 100755 --- a/confluent_client/bin/collate +++ b/confluent_client/bin/collate @@ -21,6 +21,7 @@ import optparse import os +import re import select import sys @@ -84,6 +85,7 @@ fullline = sys.stdin.readline() printpending = True clearpending = False holdoff = 0 +padded = None while fullline: for line in fullline.split('\n'): if not line: @@ -92,13 +94,18 @@ while fullline: line = 'UNKNOWN: ' + line if options.log: node, output = line.split(':', 1) - output = output.lstrip() + if padded is None: + if output.startswith(' '): + padded = True + else: + padded = False + if padded: + output = re.sub(r'^ ', '', output) currlog = options.log.format(node=node, nodename=node) with open(currlog, mode='a') as log: log.write(output + '\n') continue node, output = line.split(':', 1) - output = output.lstrip() grouped.add_line(node, output) if options.watch: if not holdoff: diff --git a/confluent_client/confluent/textgroup.py b/confluent_client/confluent/textgroup.py index cd35b6fa..e2f0dc7f 100644 --- a/confluent_client/confluent/textgroup.py +++ b/confluent_client/confluent/textgroup.py @@ -98,17 +98,24 @@ class GroupedData(object): self.byoutput = {} self.header = {} self.client = confluentconnection + self.detectedpad = None def generate_byoutput(self): self.byoutput = {} + thepad = self.detectedpad if self.detectedpad else '' for n in self.bynode: - output = '\n'.join(self.bynode[n]) + output = '' + for ln in self.bynode[n]: + output += ln.replace(thepad, '', 1) + '\n' if output not in self.byoutput: self.byoutput[output] = set([n]) else: self.byoutput[output].add(n) def add_line(self, node, line): + wspc = re.search(r'^\s*', line).group() + if self.detectedpad is None or len(wspc) < len(self.detectedpad): + self.detectedpad = wspc if node not in self.bynode: self.bynode[node] = [line] else: @@ -219,4 +226,4 @@ if __name__ == '__main__': if not line: continue groupoutput.add_line(*line.split(': ', 1)) - groupoutput.print_deviants() \ No newline at end of file + groupoutput.print_deviants()