Have collate preserve relative whitespace

The change to tolerate either a space or no space ended up greedily consuming whitespace. Do best possible in two cases: For log, use the first line as a clue, and consistently pad or not pad according to first line. It won't catch different pad strategies, or handle first line being indented but other lines not being indented. For the textgroup variant, allow subsequent lines to revise the pad downward, and accept any whitespace, not just space.
2025-01-26 19:10:30 +00:00 · 2024-04-19 08:22:32 -04:00 · 2024-04-19 08:22:32 -04:00 · b606882327
commit b606882327
parent 10f0fabb8c
2 changed files with 18 additions and 4 deletions
--- a/confluent_client/bin/collate
+++ b/confluent_client/bin/collate
@ -21,6 +21,7 @@

 import optparse
 import os
+import re
 import select
 import sys

@ -84,6 +85,7 @@ fullline = sys.stdin.readline()
 printpending = True
 clearpending = False
 holdoff = 0
+padded = None
 while fullline:
    for line in fullline.split('\n'):
        if not line:
@ -92,13 +94,18 @@ while fullline:
            line = 'UNKNOWN: ' + line
        if options.log:
            node, output = line.split(':', 1)
-            output = output.lstrip()
+            if padded is None:
+                if output.startswith(' '):
+                    padded = True
+                else:
+                    padded = False
+            if padded:
+                output = re.sub(r'^ ', '', output)
            currlog = options.log.format(node=node, nodename=node)
            with open(currlog, mode='a')  as log:
                log.write(output + '\n')
            continue
        node, output = line.split(':', 1)
-        output = output.lstrip()
        grouped.add_line(node, output)
        if options.watch:
            if not holdoff:
--- a/confluent_client/confluent/textgroup.py
+++ b/confluent_client/confluent/textgroup.py
@ -98,17 +98,24 @@ class GroupedData(object):
        self.byoutput = {}
        self.header = {}
        self.client = confluentconnection
+        self.detectedpad = None

    def generate_byoutput(self):
        self.byoutput = {}
+        thepad = self.detectedpad if self.detectedpad else ''
        for n in self.bynode:
-            output = '\n'.join(self.bynode[n])
+            output = ''
+            for ln in self.bynode[n]:
+                output += ln.replace(thepad, '', 1) + '\n'
            if output not in self.byoutput:
                self.byoutput[output] = set([n])
            else:
                self.byoutput[output].add(n)

    def add_line(self, node, line):
+        wspc = re.search(r'^\s*', line).group()
+        if self.detectedpad is None or len(wspc) < len(self.detectedpad):
+            self.detectedpad = wspc
        if node not in self.bynode:
            self.bynode[node] = [line]
        else:
@ -219,4 +226,4 @@ if __name__ == '__main__':
        if not line:
            continue
        groupoutput.add_line(*line.split(': ', 1))
-    groupoutput.print_deviants()
+    groupoutput.print_deviants()