Merge branch 'master' of github.com:jjohnson42/confluent

2025-01-13 03:08:14 +00:00 · 2019-05-10 14:56:48 -04:00 · 2019-05-10 14:56:48 -04:00 · 067e99d6ce
commit 067e99d6ce
parent ad828e609d c042583a64
1 changed files with 42 additions and 7 deletions
--- a/confluent_client/bin/stats
+++ b/confluent_client/bin/stats
@ -16,6 +16,7 @@
 # limitations under the License.

 import argparse
+import csv
 import fcntl
 import io
 import numpy as np
@ -73,7 +74,9 @@ def textplot(plotdata, bins):
 histogram = False
 aparser = argparse.ArgumentParser(description='Quick access to common statistics')
 aparser.add_argument('-c', type=int, default=0, help='Column number to analyze (default is last column)')
-aparser.add_argument('-s', default=False, action='store_true', help='Output histogram in sixel format')
+aparser.add_argument('-d', default=None, help='Value used to separate columns')
+aparser.add_argument('-x', default=False, action='store_true', help='Output histogram in sixel format')
+aparser.add_argument('-s', default=0, help='Number of header lines to skip before processing')
 aparser.add_argument('-g', default=False, action='store_true', help='Open histogram in separate graphical window')
 aparser.add_argument('-o', default=None, help='Output histogram to the specified filename in PNG format')
 aparser.add_argument('-t', default=False, action='store_true', help='Output a histogram in text format')
@ -81,13 +84,44 @@ aparser.add_argument('-v', default=False, action='store_true', help='Attempt to
 aparser.add_argument('-b', type=int, default=10, help='Number of bins to use in histogram (default is 10)')
 args = aparser.parse_args(sys.argv[1:])
 plotdata = []
-data = sys.stdin.readline()
+headlines = int(args.s)
+while headlines >= 0:
+    data = sys.stdin.readline()
+    headlines -= 1
+if args.d:
+    delimiter = args.d
+else:
+    if '\t' in data:
+        delimiter = '\t'
+    elif ' ' in data:
+        delimiter = ' '
+    elif ',' in data:
+        delimiter = ','
+    else:
+        delimiter = ' '  # handle single column
+data = list(csv.reader([data], delimiter=delimiter))[0]
 nodebydatum = {}
+idx = args.c - 1
+autoidx = False
 while data:
    node = None
-    if ':' in data:
-        node, data = data.split(':', 1)
-    datum = float(data.split()[args.c - 1])
+    if ':' in data[0]:
+        node, data[0] = data[0].split(':', 1)
+    else:
+        node = data[0]
+    if idx == -1 and not autoidx:
+        while not autoidx:
+            try:
+                datum = float(data[idx])
+            except ValueError:
+                idx -= 1
+                continue
+            except IndexError:
+                sys.stderr.write('Unable to identify a numerical column\n')
+                sys.exit(1)
+            autoidx = True
+    else:
+        datum = float(data[idx])
    if node:
        if datum in nodebydatum:
            nodebydatum[datum].add(node)
@ -95,12 +129,13 @@ while data:
            nodebydatum[datum] = set([node])
    plotdata.append(datum)
    data = sys.stdin.readline()
+    data = list(csv.reader([data], delimiter=delimiter))[0]
 n = None
-if args.g or args.o or args.s:
+if args.g or args.o or args.x:
    n, bins = plot(args.g, args.o, plotdata, bins=args.b)
 if args.t:
    n, bins = textplot(plotdata, bins=args.b)
-print('Samples: {5} Min: {3} Median: {0} Mean: {1} Max: {4} StandardDeviation: {2}'.format(np.median(plotdata), np.mean(plotdata), np.std(plotdata), np.min(plotdata), np.max(plotdata), len(plotdata)))
+print('Samples: {5} Min: {3} Median: {0} Mean: {1} Max: {4} StandardDeviation: {2} Sum: {6}'.format(np.median(plotdata), np.mean(plotdata), np.std(plotdata), np.min(plotdata), np.max(plotdata), len(plotdata), np.sum(plotdata)))
 if args.v and n is not None and nodebydatum:
    print('')
    currbin = bins[0]