From c042583a6449373f21d5a5a16be301bb263dd068 Mon Sep 17 00:00:00 2001 From: Jarrod Johnson Date: Fri, 10 May 2019 11:08:12 -0400 Subject: [PATCH] Add support for CSV formatted data Presume CSV semantics for input. --- confluent_client/bin/stats | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/confluent_client/bin/stats b/confluent_client/bin/stats index c6629c3a..39463f90 100755 --- a/confluent_client/bin/stats +++ b/confluent_client/bin/stats @@ -16,6 +16,7 @@ # limitations under the License. import argparse +import csv import fcntl import io import numpy as np @@ -73,7 +74,9 @@ def textplot(plotdata, bins): histogram = False aparser = argparse.ArgumentParser(description='Quick access to common statistics') aparser.add_argument('-c', type=int, default=0, help='Column number to analyze (default is last column)') -aparser.add_argument('-s', default=False, action='store_true', help='Output histogram in sixel format') +aparser.add_argument('-d', default=None, help='Value used to separate columns') +aparser.add_argument('-x', default=False, action='store_true', help='Output histogram in sixel format') +aparser.add_argument('-s', default=0, help='Number of header lines to skip before processing') aparser.add_argument('-g', default=False, action='store_true', help='Open histogram in separate graphical window') aparser.add_argument('-o', default=None, help='Output histogram to the specified filename in PNG format') aparser.add_argument('-t', default=False, action='store_true', help='Output a histogram in text format') @@ -81,18 +84,35 @@ aparser.add_argument('-v', default=False, action='store_true', help='Attempt to aparser.add_argument('-b', type=int, default=10, help='Number of bins to use in histogram (default is 10)') args = aparser.parse_args(sys.argv[1:]) plotdata = [] -data = sys.stdin.readline() +headlines = int(args.s) +while headlines >= 0: + data = sys.stdin.readline() + headlines -= 1 +if args.d: + delimiter = args.d +else: + if '\t' in data: + delimiter = '\t' + elif ' ' in data: + delimiter = ' ' + elif ',' in data: + delimiter = ',' + else: + delimiter = ' ' # handle single column +data = list(csv.reader([data], delimiter=delimiter))[0] nodebydatum = {} idx = args.c - 1 autoidx = False while data: node = None - if ':' in data: - node, data = data.split(':', 1) - if idx == -1: + if ':' in data[0]: + node, data[0] = data[0].split(':', 1) + else: + node = data[0] + if idx == -1 and not autoidx: while not autoidx: try: - datum = float(data.split()[idx]) + datum = float(data[idx]) except ValueError: idx -= 1 continue @@ -101,7 +121,7 @@ while data: sys.exit(1) autoidx = True else: - datum = float(data.split()[idx]) + datum = float(data[idx]) if node: if datum in nodebydatum: nodebydatum[datum].add(node) @@ -109,8 +129,9 @@ while data: nodebydatum[datum] = set([node]) plotdata.append(datum) data = sys.stdin.readline() + data = list(csv.reader([data], delimiter=delimiter))[0] n = None -if args.g or args.o or args.s: +if args.g or args.o or args.x: n, bins = plot(args.g, args.o, plotdata, bins=args.b) if args.t: n, bins = textplot(plotdata, bins=args.b)