2
0
mirror of https://github.com/xcat2/confluent.git synced 2024-11-23 01:53:28 +00:00

Merge branch 'master' of github.com:jjohnson42/confluent

This commit is contained in:
Jarrod Johnson 2019-05-10 14:56:48 -04:00
commit 067e99d6ce

View File

@ -16,6 +16,7 @@
# limitations under the License.
import argparse
import csv
import fcntl
import io
import numpy as np
@ -73,7 +74,9 @@ def textplot(plotdata, bins):
histogram = False
aparser = argparse.ArgumentParser(description='Quick access to common statistics')
aparser.add_argument('-c', type=int, default=0, help='Column number to analyze (default is last column)')
aparser.add_argument('-s', default=False, action='store_true', help='Output histogram in sixel format')
aparser.add_argument('-d', default=None, help='Value used to separate columns')
aparser.add_argument('-x', default=False, action='store_true', help='Output histogram in sixel format')
aparser.add_argument('-s', default=0, help='Number of header lines to skip before processing')
aparser.add_argument('-g', default=False, action='store_true', help='Open histogram in separate graphical window')
aparser.add_argument('-o', default=None, help='Output histogram to the specified filename in PNG format')
aparser.add_argument('-t', default=False, action='store_true', help='Output a histogram in text format')
@ -81,13 +84,44 @@ aparser.add_argument('-v', default=False, action='store_true', help='Attempt to
aparser.add_argument('-b', type=int, default=10, help='Number of bins to use in histogram (default is 10)')
args = aparser.parse_args(sys.argv[1:])
plotdata = []
data = sys.stdin.readline()
headlines = int(args.s)
while headlines >= 0:
data = sys.stdin.readline()
headlines -= 1
if args.d:
delimiter = args.d
else:
if '\t' in data:
delimiter = '\t'
elif ' ' in data:
delimiter = ' '
elif ',' in data:
delimiter = ','
else:
delimiter = ' ' # handle single column
data = list(csv.reader([data], delimiter=delimiter))[0]
nodebydatum = {}
idx = args.c - 1
autoidx = False
while data:
node = None
if ':' in data:
node, data = data.split(':', 1)
datum = float(data.split()[args.c - 1])
if ':' in data[0]:
node, data[0] = data[0].split(':', 1)
else:
node = data[0]
if idx == -1 and not autoidx:
while not autoidx:
try:
datum = float(data[idx])
except ValueError:
idx -= 1
continue
except IndexError:
sys.stderr.write('Unable to identify a numerical column\n')
sys.exit(1)
autoidx = True
else:
datum = float(data[idx])
if node:
if datum in nodebydatum:
nodebydatum[datum].add(node)
@ -95,12 +129,13 @@ while data:
nodebydatum[datum] = set([node])
plotdata.append(datum)
data = sys.stdin.readline()
data = list(csv.reader([data], delimiter=delimiter))[0]
n = None
if args.g or args.o or args.s:
if args.g or args.o or args.x:
n, bins = plot(args.g, args.o, plotdata, bins=args.b)
if args.t:
n, bins = textplot(plotdata, bins=args.b)
print('Samples: {5} Min: {3} Median: {0} Mean: {1} Max: {4} StandardDeviation: {2}'.format(np.median(plotdata), np.mean(plotdata), np.std(plotdata), np.min(plotdata), np.max(plotdata), len(plotdata)))
print('Samples: {5} Min: {3} Median: {0} Mean: {1} Max: {4} StandardDeviation: {2} Sum: {6}'.format(np.median(plotdata), np.mean(plotdata), np.std(plotdata), np.min(plotdata), np.max(plotdata), len(plotdata), np.sum(plotdata)))
if args.v and n is not None and nodebydatum:
print('')
currbin = bins[0]