2019-09-23 11:04:52 -04:00
|
|
|
#!/usr/bin/python2
|
2019-04-25 13:59:15 -04:00
|
|
|
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
|
|
|
|
|
|
|
# Copyright 2019 Lenovo
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2019-04-25 14:45:47 -04:00
|
|
|
import argparse
|
2023-10-13 15:25:08 -04:00
|
|
|
import base64
|
2019-05-10 11:08:12 -04:00
|
|
|
import csv
|
2019-04-25 13:59:15 -04:00
|
|
|
import io
|
2019-04-25 13:51:50 -04:00
|
|
|
import numpy as np
|
|
|
|
import sys
|
|
|
|
|
2021-01-14 15:49:30 -05:00
|
|
|
try:
|
|
|
|
import sixel
|
|
|
|
|
|
|
|
class DumbWriter(sixel.SixelWriter):
|
|
|
|
def restore_position(self, output):
|
|
|
|
return
|
|
|
|
except ImportError:
|
|
|
|
pass
|
2019-04-25 13:51:50 -04:00
|
|
|
|
|
|
|
|
2023-10-13 15:25:08 -04:00
|
|
|
def iterm_draw(data):
|
|
|
|
databuf = data.getbuffer()
|
|
|
|
datalen = len(databuf)
|
|
|
|
data = base64.b64encode(databuf).decode('utf8')
|
|
|
|
sys.stdout.write(
|
|
|
|
'\x1b]1337;File=inline=1;size={}:'.format(datalen))
|
|
|
|
sys.stdout.write(data)
|
|
|
|
sys.stdout.write('\a')
|
|
|
|
sys.stdout.write('\n')
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
|
|
|
|
def kitty_draw(data):
|
|
|
|
data = base64.b64encode(data.getbuffer())
|
|
|
|
while data:
|
|
|
|
chunk, data = data[:4096], data[4096:]
|
|
|
|
m = 1 if data else 0
|
|
|
|
sys.stdout.write('\x1b_Ga=T,f=100,m={};'.format(m))
|
|
|
|
sys.stdout.write(chunk.decode('utf8'))
|
|
|
|
sys.stdout.write('\x1b\\')
|
|
|
|
sys.stdout.flush()
|
|
|
|
sys.stdout.write('\n')
|
|
|
|
|
|
|
|
|
|
|
|
def plot(gui, output, plotdata, bins, fmt):
|
2019-04-25 14:45:47 -04:00
|
|
|
import matplotlib as mpl
|
2021-03-15 13:57:29 +02:00
|
|
|
if gui and mpl.get_backend() == 'agg':
|
|
|
|
sys.stderr.write('Error: No GUI backend available and -g specified!\n')
|
2019-04-25 14:45:47 -04:00
|
|
|
if not gui:
|
|
|
|
mpl.use('Agg')
|
|
|
|
import matplotlib.pyplot as plt
|
2019-04-26 16:04:01 -04:00
|
|
|
n, bins, patches = plt.hist(plotdata, bins)
|
2019-04-25 14:45:47 -04:00
|
|
|
plt.show()
|
|
|
|
if not gui:
|
|
|
|
if output:
|
|
|
|
tdata = output
|
|
|
|
else:
|
|
|
|
tdata = io.BytesIO()
|
|
|
|
plt.savefig(tdata)
|
|
|
|
if not gui and not output:
|
2023-10-13 15:25:08 -04:00
|
|
|
if fmt == 'sixel':
|
|
|
|
writer = DumbWriter()
|
|
|
|
writer.draw(tdata)
|
|
|
|
elif fmt == 'kitty':
|
|
|
|
kitty_draw(tdata)
|
|
|
|
elif fmt == 'iterm':
|
|
|
|
iterm_draw(tdata)
|
2019-04-26 16:04:01 -04:00
|
|
|
return n, bins
|
|
|
|
|
|
|
|
def textplot(plotdata, bins):
|
|
|
|
n, bins = np.histogram(plotdata, bins)
|
|
|
|
labels = []
|
|
|
|
for bin in bins:
|
|
|
|
labels.append('{0:0.1f}'.format(bin))
|
|
|
|
width = 80
|
|
|
|
# Since this will be primarily piped into, hard to get
|
|
|
|
# terminal width
|
|
|
|
labelwidth = 0
|
|
|
|
for lab in labels:
|
|
|
|
if len(lab) > labelwidth:
|
|
|
|
labelwidth = len(lab)
|
|
|
|
width -= (labelwidth) + 1
|
|
|
|
labelfmt = '{{0:>{0}s}}|'.format(labelwidth)
|
|
|
|
maxn = 0.0
|
|
|
|
for lgth in n:
|
|
|
|
if lgth > maxn:
|
|
|
|
maxn = float(lgth)
|
|
|
|
for i in range(len(n)):
|
|
|
|
print(labelfmt.format(labels[i]) + '=' * int(np.round((n[i]/maxn) * width)))
|
|
|
|
return n, bins
|
2019-04-25 14:45:47 -04:00
|
|
|
|
|
|
|
histogram = False
|
|
|
|
aparser = argparse.ArgumentParser(description='Quick access to common statistics')
|
2019-04-26 16:04:01 -04:00
|
|
|
aparser.add_argument('-c', type=int, default=0, help='Column number to analyze (default is last column)')
|
2019-05-10 11:08:12 -04:00
|
|
|
aparser.add_argument('-d', default=None, help='Value used to separate columns')
|
2023-10-13 15:25:08 -04:00
|
|
|
aparser.add_argument('-x', default=False, action='store_true', help='Output histogram in graphical format')
|
|
|
|
aparser.add_argument('-f', default='sixel', help='Format for histogram output (sixel/iterm/kitty)')
|
2019-05-10 11:08:12 -04:00
|
|
|
aparser.add_argument('-s', default=0, help='Number of header lines to skip before processing')
|
2019-04-25 14:45:47 -04:00
|
|
|
aparser.add_argument('-g', default=False, action='store_true', help='Open histogram in separate graphical window')
|
|
|
|
aparser.add_argument('-o', default=None, help='Output histogram to the specified filename in PNG format')
|
2019-04-26 16:04:01 -04:00
|
|
|
aparser.add_argument('-t', default=False, action='store_true', help='Output a histogram in text format')
|
|
|
|
aparser.add_argument('-v', default=False, action='store_true', help='Attempt to list nodes relevant to each histogram bar (requires -s, -o, or -t)')
|
|
|
|
aparser.add_argument('-b', type=int, default=10, help='Number of bins to use in histogram (default is 10)')
|
2019-04-25 14:45:47 -04:00
|
|
|
args = aparser.parse_args(sys.argv[1:])
|
2019-04-25 13:51:50 -04:00
|
|
|
plotdata = []
|
2019-05-10 11:08:12 -04:00
|
|
|
headlines = int(args.s)
|
|
|
|
while headlines >= 0:
|
|
|
|
data = sys.stdin.readline()
|
|
|
|
headlines -= 1
|
|
|
|
if args.d:
|
|
|
|
delimiter = args.d
|
|
|
|
else:
|
|
|
|
if '\t' in data:
|
|
|
|
delimiter = '\t'
|
|
|
|
elif ' ' in data:
|
|
|
|
delimiter = ' '
|
|
|
|
elif ',' in data:
|
|
|
|
delimiter = ','
|
|
|
|
else:
|
|
|
|
delimiter = ' ' # handle single column
|
|
|
|
data = list(csv.reader([data], delimiter=delimiter))[0]
|
2019-04-26 16:04:01 -04:00
|
|
|
nodebydatum = {}
|
2019-05-10 10:34:56 -04:00
|
|
|
idx = args.c - 1
|
|
|
|
autoidx = False
|
2019-04-25 13:51:50 -04:00
|
|
|
while data:
|
2019-04-26 16:04:01 -04:00
|
|
|
node = None
|
2019-05-10 11:08:12 -04:00
|
|
|
if ':' in data[0]:
|
|
|
|
node, data[0] = data[0].split(':', 1)
|
|
|
|
else:
|
|
|
|
node = data[0]
|
|
|
|
if idx == -1 and not autoidx:
|
2019-05-10 10:34:56 -04:00
|
|
|
while not autoidx:
|
|
|
|
try:
|
2019-05-10 11:08:12 -04:00
|
|
|
datum = float(data[idx])
|
2019-05-10 10:34:56 -04:00
|
|
|
except ValueError:
|
|
|
|
idx -= 1
|
|
|
|
continue
|
|
|
|
except IndexError:
|
|
|
|
sys.stderr.write('Unable to identify a numerical column\n')
|
|
|
|
sys.exit(1)
|
|
|
|
autoidx = True
|
|
|
|
else:
|
2019-05-10 11:08:12 -04:00
|
|
|
datum = float(data[idx])
|
2019-04-26 16:04:01 -04:00
|
|
|
if node:
|
|
|
|
if datum in nodebydatum:
|
|
|
|
nodebydatum[datum].add(node)
|
|
|
|
else:
|
|
|
|
nodebydatum[datum] = set([node])
|
2019-04-25 13:51:50 -04:00
|
|
|
plotdata.append(datum)
|
|
|
|
data = sys.stdin.readline()
|
2019-05-10 11:08:12 -04:00
|
|
|
data = list(csv.reader([data], delimiter=delimiter))[0]
|
2019-04-26 16:04:01 -04:00
|
|
|
n = None
|
2019-05-10 11:08:12 -04:00
|
|
|
if args.g or args.o or args.x:
|
2023-10-13 15:25:08 -04:00
|
|
|
n, bins = plot(args.g, args.o, plotdata, bins=args.b, fmt=args.f)
|
2019-04-26 16:04:01 -04:00
|
|
|
if args.t:
|
|
|
|
n, bins = textplot(plotdata, bins=args.b)
|
2019-05-10 10:34:56 -04:00
|
|
|
print('Samples: {5} Min: {3} Median: {0} Mean: {1} Max: {4} StandardDeviation: {2} Sum: {6}'.format(np.median(plotdata), np.mean(plotdata), np.std(plotdata), np.min(plotdata), np.max(plotdata), len(plotdata), np.sum(plotdata)))
|
2019-04-26 16:04:01 -04:00
|
|
|
if args.v and n is not None and nodebydatum:
|
|
|
|
print('')
|
|
|
|
currbin = bins[0]
|
|
|
|
bins = bins[1:]
|
|
|
|
currbinmembers = []
|
|
|
|
for datum in sorted(nodebydatum):
|
2019-04-26 16:17:28 -04:00
|
|
|
if datum > bins[0]:
|
2019-04-26 16:29:33 -04:00
|
|
|
nextbin = None
|
2019-04-26 16:51:44 -04:00
|
|
|
endbin = bins[0]
|
2019-04-26 16:29:33 -04:00
|
|
|
while len(bins) and bins[0] < datum:
|
|
|
|
nextbin = bins[0]
|
2019-04-26 16:17:28 -04:00
|
|
|
bins = bins[1:]
|
2019-04-26 16:40:47 -04:00
|
|
|
if not nextbin:
|
|
|
|
nextbin = np.max(plotdata)
|
2019-04-26 16:51:44 -04:00
|
|
|
print('Entries between {0} and {1}'.format(currbin, endbin))
|
2019-04-26 16:43:53 -04:00
|
|
|
currbin = nextbin
|
2019-04-26 16:40:47 -04:00
|
|
|
print('-' * 80)
|
|
|
|
print(','.join(sorted(currbinmembers)))
|
|
|
|
print('')
|
|
|
|
print('')
|
|
|
|
currbinmembers = []
|
2019-04-26 16:04:01 -04:00
|
|
|
for node in nodebydatum[datum]:
|
|
|
|
currbinmembers.append(node)
|
2019-04-26 16:51:44 -04:00
|
|
|
if currbinmembers:
|
|
|
|
print('Entries between {0} and {1}'.format(currbin, np.max(plotdata)))
|
|
|
|
print('-' * 80)
|
|
|
|
print(','.join(sorted(currbinmembers)))
|
|
|
|
print('')
|
|
|
|
print('')
|