#!/usr/bin/python
#
# fiologparser.py
#
# This tool lets you parse multiple fio log files and look at interaval
# statistics even when samples are non-uniform. For instance:
#
# fiologparser.py -s *bw*
#
# to see per-interval sums for all bandwidth logs or:
#
# fiologparser.py -a *clat*
#
# to see per-interval average completion latency.
import argparse
import math
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
help='print all stats for each interval.')
parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
args = parser.parse_args()
return args
def get_ftime(series):
ftime = 0
for ts in series:
if ftime == 0 or ts.last.end < ftime:
ftime = ts.last.end
return ftime
def print_full(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
while (start < ftime):
end = ftime if ftime < end else end
results = [ts.get_value(start, end) for ts in series]
print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
start += ctx.interval
end += ctx.interval
def print_sums(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
while (start < ftime):
end = ftime if ftime < end else end
results = [ts.get_value(start, end) for ts in series]
print("%s, %0.3f" % (end, sum(results)))
start += ctx.interval
end += ctx.interval
def print_averages(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
while (start < ftime):
end = ftime if ftime < end else end
results = [ts.get_value(start, end) for ts in series]
print("%s, %0.3f" % (end, float(sum(results))/len(results)))
start += ctx.interval
end += ctx.interval
# FIXME: this routine is computationally inefficient
# and has O(N^2) behavior
# it would be better to make one pass through samples
# to segment them into a series of time intervals, and
# then compute stats on each time interval instead.
# to debug this routine, use
# # sort -n -t ',' -k 2 small.log
# on your input.
def my_extend( vlist, val ):
vlist.extend(val)
return vlist
array_collapser = lambda vlist, val: my_extend(vlist, val)
def print_all_stats(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
while (start < ftime): # for each time interval
end = ftime if ftime < end else end
sample_arrays = [ s.get_samples(start, end) for s in series ]
samplevalue_arrays = []
for sample_array in sample_arrays:
samplevalue_arrays.append(
[ sample.value for sample in sample_array ] )
# collapse list of lists of sample values into list of sample values
samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
# compute all stats and print them
mymin = min(samplevalues)
myavg = sum(samplevalues) / float(len(samplevalues))
mymedian = median(samplevalues)
my90th = percentile(samplevalues, 0.90)
my95th = percentile(samplevalues, 0.95)
my99th = percentile(samplevalues, 0.99)
mymax = max(samplevalues)
print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
start, len(samplevalues),
mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
# advance to next interval
start += ctx.interval
end += ctx.interval
def median(values):
s=sorted(values)
return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
def percentile(values, p):
s = sorted(values)
k = (len(s)-1) * p
f = math.floor(k)
c = math.ceil(k)
if f == c:
return s[int(k)]
return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
def print_default(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
averages = []
weights = []
while (start < ftime):
end = ftime if ftime < end else end
results = [ts.get_value(start, end) for ts in series]
averages.append(sum(results))
weights.append(end-start)
start += ctx.interval
end += ctx.interval
total = 0
for i in range(0, len(averages)):
total += averages[i]*weights[i]
print('%0.3f' % (total/sum(weights)))
class TimeSeries(object):
def __init__(self, ctx, fn):
self.ctx = ctx
self.last = None
self.samples = []
self.read_data(fn)
def read_data(self, fn):
f = open(fn, 'r')
p_time = 0
for line in f:
(time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
self.add_sample(p_time, int(time), int(value))
p_time = int(time)
def add_sample(self, start, end, value):
sample = Sample(ctx, start, end, value)
if not self.last or self.last.end < end:
self.last = sample
self.samples.append(sample)
def get_samples(self, start, end):
sample_list = []
for s in self.samples:
if s.start >= start and s.end <= end:
sample_list.append(s)
return sample_list
def get_value(self, start, end):
value = 0
for sample in self.samples:
value += sample.get_contribution(start, end)
return value
class Sample(object):
def __init__(self, ctx, start, end, value):
self.ctx = ctx
self.start = start
self.end = end
self.value = value
def get_contribution(self, start, end):
# short circuit if not within the bound
if (end < self.start or start > self.end):
return 0
sbound = self.start if start < self.start else start
ebound = self.end if end > self.end else end
ratio = float(ebound-sbound) / (end-start)
return self.value*ratio/ctx.divisor
if __name__ == '__main__':
ctx = parse_args()
series = []
for fn in ctx.FILE:
series.append(TimeSeries(ctx, fn))
if ctx.sum:
print_sums(ctx, series)
elif ctx.average:
print_averages(ctx, series)
elif ctx.full:
print_full(ctx, series)
elif ctx.allstats:
print_all_stats(ctx, series)
else:
print_default(ctx, series)