#!/usr/bin/env python # Copyright 2015 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Script to plot the results of a bisect run.""" import argparse import json import math import re import urllib2 from matplotlib import cm # pylint: disable=import-error from matplotlib import pyplot # pylint: disable=import-error import numpy _PLOT_WIDTH_INCHES = 8 _PLOT_HEIGHT_INCHES = 6 _PERCENTILES = (0, 0.05, 0.25, 0.5, 0.75, 0.95, 1) def main(): parser = argparse.ArgumentParser() parser.add_argument('bisect_url_or_debug_info_file', help='The Buildbot URL of a bisect run, or a file ' 'containing the output from the Debug Info step.') parser.add_argument('output', nargs='?', help='File path to save a PNG to.') args = parser.parse_args() url = (args.bisect_url_or_debug_info_file + '/steps/Debug%20Info/logs/Debug%20Info/text') try: f = urllib2.urlopen(url) except ValueError: # Not a valid URL. f = open(args.bisect_url_or_debug_info_file, 'r') results = [] for line in f.readlines(): regex = (r'(?:(?:[a-z0-9-]+@)?[a-z0-9]+,)*' r'(?:[a-z0-9-]+@)?(?P<commit>[a-z0-9]+)\s*' r'(?P<values>\[(?:-?[0-9.]+, )*-?[0-9.]*\])') match = re.match(regex, line) if not match: continue commit = match.group('commit') values = json.loads(match.group('values')) if not values: continue print commit, values results.append((commit, values)) _SavePlots(results, args.output) def _SavePlots(results, file_path=None): """Saves histograms and empirial distribution plots showing the diff. Args: file_path: The location to save the plots go. """ figsize = (_PLOT_WIDTH_INCHES * 2, _PLOT_HEIGHT_INCHES) _, (axis0, axis1) = pyplot.subplots(nrows=1, ncols=2, figsize=figsize) _DrawHistogram(axis0, results) _DrawEmpiricalCdf(axis1, results) if file_path: pyplot.savefig(file_path) pyplot.show() pyplot.close() def _DrawHistogram(axis, results): values_per_commit = [values for _, values in results] # Calculate bounds and bins. combined_values = sum(values_per_commit, []) lower_bound = min(combined_values) upper_bound = max(combined_values) if lower_bound == upper_bound: lower_bound -= 0.5 upper_bound += 0.5 bins = numpy.linspace(lower_bound, upper_bound, math.log(len(combined_values)) * 4) # Histograms. colors = cm.rainbow(numpy.linspace( # pylint: disable=no-member 1, 0, len(results) + 1)) for (commit, values), color in zip(results, colors): axis.hist(values, bins, alpha=0.5, normed=True, histtype='stepfilled', label='%s (n=%d)' % (commit, len(values)), color=color) # Vertical lines denoting the medians. medians = tuple(numpy.percentile(values, 50) for values in values_per_commit) axis.set_xticks(medians, minor=True) axis.grid(which='minor', axis='x', linestyle='--') # Axis labels and legend. #axis.set_xlabel(step.metric_name) axis.set_ylabel('Relative probability') axis.legend(loc='upper right') def _DrawEmpiricalCdf(axis, results): colors = cm.rainbow(numpy.linspace( # pylint: disable=no-member 1, 0, len(results) + 1)) for (commit, values), color in zip(results, colors): # Empirical distribution function. levels = numpy.linspace(0, 1, len(values) + 1) axis.step(sorted(values) + [max(values)], levels, label='%s (n=%d)' % (commit, len(values)), color=color) # Dots denoting the percentiles. axis.plot(numpy.percentile(values, tuple(p * 100 for p in _PERCENTILES)), _PERCENTILES, '.', color=color) axis.set_yticks(_PERCENTILES) # Vertical lines denoting the medians. values_per_commit = [values for _, values in results] medians = tuple(numpy.percentile(values, 50) for values in values_per_commit) axis.set_xticks(medians, minor=True) axis.grid(which='minor', axis='x', linestyle='--') # Axis labels and legend. #axis.set_xlabel(step.metric_name) axis.set_ylabel('Cumulative probability') axis.legend(loc='lower right') if __name__ == '__main__': main()