#!/usr/bin/env python
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Script to plot the results of a bisect run."""

import argparse
import json
import math
import re
import urllib2

from matplotlib import cm  # pylint: disable=import-error
from matplotlib import pyplot  # pylint: disable=import-error
import numpy


_PLOT_WIDTH_INCHES = 8
_PLOT_HEIGHT_INCHES = 6
_PERCENTILES = (0, 0.05, 0.25, 0.5, 0.75, 0.95, 1)


def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('bisect_url_or_debug_info_file',
                      help='The Buildbot URL of a bisect run, or a file '
                      'containing the output from the Debug Info step.')
  parser.add_argument('output', nargs='?', help='File path to save a PNG to.')
  args = parser.parse_args()

  url = (args.bisect_url_or_debug_info_file +
         '/steps/Debug%20Info/logs/Debug%20Info/text')
  try:
    f = urllib2.urlopen(url)
  except ValueError:  # Not a valid URL.
    f = open(args.bisect_url_or_debug_info_file, 'r')

  results = []
  for line in f.readlines():
    regex = (r'(?:(?:[a-z0-9-]+@)?[a-z0-9]+,)*'
             r'(?:[a-z0-9-]+@)?(?P<commit>[a-z0-9]+)\s*'
             r'(?P<values>\[(?:-?[0-9.]+, )*-?[0-9.]*\])')
    match = re.match(regex, line)
    if not match:
      continue

    commit = match.group('commit')
    values = json.loads(match.group('values'))
    if not values:
      continue

    print commit, values
    results.append((commit, values))

  _SavePlots(results, args.output)


def _SavePlots(results, file_path=None):
  """Saves histograms and empirial distribution plots showing the diff.

  Args:
    file_path: The location to save the plots go.
  """
  figsize = (_PLOT_WIDTH_INCHES * 2, _PLOT_HEIGHT_INCHES)
  _, (axis0, axis1) = pyplot.subplots(nrows=1, ncols=2, figsize=figsize)

  _DrawHistogram(axis0, results)
  _DrawEmpiricalCdf(axis1, results)

  if file_path:
    pyplot.savefig(file_path)
  pyplot.show()
  pyplot.close()


def _DrawHistogram(axis, results):
  values_per_commit = [values for _, values in results]

  # Calculate bounds and bins.
  combined_values = sum(values_per_commit, [])
  lower_bound = min(combined_values)
  upper_bound = max(combined_values)
  if lower_bound == upper_bound:
    lower_bound -= 0.5
    upper_bound += 0.5
  bins = numpy.linspace(lower_bound, upper_bound,
                        math.log(len(combined_values)) * 4)

  # Histograms.
  colors = cm.rainbow(numpy.linspace(  # pylint: disable=no-member
      1, 0, len(results) + 1))
  for (commit, values), color in zip(results, colors):
    axis.hist(values, bins, alpha=0.5, normed=True, histtype='stepfilled',
              label='%s (n=%d)' % (commit, len(values)), color=color)

  # Vertical lines denoting the medians.
  medians = tuple(numpy.percentile(values, 50) for values in values_per_commit)
  axis.set_xticks(medians, minor=True)
  axis.grid(which='minor', axis='x', linestyle='--')

  # Axis labels and legend.
  #axis.set_xlabel(step.metric_name)
  axis.set_ylabel('Relative probability')
  axis.legend(loc='upper right')


def _DrawEmpiricalCdf(axis, results):
  colors = cm.rainbow(numpy.linspace(  # pylint: disable=no-member
      1, 0, len(results) + 1))
  for (commit, values), color in zip(results, colors):
    # Empirical distribution function.
    levels = numpy.linspace(0, 1, len(values) + 1)
    axis.step(sorted(values) + [max(values)], levels,
              label='%s (n=%d)' % (commit, len(values)), color=color)

    # Dots denoting the percentiles.
    axis.plot(numpy.percentile(values, tuple(p * 100 for p in _PERCENTILES)),
              _PERCENTILES, '.', color=color)

  axis.set_yticks(_PERCENTILES)

  # Vertical lines denoting the medians.
  values_per_commit = [values for _, values in results]
  medians = tuple(numpy.percentile(values, 50) for values in values_per_commit)
  axis.set_xticks(medians, minor=True)
  axis.grid(which='minor', axis='x', linestyle='--')

  # Axis labels and legend.
  #axis.set_xlabel(step.metric_name)
  axis.set_ylabel('Cumulative probability')
  axis.legend(loc='lower right')


if __name__ == '__main__':
  main()