普通文本  |  266行  |  11.9 KB

#!/usr/bin/env python
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Tests for results_stats."""

import os
import sys

import unittest

try:
  import numpy as np
except ImportError:
  np = None

sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                '..')))
from statistical_analysis import results_stats


class StatisticalBenchmarkResultsAnalysisTest(unittest.TestCase):
  """Unit testing of several functions in results_stats."""

  def testGetChartsFromBenchmarkResultJson(self):
    """Unit test for errors raised when getting the charts element.

    Also makes sure that the 'trace' element is deleted if it exists.
    """
    input_json_wrong_format = {'charts_wrong': {}}
    input_json_empty = {'charts': {}}
    with self.assertRaises(ValueError):
      (results_stats.GetChartsFromBenchmarkResultJson(input_json_wrong_format))
    with self.assertRaises(ValueError):
      (results_stats.GetChartsFromBenchmarkResultJson(input_json_empty))

    input_json_with_trace = {'charts':
                             {'trace': {},
                              'Ex_metric_1':
                              {'Ex_page_1': {'type': 'list_of_scalar_values',
                                             'values': [1, 2]},
                               'Ex_page_2': {'type': 'histogram',
                                             'values': [1, 2]}},
                              'Ex_metric_2':
                              {'Ex_page_1': {'type': 'list_of_scalar_values'},
                               'Ex_page_2': {'type': 'list_of_scalar_values',
                                             'values': [1, 2]}}}}

    output = (results_stats.
              GetChartsFromBenchmarkResultJson(input_json_with_trace))
    expected_output = {'Ex_metric_1':
                       {'Ex_page_1': {'type': 'list_of_scalar_values',
                                      'values': [1, 2]}},
                       'Ex_metric_2':
                       {'Ex_page_2': {'type': 'list_of_scalar_values',
                                      'values': [1, 2]}}}
    self.assertEqual(output, expected_output)

  def testCreateBenchmarkResultDict(self):
    """Unit test for benchmark result dict created from a benchmark json.

    Creates a json of the format created by tools/perf/run_benchmark and then
    compares the output dict against an expected predefined output dict.
    """
    metric_names = ['messageloop_start_time',
                    'open_tabs_time',
                    'window_display_time']
    metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]

    input_json = {'charts': {}}
    for metric, metric_vals in zip(metric_names, metric_values):
      input_json['charts'][metric] = {'summary':
                                      {'values': metric_vals,
                                       'type': 'list_of_scalar_values'}}

    output = results_stats.CreateBenchmarkResultDict(input_json)
    expected_output = {'messageloop_start_time': [55, 72, 60],
                       'open_tabs_time': [54, 42, 65],
                       'window_display_time': [44, 89]}

    self.assertEqual(output, expected_output)

  def testCreatePagesetBenchmarkResultDict(self):
    """Unit test for pageset benchmark result dict created from benchmark json.

    Creates a json of the format created by tools/perf/run_benchmark when it
    includes a pageset and then compares the output dict against an expected
    predefined output dict.
    """
    metric_names = ['messageloop_start_time',
                    'open_tabs_time',
                    'window_display_time']
    metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]
    page_names = ['Ex_page_1', 'Ex_page_2']

    input_json = {'charts': {}}
    for metric, metric_vals in zip(metric_names, metric_values):
      input_json['charts'][metric] = {'summary':
                                      {'values': [0, 1, 2, 3],
                                       'type': 'list_of_scalar_values'}}
      for page in page_names:
        input_json['charts'][metric][page] = {'values': metric_vals,
                                              'type': 'list_of_scalar_values'}

    output = results_stats.CreatePagesetBenchmarkResultDict(input_json)
    expected_output = {'messageloop_start_time': {'Ex_page_1': [55, 72, 60],
                                                  'Ex_page_2': [55, 72, 60]},
                       'open_tabs_time': {'Ex_page_1': [54, 42, 65],
                                          'Ex_page_2': [54, 42, 65]},
                       'window_display_time': {'Ex_page_1': [44, 89],
                                               'Ex_page_2': [44, 89]}}

    self.assertEqual(output, expected_output)

  def testCombinePValues(self):
    """Unit test for Fisher's Method that combines multiple p-values."""
    test_p_values = [0.05, 0.04, 0.10, 0.07, 0.01]

    expected_output = 0.00047334256271885721
    output = results_stats.CombinePValues(test_p_values)

    self.assertEqual(output, expected_output)

  def CreateRandomNormalDistribution(self, mean=0, size=30):
    """Creates two pseudo random samples for testing in multiple methods."""
    if not np:
      raise ImportError('This function requires Numpy.')

    np.random.seed(0)
    sample = np.random.normal(loc=mean, scale=1, size=size)

    return sample

  def testIsNormallyDistributed(self):
    """Unit test for values returned when testing for normality."""
    if not np:
      self.skipTest("Numpy is not installed.")

    test_samples = [self.CreateRandomNormalDistribution(0),
                    self.CreateRandomNormalDistribution(1)]

    expected_outputs = [(True, 0.5253966450691223),
                        (True, 0.5253913402557373)]
    for sample, expected_output in zip(test_samples, expected_outputs):
      output = results_stats.IsNormallyDistributed(sample)

      self.assertEqual(output, expected_output)

  def testAreSamplesDifferent(self):
    """Unit test for values returned after running the statistical tests.

    Creates two pseudo-random normally distributed samples to run the
    statistical tests and compares the resulting answer and p-value against
    their pre-calculated values.
    """
    test_samples = [3 * [0, 0, 2, 4, 4], 3 * [5, 5, 7, 9, 9]]
    with self.assertRaises(results_stats.SampleSizeError):
      results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
                                        test=results_stats.MANN)
    with self.assertRaises(results_stats.NonNormalSampleError):
      results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
                                        test=results_stats.WELCH)

    test_samples_equal = (20 * [1], 20 * [1])
    expected_output_equal = (False, 1.0)
    output_equal = results_stats.AreSamplesDifferent(test_samples_equal[0],
                                                     test_samples_equal[1],
                                                     test=results_stats.MANN)
    self.assertEqual(output_equal, expected_output_equal)

    if not np:
      self.skipTest("Numpy is not installed.")

    test_samples = [self.CreateRandomNormalDistribution(0),
                    self.CreateRandomNormalDistribution(1)]
    test_options = results_stats.ALL_TEST_OPTIONS

    expected_outputs = [(True, 2 * 0.00068516628052438266),
                        (True, 0.0017459498829507842),
                        (True, 0.00084765230478226514)]

    for test, expected_output in zip(test_options, expected_outputs):
      output = results_stats.AreSamplesDifferent(test_samples[0],
                                                 test_samples[1],
                                                 test=test)
      self.assertEqual(output, expected_output)

  def testAssertThatKeysMatch(self):
    """Unit test for exception raised when input dicts' metrics don't match."""
    differing_input_dicts = [{'messageloop_start_time': [55, 72, 60],
                              'display_time': [44, 89]},
                             {'messageloop_start_time': [55, 72, 60]}]
    with self.assertRaises(results_stats.DictMismatchError):
      results_stats.AssertThatKeysMatch(differing_input_dicts[0],
                                        differing_input_dicts[1])

  def testAreBenchmarkResultsDifferent(self):
    """Unit test for statistical test outcome dict."""
    test_input_dicts = [{'open_tabs_time':
                         self.CreateRandomNormalDistribution(0),
                         'display_time':
                         self.CreateRandomNormalDistribution(0)},
                        {'open_tabs_time':
                         self.CreateRandomNormalDistribution(0),
                         'display_time':
                         self.CreateRandomNormalDistribution(1)}]
    test_options = results_stats.ALL_TEST_OPTIONS

    expected_outputs = [{'open_tabs_time': (False, 2 * 0.49704973080841425),
                         'display_time': (True, 2 * 0.00068516628052438266)},
                        {'open_tabs_time': (False, 1.0),
                         'display_time': (True, 0.0017459498829507842)},
                        {'open_tabs_time': (False, 1.0),
                         'display_time': (True, 0.00084765230478226514)}]

    for test, expected_output in zip(test_options, expected_outputs):
      output = results_stats.AreBenchmarkResultsDifferent(test_input_dicts[0],
                                                          test_input_dicts[1],
                                                          test=test)
      self.assertEqual(output, expected_output)

  def testArePagesetBenchmarkResultsDifferent(self):
    """Unit test for statistical test outcome dict."""
    distributions = (self.CreateRandomNormalDistribution(0),
                     self.CreateRandomNormalDistribution(1))
    test_input_dicts = ({'open_tabs_time': {'Ex_page_1': distributions[0],
                                            'Ex_page_2': distributions[0]},
                         'display_time': {'Ex_page_1': distributions[1],
                                          'Ex_page_2': distributions[1]}},
                        {'open_tabs_time': {'Ex_page_1': distributions[0],
                                            'Ex_page_2': distributions[1]},
                         'display_time': {'Ex_page_1': distributions[1],
                                          'Ex_page_2': distributions[0]}})
    test_options = results_stats.ALL_TEST_OPTIONS

    expected_outputs = ({'open_tabs_time':  # Mann.
                         {'Ex_page_1': (False, 2 * 0.49704973080841425),
                          'Ex_page_2': (True, 2 * 0.00068516628052438266)},
                         'display_time':
                         {'Ex_page_1': (False, 2 * 0.49704973080841425),
                          'Ex_page_2': (True, 2 * 0.00068516628052438266)}},
                        {'open_tabs_time':  # Kolmogorov.
                         {'Ex_page_1': (False, 1.0),
                          'Ex_page_2': (True, 0.0017459498829507842)},
                         'display_time':
                         {'Ex_page_1': (False, 1.0),
                          'Ex_page_2': (True, 0.0017459498829507842)}},
                        {'open_tabs_time':  # Welch.
                         {'Ex_page_1': (False, 1.0),
                          'Ex_page_2': (True, 0.00084765230478226514)},
                         'display_time':
                         {'Ex_page_1': (False, 1.0),
                          'Ex_page_2': (True, 0.00084765230478226514)}})

    for test, expected_output in zip(test_options, expected_outputs):
      output = (results_stats.
                ArePagesetBenchmarkResultsDifferent(test_input_dicts[0],
                                                    test_input_dicts[1],
                                                    test=test))
      self.assertEqual(output, expected_output)


if __name__ == '__main__':
  sys.exit(unittest.main())