# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import collections
import logging
import re
import time

from google.appengine.api import urlfetch
import webapp2

from base import bigquery
from base import constants
from common import buildbot


class Builds(webapp2.RequestHandler):

  def get(self):
    urlfetch.set_default_fetch_deadline(300)

    bq = bigquery.BigQuery()

    current_events = []
    events = []
    for master_name in constants.MASTER_NAMES:
      builders = buildbot.Builders(master_name)
      available_builds = _AvailableBuilds(builders)
      recorded_builds = _RecordedBuilds(bq, builders, available_builds)
      for builder in builders:
        # Filter out recorded builds from available builds.
        build_numbers = (available_builds[builder.name] -
                         recorded_builds[builder.name])
        builder_current_events, builder_events = _TraceEventsForBuilder(
            builder, build_numbers)
        current_events += builder_current_events
        events += builder_events

    jobs = []
    if current_events:
      jobs += bq.InsertRowsAsync(
          constants.DATASET, constants.CURRENT_BUILDS_TABLE,
          current_events, truncate=True)
    if events:
      jobs += bq.InsertRowsAsync(constants.DATASET, constants.BUILDS_TABLE,
                                 events)

    for job in jobs:
      bq.PollJob(job, 60 * 20)  # 20 minutes.


def _AvailableBuilds(builders):
  available_builds = {}
  for builder in builders:
    if not builder.cached_builds:
      available_builds[builder.name] = frozenset()
      continue

    max_build = max(builder.cached_builds)
    # Buildbot on tryserver.chromium.perf is occasionally including build 0 in
    # its list of cached builds. That results in more builds than we want.
    # Limit the list to the last 100 builds, because the urlfetch URL limit is
    # 2048 bytes, and "&select=100000" * 100 is 1400 bytes.
    builds = frozenset(build for build in builder.cached_builds
                       if build >= max_build - 100)
    available_builds[builder.name] = builds
  return available_builds


def _RecordedBuilds(bq, builders, available_builds):
  # 105 days / 15 weeks. Must be some number greater than 100 days, because
  # we request up to 100 builds (see above comment), and the slowest cron bots
  # run one job every day.
  start_time_ms = -1000 * 60 * 60 * 24 * 105
  table = '%s.%s@%d-' % (constants.DATASET, constants.BUILDS_TABLE,
                         start_time_ms)

  conditions = []
  for builder in builders:
    if not available_builds[builder.name]:
      continue
    max_build = max(available_builds[builder.name])
    min_build = min(available_builds[builder.name])
    conditions.append('WHEN builder = "%s" THEN build >= %d AND build <= %d' %
                      (builder.name, min_build, max_build))

  query = (
      'SELECT builder, build '
      'FROM [%s] ' % table +
      'WHERE CASE %s END ' % ' '.join(conditions) +
      'GROUP BY builder, build'
  )
  query_result = bq.QuerySync(query, 600)

  builds = collections.defaultdict(set)
  for row in query_result:
    builds[row['f'][0]['v']].add(int(row['f'][1]['v']))
  return builds


def _TraceEventsForBuilder(builder, build_numbers):
  if not build_numbers:
    return (), ()

  build_numbers_string = ', '.join(map(str, sorted(build_numbers)))
  logging.info('Getting %s: %s', builder.name, build_numbers_string)

  # Fetch build information and generate trace events.
  current_events = []
  events = []

  builder_builds = builder.builds.Fetch(build_numbers)
  query_time = time.time()
  for build in builder_builds:
    if build.complete:
      events += _TraceEventsFromBuild(builder, build, query_time)
    else:
      current_events += _TraceEventsFromBuild(builder, build, query_time)

  return current_events, events


def _TraceEventsFromBuild(builder, build, query_time):
  match = re.match(r'(.+) \(([0-9]+)\)', builder.name)
  if match:
    configuration, host_shard = match.groups()
    host_shard = int(host_shard)
  else:
    configuration = builder.name
    host_shard = 0

  # Build trace event.
  if build.end_time:
    build_end_time = build.end_time
  else:
    build_end_time = query_time
  os, os_version, role = _ParseBuilderName(builder.master_name, builder.name)
  yield {
      'name': 'Build %d' % build.number,
      'start_time': build.start_time,
      'end_time': build_end_time,

      'build': build.number,
      'builder': builder.name,
      'configuration': configuration,
      'host_shard': host_shard,
      'hostname': build.slave_name,
      'master': builder.master_name,
      'os': os,
      'os_version': os_version,
      'role': role,
      'status': build.status,
      'url': build.url,
  }

  # Step trace events.
  for step in build.steps:
    if not step.start_time:
      continue

    if step.name == 'steps':
      continue

    if step.end_time:
      step_end_time = step.end_time
    else:
      step_end_time = query_time
    yield {
        'name': step.name,
        'start_time': step.start_time,
        'end_time': step_end_time,

        'benchmark': step.name,  # TODO(dtu): This isn't always right.
        'build': build.number,
        'builder': builder.name,
        'configuration': configuration,
        'host_shard': host_shard,
        'hostname': build.slave_name,
        'master': builder.master_name,
        'os': os,
        'os_version': os_version,
        'role': role,
        'status': step.status,
        'url': step.url,
    }


def _ParseBuilderName(master_name, builder_name):
  if master_name == 'chromium.perf':
    match = re.match(r'^([A-Za-z]+)(?: ([0-9\.]+|XP))?([A-Za-z0-9-\. ]+)? '
                     r'(Builder|Perf)(?: \([0-9]+\))?$', builder_name).groups()
    os = match[0]
    if match[1]:
      os_version = match[1]
    else:
      os_version = None
    if match[3] == 'Builder':
      role = 'builder'
    elif match[3] == 'Perf':
      role = 'tester'
    else:
      raise NotImplementedError()
  elif master_name == 'client.catapult':
    match = re.match(r'^Catapult(?: ([A-Za-z])+)? ([A-Za-z]+)$',
                     builder_name).groups()
    os = match[1]
    os_version = None
    role = match[0]
    if not role:
      role = 'tester'
  elif master_name == 'tryserver.chromium.perf':
    match = re.match(r'^(android|linux|mac|win).*_([a-z]+)$',
                     builder_name).groups()
    os = match[0]
    os_version = None
    role = match[1]
  elif master_name == 'tryserver.client.catapult':
    match = re.match(r'^Catapult(?: (Android|Linux|Mac|Windows))? ([A-Za-z]+)$',
                     builder_name).groups()
    os = match[0]
    os_version = None
    role = match[1]
  else:
    raise NotImplementedError()

  if os:
    os = os.lower()
  if os == 'windows':
    os = 'win'
  if os_version:
    os_version = os_version.lower()
  role = role.lower()

  return (os, os_version, role)