/*
 * Copyright 2014 Google Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

syntax = "proto3";

package kythe.proto;
option java_package = "com.google.devtools.kythe.proto";

import "google/protobuf/any.proto";
import "kythe/proto/storage.proto";

// CompilationAnalyzer services are exposed by any program that wants to plug
// into the Kythe pipeline to perform per-compilation analysis.
service CompilationAnalyzer {
  // Analyze is the main entry point for the analysis driver to send work to the
  // analyzer.  The analysis may produce many outputs which will be streamed as
  // framed AnalysisOutput messages.
  //
  // A driver may choose to retry analyses that return RPC errors.  It should
  // not retry analyses that are reported as finished unless it is necessary to
  // recover from an external production issue.
  rpc Analyze(AnalysisRequest) returns (stream AnalysisOutput) {}
}

// FileDataServices are used by a CompilationAnalyzer to retrieve the contents of
// input files required for analysis.
service FileDataService {
  // Get returns the contents of one or more files needed for analysis.  It is
  // the server's responsibility to do any caching necessary to make this
  // perform well, so that an analyzer does not need to implement its own
  // caches unless it is doing something unusual.
  //
  // For each distinct path/digest pair in the request, the server must return
  // exactly one response.  The order of the responses is arbitrary.
  //
  // For each requested file, one or both of the path and digest fields must be
  // nonempty, otherwise an error is returned.  It is not an error for there to
  // be no requested files, however.
  rpc Get(FilesRequest) returns (stream FileData) {}
}

// An AnalysisRequest instructs an analyzer to perform an analysis on a single
// CompilationUnit.
message AnalysisRequest {
  // The compilation to analyze.
  CompilationUnit compilation = 1;

  // The address of a file data service to use.  If this is provided, it should
  // be used in preference to any other file data service the analyzer may know
  // about for this compilation.
  string file_data_service = 2;
}

// AnalysisOutput contains an output artifact for the current analysis taking
// place.  A given analysis may not produce any outputs.  It is okay for an
// indexer to send an empty AnalysisOutput message if needed to keep the RPC
// channel alive; the driver must correctly handle this.
message AnalysisOutput {
  bytes value = 1;
}

// Describes a single unit of compilation.
message CompilationUnit {
  // The base VName for the compilation and any generated VNames from its
  // analysis. Generally, the `language` component designates the language of
  // the compilation's sources.
  VName v_name = 1;

  // The revision of the compilation.
  string revision = 2;

  // All files that might be touched in the course of this compilation.
  // Consumers of the CompilationUnit may not assume anything about the order
  // of the elements of this field.
  repeated FileInput required_input = 3;

  // Set by the extractor to indicate that the original input had compile
  // errors. This is used to check validity of the sharded analysis.
  bool has_compile_errors = 4;

  // The arguments to pass to a compiler tool for this compilation unit,
  // including the compiler executable, flags, and input files.
  repeated string argument = 5;

  // Of those files in `required_input`, the ones that this CompilationUnit
  // is intended to analyze. This is necessary to support languages like Go,
  // where a single translation unit may contain many source files that must all
  // be processed at once (while excluding source files that belong to other
  // CUs/packages, if any).
  repeated string source_file = 6;

  // The output key of the CompilationUnit; for example, the object file that
  // it writes.  The output key for a compilation should match the path in the
  // FileInfo message of a dependent compilation that consumes its output.
  string output_key = 7;

  // ContextDependentVersionColumn and ContextDependentVersionRow
  // define a table that relates input contexts (keyed by a single
  // source context per row) to tuples of (byte offset * linked context).
  // When a FileInput F being processed in context C refers to another
  // FileInput F' at offset O (perhaps because F has an #include directive at O)
  // the context in which F' should be processed is the linked context derived
  // from this table.
  message ContextDependentVersionColumn {
    // The byte offset into the file resource.
    int32 offset = 1;
    // The signature for the resulting context.
    string linked_context = 2;
  }

  // See ContextDependentVersionColumn for details.
  // It is valid for a ContextDependentVersionRow to have no columns. In this
  // case, the associated FileInput was seen to exist in some context C, but
  // did not refer to any other FileInputs while in that context.
  message ContextDependentVersionRow {
    // The context to be applied to all columns.
    string source_context = 1;
    // A map from byte offsets to linked contexts.
    repeated ContextDependentVersionColumn column = 2;
    // If true, this version should always be processed regardless of any
    // claiming.
    bool always_process = 3;
  }

  message FileInput {
    // If set, overrides the `v_name` in the `CompilationUnit` for deriving
    // VNames during analysis.
    VName v_name = 1;

    // The file's metadata. It is invalid to provide a FileInput without both
    // the file's path and digest.
    FileInfo info = 2;

    // The file's context-dependent versions.
    repeated ContextDependentVersionRow context = 3;
  }

  // The absolute path of the current working directory where the build tool
  // was invoked.  During analysis, a file whose path has working_directory
  // plus a path separator as an exact prefix is considered accessible from
  // that same path without said prefix.  It is only necessary to set this
  // field if the build tool requires it.
  string working_directory = 8;

  // For languages that make use of resource contexts (like C++), the context
  // that should be initially entered.
  // TODO(zarko): What is a "resource context"? Needs a clear definition and/or
  // a link to one.
  string entry_context = 9;

  // An Env message represents the name and value of a single environment
  // variable in the build environment.
  message Env {
    string name = 1;
    string value = 2;
  }

  // A collection of environment variables that the build environment expects
  // to be set.  As a rule, we only record variables here that must be set to
  // specific values for the build to work.  Users of this field may not assume
  // anything about the order of values; in particular the pipeline is free to
  // sort by name in order to canonicalize the message.
  repeated Env environment = 10;

  // Per-language or per-tool details.
  repeated google.protobuf.Any details = 11;
}

// A FilesRequest specifies a collection of files to be fetched from a
// FileDataService.
message FilesRequest {
  repeated FileInfo files = 1;
}

// A FileInfo identifies a file used for analysis.
// At least one of the path and digest fields must be non-empty.
message FileInfo {
  // The path of the file relative to the working directory of the compilation
  // command, which is typically the root of the build.
  // For example:
  //  file/base/file.cc
  //  ../../base/atomic_ref_count.h
  string path = 1;

  // The lowercase ascii hex SHA-256 digest of the file contents.
  string digest = 2;
}

// A FileData carries the content of a single file, as returned from the Get
// method of a FileDataService.
message FileData {
  // The content of the file, if known.  If missing == true, this field must be
  // empty.
  bytes content = 1;

  // A (possibly normalized) copy of the non-empty fields of the FileInfo
  // message from the Get request.  If either field from the original request
  // was empty, the server may optionally fill in that field in the reply if it
  // is known.  For example, if the client requested a file by path only and
  // the server found it, the reply MAY fill in the digest.
  FileInfo info = 2;

  // If true, no data are available for the requested file, and the content
  // field must be empty.  If false, the content field contains the complete
  // file content (which may be empty).
  bool missing = 3;
}