C++程序  |  401行  |  16.14 KB

// Copyright 2008 Google Inc.
// Author: Lincoln Smith
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef OPEN_VCDIFF_HEADERPARSER_H_
#define OPEN_VCDIFF_HEADERPARSER_H_

#include <config.h>
#include <stddef.h>  // NULL
#include <stdint.h>  // int32_t, uint32_t
#include "checksum.h"  // VCDChecksum
#include "vcdiff_defs.h"  // VCDiffResult

namespace open_vcdiff {

// This class contains a contiguous memory buffer with start and end pointers,
// as well as a position pointer which shows how much of the buffer has been
// parsed and how much remains.
//
// Because no virtual destructor is defined for ParseableChunk, a pointer to
// a child class of ParseableChunk must be destroyed using its specific type,
// rather than as a ParseableChunk*.
class ParseableChunk {
 public:
  ParseableChunk(const char* data_start, size_t data_size) {
    SetDataBuffer(data_start, data_size);
  }

  const char* End() const { return end_; }

  // The number of bytes remaining to be parsed.  This is not necessarily the
  // same as the initial size of the buffer; it changes with each call to
  // Advance().
  size_t UnparsedSize() const {
    return end_ - position_;
  }

  // The number of bytes that have already been parsed.
  size_t ParsedSize() const {
    return position_ - start_;
  }

  bool Empty() const { return 0 == UnparsedSize(); }

  // The start of the data remaining to be parsed.
  const char* UnparsedData() const { return position_; }

  // Returns a pointer to the start of the data remaining to be parsed.
  const char** UnparsedDataAddr() { return &position_; }

  // Moves the parsing position forward by number_of_bytes.
  void Advance(size_t number_of_bytes);

  // Jumps the parsing position to a new location.
  void SetPosition(const char* position);

  // Jumps the parsing position to the end of the data chunk.
  void Finish() {
    position_ = end_;
  }

  // Jumps the parsing position so that there are now number_of_bytes
  // bytes left to parse.  This number should be smaller than the size of data
  // to be parsed before the function was called.
  void FinishExcept(size_t number_of_bytes);

  void SetDataBuffer(const char* data_start, size_t data_size) {
    start_ = data_start;
    end_ = data_start + data_size;
    position_ = start_;
  }

 private:
  const char* start_;
  const char* end_;

  // The current parsing position within the data chunk.
  // Must always respect start_ <= position_ <= end_.
  const char* position_;

  // Making these private avoids implicit copy constructor & assignment operator
  ParseableChunk(const ParseableChunk&);
  void operator=(const ParseableChunk&);
};

// Represents one of the three sections in the delta window, as described in
// RFC section 4.3:
//     * Data section for ADDs and RUNs
//     * Instructions and sizes section
//     * Addresses section for COPYs
// When using the interleaved format, data and addresses are pulled from the
// instructions and sizes section rather than being stored in separate sections.
// For that reason, this class allows one DeltaWindowSection to be based on
// another, such that the same position pointer is shared by both sections;
// i.e., UnparsedDataAddr() returns the same value for both objects.
// To achieve this end, one extra level of indirection (a pointer to a
// ParseableChunk object) is added.
class DeltaWindowSection {
 public:
  DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { }

  ~DeltaWindowSection() {
    FreeChunk();
  }

  void Init(const char* data_start, size_t data_size) {
    if (owned_ && parseable_chunk_) {
      // Reuse the already-allocated ParseableChunk object.
      parseable_chunk_->SetDataBuffer(data_start, data_size);
    } else {
      parseable_chunk_ = new ParseableChunk(data_start, data_size);
      owned_ = true;
    }
  }

  void Init(DeltaWindowSection* original) {
    FreeChunk();
    parseable_chunk_ = original->parseable_chunk_;
    owned_ = false;
  }

  void Invalidate() { FreeChunk(); }

  bool IsOwned() const { return owned_; }

  // The following functions just pass their arguments to the underlying
  // ParseableChunk object.

  const char* End() const {
    return parseable_chunk_->End();
  }

  size_t UnparsedSize() const {
    return parseable_chunk_->UnparsedSize();
  }

  size_t ParsedSize() const {
    return parseable_chunk_->ParsedSize();
  }

  bool Empty() const {
    return parseable_chunk_->Empty();
  }

  const char* UnparsedData() const {
    return parseable_chunk_->UnparsedData();
  }

  const char** UnparsedDataAddr() {
    return parseable_chunk_->UnparsedDataAddr();
  }

  void Advance(size_t number_of_bytes) {
    return parseable_chunk_->Advance(number_of_bytes);
  }
 private:
  void FreeChunk() {
    if (owned_) {
      delete parseable_chunk_;
    }
    parseable_chunk_ = NULL;
  }

  // Will be NULL until Init() has been called.  If owned_ is true, this will
  // point to a ParseableChunk object that has been allocated with "new" and
  // must be deleted by this DeltaWindowSection object.  If owned_ is false,
  // this points at the parseable_chunk_ owned by a different DeltaWindowSection
  // object.  In this case, it is important to free the DeltaWindowSection which
  // does not own the ParseableChunk before (or simultaneously to) freeing the
  // DeltaWindowSection that owns it, or else deleted memory may be accessed.
  ParseableChunk* parseable_chunk_;
  bool owned_;

  // Making these private avoids implicit copy constructor & assignment operator
  DeltaWindowSection(const DeltaWindowSection&);
  void operator=(const DeltaWindowSection&);
};

// Used to parse the bytes and Varints that make up the delta file header
// or delta window header.
class VCDiffHeaderParser {
 public:
  // header_start should be the start of the header to be parsed;
  // data_end is the position just after the last byte of available data
  // (which may extend far past the end of the header.)
  VCDiffHeaderParser(const char* header_start, const char* data_end);

  // One of these functions should be called for each element of the header.
  // variable_description is a description of the value that we are attempting
  // to parse, and will only be used to create descriptive error messages.
  // If the function returns true, then the element was parsed successfully
  // and its value has been placed in *value.  If the function returns false,
  // then *value is unchanged, and GetResult() can be called to return the
  // reason that the element could not be parsed, which will be either
  // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end
  // was reached before the end of the element to be parsed.)  Once one of these
  // functions has returned false, further calls to any of the Parse...
  // functions will also return false without performing any additional actions.
  // Typical usage is as follows:
  //     int32_t segment_length = 0;
  //     if (!header_parser.ParseInt32("segment length", &segment_length)) {
  //       return header_parser.GetResult();
  //     }
  //
  // The following example takes advantage of the fact that calling a Parse...
  // function after an error or end-of-data condition is legal and does nothing.
  // It can thus parse more than one element in a row and check the status
  // afterwards.  If the first call to ParseInt32() fails, the second will have
  // no effect:
  //
  //     int32_t segment_length = 0, segment_position = 0;
  //     header_parser.ParseInt32("segment length", &segment_length));
  //     header_parser.ParseInt32("segment position", &segment_position));
  //     if (RESULT_SUCCESS != header_parser.GetResult()) {
  //       return header_parser.GetResult();
  //     }
  //
  bool ParseByte(unsigned char* value);
  bool ParseInt32(const char* variable_description, int32_t* value);
  bool ParseUInt32(const char* variable_description, uint32_t* value);
  bool ParseChecksum(const char* variable_description, VCDChecksum* value);
  bool ParseSize(const char* variable_description, size_t* value);

  // Parses the first three elements of the delta window header:
  //
  //     Win_Indicator                            - byte
  //     [Source segment size]                    - integer (VarintBE format)
  //     [Source segment position]                - integer (VarintBE format)
  //
  // Returns true if the values were parsed successfully and the values were
  // found to be acceptable.  Returns false otherwise, in which case
  // GetResult() can be called to return the reason that the two values
  // could not be validated.  This will be either RESULT_ERROR (an error
  // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was
  // reached before the end of the values to be parsed.)  If return value is
  // true, then *win_indicator, *source_segment_length, and
  // *source_segment_position are populated with the parsed values.  Otherwise,
  // the values of these output arguments are undefined.
  //
  // dictionary_size: The size of the dictionary (source) file.  Used to
  //     validate the limits of source_segment_length and
  //     source_segment_position if the source segment is taken from the
  //     dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.)
  // decoded_target_size: The size of the target data that has been decoded
  //     so far, including all target windows.  Used to validate the limits of
  //     source_segment_length and source_segment_position if the source segment
  //     is taken from the target (i.e., if the parsed *win_indicator equals
  //     VCD_TARGET.)
  // allow_vcd_target: If this argument is false, and the parsed *win_indicator
  //     is VCD_TARGET, then an error is produced; if true, VCD_TARGET is
  //     allowed.
  // win_indicator (output): Points to a single unsigned char (not an array)
  //     that will receive the parsed value of Win_Indicator.
  // source_segment_length (output): The parsed length of the source segment.
  // source_segment_position (output): The parsed zero-based index in the
  //     source/target file from which the source segment is to be taken.
  bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size,
                                         size_t decoded_target_size,
                                         bool allow_vcd_target,
                                         unsigned char* win_indicator,
                                         size_t* source_segment_length,
                                         size_t* source_segment_position);

  // Parses the following two elements of the delta window header:
  //
  //     Length of the delta encoding             - integer (VarintBE format)
  //     Size of the target window                - integer (VarintBE format)
  //
  // Return conditions and values are the same as for
  // ParseWinIndicatorAndSourceSegment(), above.
  //
  bool ParseWindowLengths(size_t* target_window_length);

  // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS.
  // Returns a pointer to the end of the delta window (which might not point to
  // a valid memory location if there is insufficient input data.)
  //
  const char* EndOfDeltaWindow() const;

  // Parses the following element of the delta window header:
  //
  //     Delta_Indicator                          - byte
  //
  // Because none of the bits in Delta_Indicator are used by this implementation
  // of VCDIFF, this function does not have an output argument to return the
  // value of that field.  It may return RESULT_SUCCESS, RESULT_ERROR, or
  // RESULT_END_OF_DATA as with the other Parse...() functions.
  //
  bool ParseDeltaIndicator();

  // Parses the following 3 elements of the delta window header:
  //
  //     Length of data for ADDs and RUNs - integer (VarintBE format)
  //     Length of instructions and sizes - integer (VarintBE format)
  //     Length of addresses for COPYs    - integer (VarintBE format)
  //
  // If has_checksum is true, it also looks for the following element:
  //
  //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
  //
  // Return conditions and values are the same as for
  // ParseWinIndicatorAndSourceSegment(), above.
  //
  bool ParseSectionLengths(bool has_checksum,
                           size_t* add_and_run_data_length,
                           size_t* instructions_and_sizes_length,
                           size_t* addresses_length,
                           VCDChecksum* checksum);

  // If one of the Parse... functions returned false, this function
  // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA)
  // describing the reason for the most recent parse failure.  If none of the
  // Parse... functions has returned false, returns RESULT_SUCCESS.
  VCDiffResult GetResult() const {
    return return_code_;
  }

  // The following functions just pass their arguments to the underlying
  // ParseableChunk object.

  const char* End() const {
    return parseable_chunk_.End();
  }

  size_t UnparsedSize() const {
    return parseable_chunk_.UnparsedSize();
  }

  size_t ParsedSize() const {
    return parseable_chunk_.ParsedSize();
  }

  const char* UnparsedData() const {
    return parseable_chunk_.UnparsedData();
  }

 private:
  // Parses two variable-length integers representing the source segment length
  // and source segment position (== offset.)  Checks whether the source segment
  // length and position would cause it to exceed the size of the source file or
  // target file.  Returns true if the values were parsed successfully and the
  // values were found to be acceptable.  Returns false otherwise, in which case
  // GetResult() can be called to return the reason that the two values could
  // not be validated, which will be either RESULT_ERROR (an error occurred and
  // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before
  // the end of the integers to be parsed.)
  // from_size: The requested size of the source segment.
  // from_boundary_name: A NULL-terminated string naming the end of the
  //     source or target file, used in error messages.
  // from_name: A NULL-terminated string naming the source or target file,
  //     also used in error messages.
  // source_segment_length (output): The parsed length of the source segment.
  // source_segment_position (output): The parsed zero-based index in the
  //     source/target file from which the source segment is to be taken.
  //
  bool ParseSourceSegmentLengthAndPosition(size_t from_size,
                                           const char* from_boundary_name,
                                           const char* from_name,
                                           size_t* source_segment_length,
                                           size_t* source_segment_position);

  ParseableChunk parseable_chunk_;

  // Contains the result code of the last Parse...() operation that failed
  // (RESULT_ERROR or RESULT_END_OF_DATA).  If no Parse...() method has been
  // called, or if all calls to Parse...() were successful, then this contains
  // RESULT_SUCCESS.
  VCDiffResult return_code_;

  // Will be zero until ParseWindowLengths() has been called.  After
  // ParseWindowLengths() has been called successfully, this contains the
  // parsed length of the delta encoding.
  size_t delta_encoding_length_;

  // Will be NULL until ParseWindowLengths() has been called.  After
  // ParseWindowLengths() has been called successfully, this points to the
  // beginning of the section of the current window titled "The delta encoding"
  // in the RFC, i.e., to the position just after the length of the delta
  // encoding.
  const char* delta_encoding_start_;

  // Making these private avoids implicit copy constructor & assignment operator
  VCDiffHeaderParser(const VCDiffHeaderParser&);
  void operator=(const VCDiffHeaderParser&);
};

}  // namespace open_vcdiff

#endif  // OPEN_VCDIFF_HEADERPARSER_H_