// Copyright 2007 Google Inc. // Author: Lincoln Smith // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Classes to implement an Encoder for the format described in // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html // // The RFC describes the possibility of using a secondary compressor // to further reduce the size of each section of the VCDIFF output. // That feature is not supported in this implementation of the encoder // and decoder. // No secondary compressor types have been publicly registered with // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids // in the more than five years since the registry was created, so there // is no standard set of compressor IDs which would be generated by other // encoders or accepted by other decoders. #include <config.h> #include "google/vcencoder.h" #include <vector> #include "checksum.h" #include "encodetable.h" #include "logging.h" #include "google/output_string.h" #include "vcdiffengine.h" namespace open_vcdiff { HashedDictionary::HashedDictionary(const char* dictionary_contents, size_t dictionary_size) : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { } HashedDictionary::~HashedDictionary() { delete engine_; } bool HashedDictionary::Init() { return const_cast<VCDiffEngine*>(engine_)->Init(); } class VCDiffStreamingEncoderImpl { public: VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary, VCDiffFormatExtensionFlags format_extensions, bool look_for_target_matches); // These functions are identical to their counterparts // in VCDiffStreamingEncoder. bool StartEncoding(OutputStringInterface* out); bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out); bool FinishEncoding(OutputStringInterface* out); const std::vector<int>& match_counts() const { return coder_.match_counts(); } private: // Write the header (as defined in section 4.1 of the RFC) to *output. // This includes information that can be gathered // before the first chunk of input is available. void WriteHeader(OutputStringInterface* output) const; const VCDiffEngine* engine_; // This implementation of the encoder uses the default // code table. A VCDiffCodeTableWriter could also be constructed // using a custom code table. VCDiffCodeTableWriter coder_; const VCDiffFormatExtensionFlags format_extensions_; // Determines whether to look for matches within the previously encoded // target data, or just within the source (dictionary) data. Please see // vcencoder.h for a full explanation of this parameter. const bool look_for_target_matches_; // This state variable is used to ensure that StartEncoding(), EncodeChunk(), // and FinishEncoding() are called in the correct order. It will be true // if StartEncoding() has been called, followed by zero or more calls to // EncodeChunk(), but FinishEncoding() has not yet been called. It will // be false initially, and also after FinishEncoding() has been called. bool encode_chunk_allowed_; // Making these private avoids implicit copy constructor & assignment operator VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT void operator=(const VCDiffStreamingEncoderImpl&); }; inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl( const HashedDictionary* dictionary, VCDiffFormatExtensionFlags format_extensions, bool look_for_target_matches) : engine_(dictionary->engine()), coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0), format_extensions_(format_extensions), look_for_target_matches_(look_for_target_matches), encode_chunk_allowed_(false) { } inline void VCDiffStreamingEncoderImpl::WriteHeader( OutputStringInterface* output) const { DeltaFileHeader header_data = { 0xD6, // Header1: "V" | 0x80 0xC3, // Header2: "C" | 0x80 0xC4, // Header3: "D" | 0x80 0x00, // Header4: Draft standard format 0x00 }; // Hdr_Indicator: // No compression, no custom code table if (format_extensions_ != VCD_STANDARD_FORMAT) { header_data.header4 = 'S'; // Header4: VCDIFF/SDCH, extensions used } output->append(reinterpret_cast<const char*>(&header_data), sizeof(header_data)); // If custom cache table sizes or a custom code table were used // for encoding, here is where they would be appended to *output. // This implementation of the encoder does not use those features, // although the decoder can understand and interpret them. } inline bool VCDiffStreamingEncoderImpl::StartEncoding( OutputStringInterface* out) { if (!coder_.Init(engine_->dictionary_size())) { LOG(DFATAL) << "Internal error: " "Initialization of code table writer failed" << LOG_ENDL; return false; } WriteHeader(out); encode_chunk_allowed_ = true; return true; } inline bool VCDiffStreamingEncoderImpl::EncodeChunk( const char* data, size_t len, OutputStringInterface* out) { if (!encode_chunk_allowed_) { LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL; return false; } if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) { coder_.AddChecksum(ComputeAdler32(data, len)); } engine_->Encode(data, len, look_for_target_matches_, out, &coder_); return true; } inline bool VCDiffStreamingEncoderImpl::FinishEncoding( OutputStringInterface* /*out*/) { if (!encode_chunk_allowed_) { LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL; return false; } encode_chunk_allowed_ = false; // There should not be any need to output more data // since EncodeChunk() encodes a complete target window // and there is no end-of-delta-file marker. return true; } VCDiffStreamingEncoder::VCDiffStreamingEncoder( const HashedDictionary* dictionary, VCDiffFormatExtensionFlags format_extensions, bool look_for_target_matches) : impl_(new VCDiffStreamingEncoderImpl(dictionary, format_extensions, look_for_target_matches)) { } VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; } bool VCDiffStreamingEncoder::StartEncodingToInterface( OutputStringInterface* out) { return impl_->StartEncoding(out); } bool VCDiffStreamingEncoder::EncodeChunkToInterface( const char* data, size_t len, OutputStringInterface* out) { return impl_->EncodeChunk(data, len, out); } bool VCDiffStreamingEncoder::FinishEncodingToInterface( OutputStringInterface* out) { return impl_->FinishEncoding(out); } void VCDiffStreamingEncoder::GetMatchCounts( std::vector<int>* match_counts) const { if (!match_counts) { LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL; return; } *match_counts = impl_->match_counts(); } bool VCDiffEncoder::EncodeToInterface(const char* target_data, size_t target_len, OutputStringInterface* out) { out->clear(); if (!encoder_) { if (!dictionary_.Init()) { LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL; return false; } encoder_ = new VCDiffStreamingEncoder(&dictionary_, flags_, look_for_target_matches_); } if (!encoder_->StartEncodingToInterface(out)) { return false; } if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) { return false; } return encoder_->FinishEncodingToInterface(out); } } // namespace open_vcdiff