C++程序  |  75行  |  3.1 KB

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
#define LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_

#include "common/memory_image/data-store.h"
#include "common/task-spec.pb.h"
#include "util/strings/stringpiece.h"

namespace libtextclassifier {
namespace nlp_core {

// In-memory representation of data for a Saft model.  Provides access to a
// TaskSpec object (produced by the "spec" stage of the Saft training model) and
// to the bytes of the TaskInputs mentioned in that spec (all these bytes are in
// memory, no file I/O required).
//
// Technically, an InMemoryModelData is a DataStore that maps the special string
// kTaskSpecDataStoreEntryName to the binary serialization of a TaskSpec.  For
// each TaskInput (of the TaskSpec) with a file_pattern that starts with
// kFilePatternPrefix (see below), the same DataStore maps file_pattern to some
// content bytes.  This way, it is possible to have all TaskInputs in memory,
// while still allowing classic, on-disk TaskInputs.
class InMemoryModelData {
 public:
  // Name for the DataStore entry that stores the serialized TaskSpec for the
  // entire model.
  static const char kTaskSpecDataStoreEntryName[];

  // Returns prefix for TaskInput::Part::file_pattern, to distinguish those
  // "files" from other files.
  static const char kFilePatternPrefix[];

  // Constructs an InMemoryModelData based on a chunk of bytes.  Those bytes
  // should have been produced by a DataStoreBuilder.
  explicit InMemoryModelData(StringPiece bytes) : data_store_(bytes) {}

  // Fills *task_spec with a TaskSpec similar to the one used by
  // DataStoreBuilder (when building the bytes used to construct this
  // InMemoryModelData) except that each file name
  // (TaskInput::Part::file_pattern) is replaced with a name that can be used to
  // retrieve the corresponding file content bytes via GetBytesForInputFile().
  //
  // Returns true on success, false otherwise.
  bool GetTaskSpec(TaskSpec *task_spec) const;

  // Gets content bytes for a file.  The file_name argument should be the
  // file_pattern for a TaskInput from the TaskSpec (see GetTaskSpec()).
  // Returns a StringPiece indicating a memory area with the content bytes.  On
  // error, returns StringPiece(nullptr, 0).
  StringPiece GetBytesForInputFile(const std::string &file_name) const;

 private:
  const memory_image::DataStore data_store_;
};

}  // namespace nlp_core
}  // namespace libtextclassifier

#endif  // LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_