C++程序  |  361行  |  9.36 KB

// far.h

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// Finite-State Transducer (FST) archive classes.
//

#ifndef FST_EXTENSIONS_FAR_FAR_H__
#define FST_EXTENSIONS_FAR_FAR_H__

#include <fst/extensions/far/stlist.h>
#include <fst/extensions/far/sttable.h>
#include <fst/fst.h>
#include <fst/vector-fst.h>

namespace fst {

enum FarEntryType { FET_LINE, FET_FILE };
enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };

// FST archive header class
class FarHeader {
 public:
  const string &FarType() const { return fartype_; }
  const string &ArcType() const { return arctype_; }

  bool Read(const string &filename) {
    FstHeader fsthdr;
    if (filename.empty()) {  // Header reading unsupported on stdin.
      return false;
    } else if (IsSTTable(filename)) {  // Check if STTable
      ReadSTTableHeader(filename, &fsthdr);
      fartype_ = "sttable";
      arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
      return true;
    } else if (IsSTList(filename)) {  // Check if STList
      ReadSTListHeader(filename, &fsthdr);
      fartype_ = "sttable";
      arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
      return true;
    }
    return false;
  }

 private:
  string fartype_;
  string arctype_;
};

enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
               FAR_SSTABLE = 3 };

// This class creates an archive of FSTs.
template <class A>
class FarWriter {
 public:
  typedef A Arc;

  // Creates a new (empty) FST archive; returns NULL on error.
  static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);

  // Adds an FST to the end of an archive. Keys must be non-empty and
  // in lexicographic order. FSTs must have a suitable write method.
  virtual void Add(const string &key, const Fst<A> &fst) = 0;

  virtual FarType Type() const = 0;

  virtual bool Error() const = 0;

  virtual ~FarWriter() {}

 protected:
  FarWriter() {}

 private:
  DISALLOW_COPY_AND_ASSIGN(FarWriter);
};


// This class iterates through an existing archive of FSTs.
template <class A>
class FarReader {
 public:
 typedef A Arc;

  // Opens an existing FST archive in a single file; returns NULL on error.
  // Sets current position to the beginning of the achive.
  static FarReader *Open(const string &filename);

  // Opens an existing FST archive in multiple files; returns NULL on error.
  // Sets current position to the beginning of the achive.
  static FarReader *Open(const vector<string> &filenames);

  // Resets current posision to beginning of archive.
  virtual void Reset() = 0;

  // Sets current position to first entry >= key.  Returns true if a match.
  virtual bool Find(const string &key) = 0;

  // Current position at end of archive?
  virtual bool Done() const = 0;

  // Move current position to next FST.
  virtual void Next() = 0;

  // Returns key at the current position. This reference is invalidated if
  // the current position in the archive is changed.
  virtual const string &GetKey() const = 0;

  // Returns FST at the current position. This reference is invalidated if
  // the current position in the archive is changed.
  virtual const Fst<A> &GetFst() const = 0;

  virtual FarType Type() const = 0;

  virtual bool Error() const = 0;

  virtual ~FarReader() {}

 protected:
  FarReader() {}

 private:
  DISALLOW_COPY_AND_ASSIGN(FarReader);
};


template <class A>
class FstWriter {
 public:
  void operator()(ostream &strm, const Fst<A> &fst) const {
    fst.Write(strm, FstWriteOptions());
  }
};


template <class A>
class STTableFarWriter : public FarWriter<A> {
 public:
  typedef A Arc;

  static STTableFarWriter *Create(const string filename) {
    STTableWriter<Fst<A>, FstWriter<A> > *writer =
        STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
    return new STTableFarWriter(writer);
  }

  void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }

  FarType Type() const { return FAR_STTABLE; }

  bool Error() const { return writer_->Error(); }

  ~STTableFarWriter() { delete writer_; }

 private:
  explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
      : writer_(writer) {}

 private:
  STTableWriter<Fst<A>, FstWriter<A> > *writer_;

  DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
};


template <class A>
class STListFarWriter : public FarWriter<A> {
 public:
  typedef A Arc;

  static STListFarWriter *Create(const string filename) {
    STListWriter<Fst<A>, FstWriter<A> > *writer =
        STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
    return new STListFarWriter(writer);
  }

  void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }

  FarType Type() const { return FAR_STLIST; }

  bool Error() const { return writer_->Error(); }

  ~STListFarWriter() { delete writer_; }

 private:
  explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
      : writer_(writer) {}

 private:
  STListWriter<Fst<A>, FstWriter<A> > *writer_;

  DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
};


template <class A>
FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
  switch(type) {
    case FAR_DEFAULT:
      if (filename.empty())
        return STListFarWriter<A>::Create(filename);
    case FAR_STTABLE:
      return STTableFarWriter<A>::Create(filename);
      break;
    case FAR_STLIST:
      return STListFarWriter<A>::Create(filename);
      break;
    default:
      LOG(ERROR) << "FarWriter::Create: unknown far type";
      return 0;
  }
}


template <class A>
class FstReader {
 public:
  Fst<A> *operator()(istream &strm) const {
    return Fst<A>::Read(strm, FstReadOptions());
  }
};


template <class A>
class STTableFarReader : public FarReader<A> {
 public:
  typedef A Arc;

  static STTableFarReader *Open(const string &filename) {
    STTableReader<Fst<A>, FstReader<A> > *reader =
        STTableReader<Fst<A>, FstReader<A> >::Open(filename);
    // TODO: error check
    return new STTableFarReader(reader);
  }

  static STTableFarReader *Open(const vector<string> &filenames) {
    STTableReader<Fst<A>, FstReader<A> > *reader =
        STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
    // TODO: error check
    return new STTableFarReader(reader);
  }

  void Reset() { reader_->Reset(); }

  bool Find(const string &key) { return reader_->Find(key); }

  bool Done() const { return reader_->Done(); }

  void Next() { return reader_->Next(); }

  const string &GetKey() const { return reader_->GetKey(); }

  const Fst<A> &GetFst() const { return reader_->GetEntry(); }

  FarType Type() const { return FAR_STTABLE; }

  bool Error() const { return reader_->Error(); }

  ~STTableFarReader() { delete reader_; }

 private:
  explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
      : reader_(reader) {}

 private:
  STTableReader<Fst<A>, FstReader<A> > *reader_;

  DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
};


template <class A>
class STListFarReader : public FarReader<A> {
 public:
  typedef A Arc;

  static STListFarReader *Open(const string &filename) {
    STListReader<Fst<A>, FstReader<A> > *reader =
        STListReader<Fst<A>, FstReader<A> >::Open(filename);
    // TODO: error check
    return new STListFarReader(reader);
  }

  static STListFarReader *Open(const vector<string> &filenames) {
    STListReader<Fst<A>, FstReader<A> > *reader =
        STListReader<Fst<A>, FstReader<A> >::Open(filenames);
    // TODO: error check
    return new STListFarReader(reader);
  }

  void Reset() { reader_->Reset(); }

  bool Find(const string &key) { return reader_->Find(key); }

  bool Done() const { return reader_->Done(); }

  void Next() { return reader_->Next(); }

  const string &GetKey() const { return reader_->GetKey(); }

  const Fst<A> &GetFst() const { return reader_->GetEntry(); }

  FarType Type() const { return FAR_STLIST; }

  bool Error() const { return reader_->Error(); }

  ~STListFarReader() { delete reader_; }

 private:
  explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
      : reader_(reader) {}

 private:
  STListReader<Fst<A>, FstReader<A> > *reader_;

  DISALLOW_COPY_AND_ASSIGN(STListFarReader);
};


template <class A>
FarReader<A> *FarReader<A>::Open(const string &filename) {
  if (filename.empty())
    return STListFarReader<A>::Open(filename);
  else if (IsSTTable(filename))
    return STTableFarReader<A>::Open(filename);
  else if (IsSTList(filename))
    return STListFarReader<A>::Open(filename);
  return 0;
}


template <class A>
FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
  if (!filenames.empty() && filenames[0].empty())
    return STListFarReader<A>::Open(filenames);
  else if (!filenames.empty() && IsSTTable(filenames[0]))
    return STTableFarReader<A>::Open(filenames);
  else if (!filenames.empty() && IsSTList(filenames[0]))
    return STListFarReader<A>::Open(filenames);
  return 0;
}

}  // namespace fst

#endif  // FST_EXTENSIONS_FAR_FAR_H__