// far.h // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Copyright 2005-2010 Google, Inc. // Author: riley@google.com (Michael Riley) // // \file // Finite-State Transducer (FST) archive classes. // #ifndef FST_EXTENSIONS_FAR_FAR_H__ #define FST_EXTENSIONS_FAR_FAR_H__ #include <fst/extensions/far/stlist.h> #include <fst/extensions/far/sttable.h> #include <fst/fst.h> #include <fst/vector-fst.h> namespace fst { enum FarEntryType { FET_LINE, FET_FILE }; enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; // FST archive header class class FarHeader { public: const string &FarType() const { return fartype_; } const string &ArcType() const { return arctype_; } bool Read(const string &filename) { FstHeader fsthdr; if (filename.empty()) { // Header reading unsupported on stdin. return false; } else if (IsSTTable(filename)) { // Check if STTable ReadSTTableHeader(filename, &fsthdr); fartype_ = "sttable"; arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); return true; } else if (IsSTList(filename)) { // Check if STList ReadSTListHeader(filename, &fsthdr); fartype_ = "sttable"; arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); return true; } return false; } private: string fartype_; string arctype_; }; enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2, FAR_SSTABLE = 3 }; // This class creates an archive of FSTs. template <class A> class FarWriter { public: typedef A Arc; // Creates a new (empty) FST archive; returns NULL on error. static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT); // Adds an FST to the end of an archive. Keys must be non-empty and // in lexicographic order. FSTs must have a suitable write method. virtual void Add(const string &key, const Fst<A> &fst) = 0; virtual FarType Type() const = 0; virtual bool Error() const = 0; virtual ~FarWriter() {} protected: FarWriter() {} private: DISALLOW_COPY_AND_ASSIGN(FarWriter); }; // This class iterates through an existing archive of FSTs. template <class A> class FarReader { public: typedef A Arc; // Opens an existing FST archive in a single file; returns NULL on error. // Sets current position to the beginning of the achive. static FarReader *Open(const string &filename); // Opens an existing FST archive in multiple files; returns NULL on error. // Sets current position to the beginning of the achive. static FarReader *Open(const vector<string> &filenames); // Resets current posision to beginning of archive. virtual void Reset() = 0; // Sets current position to first entry >= key. Returns true if a match. virtual bool Find(const string &key) = 0; // Current position at end of archive? virtual bool Done() const = 0; // Move current position to next FST. virtual void Next() = 0; // Returns key at the current position. This reference is invalidated if // the current position in the archive is changed. virtual const string &GetKey() const = 0; // Returns FST at the current position. This reference is invalidated if // the current position in the archive is changed. virtual const Fst<A> &GetFst() const = 0; virtual FarType Type() const = 0; virtual bool Error() const = 0; virtual ~FarReader() {} protected: FarReader() {} private: DISALLOW_COPY_AND_ASSIGN(FarReader); }; template <class A> class FstWriter { public: void operator()(ostream &strm, const Fst<A> &fst) const { fst.Write(strm, FstWriteOptions()); } }; template <class A> class STTableFarWriter : public FarWriter<A> { public: typedef A Arc; static STTableFarWriter *Create(const string filename) { STTableWriter<Fst<A>, FstWriter<A> > *writer = STTableWriter<Fst<A>, FstWriter<A> >::Create(filename); return new STTableFarWriter(writer); } void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } FarType Type() const { return FAR_STTABLE; } bool Error() const { return writer_->Error(); } ~STTableFarWriter() { delete writer_; } private: explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer) : writer_(writer) {} private: STTableWriter<Fst<A>, FstWriter<A> > *writer_; DISALLOW_COPY_AND_ASSIGN(STTableFarWriter); }; template <class A> class STListFarWriter : public FarWriter<A> { public: typedef A Arc; static STListFarWriter *Create(const string filename) { STListWriter<Fst<A>, FstWriter<A> > *writer = STListWriter<Fst<A>, FstWriter<A> >::Create(filename); return new STListFarWriter(writer); } void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } FarType Type() const { return FAR_STLIST; } bool Error() const { return writer_->Error(); } ~STListFarWriter() { delete writer_; } private: explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer) : writer_(writer) {} private: STListWriter<Fst<A>, FstWriter<A> > *writer_; DISALLOW_COPY_AND_ASSIGN(STListFarWriter); }; template <class A> FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { switch(type) { case FAR_DEFAULT: if (filename.empty()) return STListFarWriter<A>::Create(filename); case FAR_STTABLE: return STTableFarWriter<A>::Create(filename); break; case FAR_STLIST: return STListFarWriter<A>::Create(filename); break; default: LOG(ERROR) << "FarWriter::Create: unknown far type"; return 0; } } template <class A> class FstReader { public: Fst<A> *operator()(istream &strm) const { return Fst<A>::Read(strm, FstReadOptions()); } }; template <class A> class STTableFarReader : public FarReader<A> { public: typedef A Arc; static STTableFarReader *Open(const string &filename) { STTableReader<Fst<A>, FstReader<A> > *reader = STTableReader<Fst<A>, FstReader<A> >::Open(filename); // TODO: error check return new STTableFarReader(reader); } static STTableFarReader *Open(const vector<string> &filenames) { STTableReader<Fst<A>, FstReader<A> > *reader = STTableReader<Fst<A>, FstReader<A> >::Open(filenames); // TODO: error check return new STTableFarReader(reader); } void Reset() { reader_->Reset(); } bool Find(const string &key) { return reader_->Find(key); } bool Done() const { return reader_->Done(); } void Next() { return reader_->Next(); } const string &GetKey() const { return reader_->GetKey(); } const Fst<A> &GetFst() const { return reader_->GetEntry(); } FarType Type() const { return FAR_STTABLE; } bool Error() const { return reader_->Error(); } ~STTableFarReader() { delete reader_; } private: explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader) : reader_(reader) {} private: STTableReader<Fst<A>, FstReader<A> > *reader_; DISALLOW_COPY_AND_ASSIGN(STTableFarReader); }; template <class A> class STListFarReader : public FarReader<A> { public: typedef A Arc; static STListFarReader *Open(const string &filename) { STListReader<Fst<A>, FstReader<A> > *reader = STListReader<Fst<A>, FstReader<A> >::Open(filename); // TODO: error check return new STListFarReader(reader); } static STListFarReader *Open(const vector<string> &filenames) { STListReader<Fst<A>, FstReader<A> > *reader = STListReader<Fst<A>, FstReader<A> >::Open(filenames); // TODO: error check return new STListFarReader(reader); } void Reset() { reader_->Reset(); } bool Find(const string &key) { return reader_->Find(key); } bool Done() const { return reader_->Done(); } void Next() { return reader_->Next(); } const string &GetKey() const { return reader_->GetKey(); } const Fst<A> &GetFst() const { return reader_->GetEntry(); } FarType Type() const { return FAR_STLIST; } bool Error() const { return reader_->Error(); } ~STListFarReader() { delete reader_; } private: explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader) : reader_(reader) {} private: STListReader<Fst<A>, FstReader<A> > *reader_; DISALLOW_COPY_AND_ASSIGN(STListFarReader); }; template <class A> FarReader<A> *FarReader<A>::Open(const string &filename) { if (filename.empty()) return STListFarReader<A>::Open(filename); else if (IsSTTable(filename)) return STTableFarReader<A>::Open(filename); else if (IsSTList(filename)) return STListFarReader<A>::Open(filename); return 0; } template <class A> FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) { if (!filenames.empty() && filenames[0].empty()) return STListFarReader<A>::Open(filenames); else if (!filenames.empty() && IsSTTable(filenames[0])) return STTableFarReader<A>::Open(filenames); else if (!filenames.empty() && IsSTList(filenames[0])) return STListFarReader<A>::Open(filenames); return 0; } } // namespace fst #endif // FST_EXTENSIONS_FAR_FAR_H__