// fst.cc
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// FST definitions.
#include <fst/fst.h>
// Include these so they are registered
#include <fst/compact-fst.h>
#include <fst/const-fst.h>
#include <fst/matcher-fst.h>
#include <fst/vector-fst.h>
#include <fst/edit-fst.h>
// FST flag definitions
DEFINE_bool(fst_verify_properties, false,
"Verify fst properties queried by TestProperties");
DEFINE_string(fst_weight_separator, ",",
"Character separator between printed composite weights; "
"must be a single character");
DEFINE_string(fst_weight_parentheses, "",
"Characters enclosing the first weight of a printed composite "
"weight (e.g. pair weight, tuple weight and derived classes) to "
"ensure proper I/O of nested composite weights; "
"must have size 0 (none) or 2 (open and close parenthesis)");
DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
"Cache byte size that triggers garbage collection");
DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file");
DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file");
DEFINE_string(fst_read_mode, "read",
"Default file reading mode for mappable files");
namespace fst {
// Register VectorFst, ConstFst and EditFst for common arcs types
REGISTER_FST(VectorFst, StdArc);
REGISTER_FST(VectorFst, LogArc);
REGISTER_FST(VectorFst, Log64Arc);
REGISTER_FST(ConstFst, StdArc);
REGISTER_FST(ConstFst, LogArc);
REGISTER_FST(ConstFst, Log64Arc);
REGISTER_FST(EditFst, StdArc);
REGISTER_FST(EditFst, LogArc);
REGISTER_FST(EditFst, Log64Arc);
// Register CompactFst for common arcs with the default (uint32) size type
static FstRegisterer<
CompactFst<StdArc, StringCompactor<StdArc> > >
CompactFst_StdArc_StringCompactor_registerer;
static FstRegisterer<
CompactFst<LogArc, StringCompactor<LogArc> > >
CompactFst_LogArc_StringCompactor_registerer;
static FstRegisterer<
CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
CompactFst_StdArc_WeightedStringCompactor_registerer;
static FstRegisterer<
CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
CompactFst_LogArc_WeightedStringCompactor_registerer;
static FstRegisterer<
CompactFst<StdArc, AcceptorCompactor<StdArc> > >
CompactFst_StdArc_AcceptorCompactor_registerer;
static FstRegisterer<
CompactFst<LogArc, AcceptorCompactor<LogArc> > >
CompactFst_LogArc_AcceptorCompactor_registerer;
static FstRegisterer<
CompactFst<StdArc, UnweightedCompactor<StdArc> > >
CompactFst_StdArc_UnweightedCompactor_registerer;
static FstRegisterer<
CompactFst<LogArc, UnweightedCompactor<LogArc> > >
CompactFst_LogArc_UnweightedCompactor_registerer;
static FstRegisterer<
CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
static FstRegisterer<
CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;
// Fst type definitions for lookahead Fsts.
extern const char arc_lookahead_fst_type[] = "arc_lookahead";
extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";
// Identifies stream data as an FST (and its endianity)
static const int32 kFstMagicNumber = 2125659606;
// Check for Fst magic number in stream, to indicate
// caller function that the stream content is an Fst header;
bool IsFstHeader(istream &strm, const string &source) {
int64 pos = strm.tellg();
bool match = true;
int32 magic_number = 0;
ReadType(strm, &magic_number);
if (magic_number != kFstMagicNumber
) {
match = false;
}
strm.seekg(pos);
return match;
}
// Check Fst magic number and read in Fst header.
// If rewind = true, reposition stream to before call (if possible).
bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
int64 pos = 0;
if (rewind) pos = strm.tellg();
int32 magic_number = 0;
ReadType(strm, &magic_number);
if (magic_number != kFstMagicNumber
) {
LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
if (rewind) strm.seekg(pos);
return false;
}
ReadType(strm, &fsttype_);
ReadType(strm, &arctype_);
ReadType(strm, &version_);
ReadType(strm, &flags_);
ReadType(strm, &properties_);
ReadType(strm, &start_);
ReadType(strm, &numstates_);
ReadType(strm, &numarcs_);
if (!strm) {
LOG(ERROR) << "FstHeader::Read: read failed: " << source;
return false;
}
if (rewind) strm.seekg(pos);
return true;
}
// Write Fst magic number and Fst header.
bool FstHeader::Write(ostream &strm, const string &source) const {
WriteType(strm, kFstMagicNumber);
WriteType(strm, fsttype_);
WriteType(strm, arctype_);
WriteType(strm, version_);
WriteType(strm, flags_);
WriteType(strm, properties_);
WriteType(strm, start_);
WriteType(strm, numstates_);
WriteType(strm, numarcs_);
return true;
}
FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr,
const SymbolTable* isym, const SymbolTable* osym)
: source(src), header(hdr), isymbols(isym), osymbols(osym) {
mode = ReadMode(FLAGS_fst_read_mode);
}
FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym,
const SymbolTable* osym)
: source(src), header(0), isymbols(isym), osymbols(osym) {
mode = ReadMode(FLAGS_fst_read_mode);
}
FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) {
if (mode == "read") {
return READ;
}
if (mode == "map") {
return MAP;
}
LOG(ERROR) << "Unknown file read mode " << mode;
return READ;
}
} // namespace fst