// compact-fst.h
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: allauzen@google.com (Cyril Allauzen)
//
// \file
// FST Class for memory-efficient representation of common types of
// FSTs: linear automata, acceptors, unweighted FSTs, ...
#ifndef FST_LIB_COMPACT_FST_H__
#define FST_LIB_COMPACT_FST_H__
#include <iterator>
#include <utility>
using std::pair; using std::make_pair;
#include <vector>
using std::vector;
#include <fst/cache.h>
#include <fst/expanded-fst.h>
#include <fst/fst-decl.h> // For optional argument declarations
#include <fst/mapped-file.h>
#include <fst/matcher.h>
#include <fst/test-properties.h>
#include <fst/util.h>
namespace fst {
struct CompactFstOptions : public CacheOptions {
// CompactFst default caching behaviour is to do no caching. Most
// compactors are cheap and therefore we save memory by not doing
// caching.
CompactFstOptions() : CacheOptions(true, 0) {}
CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
};
// Compactor Interface - class determinies how arcs and final weights
// are compacted and expanded.
//
// Final weights are treated as transitions to the superfinal state,
// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
//
// There are two types of compactors:
//
// * Fixed out-degree compactors: 'compactor.Size()' returns a
// positive integer 's'. An FST can be compacted by this compactor
// only if each state has exactly 's' outgoing transitions (counting a
// non-Zero() final weight as a transition). A typical example is a
// compactor for string FSTs, i.e. 's == 1'.
//
// * Variable out-degree compactors: 'compactor.Size() == -1'. There
// are no out-degree restrictions for these compactors.
//
//
// class Compactor {
// public:
// // Element is the type of the compacted transitions.
// typedef ... Element;
// // Return the compacted representation of a transition 'arc'
// // at a state 's'.
// Element Compact(StateId s, const Arc &arc);
// // Return the transition at state 's' represented by the compacted
// // transition 'e'.
// Arc Expand(StateId s, const Element &e);
// // Return -1 for variable out-degree compactors, and the mandatory
// // out-degree otherwise.
// ssize_t Size();
// // Test whether 'fst' can be compacted by this compactor.
// bool Compatible(const Fst<A> &fst);
// // Return the properties that are always true for an fst
// // compacted using this compactor
// uint64 Properties();
// // Return a string identifying the type of compactor.
// static const string &Type();
// // Write a compactor to a file.
// bool Write(ostream &strm);
// // Read a compactor from a file.
// static Compactor *Read(istream &strm);
// // Default constructor (optional, see comment below).
// Compactor();
// };
//
// The default constructor is only required for FST_REGISTER to work
// (i.e. enabling Convert() and the command-line utilities to work
// with this new compactor). However, a default constructor always
// needs to be specify for this code to compile, but one can have it
// simply raised an error when called:
//
// Compactor::Compactor() {
// FSTERROR() << "Compactor: no default constructor";
// }
// Implementation data for Compact Fst, which can shared between otherwise
// independent copies.
//
// The implementation contains two arrays: 'states_' and 'compacts_'.
//
// For fixed out-degree compactors, the 'states_' array is unallocated.
// The 'compacts_' contains the compacted transitions. Its size is
// 'ncompacts_'. The outgoing transitions at a given state are stored
// consecutively. For a given state 's', its 'compactor.Size()' outgoing
// transitions (including superfinal transition when 's' is final), are
// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
//
// For variable out-degree compactors, the states_ array has size
// 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
// For a given state 's', the compacted transitions of 's' are
// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
// By convention, 'states_[nstates_] == ncompacts_'.
//
// In both cases, the superfinal transitons (when 's' is final, i.e.
// 'Final(s) != Weight::Zero()') is stored first.
//
// The unsigned type U is used to represent indices into the compacts_
// array.
template <class E, class U>
class CompactFstData {
public:
typedef E CompactElement;
typedef U Unsigned;
CompactFstData()
: states_region_(0),
compacts_region_(0),
states_(0),
compacts_(0),
nstates_(0),
ncompacts_(0),
narcs_(0),
start_(kNoStateId),
error_(false) {}
template <class A, class Compactor>
CompactFstData(const Fst<A> &fst, const Compactor &compactor);
template <class Iterator, class Compactor>
CompactFstData(const Iterator &begin, const Iterator &end,
const Compactor &compactor);
~CompactFstData() {
if (states_region_ == NULL) {
delete [] states_;
}
delete states_region_;
if (compacts_region_ == NULL) {
delete [] compacts_;
}
delete compacts_region_;
}
template <class Compactor>
static CompactFstData<E, U> *Read(istream &strm,
const FstReadOptions &opts,
const FstHeader &hdr,
const Compactor &compactor);
bool Write(ostream &strm, const FstWriteOptions &opts) const;
Unsigned States(ssize_t i) const { return states_[i]; }
const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
size_t NumStates() const { return nstates_; }
size_t NumCompacts() const { return ncompacts_; }
size_t NumArcs() const { return narcs_; }
ssize_t Start() const { return start_; }
int RefCount() const { return ref_count_.count(); }
int IncrRefCount() { return ref_count_.Incr(); }
int DecrRefCount() { return ref_count_.Decr(); }
bool Error() const { return error_; }
private:
MappedFile *states_region_;
MappedFile *compacts_region_;
Unsigned *states_;
CompactElement *compacts_;
size_t nstates_;
size_t ncompacts_;
size_t narcs_;
ssize_t start_;
RefCounter ref_count_;
bool error_;
};
template <class E, class U>
template <class A, class C>
CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
: states_region_(0),
compacts_region_(0),
states_(0),
compacts_(0),
nstates_(0),
ncompacts_(0),
narcs_(0),
start_(kNoStateId),
error_(false) {
typedef typename A::StateId StateId;
typedef typename A::Weight Weight;
start_ = fst.Start();
// Count # of states and arcs.
StateId nfinals = 0;
for (StateIterator< Fst<A> > siter(fst);
!siter.Done();
siter.Next()) {
++nstates_;
StateId s = siter.Value();
for (ArcIterator< Fst<A> > aiter(fst, s);
!aiter.Done();
aiter.Next())
++narcs_;
if (fst.Final(s) != Weight::Zero()) ++nfinals;
}
if (compactor.Size() == -1) {
states_ = new Unsigned[nstates_ + 1];
ncompacts_ = narcs_ + nfinals;
compacts_ = new CompactElement[ncompacts_];
states_[nstates_] = ncompacts_;
} else {
states_ = 0;
ncompacts_ = nstates_ * compactor.Size();
if ((narcs_ + nfinals) != ncompacts_) {
FSTERROR() << "CompactFstData: compactor incompatible with fst";
error_ = true;
return;
}
compacts_ = new CompactElement[ncompacts_];
}
size_t pos = 0, fpos = 0;
for (StateId s = 0; s < nstates_; ++s) {
fpos = pos;
if (compactor.Size() == -1)
states_[s] = pos;
if (fst.Final(s) != Weight::Zero())
compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
fst.Final(s), kNoStateId));
for (ArcIterator< Fst<A> > aiter(fst, s);
!aiter.Done();
aiter.Next()) {
compacts_[pos++] = compactor.Compact(s, aiter.Value());
}
if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
FSTERROR() << "CompactFstData: compactor incompatible with fst";
error_ = true;
return;
}
}
if (pos != ncompacts_) {
FSTERROR() << "CompactFstData: compactor incompatible with fst";
error_ = true;
return;
}
}
template <class E, class U>
template <class Iterator, class C>
CompactFstData<E, U>::CompactFstData(const Iterator &begin,
const Iterator &end,
const C &compactor)
: states_region_(0),
compacts_region_(0),
states_(0),
compacts_(0),
nstates_(0),
ncompacts_(0),
narcs_(0),
start_(kNoStateId),
error_(false) {
typedef typename C::Arc Arc;
typedef typename Arc::Weight Weight;
if (compactor.Size() != -1) {
ncompacts_ = distance(begin, end);
if (compactor.Size() == 1) {
// For strings, allow implicit final weight.
// Empty input is the empty string.
if (ncompacts_ == 0) {
++ncompacts_;
} else {
Arc arc = compactor.Expand(ncompacts_ - 1,
*(begin + (ncompacts_ - 1)));
if (arc.ilabel != kNoLabel)
++ncompacts_;
}
}
if (ncompacts_ % compactor.Size()) {
FSTERROR() << "CompactFstData: size of input container incompatible"
<< " with compactor";
error_ = true;
return;
}
if (ncompacts_ == 0)
return;
start_ = 0;
nstates_ = ncompacts_ / compactor.Size();
compacts_ = new CompactElement[ncompacts_];
size_t i = 0;
Iterator it = begin;
for(; it != end; ++it, ++i){
compacts_[i] = *it;
if (compactor.Expand(i, *it).ilabel != kNoLabel)
++narcs_;
}
if (i < ncompacts_)
compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
Weight::One(), kNoStateId));
} else {
if (distance(begin, end) == 0)
return;
// Count # of states, arcs and compacts.
Iterator it = begin;
for(size_t i = 0; it != end; ++it, ++i) {
Arc arc = compactor.Expand(i, *it);
if (arc.ilabel != kNoLabel) {
++narcs_;
++ncompacts_;
} else {
++nstates_;
if (arc.weight != Weight::Zero())
++ncompacts_;
}
}
start_ = 0;
compacts_ = new CompactElement[ncompacts_];
states_ = new Unsigned[nstates_ + 1];
states_[nstates_] = ncompacts_;
size_t i = 0, s = 0;
for(it = begin; it != end; ++it) {
Arc arc = compactor.Expand(i, *it);
if (arc.ilabel != kNoLabel) {
compacts_[i++] = *it;
} else {
states_[s++] = i;
if (arc.weight != Weight::Zero())
compacts_[i++] = *it;
}
}
if ((s != nstates_) || (i != ncompacts_)) {
FSTERROR() << "CompactFstData: ill-formed input container";
error_ = true;
return;
}
}
}
template <class E, class U>
template <class C>
CompactFstData<E, U> *CompactFstData<E, U>::Read(
istream &strm,
const FstReadOptions &opts,
const FstHeader &hdr,
const C &compactor) {
CompactFstData<E, U> *data = new CompactFstData<E, U>();
data->start_ = hdr.Start();
data->nstates_ = hdr.NumStates();
data->narcs_ = hdr.NumArcs();
if (compactor.Size() == -1) {
if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
delete data;
return 0;
}
size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
data->states_region_ = MappedFile::Map(&strm, opts, b);
if (!strm || data->states_region_ == NULL) {
LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
delete data;
return 0;
}
data->states_ = static_cast<Unsigned *>(
data->states_region_->mutable_data());
} else {
data->states_ = 0;
}
data->ncompacts_ = compactor.Size() == -1
? data->states_[data->nstates_]
: data->nstates_ * compactor.Size();
if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
delete data;
return 0;
}
size_t b = data->ncompacts_ * sizeof(CompactElement);
data->compacts_region_ = MappedFile::Map(&strm, opts, b);
if (!strm || data->compacts_region_ == NULL) {
LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
delete data;
return 0;
}
data->compacts_ = static_cast<CompactElement *>(
data->compacts_region_->mutable_data());
return data;
}
template<class E, class U>
bool CompactFstData<E, U>::Write(ostream &strm,
const FstWriteOptions &opts) const {
if (states_) {
if (opts.align && !AlignOutput(strm)) {
LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
return false;
}
strm.write(reinterpret_cast<char *>(states_),
(nstates_ + 1) * sizeof(Unsigned));
}
if (opts.align && !AlignOutput(strm)) {
LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
return false;
}
strm.write(reinterpret_cast<char *>(compacts_),
ncompacts_ * sizeof(CompactElement));
strm.flush();
if (!strm) {
LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
return false;
}
return true;
}
template <class A, class C, class U> class CompactFst;
template <class F, class G> void Cast(const F &, G *);
// Implementation class for CompactFst, which contains CompactFstData
// and Fst cache.
template <class A, class C, class U>
class CompactFstImpl : public CacheImpl<A> {
public:
using FstImpl<A>::SetType;
using FstImpl<A>::SetProperties;
using FstImpl<A>::Properties;
using FstImpl<A>::SetInputSymbols;
using FstImpl<A>::SetOutputSymbols;
using FstImpl<A>::WriteHeader;
using CacheImpl<A>::PushArc;
using CacheImpl<A>::HasArcs;
using CacheImpl<A>::HasFinal;
using CacheImpl<A>::HasStart;
using CacheImpl<A>::SetArcs;
using CacheImpl<A>::SetFinal;
using CacheImpl<A>::SetStart;
typedef A Arc;
typedef typename A::Weight Weight;
typedef typename A::StateId StateId;
typedef C Compactor;
typedef typename C::Element CompactElement;
typedef U Unsigned;
CompactFstImpl()
: CacheImpl<A>(CompactFstOptions()),
compactor_(0),
own_compactor_(false),
data_(0) {
string type = "compact";
if (sizeof(U) != sizeof(uint32)) {
string size;
Int64ToStr(8 * sizeof(U), &size);
type += size;
}
type += "_";
type += C::Type();
SetType(type);
SetProperties(kNullProperties | kStaticProperties);
}
CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
const CompactFstOptions &opts)
: CacheImpl<A>(opts),
compactor_(new C(compactor)),
own_compactor_(true),
data_(0) {
Init(fst);
}
CompactFstImpl(const Fst<Arc> &fst, C *compactor,
const CompactFstOptions &opts)
: CacheImpl<A>(opts),
compactor_(compactor),
own_compactor_(false),
data_(0) {
Init(fst);
}
template <class Iterator>
CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
const CompactFstOptions &opts)
: CacheImpl<A>(opts),
compactor_(new C(compactor)),
own_compactor_(true),
data_(0) {
Init(b, e);
}
template <class Iterator>
CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
const CompactFstOptions &opts)
: CacheImpl<A>(opts),
compactor_(compactor),
own_compactor_(false),
data_(0) {
Init(b, e);
}
CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
: CacheImpl<A>(impl),
compactor_(new C(*impl.compactor_)),
own_compactor_(true),
data_(impl.data_) {
if (data_)
data_->IncrRefCount();
SetType(impl.Type());
SetProperties(impl.Properties());
SetInputSymbols(impl.InputSymbols());
SetOutputSymbols(impl.OutputSymbols());
}
~CompactFstImpl(){
if (own_compactor_)
delete compactor_;
if (data_ && !data_->DecrRefCount())
delete data_;
}
StateId Start() {
if (!HasStart()) {
SetStart(data_->Start());
}
return CacheImpl<A>::Start();
}
Weight Final(StateId s) {
if (HasFinal(s))
return CacheImpl<A>::Final(s);
Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
if ((compactor_->Size() != -1) ||
(data_->States(s) != data_->States(s + 1)))
arc = ComputeArc(s,
compactor_->Size() == -1
? data_->States(s)
: s * compactor_->Size());
return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero();
}
StateId NumStates() const {
if (Properties(kError)) return 0;
return data_->NumStates();
}
size_t NumArcs(StateId s) {
if (HasArcs(s))
return CacheImpl<A>::NumArcs(s);
Unsigned i, num_arcs;
if (compactor_->Size() == -1) {
i = data_->States(s);
num_arcs = data_->States(s + 1) - i;
} else {
i = s * compactor_->Size();
num_arcs = compactor_->Size();
}
if (num_arcs > 0) {
const A &arc = ComputeArc(s, i, kArcILabelValue);
if (arc.ilabel == kNoStateId) {
--num_arcs;
}
}
return num_arcs;
}
size_t NumInputEpsilons(StateId s) {
if (!HasArcs(s) && !Properties(kILabelSorted))
Expand(s);
if (HasArcs(s))
return CacheImpl<A>::NumInputEpsilons(s);
return CountEpsilons(s, false);
}
size_t NumOutputEpsilons(StateId s) {
if (!HasArcs(s) && !Properties(kOLabelSorted))
Expand(s);
if (HasArcs(s))
return CacheImpl<A>::NumOutputEpsilons(s);
return CountEpsilons(s, true);
}
size_t CountEpsilons(StateId s, bool output_epsilons) {
size_t begin = compactor_->Size() == -1 ?
data_->States(s) : s * compactor_->Size();
size_t end = compactor_->Size() == -1 ?
data_->States(s + 1) : (s + 1) * compactor_->Size();
size_t num_eps = 0;
for (size_t i = begin; i < end; ++i) {
const A &arc = ComputeArc(
s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
const typename A::Label &label =
(output_epsilons ? arc.olabel : arc.ilabel);
if (label == kNoLabel)
continue;
else if (label > 0)
break;
++num_eps;
}
return num_eps;
}
static CompactFstImpl<A, C, U> *Read(istream &strm,
const FstReadOptions &opts) {
CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
FstHeader hdr;
if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
delete impl;
return 0;
}
// Ensures compatibility
if (hdr.Version() == kAlignedFileVersion)
hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
impl->compactor_ = C::Read(strm);
if (!impl->compactor_) {
delete impl;
return 0;
}
impl->own_compactor_ = true;
impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
*impl->compactor_);
if (!impl->data_) {
delete impl;
return 0;
}
return impl;
}
bool Write(ostream &strm, const FstWriteOptions &opts) const {
FstHeader hdr;
hdr.SetStart(data_->Start());
hdr.SetNumStates(data_->NumStates());
hdr.SetNumArcs(data_->NumArcs());
// Ensures compatibility
int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
WriteHeader(strm, opts, file_version, &hdr);
compactor_->Write(strm);
return data_->Write(strm, opts);
}
// Provide information needed for generic state iterator
void InitStateIterator(StateIteratorData<A> *data) const {
data->base = 0;
data->nstates = data_->NumStates();
}
void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
if (!HasArcs(s))
Expand(s);
CacheImpl<A>::InitArcIterator(s, data);
}
Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
return compactor_->Expand(s, data_->Compacts(i), f);
}
void Expand(StateId s) {
size_t begin = compactor_->Size() == -1 ?
data_->States(s) : s * compactor_->Size();
size_t end = compactor_->Size() == -1 ?
data_->States(s + 1) : (s + 1) * compactor_->Size();
for (size_t i = begin; i < end; ++i) {
const Arc &arc = ComputeArc(s, i);
if (arc.ilabel == kNoLabel)
SetFinal(s, arc.weight);
else
PushArc(s, arc);
}
if (!HasFinal(s))
SetFinal(s, Weight::Zero());
SetArcs(s);
}
template <class Iterator>
void SetCompactElements(const Iterator &b, const Iterator &e) {
if (data_ && !data_->DecrRefCount())
delete data_;
data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
}
C *GetCompactor() const { return compactor_; }
CompactFstData<CompactElement, U> *Data() const { return data_; }
// Properties always true of this Fst class
static const uint64 kStaticProperties = kExpanded;
protected:
template <class B, class D>
explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
: CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
compactor_(new C(*impl.GetCompactor())),
own_compactor_(true),
data_(impl.Data()) {
if (data_)
data_->IncrRefCount();
SetType(impl.Type());
SetProperties(impl.Properties());
SetInputSymbols(impl.InputSymbols());
SetOutputSymbols(impl.OutputSymbols());
}
private:
friend class CompactFst<A, C, U>; // allow access during write.
void Init(const Fst<Arc> &fst) {
string type = "compact";
if (sizeof(U) != sizeof(uint32)) {
string size;
Int64ToStr(8 * sizeof(U), &size);
type += size;
}
type += "_";
type += compactor_->Type();
SetType(type);
SetInputSymbols(fst.InputSymbols());
SetOutputSymbols(fst.OutputSymbols());
data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
if (data_->Error())
SetProperties(kError, kError);
uint64 copy_properties = fst.Properties(kCopyProperties, true);
if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
SetProperties(kError, kError);
return;
}
SetProperties(copy_properties | kStaticProperties);
}
template <class Iterator>
void Init(const Iterator &b, const Iterator &e) {
string type = "compact";
if (sizeof(U) != sizeof(uint32)) {
string size;
Int64ToStr(8 * sizeof(U), &size);
type += size;
}
type += "_";
type += compactor_->Type();
SetType(type);
SetProperties(kStaticProperties | compactor_->Properties());
data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
if (data_->Error())
SetProperties(kError, kError);
}
// Current unaligned file format version
static const int kFileVersion = 2;
// Current aligned file format version
static const int kAlignedFileVersion = 1;
// Minimum file format version supported
static const int kMinFileVersion = 1;
C *compactor_;
bool own_compactor_;
CompactFstData<CompactElement, U> *data_;
};
template <class A, class C, class U>
const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
template <class A, class C, class U>
const int CompactFstImpl<A, C, U>::kFileVersion;
template <class A, class C, class U>
const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
template <class A, class C, class U>
const int CompactFstImpl<A, C, U>::kMinFileVersion;
// CompactFst. This class attaches interface to implementation and
// handles reference counting, delegating most methods to
// ImplToExpandedFst. The unsigned type U is used to represent indices
// into the compact arc array (uint32 by default, declared in
// fst-decl.h).
template <class A, class C, class U>
class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
public:
friend class StateIterator< CompactFst<A, C, U> >;
friend class ArcIterator< CompactFst<A, C, U> >;
template <class F, class G> void friend Cast(const F &, G *);
typedef A Arc;
typedef typename A::StateId StateId;
typedef CompactFstImpl<A, C, U> Impl;
typedef CacheState<A> State;
typedef U Unsigned;
CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
const CompactFstOptions &opts = CompactFstOptions())
: ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
CompactFst(const Fst<A> &fst, C *compactor,
const CompactFstOptions &opts = CompactFstOptions())
: ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
// The following 2 constructors take as input two iterators delimiting
// a set of (already) compacted transitions, starting with the
// transitions out of the initial state. The format of the input
// differs for fixed out-degree and variable out-degree compactors.
//
// - For fixed out-degree compactors, the final weight (encoded as a
// compacted transition) needs to be given only for final
// states. All strings (compactor of size 1) will be assume to be
// terminated by a final state even when the final state is not
// implicitely given.
//
// - For variable out-degree compactors, the final weight (encoded
// as a compacted transition) needs to be given for all states and
// must appeared first in the list (for state s, final weight of s,
// followed by outgoing transitons in s).
//
// These 2 constructors allows the direct construction of a CompactFst
// without first creating a more memory hungry 'regular' FST. This
// is useful when memory usage is severely constrained.
template <class Iterator>
explicit CompactFst(const Iterator &begin, const Iterator &end,
const C &compactor = C(),
const CompactFstOptions &opts = CompactFstOptions())
: ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
template <class Iterator>
CompactFst(const Iterator &begin, const Iterator &end,
C *compactor, const CompactFstOptions &opts = CompactFstOptions())
: ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
// See Fst<>::Copy() for doc.
CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
: ImplToExpandedFst<Impl>(fst, safe) {}
// Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
return new CompactFst<A, C, U>(*this, safe);
}
// Read a CompactFst from an input stream; return NULL on error
static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
Impl* impl = Impl::Read(strm, opts);
return impl ? new CompactFst<A, C, U>(impl) : 0;
}
// Read a CompactFst from a file; return NULL on error
// Empty filename reads from standard input
static CompactFst<A, C, U> *Read(const string &filename) {
Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
return impl ? new CompactFst<A, C, U>(impl) : 0;
}
virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
return GetImpl()->Write(strm, opts);
}
virtual bool Write(const string &filename) const {
return Fst<A>::WriteFile(filename);
}
template <class F>
static bool WriteFst(const F &fst, const C &compactor, ostream &strm,
const FstWriteOptions &opts);
virtual void InitStateIterator(StateIteratorData<A> *data) const {
GetImpl()->InitStateIterator(data);
}
virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
GetImpl()->InitArcIterator(s, data);
}
virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
}
template <class Iterator>
void SetCompactElements(const Iterator &b, const Iterator &e) {
GetImpl()->SetCompactElements(b, e);
}
private:
CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
// Makes visible to friends.
Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
void SetImpl(Impl *impl, bool own_impl = false) {
ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
}
// Use overloading to extract the type of the argument.
static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) {
return compact_fst.GetImpl();
}
// This does not give privileged treatment to subclasses of CompactFst.
template<typename NonCompactFst>
static Impl* GetImplIfCompactFst(const NonCompactFst& fst) {
return NULL;
}
void operator=(const CompactFst<A, C, U> &fst); // disallow
};
// Writes Fst in Compact format, potentially with a pass over the machine
// before writing to compute the number of states and arcs.
//
template <class A, class C, class U>
template <class F>
bool CompactFst<A, C, U>::WriteFst(const F &fst,
const C &compactor,
ostream &strm,
const FstWriteOptions &opts) {
typedef U Unsigned;
typedef typename C::Element CompactElement;
typedef typename A::Weight Weight;
int file_version = opts.align ?
CompactFstImpl<A, C, U>::kAlignedFileVersion :
CompactFstImpl<A, C, U>::kFileVersion;
size_t num_arcs = -1, num_states = -1, num_compacts = -1;
C first_pass_compactor = compactor;
if (Impl* impl = GetImplIfCompactFst(fst)) {
num_arcs = impl->Data()->NumArcs();
num_states = impl->Data()->NumStates();
num_compacts = impl->Data()->NumCompacts();
first_pass_compactor = *impl->GetCompactor();
} else {
// A first pass is needed to compute the state of the compactor, which
// is saved ahead of the rest of the data structures. This unfortunately
// means forcing a complete double compaction when writing in this format.
// TODO(allauzen): eliminate mutable state from compactors.
num_arcs = 0;
num_states = 0;
for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
const StateId s = siter.Value();
++num_states;
if (fst.Final(s) != Weight::Zero()) {
first_pass_compactor.Compact(
s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
}
for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
++num_arcs;
first_pass_compactor.Compact(s, aiter.Value());
}
}
}
FstHeader hdr;
hdr.SetStart(fst.Start());
hdr.SetNumStates(num_states);
hdr.SetNumArcs(num_arcs);
string type = "compact";
if (sizeof(U) != sizeof(uint32)) {
string size;
Int64ToStr(8 * sizeof(U), &size);
type += size;
}
type += "_";
type += C::Type();
uint64 copy_properties = fst.Properties(kCopyProperties, true);
if ((copy_properties & kError) || !compactor.Compatible(fst)) {
LOG(ERROR) << "fst incompatible with compactor";
return false;
}
uint64 properties = copy_properties |
CompactFstImpl<A, C, U>::kStaticProperties;
FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
&hdr);
first_pass_compactor.Write(strm);
if (first_pass_compactor.Size() == -1) {
if (opts.align && !AlignOutput(strm)) {
LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
return false;
}
Unsigned compacts = 0;
for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
const StateId s = siter.Value();
strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
if (fst.Final(s) != Weight::Zero()) {
++compacts;
}
compacts += fst.NumArcs(s);
}
strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
}
if (opts.align && !AlignOutput(strm)) {
LOG(ERROR) << "Could not align file during write after writing states";
}
C second_pass_compactor = compactor;
CompactElement element;
for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
const StateId s = siter.Value();
if (fst.Final(s) != Weight::Zero()) {
element = second_pass_compactor.Compact(
s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
}
for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
element = second_pass_compactor.Compact(s, aiter.Value());
strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
}
}
strm.flush();
if (!strm) {
LOG(ERROR) << "CompactFst write failed: " << opts.source;
return false;
}
return true;
}
// Specialization for CompactFst; see generic version in fst.h
// for sample usage (but use the CompactFst type!). This version
// should inline.
template <class A, class C, class U>
class StateIterator< CompactFst<A, C, U> > {
public:
typedef typename A::StateId StateId;
explicit StateIterator(const CompactFst<A, C, U> &fst)
: nstates_(fst.GetImpl()->NumStates()), s_(0) {}
bool Done() const { return s_ >= nstates_; }
StateId Value() const { return s_; }
void Next() { ++s_; }
void Reset() { s_ = 0; }
private:
StateId nstates_;
StateId s_;
DISALLOW_COPY_AND_ASSIGN(StateIterator);
};
// Specialization for CompactFst.
// Never caches, always iterates over the underlying compact elements.
template <class A, class C, class U>
class ArcIterator< CompactFst<A, C, U> > {
public:
typedef typename A::StateId StateId;
typedef typename C::Element CompactElement;
ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
: compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0),
pos_(0), flags_(kArcValueFlags) {
const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data();
size_t offset;
if (compactor_->Size() == -1) { // Variable out-degree compactor
offset = data->States(s);
num_arcs_ = data->States(s + 1) - offset;
} else { // Fixed out-degree compactor
offset = s * compactor_->Size();
num_arcs_ = compactor_->Size();
}
if (num_arcs_ > 0) {
compacts_ = &(data->Compacts(offset));
arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue);
if (arc_.ilabel == kNoStateId) {
++compacts_;
--num_arcs_;
}
}
}
~ArcIterator() {}
bool Done() const { return pos_ >= num_arcs_; }
const A& Value() const {
arc_ = compactor_->Expand(state_, compacts_[pos_], flags_);
return arc_;
}
void Next() { ++pos_; }
size_t Position() const { return pos_; }
void Reset() { pos_ = 0; }
void Seek(size_t pos) { pos_ = pos; }
uint32 Flags() const { return flags_; }
void SetFlags(uint32 f, uint32 m) {
flags_ &= ~m;
flags_ |= (f & kArcValueFlags);
}
private:
C *compactor_;
StateId state_;
const CompactElement *compacts_;
size_t pos_;
size_t num_arcs_;
mutable A arc_;
uint32 flags_;
DISALLOW_COPY_AND_ASSIGN(ArcIterator);
};
// // Specialization for CompactFst.
// // This is an optionally caching arc iterator.
// // TODO(allauzen): implements the kArcValueFlags, the current
// // implementation only implements the kArcNoCache flag.
// template <class A, class C, class U>
// class ArcIterator< CompactFst<A, C, U> > {
// public:
// typedef typename A::StateId StateId;
// ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
// : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0),
// flags_(kArcValueFlags) {
// cache_data_.ref_count = 0;
// if (fst_.GetImpl()->HasArcs(state_)) {
// fst_.GetImpl()->InitArcIterator(s, &cache_data_);
// num_arcs_ = cache_data_.narcs;
// return;
// }
// const C *compactor = fst_.GetImpl()->GetCompactor();
// const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data();
// if (compactor->Size() == -1) { // Variable out-degree compactor
// offset_ = data->States(s);
// num_arcs_ = data->States(s + 1) - offset_;
// } else { // Fixed out-degree compactor
// offset_ = s * compactor->Size();
// num_arcs_ = compactor->Size();
// }
// if (num_arcs_ > 0) {
// const A &arc = fst_.GetImpl()->ComputeArc(s, offset_);
// if (arc.ilabel == kNoStateId) {
// ++offset_;
// --num_arcs_;
// }
// }
// }
// ~ArcIterator() {
// if (cache_data_.ref_count)
// --(*cache_data_.ref_count);
// }
// bool Done() const { return pos_ >= num_arcs_; }
// const A& Value() const {
// if (cache_data_.ref_count == 0) {
// if (flags_ & kArcNoCache) {
// arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_);
// return arc_;
// } else {
// fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
// }
// }
// return cache_data_.arcs[pos_];
// }
// void Next() { ++pos_; }
// size_t Position() const { return pos_; }
// void Reset() { pos_ = 0; }
// void Seek(size_t pos) { pos_ = pos; }
// uint32 Flags() const { return flags_; }
// void SetFlags(uint32 f, uint32 m) {
// flags_ &= ~m;
// flags_ |= f;
// if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0)
// fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
// }
// private:
// mutable const CompactFst<A, C, U> &fst_;
// StateId state_;
// size_t pos_;
// size_t num_arcs_;
// size_t offset_;
// uint32 flags_;
// mutable A arc_;
// mutable ArcIteratorData<A> cache_data_;
// DISALLOW_COPY_AND_ASSIGN(ArcIterator);
// };
//
// Utility Compactors
//
// Compactor for unweighted string FSTs
template <class A>
class StringCompactor {
public:
typedef A Arc;
typedef typename A::Label Element;
typedef typename A::Label Label;
typedef typename A::StateId StateId;
typedef typename A::Weight Weight;
Element Compact(StateId s, const A &arc) const { return arc.ilabel; }
Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
}
ssize_t Size() const { return 1; }
uint64 Properties() const {
return kString | kAcceptor | kUnweighted;
}
bool Compatible(const Fst<A> &fst) const {
uint64 props = Properties();
return fst.Properties(props, true) == props;
}
static const string &Type() {
static const string type = "string";
return type;
}
bool Write(ostream &strm) const { return true; }
static StringCompactor *Read(istream &strm) {
return new StringCompactor;
}
};
// Compactor for weighted string FSTs
template <class A>
class WeightedStringCompactor {
public:
typedef A Arc;
typedef typename A::Label Label;
typedef typename A::StateId StateId;
typedef typename A::Weight Weight;
typedef pair<Label, Weight> Element;
Element Compact(StateId s, const A &arc) const {
return make_pair(arc.ilabel, arc.weight);
}
Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
return Arc(p.first, p.first, p.second,
p.first != kNoLabel ? s + 1 : kNoStateId);
}
ssize_t Size() const { return 1;}
uint64 Properties() const {
return kString | kAcceptor;
}
bool Compatible(const Fst<A> &fst) const {
uint64 props = Properties();
return fst.Properties(props, true) == props;
}
static const string &Type() {
static const string type = "weighted_string";
return type;
}
bool Write(ostream &strm) const { return true; }
static WeightedStringCompactor *Read(istream &strm) {
return new WeightedStringCompactor;
}
};
// Compactor for unweighted acceptor FSTs
template <class A>
class UnweightedAcceptorCompactor {
public:
typedef A Arc;
typedef typename A::Label Label;
typedef typename A::StateId StateId;
typedef typename A::Weight Weight;
typedef pair<Label, StateId> Element;
Element Compact(StateId s, const A &arc) const {
return make_pair(arc.ilabel, arc.nextstate);
}
Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
return Arc(p.first, p.first, Weight::One(), p.second);
}
ssize_t Size() const { return -1;}
uint64 Properties() const {
return kAcceptor | kUnweighted;
}
bool Compatible(const Fst<A> &fst) const {
uint64 props = Properties();
return fst.Properties(props, true) == props;
}
static const string &Type() {
static const string type = "unweighted_acceptor";
return type;
}
bool Write(ostream &strm) const { return true; }
static UnweightedAcceptorCompactor *Read(istream &istrm) {
return new UnweightedAcceptorCompactor;
}
};
// Compactor for weighted acceptor FSTs
template <class A>
class AcceptorCompactor {
public:
typedef A Arc;
typedef typename A::Label Label;
typedef typename A::StateId StateId;
typedef typename A::Weight Weight;
typedef pair< pair<Label, Weight>, StateId > Element;
Element Compact(StateId s, const A &arc) const {
return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate);
}
Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
return Arc(p.first.first, p.first.first, p.first.second, p.second);
}
ssize_t Size() const { return -1;}
uint64 Properties() const {
return kAcceptor;
}
bool Compatible(const Fst<A> &fst) const {
uint64 props = Properties();
return fst.Properties(props, true) == props;
}
static const string &Type() {
static const string type = "acceptor";
return type;
}
bool Write(ostream &strm) const { return true; }
static AcceptorCompactor *Read(istream &strm) {
return new AcceptorCompactor;
}
};
// Compactor for unweighted FSTs
template <class A>
class UnweightedCompactor {
public:
typedef A Arc;
typedef typename A::Label Label;
typedef typename A::StateId StateId;
typedef typename A::Weight Weight;
typedef pair< pair<Label, Label>, StateId > Element;
Element Compact(StateId s, const A &arc) const {
return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate);
}
Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
return Arc(p.first.first, p.first.second, Weight::One(), p.second);
}
ssize_t Size() const { return -1; }
uint64 Properties() const {
return kUnweighted;
}
bool Compatible(const Fst<A> &fst) const {
uint64 props = Properties();
return fst.Properties(props, true) == props;
}
static const string &Type() {
static const string type = "unweighted";
return type;
}
bool Write(ostream &strm) const { return true; }
static UnweightedCompactor *Read(istream &strm) {
return new UnweightedCompactor;
}
};
// Uselful aliases when using StdArc
typedef CompactFst< StdArc, StringCompactor<StdArc> >
StdCompactStringFst;
typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
StdCompactWeightedStringFst;
typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
StdCompactAcceptorFst;
typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
StdCompactUnweightedFst;
typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
StdCompactUnweightedAcceptorFst;
} // namespace fst
#endif // FST_LIB_COMPACT_FST_H__