C++程序  |  320行  |  8.52 KB

// connect.h

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// Classes and functions to remove unsuccessful paths from an Fst.

#ifndef FST_LIB_CONNECT_H__
#define FST_LIB_CONNECT_H__

#include <vector>
using std::vector;

#include <fst/dfs-visit.h>
#include <fst/union-find.h>
#include <fst/mutable-fst.h>


namespace fst {

// Finds and returns connected components. Use with Visit().
template <class A>
class CcVisitor {
 public:
  typedef A Arc;
  typedef typename Arc::Weight Weight;
  typedef typename A::StateId StateId;

  // cc[i]: connected component number for state i.
  CcVisitor(vector<StateId> *cc)
      : comps_(new UnionFind<StateId>(0, kNoStateId)),
        cc_(cc),
        nstates_(0) { }

  // comps: connected components equiv classes.
  CcVisitor(UnionFind<StateId> *comps)
      : comps_(comps),
        cc_(0),
        nstates_(0) { }

  ~CcVisitor() {
    if (cc_)  // own comps_?
      delete comps_;
  }

  void InitVisit(const Fst<A> &fst) { }

  bool InitState(StateId s, StateId root) {
    ++nstates_;
    if (comps_->FindSet(s) == kNoStateId)
      comps_->MakeSet(s);
    return true;
  }

  bool WhiteArc(StateId s, const A &arc) {
    comps_->MakeSet(arc.nextstate);
    comps_->Union(s, arc.nextstate);
    return true;
  }

  bool GreyArc(StateId s, const A &arc) {
    comps_->Union(s, arc.nextstate);
    return true;
  }

  bool BlackArc(StateId s, const A &arc) {
    comps_->Union(s, arc.nextstate);
    return true;
  }

  void FinishState(StateId s) { }

  void FinishVisit() {
    if (cc_)
      GetCcVector(cc_);
  }

  // cc[i]: connected component number for state i.
  // Returns number of components.
  int GetCcVector(vector<StateId> *cc) {
    cc->clear();
    cc->resize(nstates_, kNoStateId);
    StateId ncomp = 0;
    for (StateId i = 0; i < nstates_; ++i) {
      StateId rep = comps_->FindSet(i);
      StateId &comp = (*cc)[rep];
      if (comp == kNoStateId) {
        comp = ncomp;
        ++ncomp;
      }
      (*cc)[i] = comp;
    }
    return ncomp;
  }

 private:
  UnionFind<StateId> *comps_;   // Components
  vector<StateId> *cc_;         // State's cc number
  StateId nstates_;             // State count
};


// Finds and returns strongly-connected components, accessible and
// coaccessible states and related properties. Uses Tarjan's single
// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
// Algorithms", 189pp). Use with DfsVisit();
template <class A>
class SccVisitor {
 public:
  typedef A Arc;
  typedef typename A::Weight Weight;
  typedef typename A::StateId StateId;

  // scc[i]: strongly-connected component number for state i.
  //   SCC numbers will be in topological order for acyclic input.
  // access[i]: accessibility of state i.
  // coaccess[i]: coaccessibility of state i.
  // Any of above can be NULL.
  // props: related property bits (cyclicity, initial cyclicity,
  //   accessibility, coaccessibility) set/cleared (o.w. unchanged).
  SccVisitor(vector<StateId> *scc, vector<bool> *access,
             vector<bool> *coaccess, uint64 *props)
      : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
  SccVisitor(uint64 *props)
      : scc_(0), access_(0), coaccess_(0), props_(props) {}

  void InitVisit(const Fst<A> &fst);

  bool InitState(StateId s, StateId root);

  bool TreeArc(StateId s, const A &arc) { return true; }

  bool BackArc(StateId s, const A &arc) {
    StateId t = arc.nextstate;
    if ((*dfnumber_)[t] < (*lowlink_)[s])
      (*lowlink_)[s] = (*dfnumber_)[t];
    if ((*coaccess_)[t])
      (*coaccess_)[s] = true;
    *props_ |= kCyclic;
    *props_ &= ~kAcyclic;
    if (arc.nextstate == start_) {
      *props_ |= kInitialCyclic;
      *props_ &= ~kInitialAcyclic;
    }
    return true;
  }

  bool ForwardOrCrossArc(StateId s, const A &arc) {
    StateId t = arc.nextstate;
    if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
        (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
      (*lowlink_)[s] = (*dfnumber_)[t];
    if ((*coaccess_)[t])
      (*coaccess_)[s] = true;
    return true;
  }

  void FinishState(StateId s, StateId p, const A *);

  void FinishVisit() {
    // Numbers SCC's in topological order when acyclic.
    if (scc_)
      for (StateId i = 0; i < scc_->size(); ++i)
        (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
    if (coaccess_internal_)
      delete coaccess_;
    delete dfnumber_;
    delete lowlink_;
    delete onstack_;
    delete scc_stack_;
  }

 private:
  vector<StateId> *scc_;        // State's scc number
  vector<bool> *access_;        // State's accessibility
  vector<bool> *coaccess_;      // State's coaccessibility
  uint64 *props_;
  const Fst<A> *fst_;
  StateId start_;
  StateId nstates_;             // State count
  StateId nscc_;                // SCC count
  bool coaccess_internal_;
  vector<StateId> *dfnumber_;   // state discovery times
  vector<StateId> *lowlink_;    // lowlink[s] == dfnumber[s] => SCC root
  vector<bool> *onstack_;       // is a state on the SCC stack
  vector<StateId> *scc_stack_;  // SCC stack (w/ random access)
};

template <class A> inline
void SccVisitor<A>::InitVisit(const Fst<A> &fst) {
  if (scc_)
    scc_->clear();
  if (access_)
    access_->clear();
  if (coaccess_) {
    coaccess_->clear();
    coaccess_internal_ = false;
  } else {
    coaccess_ = new vector<bool>;
    coaccess_internal_ = true;
  }
  *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
  *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
  fst_ = &fst;
  start_ = fst.Start();
  nstates_ = 0;
  nscc_ = 0;
  dfnumber_ = new vector<StateId>;
  lowlink_ = new vector<StateId>;
  onstack_ = new vector<bool>;
  scc_stack_ = new vector<StateId>;
}

template <class A> inline
bool SccVisitor<A>::InitState(StateId s, StateId root) {
  scc_stack_->push_back(s);
  while (dfnumber_->size() <= s) {
    if (scc_)
      scc_->push_back(-1);
    if (access_)
      access_->push_back(false);
    coaccess_->push_back(false);
    dfnumber_->push_back(-1);
    lowlink_->push_back(-1);
    onstack_->push_back(false);
  }
  (*dfnumber_)[s] = nstates_;
  (*lowlink_)[s] = nstates_;
  (*onstack_)[s] = true;
  if (root == start_) {
    if (access_)
      (*access_)[s] = true;
  } else {
    if (access_)
      (*access_)[s] = false;
    *props_ |= kNotAccessible;
    *props_ &= ~kAccessible;
  }
  ++nstates_;
  return true;
}

template <class A> inline
void SccVisitor<A>::FinishState(StateId s, StateId p, const A *) {
  if (fst_->Final(s) != Weight::Zero())
    (*coaccess_)[s] = true;
  if ((*dfnumber_)[s] == (*lowlink_)[s]) {  // root of new SCC
    bool scc_coaccess = false;
    size_t i = scc_stack_->size();
    StateId t;
    do {
      t = (*scc_stack_)[--i];
      if ((*coaccess_)[t])
        scc_coaccess = true;
    } while (s != t);
    do {
      t = scc_stack_->back();
      if (scc_)
        (*scc_)[t] = nscc_;
      if (scc_coaccess)
        (*coaccess_)[t] = true;
      (*onstack_)[t] = false;
      scc_stack_->pop_back();
    } while (s != t);
    if (!scc_coaccess) {
      *props_ |= kNotCoAccessible;
      *props_ &= ~kCoAccessible;
    }
    ++nscc_;
  }
  if (p != kNoStateId) {
    if ((*coaccess_)[s])
      (*coaccess_)[p] = true;
    if ((*lowlink_)[s] < (*lowlink_)[p])
      (*lowlink_)[p] = (*lowlink_)[s];
  }
}


// Trims an FST, removing states and arcs that are not on successful
// paths. This version modifies its input.
//
// Complexity:
// - Time:  O(V + E)
// - Space: O(V + E)
// where V = # of states and E = # of arcs.
template<class Arc>
void Connect(MutableFst<Arc> *fst) {
  typedef typename Arc::StateId StateId;

  vector<bool> access;
  vector<bool> coaccess;
  uint64 props = 0;
  SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
  DfsVisit(*fst, &scc_visitor);
  vector<StateId> dstates;
  for (StateId s = 0; s < access.size(); ++s)
    if (!access[s] || !coaccess[s])
      dstates.push_back(s);
  fst->DeleteStates(dstates);
  fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
}

}  // namespace fst

#endif  // FST_LIB_CONNECT_H__