// pdt.h // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Copyright 2005-2010 Google, Inc. // Author: riley@google.com (Michael Riley) // // \file // Common classes for PDT expansion/traversal. #ifndef FST_EXTENSIONS_PDT_PDT_H__ #define FST_EXTENSIONS_PDT_PDT_H__ #include <unordered_map> using std::tr1::unordered_map; using std::tr1::unordered_multimap; #include <map> #include <set> #include <fst/state-table.h> #include <fst/fst.h> namespace fst { // Provides bijection between parenthesis stacks and signed integral // stack IDs. Each stack ID is unique to each distinct stack. The // open-close parenthesis label pairs are passed in 'parens'. template <typename K, typename L> class PdtStack { public: typedef K StackId; typedef L Label; // The stacks are stored in a tree. The nodes are stored in vector // 'nodes_'. Each node represents the top of some stack and is // ID'ed by its position in the vector. Its parent node represents // the stack with the top 'popped' and its children are stored in // 'child_map_' accessed by stack_id and label. The paren_id is // the position in 'parens' of the parenthesis for that node. struct StackNode { StackId parent_id; size_t paren_id; StackNode(StackId p, size_t i) : parent_id(p), paren_id(i) {} }; PdtStack(const vector<pair<Label, Label> > &parens) : parens_(parens), min_paren_(kNoLabel), max_paren_(kNoLabel) { for (size_t i = 0; i < parens.size(); ++i) { const pair<Label, Label> &p = parens[i]; paren_map_[p.first] = i; paren_map_[p.second] = i; if (min_paren_ == kNoLabel || p.first < min_paren_) min_paren_ = p.first; if (p.second < min_paren_) min_paren_ = p.second; if (max_paren_ == kNoLabel || p.first > max_paren_) max_paren_ = p.first; if (p.second > max_paren_) max_paren_ = p.second; } nodes_.push_back(StackNode(-1, -1)); // Tree root. } // Returns stack ID given the current stack ID (0 if empty) and // label read. 'Pushes' onto a stack if the label is an open // parenthesis, returning the new stack ID. 'Pops' the stack if the // label is a close parenthesis that matches the top of the stack, // returning the parent stack ID. Returns -1 if label is an // unmatched close parenthesis. Otherwise, returns the current stack // ID. StackId Find(StackId stack_id, Label label) { if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_) return stack_id; // Non-paren. typename unordered_map<Label, size_t>::const_iterator pit = paren_map_.find(label); if (pit == paren_map_.end()) // Non-paren. return stack_id; ssize_t paren_id = pit->second; if (label == parens_[paren_id].first) { // Open paren. StackId &child_id = child_map_[make_pair(stack_id, label)]; if (child_id == 0) { // Child not found, push label. child_id = nodes_.size(); nodes_.push_back(StackNode(stack_id, paren_id)); } return child_id; } const StackNode &node = nodes_[stack_id]; if (paren_id == node.paren_id) // Matching close paren. return node.parent_id; return -1; // Non-matching close paren. } // Returns the stack ID obtained by "popping" the label at the top // of the current stack ID. StackId Pop(StackId stack_id) const { return nodes_[stack_id].parent_id; } // Returns the paren ID at the top of the stack for 'stack_id' ssize_t Top(StackId stack_id) const { return nodes_[stack_id].paren_id; } ssize_t ParenId(Label label) const { typename unordered_map<Label, size_t>::const_iterator pit = paren_map_.find(label); if (pit == paren_map_.end()) // Non-paren. return -1; return pit->second; } private: struct ChildHash { size_t operator()(const pair<StackId, Label> &p) const { return p.first + p.second * kPrime; } }; static const size_t kPrime; vector<pair<Label, Label> > parens_; vector<StackNode> nodes_; unordered_map<Label, size_t> paren_map_; unordered_map<pair<StackId, Label>, StackId, ChildHash> child_map_; // Child of stack node wrt label Label min_paren_; // For faster paren. check Label max_paren_; // For faster paren. check }; template <typename T, typename L> const size_t PdtStack<T, L>::kPrime = 7853; // State tuple for PDT expansion template <typename S, typename K> struct PdtStateTuple { typedef S StateId; typedef K StackId; StateId state_id; StackId stack_id; PdtStateTuple() : state_id(kNoStateId), stack_id(-1) {} PdtStateTuple(StateId fs, StackId ss) : state_id(fs), stack_id(ss) {} }; // Equality of PDT state tuples. template <typename S, typename K> inline bool operator==(const PdtStateTuple<S, K>& x, const PdtStateTuple<S, K>& y) { if (&x == &y) return true; return x.state_id == y.state_id && x.stack_id == y.stack_id; } // Hash function object for PDT state tuples template <class T> class PdtStateHash { public: size_t operator()(const T &tuple) const { return tuple.state_id + tuple.stack_id * kPrime; } private: static const size_t kPrime; }; template <typename T> const size_t PdtStateHash<T>::kPrime = 7853; // Tuple to PDT state bijection. template <class S, class K> class PdtStateTable : public CompactHashStateTable<PdtStateTuple<S, K>, PdtStateHash<PdtStateTuple<S, K> > > { public: typedef S StateId; typedef K StackId; PdtStateTable() {} PdtStateTable(const PdtStateTable<S, K> &table) {} private: void operator=(const PdtStateTable<S, K> &table); // disallow }; } // namespace fst #endif // FST_EXTENSIONS_PDT_PDT_H__