// util.h // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Copyright 2005-2010 Google, Inc. // Author: riley@google.com (Michael Riley) // // \file // FST utility inline definitions. #ifndef FST_LIB_UTIL_H__ #define FST_LIB_UTIL_H__ #include <tr1/unordered_map> using std::tr1::unordered_map; using std::tr1::unordered_multimap; #include <tr1/unordered_set> using std::tr1::unordered_set; using std::tr1::unordered_multiset; #include <list> #include <map> #include <set> #include <sstream> #include <string> #include <vector> using std::vector; #include <fst/compat.h> #include <fst/types.h> #include <iostream> #include <fstream> #include <sstream> // // UTILITY FOR ERROR HANDLING // DECLARE_bool(fst_error_fatal); #define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) namespace fst { // // UTILITIES FOR TYPE I/O // // Read some types from an input stream. // Generic case. template <typename T> inline istream &ReadType(istream &strm, T *t) { return t->Read(strm); } // Fixed size, contiguous memory read. #define READ_POD_TYPE(T) \ inline istream &ReadType(istream &strm, T *t) { \ return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \ } READ_POD_TYPE(bool); READ_POD_TYPE(char); READ_POD_TYPE(signed char); READ_POD_TYPE(unsigned char); READ_POD_TYPE(short); READ_POD_TYPE(unsigned short); READ_POD_TYPE(int); READ_POD_TYPE(unsigned int); READ_POD_TYPE(long); READ_POD_TYPE(unsigned long); READ_POD_TYPE(long long); READ_POD_TYPE(unsigned long long); READ_POD_TYPE(float); READ_POD_TYPE(double); // String case. inline istream &ReadType(istream &strm, string *s) { s->clear(); int32 ns = 0; strm.read(reinterpret_cast<char *>(&ns), sizeof(ns)); for (int i = 0; i < ns; ++i) { char c; strm.read(&c, 1); *s += c; } return strm; } // Pair case. template <typename S, typename T> inline istream &ReadType(istream &strm, pair<S, T> *p) { ReadType(strm, &p->first); ReadType(strm, &p->second); return strm; } template <typename S, typename T> inline istream &ReadType(istream &strm, pair<const S, T> *p) { ReadType(strm, const_cast<S *>(&p->first)); ReadType(strm, &p->second); return strm; } // General case - no-op. template <typename C> void StlReserve(C *c, int64 n) {} // Specialization for vectors. template <typename S, typename T> void StlReserve(vector<S, T> *c, int64 n) { c->reserve(n); } // STL sequence container. #define READ_STL_SEQ_TYPE(C) \ template <typename S, typename T> \ inline istream &ReadType(istream &strm, C<S, T> *c) { \ c->clear(); \ int64 n = 0; \ strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ StlReserve(c, n); \ for (ssize_t i = 0; i < n; ++i) { \ typename C<S, T>::value_type value; \ ReadType(strm, &value); \ c->insert(c->end(), value); \ } \ return strm; \ } READ_STL_SEQ_TYPE(vector); READ_STL_SEQ_TYPE(list); // STL associative container. #define READ_STL_ASSOC_TYPE(C) \ template <typename S, typename T, typename U> \ inline istream &ReadType(istream &strm, C<S, T, U> *c) { \ c->clear(); \ int64 n = 0; \ strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ for (ssize_t i = 0; i < n; ++i) { \ typename C<S, T, U>::value_type value; \ ReadType(strm, &value); \ c->insert(value); \ } \ return strm; \ } READ_STL_ASSOC_TYPE(set); READ_STL_ASSOC_TYPE(unordered_set); READ_STL_ASSOC_TYPE(map); READ_STL_ASSOC_TYPE(unordered_map); // Write some types to an output stream. // Generic case. template <typename T> inline ostream &WriteType(ostream &strm, const T t) { t.Write(strm); return strm; } // Fixed size, contiguous memory write. #define WRITE_POD_TYPE(T) \ inline ostream &WriteType(ostream &strm, const T t) { \ return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \ } WRITE_POD_TYPE(bool); WRITE_POD_TYPE(char); WRITE_POD_TYPE(signed char); WRITE_POD_TYPE(unsigned char); WRITE_POD_TYPE(short); WRITE_POD_TYPE(unsigned short); WRITE_POD_TYPE(int); WRITE_POD_TYPE(unsigned int); WRITE_POD_TYPE(long); WRITE_POD_TYPE(unsigned long); WRITE_POD_TYPE(long long); WRITE_POD_TYPE(unsigned long long); WRITE_POD_TYPE(float); WRITE_POD_TYPE(double); // String case. inline ostream &WriteType(ostream &strm, const string &s) { int32 ns = s.size(); strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns)); return strm.write(s.data(), ns); } // Pair case. template <typename S, typename T> inline ostream &WriteType(ostream &strm, const pair<S, T> &p) { WriteType(strm, p.first); WriteType(strm, p.second); return strm; } // STL sequence container. #define WRITE_STL_SEQ_TYPE(C) \ template <typename S, typename T> \ inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \ int64 n = c.size(); \ strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ for (typename C<S, T>::const_iterator it = c.begin(); \ it != c.end(); ++it) \ WriteType(strm, *it); \ return strm; \ } WRITE_STL_SEQ_TYPE(vector); WRITE_STL_SEQ_TYPE(list); // STL associative container. #define WRITE_STL_ASSOC_TYPE(C) \ template <typename S, typename T, typename U> \ inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \ int64 n = c.size(); \ strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ for (typename C<S, T, U>::const_iterator it = c.begin(); \ it != c.end(); ++it) \ WriteType(strm, *it); \ return strm; \ } WRITE_STL_ASSOC_TYPE(set); WRITE_STL_ASSOC_TYPE(unordered_set); WRITE_STL_ASSOC_TYPE(map); WRITE_STL_ASSOC_TYPE(unordered_map); // Utilities for converting between int64 or Weight and string. int64 StrToInt64(const string &s, const string &src, size_t nline, bool allow_negative, bool *error = 0); template <typename Weight> Weight StrToWeight(const string &s, const string &src, size_t nline) { Weight w; istringstream strm(s); strm >> w; if (!strm) { FSTERROR() << "StrToWeight: Bad weight = \"" << s << "\", source = " << src << ", line = " << nline; return Weight::NoWeight(); } return w; } void Int64ToStr(int64 n, string *s); template <typename Weight> void WeightToStr(Weight w, string *s) { ostringstream strm; strm.precision(9); strm << w; s->append(strm.str().data(), strm.str().size()); } // Utilities for reading/writing integer pairs (typically labels) // Returns true on success template <typename I> bool ReadIntPairs(const string& filename, vector<pair<I, I> >* pairs, bool allow_negative = false) { ifstream strm(filename.c_str()); if (!strm) { LOG(ERROR) << "ReadIntPairs: Can't open file: " << filename; return false; } const int kLineLen = 8096; char line[kLineLen]; size_t nline = 0; pairs->clear(); while (strm.getline(line, kLineLen)) { ++nline; vector<char *> col; SplitToVector(line, "\n\t ", &col, true); // empty line or comment? if (col.size() == 0 || col[0][0] == '\0' || col[0][0] == '#') continue; if (col.size() != 2) { LOG(ERROR) << "ReadIntPairs: Bad number of columns, " << "file = " << filename << ", line = " << nline; return false; } bool err; I i1 = StrToInt64(col[0], filename, nline, allow_negative, &err); if (err) return false; I i2 = StrToInt64(col[1], filename, nline, allow_negative, &err); if (err) return false; pairs->push_back(make_pair(i1, i2)); } return true; } // Returns true on success template <typename I> bool WriteIntPairs(const string& filename, const vector<pair<I, I> >& pairs) { ostream *strm = &cout; if (!filename.empty()) { strm = new ofstream(filename.c_str()); if (!*strm) { LOG(ERROR) << "WriteIntPairs: Can't open file: " << filename; return false; } } for (ssize_t n = 0; n < pairs.size(); ++n) *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; if (!*strm) { LOG(ERROR) << "WriteIntPairs: Write failed: " << (filename.empty() ? "standard output" : filename); return false; } if (strm != &cout) delete strm; return true; } // Utilities for reading/writing label pairs template <typename Label> bool ReadLabelPairs(const string& filename, vector<pair<Label, Label> >* pairs, bool allow_negative = false) { return ReadIntPairs(filename, pairs, allow_negative); } template <typename Label> bool WriteLabelPairs(const string& filename, vector<pair<Label, Label> >& pairs) { return WriteIntPairs(filename, pairs); } // Utilities for converting a type name to a legal C symbol. void ConvertToLegalCSymbol(string *s); // // UTILITIES FOR STREAM I/O // bool AlignInput(istream &strm); bool AlignOutput(ostream &strm); // // UTILITIES FOR PROTOCOL BUFFER I/O // // An associative container for which testing membership is // faster than an STL set if members are restricted to an interval // that excludes most non-members. A 'Key' must have ==, !=, and < defined. // Element 'NoKey' should be a key that marks an uninitialized key and // is otherwise unused. 'Find()' returns an STL const_iterator to the match // found, otherwise it equals 'End()'. template <class Key, Key NoKey> class CompactSet { public: typedef typename set<Key>::const_iterator const_iterator; CompactSet() : min_key_(NoKey), max_key_(NoKey) { } CompactSet(const CompactSet<Key, NoKey> &compact_set) : set_(compact_set.set_), min_key_(compact_set.min_key_), max_key_(compact_set.max_key_) { } void Insert(Key key) { set_.insert(key); if (min_key_ == NoKey || key < min_key_) min_key_ = key; if (max_key_ == NoKey || max_key_ < key) max_key_ = key; } void Erase(Key key) { set_.erase(key); if (set_.empty()) { min_key_ = max_key_ = NoKey; } else if (key == min_key_) { ++min_key_; } else if (key == max_key_) { --max_key_; } } void Clear() { set_.clear(); min_key_ = max_key_ = NoKey; } const_iterator Find(Key key) const { if (min_key_ == NoKey || key < min_key_ || max_key_ < key) return set_.end(); else return set_.find(key); } bool Member(Key key) const { if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { return false; // out of range } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) { return true; // dense range } else { return set_.find(key) != set_.end(); } } const_iterator Begin() const { return set_.begin(); } const_iterator End() const { return set_.end(); } // All stored keys are greater than or equal to this value. Key LowerBound() const { return min_key_; } // All stored keys are less than or equal to this value. Key UpperBound() const { return max_key_; } private: set<Key> set_; Key min_key_; Key max_key_; void operator=(const CompactSet<Key, NoKey> &); //disallow }; } // namespace fst #endif // FST_LIB_UTIL_H__