C++程序  |  561行  |  15.91 KB

// string-weight.h

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// String weight set and associated semiring operation definitions.

#ifndef FST_LIB_STRING_WEIGHT_H__
#define FST_LIB_STRING_WEIGHT_H__

#include <list>
#include <string>

#include <fst/product-weight.h>
#include <fst/weight.h>

namespace fst {

const int kStringInfinity = -1;      // Label for the infinite string
const int kStringBad = -2;           // Label for a non-string
const char kStringSeparator = '_';   // Label separator in strings

// Determines whether to use left or right string semiring.  Includes
// restricted versions that signal an error if proper prefixes
// (suffixes) would otherwise be returned by Plus, useful with various
// algorithms that require functional transducer input with the
// string semirings.
enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 ,
                  STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT };

#define REVERSE_STRING_TYPE(S)                                  \
   ((S) == STRING_LEFT ? STRING_RIGHT :                         \
    ((S) == STRING_RIGHT ? STRING_LEFT :                        \
     ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT :     \
      STRING_LEFT_RESTRICT)))

template <typename L, StringType S = STRING_LEFT>
class StringWeight;

template <typename L, StringType S = STRING_LEFT>
class StringWeightIterator;

template <typename L, StringType S = STRING_LEFT>
class StringWeightReverseIterator;

template <typename L, StringType S>
bool operator==(const StringWeight<L, S> &,  const StringWeight<L, S> &);


// String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon)
template <typename L, StringType S>
class StringWeight {
 public:
  typedef L Label;
  typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight;

  friend class StringWeightIterator<L, S>;
  friend class StringWeightReverseIterator<L, S>;
  friend bool operator==<>(const StringWeight<L, S> &,
                           const StringWeight<L, S> &);

  StringWeight() { Init(); }

  template <typename Iter>
  StringWeight(const Iter &begin, const Iter &end) {
    Init();
    for (Iter iter = begin; iter != end; ++iter)
      PushBack(*iter);
  }

  explicit StringWeight(L l) { Init(); PushBack(l); }

  static const StringWeight<L, S> &Zero() {
    static const StringWeight<L, S> zero(kStringInfinity);
    return zero;
  }

  static const StringWeight<L, S> &One() {
    static const StringWeight<L, S> one;
    return one;
  }

  static const StringWeight<L, S> &NoWeight() {
    static const StringWeight<L, S> no_weight(kStringBad);
    return no_weight;
  }

  static const string &Type() {
    static const string type =
        S == STRING_LEFT ? "string" :
        (S == STRING_RIGHT ? "right_string" :
         (S == STRING_LEFT_RESTRICT ? "restricted_string" :
          "right_restricted_string"));
    return type;
  }

  bool Member() const;

  istream &Read(istream &strm);

  ostream &Write(ostream &strm) const;

  size_t Hash() const;

  StringWeight<L, S> Quantize(float delta = kDelta) const {
    return *this;
  }

  ReverseWeight Reverse() const;

  static uint64 Properties() {
    return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ?
            kLeftSemiring : kRightSemiring) | kIdempotent;
  }

  // NB: This needs to be uncommented only if default fails for this impl.
  // StringWeight<L, S> &operator=(const StringWeight<L, S> &w);

  // These operations combined with the StringWeightIterator and
  // StringWeightReverseIterator provide the access and mutation of
  // the string internal elements.

  // Common initializer among constructors.
  void Init() { first_ = 0; }

  // Clear existing StringWeight.
  void Clear() { first_ = 0; rest_.clear(); }

  size_t Size() const { return first_ ? rest_.size() + 1 : 0; }

  void PushFront(L l) {
    if (first_)
      rest_.push_front(first_);
    first_ = l;
  }

  void PushBack(L l) {
    if (!first_)
      first_ = l;
    else
      rest_.push_back(l);
  }

 private:
  L first_;         // first label in string (0 if empty)
  list<L> rest_;    // remaining labels in string
};


// Traverses string in forward direction.
template <typename L, StringType S>
class StringWeightIterator {
 public:
  explicit StringWeightIterator(const StringWeight<L, S>& w)
      : first_(w.first_), rest_(w.rest_), init_(true),
        iter_(rest_.begin()) {}

  bool Done() const {
    if (init_) return first_ == 0;
    else return iter_ == rest_.end();
  }

  const L& Value() const { return init_ ? first_ : *iter_; }

  void Next() {
    if (init_) init_ = false;
    else  ++iter_;
  }

  void Reset() {
    init_ = true;
    iter_ = rest_.begin();
  }

 private:
  const L &first_;
  const list<L> &rest_;
  bool init_;   // in the initialized state?
  typename list<L>::const_iterator iter_;

  DISALLOW_COPY_AND_ASSIGN(StringWeightIterator);
};


// Traverses string in backward direction.
template <typename L, StringType S>
class StringWeightReverseIterator {
 public:
  explicit StringWeightReverseIterator(const StringWeight<L, S>& w)
      : first_(w.first_), rest_(w.rest_), fin_(first_ == 0),
        iter_(rest_.rbegin()) {}

  bool Done() const { return fin_; }

  const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; }

  void Next() {
    if (iter_ == rest_.rend()) fin_ = true;
    else  ++iter_;
  }

  void Reset() {
    fin_ = false;
    iter_ = rest_.rbegin();
  }

 private:
  const L &first_;
  const list<L> &rest_;
  bool fin_;   // in the final state?
  typename list<L>::const_reverse_iterator iter_;

  DISALLOW_COPY_AND_ASSIGN(StringWeightReverseIterator);
};


// StringWeight member functions follow that require
// StringWeightIterator or StringWeightReverseIterator.

template <typename L, StringType S>
inline istream &StringWeight<L, S>::Read(istream &strm) {
  Clear();
  int32 size;
  ReadType(strm, &size);
  for (int i = 0; i < size; ++i) {
    L label;
    ReadType(strm, &label);
    PushBack(label);
  }
  return strm;
}

template <typename L, StringType S>
inline ostream &StringWeight<L, S>::Write(ostream &strm) const {
  int32 size =  Size();
  WriteType(strm, size);
  for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) {
    L label = iter.Value();
    WriteType(strm, label);
  }
  return strm;
}

template <typename L, StringType S>
inline bool StringWeight<L, S>::Member() const {
  if (Size() != 1)
    return true;
  StringWeightIterator<L, S> iter(*this);
  return iter.Value() != kStringBad;
}

template <typename L, StringType S>
inline typename StringWeight<L, S>::ReverseWeight
StringWeight<L, S>::Reverse() const {
  ReverseWeight rw;
  for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
    rw.PushFront(iter.Value());
  return rw;
}

template <typename L, StringType S>
inline size_t StringWeight<L, S>::Hash() const {
  size_t h = 0;
  for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
    h ^= h<<1 ^ iter.Value();
  return h;
}

// NB: This needs to be uncommented only if default fails for this the impl.
//
// template <typename L, StringType S>
// inline StringWeight<L, S>
// &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) {
//   if (this != &w) {
//     Clear();
//     for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next())
//       PushBack(iter.Value());
//   }
//   return *this;
// }

template <typename L, StringType S>
inline bool operator==(const StringWeight<L, S> &w1,
                       const StringWeight<L, S> &w2) {
  if (w1.Size() != w2.Size())
    return false;

  StringWeightIterator<L, S> iter1(w1);
  StringWeightIterator<L, S> iter2(w2);

  for (; !iter1.Done() ; iter1.Next(), iter2.Next())
    if (iter1.Value() != iter2.Value())
      return false;

  return true;
}

template <typename L, StringType S>
inline bool operator!=(const StringWeight<L, S> &w1,
                       const StringWeight<L, S> &w2) {
  return !(w1 == w2);
}

template <typename L, StringType S>
inline bool ApproxEqual(const StringWeight<L, S> &w1,
                        const StringWeight<L, S> &w2,
                        float delta = kDelta) {
  return w1 == w2;
}

template <typename L, StringType S>
inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) {
  StringWeightIterator<L, S> iter(w);
  if (iter.Done())
    return strm << "Epsilon";
  else if (iter.Value() == kStringInfinity)
    return strm << "Infinity";
  else if (iter.Value() == kStringBad)
    return strm << "BadString";
  else
    for (size_t i = 0; !iter.Done(); ++i, iter.Next()) {
      if (i > 0)
        strm << kStringSeparator;
      strm << iter.Value();
    }
  return strm;
}

template <typename L, StringType S>
inline istream &operator>>(istream &strm, StringWeight<L, S> &w) {
  string s;
  strm >> s;
  if (s == "Infinity") {
    w = StringWeight<L, S>::Zero();
  } else if (s == "Epsilon") {
    w = StringWeight<L, S>::One();
  } else {
    w.Clear();
    char *p = 0;
    for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) {
      int l = strtoll(cs, &p, 10);
      if (p == cs || (*p != 0 && *p != kStringSeparator)) {
        strm.clear(std::ios::badbit);
        break;
      }
      w.PushBack(l);
    }
  }
  return strm;
}


// Default is for the restricted left and right semirings.  String
// equality is required (for non-Zero() input. This restriction
// is used in e.g. Determinize to ensure functional input.
template <typename L, StringType S>  inline StringWeight<L, S>
Plus(const StringWeight<L, S> &w1,
     const StringWeight<L, S> &w2) {
  if (!w1.Member() || !w2.Member())
    return StringWeight<L, S>::NoWeight();
  if (w1 == StringWeight<L, S>::Zero())
    return w2;
  if (w2 == StringWeight<L, S>::Zero())
    return w1;

  if (w1 != w2) {
    FSTERROR() << "StringWeight::Plus: unequal arguments "
               << "(non-functional FST?)"
               << " w1 = " << w1
               << " w2 = " << w2;
    return StringWeight<L, S>::NoWeight();
  }

  return w1;
}


// Longest common prefix for left string semiring.
template <typename L>  inline StringWeight<L, STRING_LEFT>
Plus(const StringWeight<L, STRING_LEFT> &w1,
     const StringWeight<L, STRING_LEFT> &w2) {
  if (!w1.Member() || !w2.Member())
    return StringWeight<L, STRING_LEFT>::NoWeight();
  if (w1 == StringWeight<L, STRING_LEFT>::Zero())
    return w2;
  if (w2 == StringWeight<L, STRING_LEFT>::Zero())
    return w1;

  StringWeight<L, STRING_LEFT> sum;
  StringWeightIterator<L, STRING_LEFT> iter1(w1);
  StringWeightIterator<L, STRING_LEFT> iter2(w2);
  for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
       iter1.Next(), iter2.Next())
    sum.PushBack(iter1.Value());
  return sum;
}


// Longest common suffix for right string semiring.
template <typename L>  inline StringWeight<L, STRING_RIGHT>
Plus(const StringWeight<L, STRING_RIGHT> &w1,
     const StringWeight<L, STRING_RIGHT> &w2) {
  if (!w1.Member() || !w2.Member())
    return StringWeight<L, STRING_RIGHT>::NoWeight();
  if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
    return w2;
  if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
    return w1;

  StringWeight<L, STRING_RIGHT> sum;
  StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1);
  StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2);
  for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
       iter1.Next(), iter2.Next())
    sum.PushFront(iter1.Value());
  return sum;
}


template <typename L, StringType S>
inline StringWeight<L, S> Times(const StringWeight<L, S> &w1,
                             const StringWeight<L, S> &w2) {
  if (!w1.Member() || !w2.Member())
    return StringWeight<L, S>::NoWeight();
  if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero())
    return StringWeight<L, S>::Zero();

  StringWeight<L, S> prod(w1);
  for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next())
    prod.PushBack(iter.Value());

  return prod;
}


// Default is for left division in the left string and the
// left restricted string semirings.
template <typename L, StringType S> inline StringWeight<L, S>
Divide(const StringWeight<L, S> &w1,
       const StringWeight<L, S> &w2,
       DivideType typ) {

  if (typ != DIVIDE_LEFT) {
    FSTERROR() << "StringWeight::Divide: only left division is defined "
               << "for the " << StringWeight<L, S>::Type() << " semiring";
    return StringWeight<L, S>::NoWeight();
  }

  if (!w1.Member() || !w2.Member())
    return StringWeight<L, S>::NoWeight();

  if (w2 == StringWeight<L, S>::Zero())
    return StringWeight<L, S>(kStringBad);
  else if (w1 == StringWeight<L, S>::Zero())
    return StringWeight<L, S>::Zero();

  StringWeight<L, S> div;
  StringWeightIterator<L, S> iter(w1);
  for (int i = 0; !iter.Done(); iter.Next(), ++i) {
    if (i >= w2.Size())
      div.PushBack(iter.Value());
  }
  return div;
}


// Right division in the right string semiring.
template <typename L> inline StringWeight<L, STRING_RIGHT>
Divide(const StringWeight<L, STRING_RIGHT> &w1,
       const StringWeight<L, STRING_RIGHT> &w2,
       DivideType typ) {

  if (typ != DIVIDE_RIGHT) {
    FSTERROR() << "StringWeight::Divide: only right division is defined "
               << "for the right string semiring";
    return StringWeight<L, STRING_RIGHT>::NoWeight();
  }

  if (!w1.Member() || !w2.Member())
    return StringWeight<L, STRING_RIGHT>::NoWeight();

  if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
    return StringWeight<L, STRING_RIGHT>(kStringBad);
  else if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
    return StringWeight<L, STRING_RIGHT>::Zero();

  StringWeight<L, STRING_RIGHT> div;
  StringWeightReverseIterator<L, STRING_RIGHT> iter(w1);
  for (int i = 0; !iter.Done(); iter.Next(), ++i) {
    if (i >= w2.Size())
      div.PushFront(iter.Value());
  }
  return div;
}


// Right division in the right restricted string semiring.
template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT>
Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1,
       const StringWeight<L, STRING_RIGHT_RESTRICT> &w2,
       DivideType typ) {

  if (typ != DIVIDE_RIGHT) {
    FSTERROR() << "StringWeight::Divide: only right division is defined "
               << "for the right restricted string semiring";
    return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();
  }

  if (!w1.Member() || !w2.Member())
    return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight();

  if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
    return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad);
  else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
    return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero();

  StringWeight<L, STRING_RIGHT_RESTRICT> div;
  StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1);
  for (int i = 0; !iter.Done(); iter.Next(), ++i) {
    if (i >= w2.Size())
      div.PushFront(iter.Value());
  }
  return div;
}


// Product of string weight and an arbitray weight.
template <class L, class W, StringType S = STRING_LEFT>
struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> {
  typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)>
  ReverseWeight;

  GallicWeight() {}

  GallicWeight(StringWeight<L, S> w1, W w2)
      : ProductWeight<StringWeight<L, S>, W>(w1, w2) {}

  explicit GallicWeight(const string &s, int *nread = 0)
      : ProductWeight<StringWeight<L, S>, W>(s, nread) {}

  GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w)
      : ProductWeight<StringWeight<L, S>, W>(w) {}
};

}  // namespace fst

#endif  // FST_LIB_STRING_WEIGHT_H__