C++程序  |  104行  |  2.99 KB

// icu.h

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: roubert@google.com (Fredrik Roubert)

// Wrapper class for UErrorCode, with conversion operators for direct use in
// ICU C and C++ APIs.
//
// Features:
// - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
//   removing one common source of errors.
// - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking
//   UErrorCode& (reference), via conversion operators.
// - Automatic checking for success when it goes out of scope. On failure,
//   the destructor will FSTERROR() an error message.
//
// Most of ICU will handle errors gracefully and provide sensible fallbacks.
// Using IcuErrorCode, it is therefore possible to write very compact code
// that does sensible things on failure and provides logging for debugging.
//
// Example:
//
// IcuErrorCode icuerrorcode;
// return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL;

#ifndef FST_LIB_ICU_H_
#define FST_LIB_ICU_H_

#include <unicode/errorcode.h>
#include <unicode/unistr.h>
#include <unicode/ustring.h>
#include <unicode/utf8.h>

class IcuErrorCode : public icu::ErrorCode {
 public:
  IcuErrorCode() {}
  virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); }

  // Redefine 'errorName()' in order to be compatible with ICU version 4.2
  const char* errorName() const {
    return u_errorName(errorCode);
  }

 protected:
  virtual void handleFailure() const {
    FSTERROR() << errorName();
}

 private:
  DISALLOW_COPY_AND_ASSIGN(IcuErrorCode);
};

namespace fst {

template <class Label>
bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
  const char *c_str = str.c_str();
  int32_t length = str.size();
  UChar32 c;
  for (int32_t i = 0; i < length; /* no update */) {
    U8_NEXT(c_str, i, length, c);
    if (c < 0) {
      LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
      return false;
    }
    labels->push_back(c);
  }
  return true;
}

template <class Label>
bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
  icu::UnicodeString u_str;
  char c_str[5];
  for (size_t i = 0; i < labels.size(); ++i) {
    u_str.setTo(labels[i]);
    IcuErrorCode error;
    u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error);
    if (error.isFailure()) {
      LOG(ERROR) << "LabelsToUTF8String: Bad encoding: "
                 << error.errorName();
      return false;
    }
    *str += c_str;
  }
  return true;
}

}  // namespace fst

#endif  // FST_LIB_ICU_H_