/* * Copyright (C) 2009 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__ #define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__ #include <stdlib.h> #include "./spellingtrie.h" namespace ime_pinyin { // Type used to identify the size of a pool, such as id pool, etc. typedef uint16 PoolPosType; // Type used to identify a parsing mile stone in an atom dictionary. typedef uint16 MileStoneHandle; // Type used to express a lemma and its probability score. typedef struct { size_t id:(kLemmaIdSize * 8); size_t lma_len:4; uint16 psb; // The score, the lower psb, the higher possibility. // For single character items, we may also need Hanzi. // For multiple characer items, ignore it. char16 hanzi; } LmaPsbItem, *PLmaPsbItem; // LmaPsbItem extended with string. typedef struct { LmaPsbItem lpi; char16 str[kMaxLemmaSize + 1]; } LmaPsbStrItem, *PLmaPsbStrItem; typedef struct { float psb; char16 pre_hzs[kMaxPredictSize]; uint16 his_len; // The length of the history used to do the prediction. } NPredictItem, *PNPredictItem; // Parameter structure used to extend in a dictionary. All dictionaries // receives the same DictExtPara and a dictionary specific MileStoneHandle for // extending. // // When the user inputs a new character, AtomDictBase::extend_dict() will be // called at least once for each dictionary. // // For example, when the user inputs "wm", extend_dict() will be called twice, // and the DictExtPara parameter are as follows respectively: // 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1; // splid_end_split = false; id_start = wa(the first id start with 'w'); // id_num = number of ids starting with 'w'. // 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1; // splid_end_split = false; id_start = wa; id_num = number of ids starting with // 'w'. // // For string "women", one of the cases of the DictExtPara parameter is: // splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"), // step_no = 4; splid_end_split = false; id_start = men, id_num = 1. // typedef struct { // Spelling ids for extending, there are splids_extended + 1 ids in the // buffer. // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max, // but for a composing phrase, there can kMaxSearchSteps spelling ids. uint16 splids[kMaxSearchSteps]; // Number of ids that have been used before. splids[splids_extended] is the // newly added id for the current extension. uint16 splids_extended; // The step span of the extension. It is also the size of the string for // the newly added spelling id. uint16 ext_len; // The step number for the current extension. It is also the ending position // in the input Pinyin string for the substring of spelling ids in splids[]. // For example, when the user inputs "women", step_no = 4. // This parameter may useful to manage the MileStoneHandle list for each // step. When the user deletes a character from the string, MileStoneHandle // objects for the the steps after that character should be reset; when the // user begins a new string, all MileStoneHandle objects should be reset. uint16 step_no; // Indicate whether the newly added spelling ends with a splitting character bool splid_end_split; // If the newly added id is a half id, id_start is the first id of the // corresponding full ids; if the newly added id is a full id, id_start is // that id. uint16 id_start; // If the newly added id is a half id, id_num is the number of corresponding // ids; if it is a full id, id_num == 1. uint16 id_num; }DictExtPara, *PDictExtPara; bool is_system_lemma(LemmaIdType lma_id); bool is_user_lemma(LemmaIdType lma_id); bool is_composing_lemma(LemmaIdType lma_id); int cmp_lpi_with_psb(const void *p1, const void *p2); int cmp_lpi_with_unified_psb(const void *p1, const void *p2); int cmp_lpi_with_id(const void *p1, const void *p2); int cmp_lpi_with_hanzi(const void *p1, const void *p2); int cmp_lpsi_with_str(const void *p1, const void *p2); int cmp_hanzis_1(const void *p1, const void *p2); int cmp_hanzis_2(const void *p1, const void *p2); int cmp_hanzis_3(const void *p1, const void *p2); int cmp_hanzis_4(const void *p1, const void *p2); int cmp_hanzis_5(const void *p1, const void *p2); int cmp_hanzis_6(const void *p1, const void *p2); int cmp_hanzis_7(const void *p1, const void *p2); int cmp_hanzis_8(const void *p1, const void *p2); int cmp_npre_by_score(const void *p1, const void *p2); int cmp_npre_by_hislen_score(const void *p1, const void *p2); int cmp_npre_by_hanzi_score(const void *p1, const void *p2); size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num); size_t align_to_size_t(size_t size); } // namespace #endif // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__