C++程序  |  365行  |  15.74 KB

/**********************************************************************
 * File:        tessbox.cpp  (Formerly tessbox.c)
 * Description: Black boxed Tess for developing a resaljet.
 * Author:					Ray Smith
 * Created:					Thu Apr 23 11:03:36 BST 1992
 *
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#include "mfcpch.h"
#include          "tfacep.h"
#include          "tfacepp.h"
#include          "tessbox.h"
#include "mfoutline.h"
#include "tesseractclass.h"

#define EXTERN

/**********************************************************************
 * tess_segment_pass1
 *
 * Segment a word using the pass1 conditions of the tess segmenter.
 **********************************************************************/

namespace tesseract {
WERD_CHOICE *Tesseract::tess_segment_pass1(                 //recog one word
                                           WERD *word,      //bln word to do
                                           DENORM *denorm,  //de-normaliser
                                                            //matcher function
                                           POLY_MATCHER matcher,
                                                            //raw result
                                           WERD_CHOICE *&raw_choice,
                                                            //list of blob lists
                                           BLOB_CHOICE_LIST_CLIST *blob_choices,
                                           WERD *&outword   //bln word output
                                          ) {
  WERD_CHOICE *result;           //return value
  int saved_enable_assoc = 0;
  int saved_chop_enable = 0;

  if (word->flag (W_DONT_CHOP)) {
    saved_enable_assoc = wordrec_enable_assoc;
    saved_chop_enable = chop_enable;
    wordrec_enable_assoc.set_value(0);
    chop_enable.set_value(0);
    if (word->flag (W_REP_CHAR))
      permute_only_top = 1;
  }
  set_pass1();
  //      tprintf("pass1 chop on=%d, seg=%d, onlytop=%d",chop_enable,enable_assoc,permute_only_top);
  result = recog_word (word, denorm, matcher, NULL, NULL, FALSE,
    raw_choice, blob_choices, outword);
  if (word->flag (W_DONT_CHOP)) {
    wordrec_enable_assoc.set_value(saved_enable_assoc);
    chop_enable.set_value(saved_chop_enable);
    permute_only_top = 0;
  }
  return result;
}


/**********************************************************************
 * tess_segment_pass2
 *
 * Segment a word using the pass2 conditions of the tess segmenter.
 **********************************************************************/

WERD_CHOICE *Tesseract::tess_segment_pass2(                 //recog one word
                                           WERD *word,      //bln word to do
                                           DENORM *denorm,  //de-normaliser
                                                            //matcher function
                                           POLY_MATCHER matcher,
                                                            //raw result
                                           WERD_CHOICE *&raw_choice,
                                                            //list of blob lists
                                           BLOB_CHOICE_LIST_CLIST *blob_choices,
                                           WERD *&outword   //bln word output
                                          ) {
  WERD_CHOICE *result;           //return value
  int saved_enable_assoc = 0;
  int saved_chop_enable = 0;

  if (word->flag (W_DONT_CHOP)) {
    saved_enable_assoc = wordrec_enable_assoc;
    saved_chop_enable = chop_enable;
    wordrec_enable_assoc.set_value(0);
    chop_enable.set_value(0);
    if (word->flag (W_REP_CHAR))
      permute_only_top = 1;
  }
  set_pass2();
  result = recog_word (word, denorm, matcher, NULL, NULL, FALSE,
    raw_choice, blob_choices, outword);
  if (word->flag (W_DONT_CHOP)) {
    wordrec_enable_assoc.set_value(saved_enable_assoc);
    chop_enable.set_value(saved_chop_enable);
    permute_only_top = 0;
  }
  return result;
}


/**********************************************************************
 * correct_segment_pass2
 *
 * Segment a word correctly using the pass2 conditions of the tess segmenter.
 * Then call the tester with all the correctly segmented blobs.
 * If the correct segmentation cannot be found, the tester is called
 * with the segmentation found by tess and all the correct flags set to
 * false and all strings are NULL.
 **********************************************************************/

WERD_CHOICE *Tesseract::correct_segment_pass2(              //recog one word
                                              WERD *word,   //bln word to do
                                                            //de-normaliser
                                              DENORM *denorm,
                                                            //matcher function
                                              POLY_MATCHER matcher,
                                                            //tester function
                                              POLY_TESTER tester,
                                                            //raw result
                                              WERD_CHOICE *&raw_choice,
                                                            //list of blob lists
                                              BLOB_CHOICE_LIST_CLIST *blob_choices,
                                                            //bln word output
                                              WERD *&outword
                                             ) {
  set_pass2();
  return recog_word (word, denorm, matcher, NULL, tester, TRUE,
    raw_choice, blob_choices, outword);
}


/**********************************************************************
 * test_segment_pass2
 *
 * Segment a word correctly using the pass2 conditions of the tess segmenter.
 * Then call the tester on all words used by tess in its search.
 * Do this only on words where the correct segmentation could be found.
 **********************************************************************/
WERD_CHOICE *Tesseract::test_segment_pass2(               //recog one word
                                           WERD *word,    //bln word to do
                                                          //de-normaliser
                                           DENORM *denorm,
                                                          //matcher function
                                           POLY_MATCHER matcher,
                                                          //tester function
                                           POLY_TESTER tester,
                                                          //raw result
                                           WERD_CHOICE *&raw_choice,
                                                          //list of blob lists
                                           BLOB_CHOICE_LIST_CLIST *blob_choices,
                                                          //bln word output
                                           WERD *&outword
                                          ) {
  set_pass2();
  return recog_word (word, denorm, matcher, tester, NULL, TRUE,
    raw_choice, blob_choices, outword);
}


/**********************************************************************
 * tess_acceptable_word
 *
 * Return true if the word is regarded as "good enough".
 **********************************************************************/
BOOL8 Tesseract::tess_acceptable_word(
    WERD_CHOICE *word_choice,  // after context
    WERD_CHOICE *raw_choice) {  // before context
  return getDict().AcceptableResult(*word_choice, *raw_choice);
}


/**********************************************************************
 * tess_adaptable_word
 *
 * Return true if the word is regarded as "good enough".
 **********************************************************************/
BOOL8 Tesseract::tess_adaptable_word(  // test adaptability
    WERD *word,                        // word to test
    WERD_CHOICE *best_choice,          // after context
    WERD_CHOICE *raw_choice            // before context
                                     ) {
  TWERD *tessword = make_tess_word(word, NULL);
  int result = (tessword && best_choice && raw_choice &&
                AdaptableWord(tessword, *best_choice, *raw_choice));
  delete_word(tessword);
  return result != 0;
}


/**********************************************************************
 * tess_cn_matcher
 *
 * Match a blob using the Tess Char Normalized (non-adaptive) matcher
 * only.
 **********************************************************************/

void Tesseract::tess_cn_matcher(                           //call tess
                                PBLOB *pblob,              //previous blob
                                PBLOB *blob,               //blob to match
                                PBLOB *nblob,              //next blob
                                WERD *word,                //word it came from
                                DENORM *denorm,            //de-normaliser
                                BLOB_CHOICE_LIST *ratings,  //list of results
                                CLASS_PRUNER_RESULTS cpresults  // may be null.
                               ) {
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  tess_cn_matching.set_value(true);       //turn it on
  tess_bn_matching.set_value(false);
                                 //convert blob
  tessblob = make_rotated_tess_blob(denorm, blob, true);
                                 //make dummy row
  make_tess_row(denorm, &tessrow);
                                 //classify
  AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, cpresults);
  free_blob(tessblob);
}


/**********************************************************************
 * tess_bn_matcher
 *
 * Match a blob using the Tess Baseline Normalized (adaptive) matcher
 * only.
 **********************************************************************/

void Tesseract::tess_bn_matcher(                           //call tess
                                PBLOB *pblob,              //previous blob
                                PBLOB *blob,               //blob to match
                                PBLOB *nblob,              //next blob
                                WERD *word,                //word it came from
                                DENORM *denorm,            //de-normaliser
                                BLOB_CHOICE_LIST *ratings  //list of results
                               ) {
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  tess_bn_matching.set_value(true);       //turn it on
  tess_cn_matching.set_value(false);
                                 //convert blob
  tessblob = make_rotated_tess_blob(denorm, blob, true);
                                 //make dummy row
  make_tess_row(denorm, &tessrow);
                                 //classify
  AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, NULL);
  free_blob(tessblob);
}


/**********************************************************************
 * tess_default_matcher
 *
 * Match a blob using the default functionality of the Tess matcher.
 **********************************************************************/

void Tesseract::tess_default_matcher(                       //call tess
                                     PBLOB *pblob,          //previous blob
                                     PBLOB *blob,           //blob to match
                                     PBLOB *nblob,          //next blob
                                     WERD *word,            //word it came from
                                     DENORM *denorm,        //de-normaliser
                                                            //list of results
                                     BLOB_CHOICE_LIST *ratings,
                                     const char* script
                                    ) {
  assert(ratings != NULL);
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  tess_bn_matching.set_value(false);      //turn it off
  tess_cn_matching.set_value(false);
                                 //convert blob
  tessblob = make_rotated_tess_blob(denorm, blob, true);
                                 //make dummy row
  make_tess_row(denorm, &tessrow);
                                 //classify
  AdaptiveClassifier (tessblob, NULL, &tessrow, ratings, NULL);
  free_blob(tessblob);
}
}  // namespace tesseract


/**********************************************************************
 * tess_training_tester
 *
 * Matcher tester function which actually trains tess.
 **********************************************************************/

void tess_training_tester(                           //call tess
                          const STRING& filename,    //filename to output
                          PBLOB *blob,               //blob to match
                          DENORM *denorm,            //de-normaliser
                          BOOL8 correct,             //ly segmented
                          char *text,                //correct text
                          inT32 count,               //chars in text
                          BLOB_CHOICE_LIST *ratings  //list of results
                         ) {
  TBLOB *tessblob;               //converted blob
  TEXTROW tessrow;               //dummy row

  if (correct) {
    classify_norm_method.set_value(character); // force char norm spc 30/11/93
    tess_bn_matching.set_value(false);    //turn it off
    tess_cn_matching.set_value(false);
                                 //convert blob
    tessblob = make_tess_blob (blob, TRUE);
                                 //make dummy row
    make_tess_row(denorm, &tessrow);
                                 //learn it
    LearnBlob(filename, tessblob, &tessrow, text);
    free_blob(tessblob);
  }
}


/**********************************************************************
 * tess_adapter
 *
 * Adapt to the word using the Tesseract mechanism.
 **********************************************************************/

namespace tesseract {
void Tesseract::tess_adapter(                         //adapt to word
                             WERD *word,              //bln word
                             DENORM *denorm,          //de-normalise
                             const WERD_CHOICE& choice,      //string for word
                             const WERD_CHOICE& raw_choice,  //before context
                             const char *rejmap       //reject map
                            ) {
  TWERD *tessword;               //converted word
  static TEXTROW tessrow;        //dummy row

                                 //make dummy row
  make_tess_row(denorm, &tessrow);
                                 //make a word
  tessword = make_tess_word (word, &tessrow);
  AdaptToWord(tessword, &tessrow, choice, raw_choice, rejmap);
  //adapt to it
  delete_word(tessword);  //free it
}


/**********************************************************************
 * tess_add_doc_word
 *
 * Add the given word to the document dictionary
 **********************************************************************/
void Tesseract::tess_add_doc_word(WERD_CHOICE *word_choice) {
  getDict().add_document_word(*word_choice);
}
}  // namespace tesseract