C++程序  |  285行  |  9.69 KB

/**********************************************************************
 * File:        rejctmap.h  (Formerly rejmap.h)
 * Description: REJ and REJMAP class functions.
 * Author:		Phil Cheatle
 * Created:		Thu Jun  9 13:46:38 BST 1994
 *
 * (C) Copyright 1994, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *

This module may look unneccessarily verbose, but here's the philosophy...

ALL processing of the reject map is done in this module. There are lots of
separate calls to set reject/accept flags. These have DELIBERATELY been kept
distinct so that this module can decide what to do.

Basically, there is a flag for each sort of rejection or acceptance. This
provides a history of what has happened to EACH character.

Determining whether a character is CURRENTLY rejected depends on implicit
understanding of the SEQUENCE of possible calls. The flags are defined and
grouped in the REJ_FLAGS enum. These groupings are used in determining a
characters CURRENT rejection status. Basically, a character is ACCEPTED if

    none of the permanent rej flags are set
  AND (    the character has never been rejected
      OR an accept flag is set which is LATER than the latest reject flag )

IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
**********************************************************************/

#ifndef           REJCTMAP_H
#define           REJCTMAP_H

#ifdef __UNIX__
#include          <assert.h>
#endif
#include          "memry.h"
#include          "bits16.h"
#include                   "varable.h"
#include          "notdll.h"

extern BOOL_VAR_H (rejword_only_set_if_accepted, TRUE,
"Mimic old reject_word");
extern BOOL_VAR_H (rejmap_allow_more_good_qual, FALSE,
"Use initial good qual setting");
extern BOOL_VAR_H (rej_use_1Il_rej, TRUE, "1Il rejection enabled");

enum REJ_FLAGS
{
  /* Reject modes which are NEVER overridden */
  R_TESS_FAILURE,                // PERM Tess didnt classify
  R_SMALL_XHT,                   // PERM Xht too small
  R_EDGE_CHAR,                   // PERM Too close to edge of image
  R_1IL_CONFLICT,                // PERM 1Il confusion
  R_POSTNN_1IL,                  // PERM 1Il unrejected by NN
  R_REJ_CBLOB,                   // PERM Odd blob
  R_MM_REJECT,                   // PERM Matrix match rejection (m's)
  R_BAD_REPETITION,              // TEMP Repeated char which doesn't match trend

  /* Initial reject modes (pre NN_ACCEPT) */
  R_POOR_MATCH,                  // TEMP Ray's original heuristic (Not used)
  R_NOT_TESS_ACCEPTED,           // TEMP Tess didnt accept WERD
  R_CONTAINS_BLANKS,             // TEMP Tess failed on other chs in WERD
  R_BAD_PERMUTER,                // POTENTIAL Bad permuter for WERD

  /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
  R_HYPHEN,                      // TEMP Post NN dodgy hyphen or full stop
  R_DUBIOUS,                     // TEMP Post NN dodgy chars
  R_NO_ALPHANUMS,                // TEMP No alphanumerics in word after NN
  R_MOSTLY_REJ,                  // TEMP Most of word rejected so rej the rest
  R_XHT_FIXUP,                   // TEMP Xht tests unsure

  /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
  R_BAD_QUALITY,                 // TEMP Quality metrics bad for WERD

  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
  R_DOC_REJ,                     // TEMP Document rejection
  R_BLOCK_REJ,                   // TEMP Block rejection
  R_ROW_REJ,                     // TEMP Row rejection
  R_UNLV_REJ,                    // TEMP ~ turned to - or ^ turned to space

  /* Accept modes which occur inbetween the above rejection groups */
  R_NN_ACCEPT,                   //NN acceptance
  R_HYPHEN_ACCEPT,               //Hyphen acceptance
  R_MM_ACCEPT,                   //Matrix match acceptance
  R_QUALITY_ACCEPT,              //Accept word in good quality doc
  R_MINIMAL_REJ_ACCEPT           //Accept EVERYTHING except tess failures
};

/* REJECT MAP VALUES */

#define           MAP_ACCEPT '1'
#define           MAP_REJECT_PERM '0'
#define           MAP_REJECT_TEMP '2'
#define           MAP_REJECT_POTENTIAL '3'

class REJ
{
  BITS16 flags1;
  BITS16 flags2;

  void set_flag(REJ_FLAGS rej_flag) {
    if (rej_flag < 16)
      flags1.turn_on_bit (rej_flag);
    else
      flags2.turn_on_bit (rej_flag - 16);
  }

  BOOL8 rej_before_nn_accept();
  BOOL8 rej_between_nn_and_mm();
  BOOL8 rej_between_mm_and_quality_accept();
  BOOL8 rej_between_quality_and_minimal_rej_accept();
  BOOL8 rej_before_mm_accept();
  BOOL8 rej_before_quality_accept();

  public:
    REJ() {  //constructor
    }

    REJ(  //classwise copy
        const REJ &source) {
      flags1 = source.flags1;
      flags2 = source.flags2;
    }

    REJ & operator= (            //assign REJ
    const REJ & source) {        //from this
      flags1 = source.flags1;
      flags2 = source.flags2;
      return *this;
    }

    BOOL8 flag(REJ_FLAGS rej_flag) {
      if (rej_flag < 16)
        return flags1.bit (rej_flag);
      else
        return flags2.bit (rej_flag - 16);
    }

    char display_char() {
      if (perm_rejected ())
        return MAP_REJECT_PERM;
      else if (accept_if_good_quality ())
        return MAP_REJECT_POTENTIAL;
      else if (rejected ())
        return MAP_REJECT_TEMP;
      else
        return MAP_ACCEPT;
    }

    BOOL8 perm_rejected();  //Is char perm reject?

    BOOL8 rejected();  //Is char rejected?

    BOOL8 accepted() {  //Is char accepted?
      return !rejected ();
    }

                                 //potential rej?
    BOOL8 accept_if_good_quality();

    BOOL8 recoverable() {
      return (rejected () && !perm_rejected ());
    }

    void setrej_tess_failure();  //Tess generated blank
    void setrej_small_xht();  //Small xht char/wd
    void setrej_edge_char();  //Close to image edge
    void setrej_1Il_conflict();  //Initial reject map
    void setrej_postNN_1Il();  //1Il after NN
    void setrej_rej_cblob();  //Insert duff blob
    void setrej_mm_reject();  //Matrix matcher
                                 //Odd repeated char
    void setrej_bad_repetition();
    void setrej_poor_match();  //Failed Rays heuristic
                                 //TEMP reject_word
    void setrej_not_tess_accepted();
                                 //TEMP reject_word
    void setrej_contains_blanks();
    void setrej_bad_permuter();  //POTENTIAL reject_word
    void setrej_hyphen();  //PostNN dubious hyph or .
    void setrej_dubious();  //PostNN dubious limit
    void setrej_no_alphanums();  //TEMP reject_word
    void setrej_mostly_rej();  //TEMP reject_word
    void setrej_xht_fixup();  //xht fixup
    void setrej_bad_quality();  //TEMP reject_word
    void setrej_doc_rej();  //TEMP reject_word
    void setrej_block_rej();  //TEMP reject_word
    void setrej_row_rej();  //TEMP reject_word
    void setrej_unlv_rej();  //TEMP reject_word
    void setrej_nn_accept();  //NN Flipped a char
    void setrej_hyphen_accept();  //Good aspect ratio
    void setrej_mm_accept();  //Matrix matcher
                                 //Quality flip a char
    void setrej_quality_accept();
                                 //Accept all except blank
    void setrej_minimal_rej_accept();

    void full_print(FILE *fp);
};

class REJMAP
{
  REJ *ptr;                      //ptr to the chars
  inT16 len;                     //Number of chars

  public:
    REJMAP() {  //constructor
      ptr = NULL;
      len = 0;
    }

    REJMAP(  //classwise copy
           const REJMAP &rejmap);

    REJMAP & operator= (         //assign REJMAP
      const REJMAP & source);    //from this

    ~REJMAP () {                 //destructor
      if (ptr != NULL)
        free_struct (ptr, len * sizeof (REJ), "REJ");
    }

    void initialise(  //Redefine map
                    inT16 length);

    REJ & operator[](            //access function
      inT16 index) const         //map index
    {
      ASSERT_HOST (index < len);
      return ptr[index];         //no bounds checks
    }

    inT32 length() const {  //map length
      return len;
    }

    inT16 accept_count();  //How many accepted?

    inT16 reject_count() {  //How many rejects?
      return len - accept_count ();
    }

    void remove_pos(             //Cut out an element
                    inT16 pos);  //element to remove

    void print(FILE *fp);

    void full_print(FILE *fp);

    BOOL8 recoverable_rejects();  //Any non perm rejs?

    BOOL8 quality_recoverable_rejects();
    //Any potential rejs?

    void rej_word_small_xht();  //Reject whole word
                                 //Reject whole word
    void rej_word_tess_failure();
    void rej_word_not_tess_accepted();
    //Reject whole word
                                 //Reject whole word
    void rej_word_contains_blanks();
                                 //Reject whole word
    void rej_word_bad_permuter();
    void rej_word_xht_fixup();  //Reject whole word
                                 //Reject whole word
    void rej_word_no_alphanums();
    void rej_word_mostly_rej();  //Reject whole word
    void rej_word_bad_quality();  //Reject whole word
    void rej_word_doc_rej();  //Reject whole word
    void rej_word_block_rej();  //Reject whole word
    void rej_word_row_rej();  //Reject whole word
};
#endif