/**********************************************************************
 * File:        edgblob.c  (Formerly edgeloop.c)
 * Description: Functions to clean up an outline before approximation.
 * Author:		Ray Smith
 * Created:		Tue Mar 26 16:56:25 GMT 1991
 *
 * (C) Copyright 1991, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#include "mfcpch.h"
#include          "scanedg.h"
#include          "drawedg.h"
#include          "edgloop.h"
#include          "edgblob.h"

#define EXTERN

// Control parameters used in outline_complexity(), which rejects an outline
// if any one of the 3 conditions is satisfied:
//  - number of children exceeds edges_max_children_per_outline
//  - number of nested layers exceeds edges_max_children_layers
//  - joint complexity exceeds edges_children_count_limit(as in child_count())
EXTERN BOOL_VAR(edges_use_new_outline_complexity, FALSE,
                "Use the new outline complexity module");
EXTERN INT_VAR(edges_max_children_per_outline, 10,
               "Max number of children inside a character outline");
EXTERN INT_VAR(edges_max_children_layers, 5,
               "Max layers of nested children inside a character outline");
EXTERN BOOL_VAR(edges_debug, FALSE,
                "turn on debugging for this module");


EXTERN INT_VAR (edges_children_per_grandchild, 10,
"Importance ratio for chucking outlines");
EXTERN INT_VAR(edges_children_count_limit, 45,
               "Max holes allowed in blob");
EXTERN BOOL_VAR (edges_children_fix, FALSE,
"Remove boxy parents of char-like children");
EXTERN INT_VAR (edges_min_nonhole, 12,
"Min pixels for potential char in box");
EXTERN INT_VAR (edges_patharea_ratio, 40,
"Max lensq/area for acceptable child outline");
EXTERN double_VAR (edges_childarea, 0.5,
                  "Min area fraction of child outline");
EXTERN double_VAR (edges_boxarea, 0.875,
"Min area fraction of grandchild for box");

/**********************************************************************
 * OL_BUCKETS::OL_BUCKETS
 *
 * Construct an array of buckets for associating outlines into blobs.
 **********************************************************************/

OL_BUCKETS::OL_BUCKETS (
////constructor
ICOORD bleft,                    //corners
ICOORD tright):         bl (bleft), tr (tright) {
  bxdim = (tright.x () - bleft.x ()) / BUCKETSIZE + 1;
  bydim = (tright.y () - bleft.y ()) / BUCKETSIZE + 1;
                                 //make array
  buckets = new C_OUTLINE_LIST[bxdim * bydim];
  index = 0;
}


/**********************************************************************
 * OL_BUCKETS::operator(
 *
 * Return a pointer to a list of C_OUTLINEs corresponding to the
 * given pixel coordinates.
 **********************************************************************/

C_OUTLINE_LIST *
OL_BUCKETS::operator () (        //array access
inT16 x,                         //image coords
inT16 y) {
  return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE];
}


/**********************************************************************
 * OL_BUCKETS::outline_complexity
 *
 * This is the new version of count_child.
 *
 * The goal of this function is to determine if an outline and its
 * interiors could be part of a character blob.  This is done by
 * computing a "complexity" index for the outline, which is the return
 * value of this function, and checking it against a threshold.
 * The max_count is used for short-circuiting the recursion and forcing
 * a rejection that guarantees to fail the threshold test.
 * The complexity F for outline X with N children X[i] is
 *   F(X) = N + sum_i F(X[i]) * edges_children_per_grandchild
 * so each layer of nesting increases complexity exponentially.
 * An outline can be rejected as a text blob candidate if its complexity
 * is too high, has too many children(likely a container), or has too
 * many layers of nested inner loops.  This has the side-effect of
 * flattening out boxed or reversed video text regions.
 **********************************************************************/

inT32 OL_BUCKETS::outline_complexity(
                                     C_OUTLINE *outline,   // parent outline
                                     inT32 max_count,      // max output
                                     inT16 depth           // recurion depth
                                    ) {
  inT16 xmin, xmax;              // coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          // current bucket
  C_OUTLINE *child;              // current child
  inT32 child_count;             // no of children
  inT32 grandchild_count;        // no of grandchildren
  C_OUTLINE_IT child_it;         // search iterator

  TBOX olbox = outline->bounding_box();
  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
  child_count = 0;
  grandchild_count = 0;
  if (++depth > edges_max_children_layers)  // nested loops are too deep
    return max_count + depth;

  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
      if (child_it.empty())
        continue;
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.data();
        if (child == outline || !(*child < *outline))
          continue;
        child_count++;

        if (child_count > edges_max_children_per_outline) {   // too fragmented
          if (edges_debug)
            tprintf("Discard outline on child_count=%d > "
                    "max_children_per_outline=%d\n",
                    child_count,
                    static_cast<inT32>(edges_max_children_per_outline));
          return max_count + child_count;
        }

        // Compute the "complexity" of each child recursively
        inT32 remaining_count = max_count - child_count - grandchild_count;
        if (remaining_count > 0)
          grandchild_count += edges_children_per_grandchild *
                              outline_complexity(child, remaining_count, depth);
        if (child_count + grandchild_count > max_count) {  // too complex
          if (edges_debug)
            tprintf("Disgard outline on child_count=%d + grandchild_count=%d "
                    "> max_count=%d\n",
                    child_count, grandchild_count, max_count);
          return child_count + grandchild_count;
        }
      }
    }
  }
  return child_count + grandchild_count;
}


/**********************************************************************
 * OL_BUCKETS::count_children
 *
 * Find number of descendants of this outline.
 **********************************************************************/

inT32 OL_BUCKETS::count_children(                     //recursive count
                                 C_OUTLINE *outline,  //parent outline
                                 inT32 max_count      //max output
                                ) {
  BOOL8 parent_box;              //could it be boxy
  inT16 xmin, xmax;              //coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          //current bucket
  C_OUTLINE *child;              //current child
  inT32 child_count;             //no of children
  inT32 grandchild_count;        //no of grandchildren
  inT32 parent_area;             //potential box
  FLOAT32 max_parent_area;       //potential box
  inT32 child_area;              //current child
  inT32 child_length;            //current child
  TBOX olbox;
  C_OUTLINE_IT child_it;         //search iterator

  olbox = outline->bounding_box ();
  xmin = (olbox.left () - bl.x ()) / BUCKETSIZE;
  xmax = (olbox.right () - bl.x ()) / BUCKETSIZE;
  ymin = (olbox.bottom () - bl.y ()) / BUCKETSIZE;
  ymax = (olbox.top () - bl.y ()) / BUCKETSIZE;
  child_count = 0;
  grandchild_count = 0;
  parent_area = 0;
  max_parent_area = 0;
  parent_box = TRUE;
  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list (&buckets[yindex * bxdim + xindex]);
      if (child_it.empty ())
        continue;
      for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
      child_it.forward ()) {
        child = child_it.data ();
        if (child != outline && *child < *outline) {
          child_count++;
          if (child_count <= max_count) {
            int max_grand = (max_count - child_count) /
                            edges_children_per_grandchild;
            if (max_grand > 0)
              grandchild_count += count_children (child, max_grand) *
                                  edges_children_per_grandchild;
            else
              grandchild_count += count_children(child, 1);
          }
          if (child_count + grandchild_count > max_count) {
            if (edges_debug)
              tprintf("Discarding parent with child count=%d, gc=%d\n",
                      child_count,grandchild_count);
            return child_count + grandchild_count;
          }
          if (parent_area == 0) {
            parent_area = outline->outer_area ();
            if (parent_area < 0)
              parent_area = -parent_area;
            max_parent_area = outline->bounding_box().area() * edges_boxarea;
            if (parent_area < max_parent_area)
              parent_box = FALSE;
          }
          if (parent_box &&
              (!edges_children_fix ||
               child->bounding_box().height() > edges_min_nonhole)) {
            child_area = child->outer_area ();
            if (child_area < 0)
              child_area = -child_area;
            if (edges_children_fix) {
              if (parent_area - child_area < max_parent_area) {
                parent_box = FALSE;
                continue;
              }
              if (grandchild_count > 0) {
                if (edges_debug)
                  tprintf("Discarding parent of area %d, child area=%d, max%g "
                          "with gc=%d\n",
                          parent_area, child_area, max_parent_area,
                          grandchild_count);
                return max_count + 1;
              }
              child_length = child->pathlength ();
              if (child_length * child_length >
              child_area * edges_patharea_ratio) {
                if (edges_debug)
                  tprintf("Discarding parent of area %d, child area=%d, max%g "
                          "with child length=%d\n",
                          parent_area, child_area, max_parent_area,
                          child_length);
                return max_count + 1;
              }
            }
            if (child_area < child->bounding_box().area() * edges_childarea) {
              if (edges_debug)
                tprintf("Discarding parent of area %d, child area=%d, max%g "
                        "with child rect=%d\n",
                        parent_area, child_area, max_parent_area,
                        child->bounding_box().area());
              return max_count + 1;
            }
          }
        }
      }
    }
  }
  return child_count + grandchild_count;
}




/**********************************************************************
 * OL_BUCKETS::extract_children
 *
 * Find number of descendants of this outline.
 **********************************************************************/

void OL_BUCKETS::extract_children(                     //recursive count
                                  C_OUTLINE *outline,  //parent outline
                                  C_OUTLINE_IT *it     //destination iterator
                                 ) {
  inT16 xmin, xmax;              //coord limits
  inT16 ymin, ymax;
  inT16 xindex, yindex;          //current bucket
  TBOX olbox;
  C_OUTLINE_IT child_it;         //search iterator

  olbox = outline->bounding_box ();
  xmin = (olbox.left () - bl.x ()) / BUCKETSIZE;
  xmax = (olbox.right () - bl.x ()) / BUCKETSIZE;
  ymin = (olbox.bottom () - bl.y ()) / BUCKETSIZE;
  ymax = (olbox.top () - bl.y ()) / BUCKETSIZE;
  for (yindex = ymin; yindex <= ymax; yindex++) {
    for (xindex = xmin; xindex <= xmax; xindex++) {
      child_it.set_to_list (&buckets[yindex * bxdim + xindex]);
      for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
      child_it.forward ()) {
        if (*child_it.data () < *outline) {
          it->add_after_then_move (child_it.extract ());
        }
      }
    }
  }
}


/**********************************************************************
 * extract_edges
 *
 * Run the edge detector over the block and return a list of blobs.
 **********************************************************************/

void extract_edges(                 //find blobs
#ifndef GRAPHICS_DISABLED
                   ScrollView* window,   //window for output
#endif
                   IMAGE *image,    //image to scan
                   IMAGE *t_image,  //thresholded image
                   ICOORD page_tr,  //corner of page
                   BLOCK *block     //block to scan
                  ) {
  ICOORD bleft;                  //block box
  ICOORD tright;
  C_OUTLINE_LIST outlines;       //outlines in block
                                 //iterator
  C_OUTLINE_IT out_it = &outlines;

#ifndef GRAPHICS_DISABLED
  get_outlines (window, image, t_image, page_tr, (PDBLK *) block, &out_it);
#else
  get_outlines (image, t_image, page_tr, (PDBLK *) block, &out_it);
#endif
                                 //block box
  block->bounding_box (bleft, tright);
                                 //make blobs
  outlines_to_blobs(block, bleft, tright, &outlines);
}


/**********************************************************************
 * outlines_to_blobs
 *
 * Gather together outlines into blobs using the usual bucket sort.
 **********************************************************************/

void outlines_to_blobs(               //find blobs
                       BLOCK *block,  //block to scan
                       ICOORD bleft,
                       ICOORD tright,
                       C_OUTLINE_LIST *outlines) {
                                 //make buckets
  OL_BUCKETS buckets(bleft, tright);

  fill_buckets(outlines, &buckets);
  empty_buckets(block, &buckets);
}


/**********************************************************************
 * fill_buckets
 *
 * Run the edge detector over the block and return a list of blobs.
 **********************************************************************/

void fill_buckets(                           //find blobs
                  C_OUTLINE_LIST *outlines,  //outlines in block
                  OL_BUCKETS *buckets        //output buckets
                 ) {
  TBOX ol_box;                    //outline box
  C_OUTLINE_IT out_it = outlines;//iterator
  C_OUTLINE_IT bucket_it;        //iterator in bucket
  C_OUTLINE *outline;            //current outline

  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
    outline = out_it.extract (); //take off list
                                 //get box
    ol_box = outline->bounding_box ();
    bucket_it.set_to_list ((*buckets) (ol_box.left (), ol_box.bottom ()));
    bucket_it.add_to_end (outline);
  }
}


/**********************************************************************
 * empty_buckets
 *
 * Run the edge detector over the block and return a list of blobs.
 **********************************************************************/

void empty_buckets(                     //find blobs
                   BLOCK *block,        //block to scan
                   OL_BUCKETS *buckets  //output buckets
                  ) {
  BOOL8 good_blob;               //healthy blob
  C_OUTLINE_LIST outlines;       //outlines in block
                                 //iterator
  C_OUTLINE_IT out_it = &outlines;
  C_OUTLINE_IT bucket_it = buckets->start_scan ();
  C_OUTLINE_IT parent_it;        //parent outline
  C_BLOB *blob;                  //new blob
  C_BLOB_IT good_blobs = block->blob_list ();
  C_BLOB_IT junk_blobs = block->reject_blobs ();

  while (!bucket_it.empty ()) {
    out_it.set_to_list (&outlines);
    do {
      parent_it = bucket_it;     //find outermost
      do
      bucket_it.forward ();
      while (!bucket_it.at_first ()
        && !(*parent_it.data () < *bucket_it.data ()));
    }
    while (!bucket_it.at_first ());

                                 //move to new list
    out_it.add_after_then_move (parent_it.extract ());
    good_blob = capture_children (buckets, &junk_blobs, &out_it);
    blob = new C_BLOB (&outlines);
    if (good_blob)
      good_blobs.add_after_then_move (blob);
    else
      junk_blobs.add_after_then_move (blob);

    bucket_it.set_to_list (buckets->scan_next ());
  }
}


/**********************************************************************
 * capture_children
 *
 * Find all neighbouring outlines that are children of this outline
 * and either move them to the output list or declare this outline
 * illegal and return FALSE.
 **********************************************************************/

BOOL8 capture_children(                       //find children
                       OL_BUCKETS *buckets,   //bucket sort clanss
                       C_BLOB_IT *reject_it,  //dead grandchildren
                       C_OUTLINE_IT *blob_it  //output outlines
                      ) {
  C_OUTLINE *outline;            //master outline
  inT32 child_count;             //no of children

  outline = blob_it->data ();
  if (edges_use_new_outline_complexity)
    child_count = buckets->outline_complexity(outline,
                                               edges_children_count_limit,
                                               0);
  else
    child_count = buckets->count_children(outline,
                                           edges_children_count_limit);
  if (child_count > edges_children_count_limit)
    return FALSE;

  if (child_count > 0)
  buckets->extract_children (outline, blob_it);
  return TRUE;
}