C++程序  |  378行  |  12.86 KB

///////////////////////////////////////////////////////////////////////
// File:        tabvector.h
// Description: Class to hold a near-vertical vector representing a tab-stop.
// Author:      Ray Smith
// Created:     Thu Apr 10 16:25:01 PST 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_TEXTORD_TABVECTOR_H__
#define TESSERACT_TEXTORD_TABVECTOR_H__

#include "clst.h"
#include "elst.h"
#include "elst2.h"
#include "rect.h"
#include "bbgrid.h"

#undef TA_CENTER

class BLOBNBOX;
class ScrollView;

CLISTIZEH(BLOBNBOX)

namespace tesseract {

// The alignment type that a tab vector represents.
// Keep this enum synced with kAlignmentNames in tabvector.cpp.
enum TabAlignment {
  TA_LEFT_ALIGNED,
  TA_LEFT_RAGGED,
  TA_CENTER,
  TA_RIGHT_ALIGNED,
  TA_RIGHT_RAGGED,
  TA_SEPARATOR,
  TA_COUNT
};

// Forward declarations. The classes use their own list types, so we
// need to make the list types first.
class TabFind;
class TabVector;
class TabConstraint;
typedef BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> BlobGrid;

ELIST2IZEH(TabVector)
CLISTIZEH(TabVector)
ELISTIZEH(TabConstraint)

// TabConstraint is a totally self-contained class to maintain
// a list of [min,max] constraints, each referring to a TabVector.
// The constraints are manipulated through static methods that act
// on a list of constraints. The list itself is cooperatively owned
// by the TabVectors of the constraints on the list and managed
// by implicit reference counting via the elements of the list.
class TabConstraint : public ELIST_LINK {
 public:
  TabConstraint() {
    // This empty constructor is here only so that the class can be ELISTIZED.
    // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
    // and eliminate CLASSNAME##_copier.
  }

  // Create a constraint for the top or bottom of this TabVector.
  static void CreateConstraint(TabVector* vector, bool is_top);

  // Test to see if the constraints are compatible enough to merge.
  static bool CompatibleConstraints(TabConstraint_LIST* list1,
                                    TabConstraint_LIST* list2);

  // Merge the lists of constraints and update the TabVector pointers.
  // The second list is deleted.
  static void MergeConstraints(TabConstraint_LIST* list1,
                               TabConstraint_LIST* list2);

  // Set all the tops and bottoms as appropriate to a mean of the
  // constrained range. Delete all the constraints and list.
  static void ApplyConstraints(TabConstraint_LIST* constraints);

 private:
  TabConstraint(TabVector* vector, bool is_top);

  // Get the max of the mins and the min of the maxes.
  static void GetConstraints(TabConstraint_LIST* constraints,
                             int* y_min, int* y_max);

  // The TabVector this constraint applies to.
  TabVector* vector_;
  // If true then we refer to the top of the vector_.
  bool is_top_;
  // The allowed range of this vector_.
  int y_min_;
  int y_max_;
};

// Class to hold information about a single vector
// that represents a tab stop or a rule line.
class TabVector : public ELIST2_LINK {
 public:
  TabVector() {
    // TODO(rays) fix this in elst.h line 1076, where it should use the
    // copy constructor instead of operator=.
  }
  ~TabVector();

  // Public factory to build a TabVector from a list of boxes.
  // The TabVector will be of the given alignment type.
  // The input vertical vector is used in fitting, and the output
  // vertical_x, vertical_y have the resulting line vector added to them
  // if the alignment is not ragged.
  // The extended_start_y and extended_end_y are the maximum possible
  // extension to the line segment that can be used to align with others.
  // The input CLIST of BLOBNBOX good_points is consumed and taken over.
  static TabVector* FitVector(TabAlignment alignment, ICOORD vertical,
                              int  extended_start_y, int extended_end_y,
                              BLOBNBOX_CLIST* good_points,
                              int* vertical_x, int* vertical_y);

  // Build a ragged TabVector by copying another's direction, shifting it
  // to match the given blob, and making its initial extent the height
  // of the blob, but its extended bounds from the bounds of the original.
  TabVector(const TabVector& src, TabAlignment alignment,
            const ICOORD& vertical_skew, BLOBNBOX* blob);

  // Simple accessors.
  const ICOORD& startpt() const {
    return startpt_;
  }
  const ICOORD& endpt() const {
    return endpt_;
  }
  int extended_ymax() const {
    return extended_ymax_;
  }
  int extended_ymin() const {
    return extended_ymin_;
  }
  int sort_key() const {
    return sort_key_;
  }
  void set_top_constraints(TabConstraint_LIST* constraints) {
    top_constraints_ = constraints;
  }
  void set_bottom_constraints(TabConstraint_LIST* constraints) {
    bottom_constraints_ = constraints;
  }
  TabVector_CLIST* partners() {
    return &partners_;
  }

  // Inline quasi-accessors that require some computation.

  // Compute the x coordinate at the given y coordinate.
  int XAtY(int y) const {
    int height = endpt_.y() - startpt_.y();
    if (height != 0)
      return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height +
             startpt_.x();
    else
      return startpt_.x();
  }

  // Compute the vertical overlap with the other TabVector.
  int VOverlap(const TabVector& other) const {
    return MIN(other.endpt_.y(), endpt_.y()) -
           MAX(other.startpt_.y(), startpt_.y());
  }
  // Compute the vertical overlap with the given y bounds.
  int VOverlap(int top_y, int bottom_y) const {
    return MIN(top_y, endpt_.y()) - MAX(bottom_y, startpt_.y());
  }
  // Compute the extended vertical overlap with the given y bounds.
  int ExtendedOverlap(int top_y, int bottom_y) const {
    return MIN(top_y, extended_ymax_) - MAX(bottom_y, extended_ymin_);
  }

  // Return true if this is a left tab stop, either aligned, or ragged.
  bool IsLeftTab() const {
    return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
  }
  // Return true if this is a right tab stop, either aligned, or ragged.
  bool IsRightTab() const {
    return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
  }
  // Return true if this is a separator.
  bool IsSeparator() const {
    return alignment_ == TA_SEPARATOR;
  }
  // Return true if this is a ragged tab top, either left or right.
  bool IsRagged() const {
    return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
  }

  // Return true if this vector is to the left of the other in terms
  // of sort_key_.
  bool IsLeftOf(const TabVector& other) const {
    return sort_key_ < other.sort_key_;
  }

  // Return true if the vector has no partners.
  bool Partnerless() {
    return partners_.empty();
  }

  // Return the number of tab boxes in this vector.
  int BoxCount() {
    return boxes_.length();
  }

  // Lock the vector from refits by clearing the boxes_ list.
  void Freeze() {
    boxes_.shallow_clear();
  }

  // Flip x and y on the ends so a vector can be created from flipped input.
  void XYFlip() {
    int x = startpt_.y();
    startpt_.set_y(startpt_.x());
    startpt_.set_x(x);
    x = endpt_.y();
    endpt_.set_y(endpt_.x());
    endpt_.set_x(x);
  }

  // Separate function to compute the sort key for a given coordinate pair.
  static int SortKey(const ICOORD& vertical, int x, int y) {
    ICOORD pt(x, y);
    return pt * vertical;
  }

  // Return the x at the given y for the given sort key.
  static int XAtY(const ICOORD& vertical, int sort_key, int y) {
    if (vertical.y() != 0)
      return (vertical.x() * y + sort_key) / vertical.y();
    else
      return sort_key;
  }

  // Sort function for E2LIST::sort to sort by sort_key_.
  static int SortVectorsByKey(const void* v1, const void* v2) {
    const TabVector* tv1 = *reinterpret_cast<const TabVector* const *>(v1);
    const TabVector* tv2 = *reinterpret_cast<const TabVector* const *>(v2);
    return tv1->sort_key_ - tv2->sort_key_;
  }

  // More complex members.

  // Extend this vector to include the supplied blob if it doesn't
  // already have it.
  void ExtendToBox(BLOBNBOX* blob);

  // Set the ycoord of the start and move the xcoord to match.
  void SetYStart(int start_y);
  // Set the ycoord of the end and move the xcoord to match.
  void SetYEnd(int end_y);

  // Rotate the ends by the given vector.
  void Rotate(const FCOORD& rotation);

  // Setup the initial constraints, being the limits of
  // the vector and the extended ends.
  void SetupConstraints();

  // Setup the constraints between the partners of this TabVector.
  void SetupPartnerConstraints();

  // Setup the constraints between this and its partner.
  void SetupPartnerConstraints(TabVector* partner);

  // Use the constraints to modify the top and bottom.
  void ApplyConstraints();

  // Merge close tab vectors of the same side that overlap.
  static void MergeSimilarTabVectors(const ICOORD& vertical,
                                     TabVector_LIST* vectors, BlobGrid* grid);

  // Return true if this vector is the same side, overlaps, and close
  // enough to the other to be merged.
  bool SimilarTo(const ICOORD& vertical,
                 const TabVector& other, BlobGrid* grid) const;

  // Eat the other TabVector into this and delete it.
  void MergeWith(const ICOORD& vertical, TabVector* other);

  // Add a new element to the list of partner TabVectors.
  // Partners must be added in order of increasing y coordinate of the text line
  // that makes them partners.
  // Groups of identical partners are merged into one.
  void AddPartner(TabVector* partner);

  // Return true if other is a partner of this.
  bool IsAPartner(const TabVector* other);

  // Print basic information about this tab vector.
  void Print(const char* prefix);

  // Print basic information about this tab vector and every box in it.
  void Debug(const char* prefix);

  // Draw this tabvector in place in the given window.
  void Display(ScrollView* tab_win);

  // Refit the line and/or re-evaluate the vector if the dirty flags are set.
  void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder);

  // Evaluate the vector in terms of coverage of its length by good-looking
  // box edges. A good looking box is one where its nearest neighbour on the
  // inside is nearer than half the distance its nearest neighbour on the
  // outside of the putative column. Bad boxes are removed from the line.
  // A second pass then further filters boxes by requiring that the gutter
  // width be a minimum fraction of the mean gutter along the line.
  void Evaluate(const ICOORD& vertical, TabFind* finder);

  // (Re)Fit a line to the stored points. Returns false if the line
  // is degenerate.
  bool Fit(ICOORD vertical, bool force_parallel);

  // Return the partner of this TabVector if the vector qualifies as
  // being a vertical text line, otherwise NULL.
  TabVector* VerticalTextlinePartner();

 private:
  // Constructor is private as the static factory is the external way
  // to build a TabVector.
  TabVector(int extended_ymin, int extended_ymax,
            TabAlignment alignment, BLOBNBOX_CLIST* boxes);

  // Delete this, but first, repoint all the partners to point to
  // replacement. If replacement is NULL, then partner relationships
  // are removed.
  void Delete(TabVector* replacement);

 private:
  // The bottom of the tab line.
  ICOORD startpt_;
  // The top of the tab line.
  ICOORD endpt_;
  // The lowest y that the vector might extend to.
  int extended_ymin_;
  // The highest y that the vector might extend to.
  int extended_ymax_;
  // Perpendicular distance of vector from a given vertical for sorting.
  int sort_key_;
  // Result of Evaluate 0-100. Coverage of line with good boxes.
  int percent_score_;
  // True if the boxes_ list has been modified, so a refit is needed.
  bool needs_refit_;
  // True if a fit has been done, so re-evaluation is needed.
  bool needs_evaluation_;
  // The type of this TabVector.
  TabAlignment alignment_;
  // The list of boxes whose edges are aligned at this TabVector.
  BLOBNBOX_CLIST boxes_;
  // List of TabVectors that have a connection with this via a text line.
  TabVector_CLIST partners_;
  // Constraints used to resolve the exact location of the top and bottom
  // of the tab line.
  TabConstraint_LIST* top_constraints_;
  TabConstraint_LIST* bottom_constraints_;
};

}  // namespace tesseract.

#endif  // TESSERACT_TEXTORD_TABVECTOR_H__