/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - This software is distributed in the hope that it will be - useful, but with NO WARRANTY OF ANY KIND. - No author or distributor accepts responsibility to anyone for the - consequences of using this software, or for whether it serves any - particular purpose or works at all, unless he or she says so in - writing. Everyone is granted permission to copy, modify and - redistribute this source code, for commercial or non-commercial - purposes, with the following restrictions: (1) the origin of this - source code must not be misrepresented; (2) modified versions must - be plainly marked as such; and (3) this notice may not be removed - or altered from any source or modified source distribution. *====================================================================*/ /* * baseline.c * * Locate text baselines in an image * NUMA *pixFindBaselines() * * Projective transform to remove local skew * PIX *pixDeskewLocal() * * Determine local skew * l_int32 pixGetLocalSkewTransform() * NUMA *pixGetLocalSkewAngles() * * We have two apparently different functions here: * - finding baselines * - finding a projective transform to remove keystone warping * The function pixGetLocalSkewAngles() returns an array of angles, * one for each raster line, and the baselines of the text lines * should intersect the left edge of the image with that angle. */ #include <stdio.h> #include <stdlib.h> #include <math.h> #include "allheaders.h" #ifndef NO_CONSOLE_IO #define DEBUG_PLOT 0 #endif /* NO_CONSOLE_IO */ /* Min to travel after finding max before abandoning peak */ static const l_int32 MIN_DIST_IN_PEAK = 35; /* Thresholds for peaks and zeros, relative to the max peak */ static const l_int32 PEAK_THRESHOLD_RATIO = 20; static const l_int32 ZERO_THRESHOLD_RATIO = 100; /* Default values for determining local skew */ static const l_int32 DEFAULT_SLICES = 10; static const l_int32 DEFAULT_SWEEP_REDUCTION = 2; static const l_int32 DEFAULT_BS_REDUCTION = 1; static const l_float32 DEFAULT_SWEEP_RANGE = 5.; /* degrees */ static const l_float32 DEFAULT_SWEEP_DELTA = 1.; /* degrees */ static const l_float32 DEFAULT_MINBS_DELTA = 0.01; /* degrees */ /* Overlap slice fraction added to top and bottom of each slice */ static const l_float32 OVERLAP_FRACTION = 0.5; /* Minimum allowed confidence (ratio) for accepting a value */ static const l_float32 MIN_ALLOWED_CONFIDENCE = 3.0; /*---------------------------------------------------------------------* * Locate text baselines in an image * *---------------------------------------------------------------------*/ /*! * pixFindBaselines() * * Input: pixs (1 bpp) * &pta (<optional return> pairs of pts corresponding to * approx. ends of each text line) * debug (usually 0; set to 1 for debugging output) * Return: na (of baseline y values), or null on error * * Notes: * (1) Input binary image must have text lines already aligned * horizontally. This can be done by either rotating the * image with pixDeskew(), or, if a projective transform * is required, by doing pixDeskewLocal() first. * (2) Input null for &pta if you don't want this returned. * The pta will come in pairs of points (left and right end * of each baseline). * (3) Caution: this will not work properly on text with multiple * columns, where the lines are not aligned between columns. * If there are multiple columns, they should be extracted * separately before finding the baselines. * (4) This function constructs different types of output * for baselines; namely, a set of raster line values and * a set of end points of each baseline. * (5) This function was designed to handle short and long text lines * without using dangerous thresholds on the peak heights. It does * this by combining the differential signal with a morphological * analysis of the locations of the text lines. One can also * combine this data to normalize the peak heights, by weighting * the differential signal in the region of each baseline * by the inverse of the width of the text line found there. * (6) There are various debug sections that can be turned on * with the debug flag. */ NUMA * pixFindBaselines(PIX *pixs, PTA **ppta, l_int32 debug) { l_int32 w, h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh; l_int32 imaxloc, peakthresh, zerothresh, inpeak; l_int32 mintosearch, max, maxloc, nloc, locval; l_int32 *array; l_float32 maxval; BOXA *boxa1, *boxa2, *boxa3; GPLOT *gplot; NUMA *nasum, *nadiff, *naloc, *naval; PIX *pixt1, *pixt2; PTA *pta; PROCNAME("pixFindBaselines"); if (!pixs) return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); pta = NULL; if (ppta) { pta = ptaCreate(0); *ppta = pta; } /* Close up the text characters, removing noise */ pixt1 = pixMorphSequence(pixs, "c25.1 + e3.1", 0); /* Save the difference of adjacent row sums. * The high positive-going peaks are the baselines */ if ((nasum = pixCountPixelsByRow(pixt1, NULL)) == NULL) return (NUMA *)ERROR_PTR("nasum not made", procName, NULL); w = pixGetWidth(pixs); h = pixGetHeight(pixs); nadiff = numaCreate(h); numaGetIValue(nasum, 0, &val2); for (i = 0; i < h - 1; i++) { val1 = val2; numaGetIValue(nasum, i + 1, &val2); numaAddNumber(nadiff, val1 - val2); } if (debug) /* show the difference signal */ gplotSimple1(nadiff, GPLOT_X11, "junkdiff", "difference"); /* Use the zeroes of the profile to locate each baseline. */ array = numaGetIArray(nadiff); ndiff = numaGetCount(nadiff); numaGetMax(nadiff, &maxval, &imaxloc); /* Use this to begin locating a new peak: */ peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO; /* Use this to begin a region between peaks: */ zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO; naloc = numaCreate(0); naval = numaCreate(0); inpeak = FALSE; for (i = 0; i < ndiff; i++) { if (inpeak == FALSE) { if (array[i] > peakthresh) { /* transition to in-peak */ inpeak = TRUE; mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros * between i and mintosearch */ max = array[i]; maxloc = i; } } else { /* inpeak == TRUE; look for max */ if (array[i] > max) { max = array[i]; maxloc = i; mintosearch = i + MIN_DIST_IN_PEAK; } else if (i > mintosearch && array[i] <= zerothresh) { /* leave */ inpeak = FALSE; numaAddNumber(naval, max); numaAddNumber(naloc, maxloc); } } } /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */ if (inpeak) { numaAddNumber(naval, max); numaAddNumber(naloc, maxloc); } FREE(array); if (debug) { /* show the raster locations for the peaks */ gplot = gplotCreate("junkloc", GPLOT_X11, "Peak locations", "rasterline", "height"); gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs"); gplotMakeOutput(gplot); gplotDestroy(&gplot); } /* Generate an approximate profile of text line width. * First, filter the boxes of text, where there may be * more than one box for a given textline. */ pixt2 = pixMorphSequence(pixt1, "r11 + c25.1 + o7.1 +c1.3", 0); boxa1 = pixConnComp(pixt2, NULL, 4); boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.); boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); /* Then find the baseline segments */ if (pta) { nloc = numaGetCount(naloc); nbox = boxaGetCount(boxa3); for (i = 0; i < nbox; i++) { boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh); for (j = 0; j < nloc; j++) { numaGetIValue(naloc, j, &locval); if (L_ABS(locval - (by + bh)) > 25) continue; ptaAddPt(pta, bx, locval); ptaAddPt(pta, bx + bw, locval); break; } } } if (debug) { /* display baselines */ PIX *pixd; l_int32 npts, x1, y1, x2, y2; if (pta) { pixd = pixConvertTo32(pixs); npts = ptaGetCount(pta); for (i = 0; i < npts; i += 2) { ptaGetIPt(pta, i, &x1, &y1); ptaGetIPt(pta, i + 1, &x2, &y2); pixRenderLineArb(pixd, x1, y1, x2, y2, 1, 255, 0, 0); } pixDisplay(pixd, 200, 200); pixWrite("junkbaselines", pixd, IFF_PNG); pixDestroy(&pixd); } } boxaDestroy(&boxa1); boxaDestroy(&boxa2); boxaDestroy(&boxa3); pixDestroy(&pixt1); pixDestroy(&pixt2); numaDestroy(&nasum); numaDestroy(&nadiff); numaDestroy(&naval); return naloc; } /*---------------------------------------------------------------------* * Projective transform to remove local skew * *---------------------------------------------------------------------*/ /*! * pixDeskewLocal() * * Input: pixs * nslices (the number of horizontal overlapping slices; must * be larger than 1 and not exceed 20; use 0 for default) * redsweep (sweep reduction factor: 1, 2, 4 or 8; * use 0 for default value) * redsearch (search reduction factor: 1, 2, 4 or 8, and * not larger than redsweep; use 0 for default value) * sweeprange (half the full range, assumed about 0; in degrees; * use 0.0 for default value) * sweepdelta (angle increment of sweep; in degrees; * use 0.0 for default value) * minbsdelta (min binary search increment angle; in degrees; * use 0.0 for default value) * Return: pixd, or null on error * * Notes: * (1) This function allows deskew of a page whose skew changes * approximately linearly with vertical position. It uses * a projective tranform that in effect does a differential * shear about the LHS of the page, and makes all text lines * horizontal. * (2) The origin of the keystoning can be either a cheap document * feeder that rotates the page as it is passed through, or a * camera image taken from either the left or right side * of the vertical. * (3) The image transformation is a projective warping, * not a rotation. Apart from this function, the text lines * must be properly aligned vertically with respect to each * other. This can be done by pre-processing the page; e.g., * by rotating or horizontally shearing it. * Typically, this can be achieved by vertically aligning * the page edge. */ PIX * pixDeskewLocal(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta) { l_int32 ret; PIX *pixd; PTA *ptas, *ptad; PROCNAME("pixDeskewLocal"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); /* Skew array gives skew angle (deg) as fctn of raster line * where it intersects the LHS of the image */ ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch, sweeprange, sweepdelta, minbsdelta, &ptas, &ptad); if (ret != 0) return (PIX *)ERROR_PTR("transform pts not found", procName, NULL); /* Use a projective transform */ pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE); ptaDestroy(&ptas); ptaDestroy(&ptad); return pixd; } /*---------------------------------------------------------------------* * Determine the local skew * *---------------------------------------------------------------------*/ /*! * pixGetLocalSkewTransform() * * Input: pixs * nslices (the number of horizontal overlapping slices; must * be larger than 1 and not exceed 20; use 0 for default) * redsweep (sweep reduction factor: 1, 2, 4 or 8; * use 0 for default value) * redsearch (search reduction factor: 1, 2, 4 or 8, and * not larger than redsweep; use 0 for default value) * sweeprange (half the full range, assumed about 0; in degrees; * use 0.0 for default value) * sweepdelta (angle increment of sweep; in degrees; * use 0.0 for default value) * minbsdelta (min binary search increment angle; in degrees; * use 0.0 for default value) * &ptas (<return> 4 points in the source) * &ptad (<return> the corresponding 4 pts in the dest) * Return: 0 if OK, 1 on error * * Notes: * (1) This generates two pairs of points in the src, each pair * corresponding to a pair of points that would lie along * the same raster line in a transformed (dewarped) image. * (2) The sets of 4 src and 4 dest points returned by this function * can then be used, in a projective or bilinear transform, * to remove keystoning in the src. */ l_int32 pixGetLocalSkewTransform(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, PTA **pptas, PTA **pptad) { l_int32 w, h, i; l_float32 deg2rad, angr, angd, dely; NUMA *naskew; PTA *ptas, *ptad; PROCNAME("pixGetLocalSkewTransform"); if (!pptas || !pptad) return ERROR_INT("&ptas and &ptad not defined", procName, 1); *pptas = *pptad = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (nslices < 2 || nslices > 20) nslices = DEFAULT_SLICES; if (redsweep < 1 || redsweep > 8) redsweep = DEFAULT_SWEEP_REDUCTION; if (redsearch < 1 || redsearch > redsweep) redsearch = DEFAULT_BS_REDUCTION; if (sweeprange == 0.0) sweeprange = DEFAULT_SWEEP_RANGE; if (sweepdelta == 0.0) sweepdelta = DEFAULT_SWEEP_DELTA; if (minbsdelta == 0.0) minbsdelta = DEFAULT_MINBS_DELTA; naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch, sweeprange, sweepdelta, minbsdelta, NULL, NULL); if (!naskew) return ERROR_INT("naskew not made", procName, 1); deg2rad = 3.14159265 / 180.; w = pixGetWidth(pixs); h = pixGetHeight(pixs); ptas = ptaCreate(4); ptad = ptaCreate(4); *pptas = ptas; *pptad = ptad; /* Find i for skew line that intersects LHS at i and RHS at h / 20 */ for (i = 0; i < h; i++) { numaGetFValue(naskew, i, &angd); angr = angd * deg2rad; dely = w * tan(angr); if (i - dely > 0.05 * h) break; } ptaAddPt(ptas, 0, i); ptaAddPt(ptas, w - 1, i - dely); ptaAddPt(ptad, 0, i); ptaAddPt(ptad, w - 1, i); /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */ for (i = h - 1; i > 0; i--) { numaGetFValue(naskew, i, &angd); angr = angd * deg2rad; dely = w * tan(angr); if (i - dely < 0.95 * h) break; } ptaAddPt(ptas, 0, i); ptaAddPt(ptas, w - 1, i - dely); ptaAddPt(ptad, 0, i); ptaAddPt(ptad, w - 1, i); numaDestroy(&naskew); return 0; } /*! * pixGetLocalSkewAngles() * * Input: pixs * nslices (the number of horizontal overlapping slices; must * be larger than 1 and not exceed 20; use 0 for default) * redsweep (sweep reduction factor: 1, 2, 4 or 8; * use 0 for default value) * redsearch (search reduction factor: 1, 2, 4 or 8, and * not larger than redsweep; use 0 for default value) * sweeprange (half the full range, assumed about 0; in degrees; * use 0.0 for default value) * sweepdelta (angle increment of sweep; in degrees; * use 0.0 for default value) * minbsdelta (min binary search increment angle; in degrees; * use 0.0 for default value) * &a (<optional return> slope of skew as fctn of y) * &b (<optional return> intercept at y=0 of skew as fctn of y) * Return: naskew, or null on error * * Notes: * (1) The local skew is measured in a set of overlapping strips. * We then do a least square linear fit parameters to get * the slope and intercept parameters a and b in * skew-angle = a * y + b (degrees) * for the local skew as a function of raster line y. * This is then used to make naskew, which can be interpreted * as the computed skew angle (in degrees) at the left edge * of each raster line. * (2) naskew can then be used to find the baselines of text, because * each text line has a baseline that should intersect * the left edge of the image with the angle given by this * array, evaluated at the raster line of intersection. */ NUMA * pixGetLocalSkewAngles(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_float32 *pa, l_float32 *pb) { l_int32 w, h, hs, i, ystart, yend, ovlap, npts; l_float32 angle, conf, ycenter, a, b; BOX *box; NUMA *naskew; PIX *pix; PTA *pta; PROCNAME("pixGetLocalSkewAngles"); if (!pixs) return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); if (nslices < 2 || nslices > 20) nslices = DEFAULT_SLICES; if (redsweep < 1 || redsweep > 8) redsweep = DEFAULT_SWEEP_REDUCTION; if (redsearch < 1 || redsearch > redsweep) redsearch = DEFAULT_BS_REDUCTION; if (sweeprange == 0.0) sweeprange = DEFAULT_SWEEP_RANGE; if (sweepdelta == 0.0) sweepdelta = DEFAULT_SWEEP_DELTA; if (minbsdelta == 0.0) minbsdelta = DEFAULT_MINBS_DELTA; h = pixGetHeight(pixs); w = pixGetWidth(pixs); hs = h / nslices; ovlap = (l_int32)(OVERLAP_FRACTION * hs); pta = ptaCreate(nslices); for (i = 0; i < nslices; i++) { ystart = L_MAX(0, hs * i - ovlap); yend = L_MIN(h - 1, hs * (i + 1) + ovlap); ycenter = (ystart + yend) / 2; box = boxCreate(0, ystart, w, yend - ystart + 1); pix = pixClipRectangle(pixs, box, NULL); pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch, sweeprange, sweepdelta, minbsdelta); if (conf > MIN_ALLOWED_CONFIDENCE) ptaAddPt(pta, ycenter, angle); pixDestroy(&pix); boxDestroy(&box); } /* ptaWriteStream(stderr, pta, 0); */ /* Do linear least squares fit */ if ((npts = ptaGetCount(pta)) < 2) { ptaDestroy(&pta); return (NUMA *)ERROR_PTR("can't fit skew", procName, NULL); } ptaGetLinearLSF(pta, &a, &b, NULL); if (pa) *pa = a; if (pb) *pb = b; /* Make skew angle array as function of raster line */ naskew = numaCreate(h); for (i = 0; i < h; i++) { angle = a * i + b; numaAddNumber(naskew, angle); } #if DEBUG_PLOT { NUMA *nax, *nay; GPLOT *gplot; ptaGetArrays(pta, &nax, &nay); gplot = gplotCreate("junkskew", GPLOT_X11, "skew as fctn of y", "y (in raster lines from top)", "angle (in degrees)"); gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf"); gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&nax); numaDestroy(&nay); } #endif /* DEBUG_PLOT */ ptaDestroy(&pta); return naskew; }