/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - This software is distributed in the hope that it will be - useful, but with NO WARRANTY OF ANY KIND. - No author or distributor accepts responsibility to anyone for the - consequences of using this software, or for whether it serves any - particular purpose or works at all, unless he or she says so in - writing. Everyone is granted permission to copy, modify and - redistribute this source code, for commercial or non-commercial - purposes, with the following restrictions: (1) the origin of this - source code must not be misrepresented; (2) modified versions must - be plainly marked as such; and (3) this notice may not be removed - or altered from any source or modified source distribution. *====================================================================*/ /* * jbclass.c * * These are functions for unsupervised classification of * collections of connected components -- either characters or * words -- in binary images. They can be used as image * processing steps in jbig2 compression. * * Initialization * * JBCLASSER *jbRankHausInit() [rank hausdorff encoder] * JBCLASSER *jbCorrelationInit() [correlation encoder] * JBCLASSER *jbCorrelationInitWithoutComponents() [ditto] * static JBCLASSER *jbCorrelationInitInternal() * * Classify the pages * * l_int32 jbAddPages() * l_int32 jbAddPage() * l_int32 jbAddPageComponents() * * Rank hausdorff classifier * * l_int32 jbClassifyRankHaus() * l_int32 pixHaustest() * l_int32 pixRankHaustest() * * Binary correlation classifier * * l_int32 jbClassifyCorrelation() * * Determine the image components we start with * * l_int32 jbGetComponents() * PIX *pixWordMaskByDilation() * * Build grayscale composites (templates) * * PIXA *jbAccumulateComposites * PIXA *jbTemplatesFromComposites * * Utility functions for Classer * * JBCLASSER *jbClasserCreate() * void jbClasserDestroy() * * Utility functions for Data * * JBDATA *jbDataSave() * void jbDataDestroy() * l_int32 jbDataWrite() * JBDATA *jbDataRead() * PIXA *jbDataRender() * l_int32 jbGetULCorners() * l_int32 jbGetLLCorners() * * Static helpers * * static JBFINDCTX *findSimilarSizedTemplatesInit() * static l_int32 findSimilarSizedTemplatesNext() * static void findSimilarSizedTemplatesDestroy() * static l_int32 finalPositioningForAlignment() * * Note: this is NOT an implementation of the JPEG jbig2 * proposed standard encoder, the specifications for which * can be found at http://www.jpeg.org/jbigpt2.html. * (See below for a full implementation.) * It is an implementation of the lower-level part of an encoder that: * * (1) identifies connected components that are going to be used * (2) puts them in similarity classes (this is an unsupervised * classifier), and * (3) stores the result in a simple file format (2 files, * one for templates and one for page/coordinate/template-index * quartets). * * An actual implementation of the official jbig2 encoder could * start with parts (1) and (2), and would then compress the quartets * according to the standards requirements (e.g., Huffman or * arithmetic coding of coordinate differences and image templates). * * The low-level part of the encoder provided here has the * following useful features: * * - It is accurate in the identification of templates * and classes because it uses a windowed hausdorff * distance metric. * - It is accurate in the placement of the connected * components, doing a two step process of first aligning * the the centroids of the template with those of each instance, * and then making a further correction of up to +- 1 pixel * in each direction to best align the templates. * - It is fast because it uses a morphologically based * matching algorithm to implement the hausdorff criterion, * and it selects the patterns that are possible matches * based on their size. * * We provide two different matching functions, one using Hausdorff * distance and one using a simple image correlation. * The Hausdorff method sometimes produces better results for the * same number of classes, because it gives a relatively small * effective weight to foreground pixels near the boundary, * and a relatively large weight to foreground pixels that are * not near the boundary. By effectively ignoring these boundary * pixels, Hausdorff weighting corresponds better to the expected * probabilities of the pixel values in a scanned image, where the * variations in instances of the same printed character are much * more likely to be in pixels near the boundary. By contrast, * the correlation method gives equal weight to all foreground pixels. * * For best results, use the correlation method. Correlation takes * the number of fg pixels in the AND of instance and template, * divided by the product of the number of fg pixels in instance * and template. It compares this with a threshold that, in * general, depends on the fractional coverage of the template. * For heavy text, the threshold is raised above that for light * text, By using both these parameters (basic threshold and * adjustment factor for text weight), one has more flexibility * and can arrive at the fewest substitution errors, although * this comes at the price of more templates. * * The strict Hausdorff scoring is not a rank weighting, because a * single pixel beyond the given distance will cause a match * failure. A rank Hausdorff is more robust to non-boundary noise, * but it is also more susceptible to confusing components that * should be in different classes. For implementing a jbig2 * application for visually lossless binary image compression, * you have two choices: * * (1) use a 3x3 structuring element (size = 3) and a strict * Hausdorff comparison (rank = 1.0 in the rank Hausdorff * function). This will result in a minimal number of classes, * but confusion of small characters, such as italic and * non-italic lower-case 'o', can still occur. * (2) use the correlation method with a threshold of 0.85 * and a weighting factor of about 0.7. This will result in * a larger number of classes, but should not be confused * either by similar small characters or by extremely * thick sans serif characters, such as in prog/cootoots.png. * * As mentioned above, if visual substitution errors must be * avoided, you should use the correlation method. * * We provide executables that show how to do the encoding: * prog/jbrankhaus.c * prog/jbcorrelation.c * * The basic flow for correlation classification goes as follows, * where specific choices have been made for parameters (Hausdorff * is the same except for initialization): * * // Initialize and save data in the classer * JBCLASSER *classer = * jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7); * SARRAY *safiles = getSortedPathnamesInDirectory(directory, * NULL, 0, 0); * jbAddPages(classer, safiles); * * // Save the data in a data structure for serialization, * // and write it into two files. * JBDATA *data = jbDataSave(classer); * jbDataWrite(rootname, data); * * // Reconstruct (render) the pages from the encoded data. * PIXA *pixa = jbDataRender(data, FALSE); * * Adam Langley has recently built a jbig2 standards-compliant * encoder, the first one to appear in open source. You can get * this encoder at: * * http://www.imperialviolet.org/jbig2.html * * It uses arithmetic encoding throughout. It encodes binary images * losslessly with a single arithmetic coding over the full image. * It also does both lossy and lossless encoding from connected * components, using leptonica to generate the templates representing * each cluster. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include "allheaders.h" /* MSVC can't handle arrays dimensioned by static const integers */ #define L_BUF_SIZE 512 /* For jbClassifyRankHaus(): size of border added around * pix of each c.c., to allow further processing. This * should be at least the sum of the MAX_DIFF_HEIGHT * (or MAX_DIFF_WIDTH) and one-half the size of the Sel */ static const l_int32 JB_ADDED_PIXELS = 6; /* For pixHaustest(), pixRankHaustest() and pixCorrelationScore(): * choose these to be 2 or greater */ static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */ static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */ /* In initialization, you have the option to discard components * (cc, characters or words) that have either width or height larger * than a given size. This is convenient for jbDataSave(), because * the components are placed onto a regular lattice with cell * dimension equal to the maximum component size. The default * values are given here. If you want to save all components, * use a sufficiently large set of dimensions. */ static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */ static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */ static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */ static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */ /* Max allowed dilation to merge characters into words */ #define MAX_ALLOWED_DILATION 14 /* This stores the state of a state machine which fetches * similar sized templates */ struct JbFindTemplatesState { JBCLASSER *classer; /* classer */ l_int32 w; /* desired width */ l_int32 h; /* desired height */ l_int32 i; /* index into two_by_two step array */ NUMA *numa; /* current number array */ l_int32 n; /* current element of numa */ }; typedef struct JbFindTemplatesState JBFINDCTX; /* Static initialization function */ static JBCLASSER * jbCorrelationInitInternal(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor, l_int32 keep_components); /* Static helper functions */ static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs); static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context); static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext); static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y, l_int32 idelx, l_int32 idely, PIX *pixt, l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy); #ifndef NO_CONSOLE_IO #define DEBUG_PLOT_CC 0 #define DEBUG_CORRELATION_SCORE 0 #endif /* ~NO_CONSOLE_IO */ /*----------------------------------------------------------------------* * Initialization * *----------------------------------------------------------------------*/ /*! * jbRankHausInit() * * Input: components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) * maxwidth (of component; use 0 for default) * maxheight (of component; use 0 for default) * size (of square structuring element; 2, representing * 2x2 sel, is necessary for reasonable accuracy of * small components; combine this with rank ~ 0.97 * to avoid undue class expansion) * rank (rank val of match, each way; in [0.5 - 1.0]; * when using size = 2, 0.97 is a reasonable value) * Return: jbclasser if OK; NULL on error */ JBCLASSER * jbRankHausInit(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_int32 size, l_float32 rank) { JBCLASSER *classer; PROCNAME("jbRankHausInit"); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL); if (size < 1 || size > 10) return (JBCLASSER *)ERROR_PTR("size not reasonable", procName, NULL); if (rank < 0.5 || rank > 1.0) return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", procName, NULL); if (maxwidth == 0) { if (components == JB_CONN_COMPS) maxwidth = MAX_CONN_COMP_WIDTH; else if (components == JB_CHARACTERS) maxwidth = MAX_CHAR_COMP_WIDTH; else /* JB_WORDS */ maxwidth = MAX_WORD_COMP_WIDTH; } if (maxheight == 0) maxheight = MAX_COMP_HEIGHT; if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL) return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); classer->maxwidth = maxwidth; classer->maxheight = maxheight; classer->sizehaus = size; classer->rankhaus = rank; classer->nahash = numaHashCreate(5507, 4); /* 5507 is prime */ return classer; } /*! * jbCorrelationInit() * * Input: components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) * maxwidth (of component; use 0 for default) * maxheight (of component; use 0 for default) * thresh (value for correlation score: in [0.4 - 0.9]) * weightfactor (corrects thresh for thick characters [0.0 - 1.0]) * Return: jbclasser if OK; NULL on error */ JBCLASSER * jbCorrelationInit(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor) { return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, weightfactor, 1); } /*! * jbCorrelationInitWithoutComponents() * * Input: same as jbCorrelationInit * Output: same as jbCorrelationInit * * Note: acts the same as jbCorrelationInit(), but the resulting * object doesn't keep a list of all the components. */ JBCLASSER * jbCorrelationInitWithoutComponents(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor) { return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, weightfactor, 0); } static JBCLASSER * jbCorrelationInitInternal(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor, l_int32 keep_components) { JBCLASSER *classer; PROCNAME("jbCorrelationInitInternal"); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL); if (thresh < 0.4 || thresh > 0.9) return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.9]", procName, NULL); if (weightfactor < 0.0 || weightfactor > 1.0) return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]", procName, NULL); if (maxwidth == 0) { if (components == JB_CONN_COMPS) maxwidth = MAX_CONN_COMP_WIDTH; else if (components == JB_CHARACTERS) maxwidth = MAX_CHAR_COMP_WIDTH; else /* JB_WORDS */ maxwidth = MAX_WORD_COMP_WIDTH; } if (maxheight == 0) maxheight = MAX_COMP_HEIGHT; if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL) return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); classer->maxwidth = maxwidth; classer->maxheight = maxheight; classer->thresh = thresh; classer->weightfactor = weightfactor; classer->nahash = numaHashCreate(5507, 4); /* 5507 is prime */ classer->keep_pixaa = keep_components; return classer; } /*----------------------------------------------------------------------* * Classify the pages * *----------------------------------------------------------------------*/ /*! * jbAddPages() * * Input: jbclasser * safiles (of page image file names) * Return: 0 if OK; 1 on error * * Note: * (1) jbclasser makes a copy of the array of file names. * (2) The caller is still responsible for destroying the input array. */ l_int32 jbAddPages(JBCLASSER *classer, SARRAY *safiles) { l_int32 i, nfiles; char *fname; PIX *pix; PROCNAME("jbAddPages"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!safiles) return ERROR_INT("safiles not defined", procName, 1); classer->safiles = sarrayCopy(safiles); nfiles = sarrayGetCount(safiles); for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, 0); if ((pix = pixRead(fname)) == NULL) { L_WARNING_INT("image file %d not read", procName, i); continue; } jbAddPage(classer, pix); pixDestroy(&pix); } return 0; } /*! * jbAddPage() * * Input: jbclasser * pixs (of input page) * Return: 0 if OK; 1 on error */ l_int32 jbAddPage(JBCLASSER *classer, PIX *pixs) { BOXA *boxas; PIXA *pixas; PROCNAME("jbAddPage"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!pixs) return ERROR_INT("pix not defined", procName, 1); classer->w = pixGetWidth(pixs); classer->h = pixGetHeight(pixs); /* Get the appropriate components and their bounding boxes */ if (jbGetComponents(pixs, classer->components, classer->maxwidth, classer->maxheight, &boxas, &pixas)) { return ERROR_INT("components not made", procName, 1); } jbAddPageComponents(classer, pixs, boxas, pixas); boxaDestroy(&boxas); pixaDestroy(&pixas); return 0; } /*! * jbAddPageComponents() * * Input: jbclasser * pixs (of input page) * boxas (b.b. of components for this page) * pixas (components for this page) * Return: 0 if OK; 1 on error * * Notes: * (1) If there are no components on the page, we don't require input * of empty boxas or pixas, although that's the typical situation. */ l_int32 jbAddPageComponents(JBCLASSER *classer, PIX *pixs, BOXA *boxas, PIXA *pixas) { l_int32 n; PROCNAME("jbAddPageComponents"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!pixs) return ERROR_INT("pix not defined", procName, 1); /* Test for no components on the current page. Always update the * number of pages processed, even if nothing is on it. */ if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) { classer->npages++; return 0; } /* Get classes. For hausdorff, it uses a specified size of * structuring element and specified rank. For correlation, * it uses a specified threshold. */ if (classer->method == JB_RANKHAUS) { if (jbClassifyRankHaus(classer, boxas, pixas)) return ERROR_INT("rankhaus classification failed", procName, 1); } else { /* classer->method == JB_CORRELATION */ if (jbClassifyCorrelation(classer, boxas, pixas)) return ERROR_INT("correlation classification failed", procName, 1); } /* Find the global UL corners, adjusted for each instance so * that the class template and instance will have their * centroids in the same place. Then the template can be * used to replace the instance. */ if (jbGetULCorners(classer, pixs, boxas)) return ERROR_INT("UL corners not found", procName, 1); /* Update total component counts and number of pages processed. */ n = boxaGetCount(boxas); classer->baseindex += n; numaAddNumber(classer->nacomps, n); classer->npages++; return 0; } /*----------------------------------------------------------------------* * Classification using windowed rank hausdorff metric * *----------------------------------------------------------------------*/ /*! * jbClassifyRankHaus() * * Input: jbclasser * boxa (of new components for classification) * pixas (of new components for classification) * Return: 0 if OK; 1 on error */ l_int32 jbClassifyRankHaus(JBCLASSER *classer, BOXA *boxa, PIXA *pixas) { l_int32 n, nt, i, wt, ht, iclass, size, found, testval; l_int32 *sumtab; l_int32 npages, area1, area3; l_int32 *tab8; l_float32 rank, x1, y1, x2, y2; BOX *box; NUMA *naclass, *napage; NUMA *nafg; /* fg area of all instances */ NUMA *nafgt; /* fg area of all templates */ JBFINDCTX *findcontext; NUMAHASH *nahash; PIX *pix, *pix1, *pix2, *pix3, *pix4; PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd; PIXAA *pixaa; PTA *pta, *ptac, *ptact; SEL *sel; PROCNAME("jbClassifyRankHaus"); if (!classer) return ERROR_INT("classer not found", procName, 1); if (!boxa) return ERROR_INT("boxa not found", procName, 1); if (!pixas) return ERROR_INT("pixas not found", procName, 1); npages = classer->npages; size = classer->sizehaus; sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT); /* Generate the bordered pixa, with and without dilation. * pixa1 and pixa2 contain all the input components. */ n = pixaGetCount(pixas); pixa1 = pixaCreate(n); pixa2 = pixaCreate(n); for (i = 0; i < n; i++) { pix = pixaGetPix(pixas, i, L_CLONE); pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); pix2 = pixDilate(NULL, pix1, sel); pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */ pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */ pixDestroy(&pix); } /* Get the centroids of all the bordered images. * These are relative to the UL corner of each (bordered) pix. */ pta = pixaCentroids(pixa1); /* centroids for this page; use here */ ptac = classer->ptac; /* holds centroids of components up to this page */ ptaJoin(ptac, pta, 0, 0); /* save centroids of all components */ ptact = classer->ptact; /* holds centroids of templates */ /* Use these to save the class and page of each component. */ naclass = classer->naclass; napage = classer->napage; sumtab = makePixelSumTab8(); /* Store the unbordered pix in a pixaa, in a hierarchical * set of arrays. There is one pixa for each class, * and the pix in each pixa are all the instances found * of that class. This is actually more than one would need * for a jbig2 encoder, but there are two reasons to keep * them around: (1) the set of instances for each class * can be used to make an improved binary (or, better, * a grayscale) template, rather than simply using the first * one in the set; (2) we can investigate the failures * of the classifier. This pixaa grows as we process * successive pages. */ pixaa = classer->pixaa; /* arrays to store class exemplars (templates) */ pixat = classer->pixat; /* un-dilated */ pixatd = classer->pixatd; /* dilated */ /* Fill up the pixaa tree with the template exemplars as * the first pix in each pixa. As we add each pix, * we also add the associated box to the pixa. * We also keep track of the centroid of each pix, * and use the difference between centroids (of the * pix with the exemplar we are checking it with) * to align the two when checking that the Hausdorff * distance does not exceed a threshold. * The threshold is set by the Sel used for dilating. * For example, a 3x3 brick, sel_3, corresponds to a * Hausdorff distance of 1. In general, for an NxN brick, * with N odd, corresponds to a Hausdorff distance of (N - 1)/2. * It turns out that we actually need to use a sel of size 2x2 * to avoid small bad components when there is a halftone image * from which components can be chosen. * The larger the Sel you use, the fewer the number of classes, * and the greater the likelihood of putting semantically * different objects in the same class. For simplicity, * we do this separately for the case of rank == 1.0 (exact * match within the Hausdorff distance) and rank < 1.0. */ rank = classer->rankhaus; nahash = classer->nahash; if (rank == 1.0) { for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa1, i, L_CLONE); pix2 = pixaGetPix(pixa2, i, L_CLONE); ptaGetPt(pta, i, &x1, &y1); nt = pixaGetCount(pixat); /* number of templates */ found = FALSE; findcontext = findSimilarSizedTemplatesInit(classer, pix1); while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { /* Find score for this template */ pix3 = pixaGetPix(pixat, iclass, L_CLONE); pix4 = pixaGetPix(pixatd, iclass, L_CLONE); ptaGetPt(ptact, iclass, &x2, &y2); testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT); pixDestroy(&pix3); pixDestroy(&pix4); if (testval == 1) { found = TRUE; numaAddNumber(naclass, iclass); numaAddNumber(napage, npages); if (classer->keep_pixaa) { pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); pix = pixaGetPix(pixas, i, L_CLONE); pixaAddPix(pixa, pix, L_INSERT); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaDestroy(&pixa); } break; } } findSimilarSizedTemplatesDestroy(&findcontext); if (found == FALSE) { /* new class */ numaAddNumber(naclass, nt); numaAddNumber(napage, npages); pixa = pixaCreate(0); pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ pixaAddPix(pixa, pix, L_INSERT); wt = pixGetWidth(pix); ht = pixGetHeight(pix); numaHashAdd(nahash, ht * wt, nt); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ ptaAddPt(ptact, x1, y1); pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */ } else { /* don't save them */ pixDestroy(&pix1); pixDestroy(&pix2); } } } else { /* rank < 1.0 */ if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */ return ERROR_INT("nafg not made", procName, 1); nafgt = classer->nafgt; tab8 = makePixelSumTab8(); for (i = 0; i < n; i++) { /* all instances on this page */ pix1 = pixaGetPix(pixa1, i, L_CLONE); numaGetIValue(nafg, i, &area1); pix2 = pixaGetPix(pixa2, i, L_CLONE); ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */ nt = pixaGetCount(pixat); /* number of templates */ found = FALSE; findcontext = findSimilarSizedTemplatesInit(classer, pix1); while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { /* Find score for this template */ pix3 = pixaGetPix(pixat, iclass, L_CLONE); numaGetIValue(nafgt, iclass, &area3); pix4 = pixaGetPix(pixatd, iclass, L_CLONE); ptaGetPt(ptact, iclass, &x2, &y2); testval = pixRankHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, area1, area3, rank, tab8); pixDestroy(&pix3); pixDestroy(&pix4); if (testval == 1) { /* greedy match; take the first */ found = TRUE; numaAddNumber(naclass, iclass); numaAddNumber(napage, npages); if (classer->keep_pixaa) { pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); pix = pixaGetPix(pixas, i, L_CLONE); pixaAddPix(pixa, pix, L_INSERT); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaDestroy(&pixa); } break; } } findSimilarSizedTemplatesDestroy(&findcontext); if (found == FALSE) { /* new class */ numaAddNumber(naclass, nt); numaAddNumber(napage, npages); pixa = pixaCreate(0); pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ pixaAddPix(pixa, pix, L_INSERT); wt = pixGetWidth(pix); ht = pixGetHeight(pix); numaHashAdd(nahash, ht * wt, nt); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ ptaAddPt(ptact, x1, y1); pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */ numaAddNumber(nafgt, area1); } else { /* don't save them */ pixDestroy(&pix1); pixDestroy(&pix2); } } FREE(tab8); numaDestroy(&nafg); } classer->nclass = pixaGetCount(pixat); FREE(sumtab); ptaDestroy(&pta); pixaDestroy(&pixa1); pixaDestroy(&pixa2); selDestroy(&sel); return 0; } /*! * pixHaustest() * * Input: pix1 (new pix, not dilated) * pix2 (new pix, dilated) * pix3 (exemplar pix, not dilated) * pix4 (exemplar pix, dilated) * delx (x comp of centroid difference) * dely (y comp of centroid difference) * maxdiffw (max width difference of pix1 and pix2) * maxdiffh (max height difference of pix1 and pix2) * Return: 0 (FALSE) if no match, 1 (TRUE) if the new * pix is in the same class as the exemplar. * * Note: we check first that the two pix are roughly * the same size. Only if they meet that criterion do * we compare the bitmaps. The Hausdorff is a 2-way * check. The centroid difference is used to align the two * images to the nearest integer for each of the checks. * These check that the dilated image of one contains * ALL the pixels of the undilated image of the other. * Checks are done in both direction. A single pixel not * contained in either direction results in failure of the test. */ l_int32 pixHaustest(PIX *pix1, PIX *pix2, PIX *pix3, PIX *pix4, l_float32 delx, /* x(1) - x(3) */ l_float32 dely, /* y(1) - y(3) */ l_int32 maxdiffw, l_int32 maxdiffh) { l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; PIX *pixt; /* Eliminate possible matches based on size difference */ wi = pixGetWidth(pix1); hi = pixGetHeight(pix1); wt = pixGetWidth(pix3); ht = pixGetHeight(pix3); delw = L_ABS(wi - wt); if (delw > maxdiffw) return FALSE; delh = L_ABS(hi - ht); if (delh > maxdiffh) return FALSE; /* Round difference in centroid location to nearest integer; * use this as a shift when doing the matching. */ if (delx >= 0) idelx = (l_int32)(delx + 0.5); else idelx = (l_int32)(delx - 0.5); if (dely >= 0) idely = (l_int32)(dely + 0.5); else idely = (l_int32)(dely - 0.5); /* Do 1-direction hausdorff, checking that every pixel in pix1 * is within a dilation distance of some pixel in pix3. Namely, * that pix4 entirely covers pix1: * pixt = pixSubtract(NULL, pix1, pix4), including shift * where pixt has no ON pixels. */ pixt = pixCreateTemplate(pix1); pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), pix4, 0, 0); pixZero(pixt, &boolmatch); if (boolmatch == 0) { pixDestroy(&pixt); return FALSE; } /* Do 1-direction hausdorff, checking that every pixel in pix3 * is within a dilation distance of some pixel in pix1. Namely, * that pix2 entirely covers pix3: * pixSubtract(pixt, pix3, pix2), including shift * where pixt has no ON pixels. */ pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); pixZero(pixt, &boolmatch); pixDestroy(&pixt); return boolmatch; } /*! * pixRankHaustest() * * Input: pix1 (new pix, not dilated) * pix2 (new pix, dilated) * pix3 (exemplar pix, not dilated) * pix4 (exemplar pix, dilated) * delx (x comp of centroid difference) * dely (y comp of centroid difference) * maxdiffw (max width difference of pix1 and pix2) * maxdiffh (max height difference of pix1 and pix2) * area1 (fg pixels in pix1) * area3 (fg pixels in pix3) * rank (rank value of test, each way) * tab8 (table of pixel sums for byte) * Return: 0 (FALSE) if no match, 1 (TRUE) if the new * pix is in the same class as the exemplar. * * Note: we check first that the two pix are roughly * the same size. Only if they meet that criterion do * we compare the bitmaps. We convert the rank value to * a number of pixels by multiplying the rank fraction by the number * of pixels in the undilated image. The Hausdorff is a 2-way * check. The centroid difference is used to align the two * images to the nearest integer for each of the checks. * The rank hausdorff checks that the dilated image of one * contains the rank fraction of the pixels of the undilated * image of the other. Checks are done in both direction. * Failure of the test in either direction results in failure * of the test. */ l_int32 pixRankHaustest(PIX *pix1, PIX *pix2, PIX *pix3, PIX *pix4, l_float32 delx, /* x(1) - x(3) */ l_float32 dely, /* y(1) - y(3) */ l_int32 maxdiffw, l_int32 maxdiffh, l_int32 area1, l_int32 area3, l_float32 rank, l_int32 *tab8) { l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; l_int32 thresh1, thresh3; PIX *pixt; /* Eliminate possible matches based on size difference */ wi = pixGetWidth(pix1); hi = pixGetHeight(pix1); wt = pixGetWidth(pix3); ht = pixGetHeight(pix3); delw = L_ABS(wi - wt); if (delw > maxdiffw) return FALSE; delh = L_ABS(hi - ht); if (delh > maxdiffh) return FALSE; /* Upper bounds in remaining pixels for allowable match */ thresh1 = (l_int32)(area1 * (1. - rank) + 0.5); thresh3 = (l_int32)(area3 * (1. - rank) + 0.5); /* Round difference in centroid location to nearest integer; * use this as a shift when doing the matching. */ if (delx >= 0) idelx = (l_int32)(delx + 0.5); else idelx = (l_int32)(delx - 0.5); if (dely >= 0) idely = (l_int32)(dely + 0.5); else idely = (l_int32)(dely - 0.5); /* Do 1-direction hausdorff, checking that every pixel in pix1 * is within a dilation distance of some pixel in pix3. Namely, * that pix4 entirely covers pix1: * pixt = pixSubtract(NULL, pix1, pix4), including shift * where pixt has no ON pixels. */ pixt = pixCreateTemplate(pix1); pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), pix4, 0, 0); pixThresholdPixels(pixt, thresh1, &boolmatch, tab8); if (boolmatch == 1) { /* above thresh1 */ pixDestroy(&pixt); return FALSE; } /* Do 1-direction hausdorff, checking that every pixel in pix3 * is within a dilation distance of some pixel in pix1. Namely, * that pix2 entirely covers pix3: * pixSubtract(pixt, pix3, pix2), including shift * where pixt has no ON pixels. */ pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); pixThresholdPixels(pixt, thresh3, &boolmatch, tab8); pixDestroy(&pixt); if (boolmatch == 1) /* above thresh3 */ return FALSE; else return TRUE; } /*----------------------------------------------------------------------* * Classification using windowed correlation score * *----------------------------------------------------------------------*/ /*! * jbClassifyCorrelation() * * Input: jbclasser * boxa (of new components for classification) * pixas (of new components for classification) * Return: 0 if OK; 1 on error */ l_int32 jbClassifyCorrelation(JBCLASSER *classer, BOXA *boxa, PIXA *pixas) { l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages, overthreshold; l_int32 *sumtab, *centtab; l_uint32 *row, word; l_float32 x1, y1, x2, y2, xsum, ysum; l_float32 thresh, weight, threshold; BOX *box; NUMA *naclass, *napage; NUMA *nafgt; /* fg area of all templates */ NUMA *naarea; /* w * h area of all templates */ JBFINDCTX *findcontext; NUMAHASH *nahash; PIX *pix, *pix1, *pix2; PIXA *pixa, *pixa1, *pixat; PIXAA *pixaa; PTA *pta, *ptac, *ptact; l_int32 *pixcts; /* pixel counts of each pixa */ l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */ l_int32 x, y, rowcount, downcount, wpl; l_uint8 byte; PROCNAME("jbClassifyCorrelation"); if (!classer) return ERROR_INT("classer not found", procName, 1); if (!boxa) return ERROR_INT("boxa not found", procName, 1); if (!pixas) return ERROR_INT("pixas not found", procName, 1); npages = classer->npages; /* Generate the bordered pixa, which contains all the the * input components. This will not be saved. */ n = pixaGetCount(pixas); pixa1 = pixaCreate(n); for (i = 0; i < n; i++) { pix = pixaGetPix(pixas, i, L_CLONE); pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); pixaAddPix(pixa1, pix1, L_INSERT); pixDestroy(&pix); } /* Use these to save the class and page of each component. */ naclass = classer->naclass; napage = classer->napage; /* Get the number of fg pixels in each component. */ nafgt = classer->nafgt; /* holds fg areas of the templates */ sumtab = makePixelSumTab8(); pixcts = (l_int32 *)CALLOC(n, sizeof(*pixcts)); pixrowcts = (l_int32 **)CALLOC(n, sizeof(*pixrowcts)); centtab = makePixelCentroidTab8(); if (!pixcts || !pixrowcts || !centtab) return ERROR_INT("calloc fail in pix*cts or centtab", procName, 1); /* Count the "1" pixels in each row of the pix in pixa1; this * allows pixCorrelationScoreThresholded to abort early if a match * is impossible. This loop merges three calculations: the total * number of "1" pixels, the number of "1" pixels in each row, and * the centroid. The centroids are relative to the UL corner of * each (bordered) pix. The pixrowcts[i][y] are the total number * of fg pixels in pixa[i] below row y. */ pta = ptaCreate(n); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa1, i, L_CLONE); pixrowcts[i] = (l_int32 *)CALLOC(pixGetHeight(pix), sizeof(**pixrowcts)); xsum = 0; ysum = 0; wpl = pixGetWpl(pix); row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl; downcount = 0; for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) { pixrowcts[i][y] = downcount; rowcount = 0; for (x = 0; x < wpl; x++) { word = row[x]; byte = word & 0xff; rowcount += sumtab[byte]; xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte]; byte = (word >> 8) & 0xff; rowcount += sumtab[byte]; xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte]; byte = (word >> 16) & 0xff; rowcount += sumtab[byte]; xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte]; byte = (word >> 24) & 0xff; rowcount += sumtab[byte]; xsum += centtab[byte] + x * 32 * sumtab[byte]; } downcount += rowcount; ysum += rowcount * y; } pixcts[i] = downcount; ptaAddPt(pta, xsum / (l_float32)downcount, ysum / (l_float32)downcount); pixDestroy(&pix); } ptac = classer->ptac; /* holds centroids of components up to this page */ ptaJoin(ptac, pta, 0, 0); /* save centroids of all components */ ptact = classer->ptact; /* holds centroids of templates */ /* Store the unbordered pix in a pixaa, in a hierarchical * set of arrays. There is one pixa for each class, * and the pix in each pixa are all the instances found * of that class. This is actually more than one would need * for a jbig2 encoder, but there are two reasons to keep * them around: (1) the set of instances for each class * can be used to make an improved binary (or, better, * a grayscale) template, rather than simply using the first * one in the set; (2) we can investigate the failures * of the classifier. This pixaa grows as we process * successive pages. */ pixaa = classer->pixaa; /* Array to store class exemplars */ pixat = classer->pixat; /* Fill up the pixaa tree with the template exemplars as * the first pix in each pixa. As we add each pix, * we also add the associated box to the pixa. * We also keep track of the centroid of each pix, * and use the difference between centroids (of the * pix with the exemplar we are checking it with) * to align the two when checking that the correlation * score exceeds a threshold. The correlation score * is given by the square of the area of the AND * between aligned instance and template, divided by * the product of areas of each image. For identical * template and instance, the score is 1.0. * If the threshold is too small, non-equivalent instances * will be placed in the same class; if too large, there will * be an unnecessary division of classes representing the * same character. The weightfactor adds in some of the * difference (1.0 - thresh), depending on the heaviness * of the template (measured as the fraction of fg pixels). */ thresh = classer->thresh; weight = classer->weightfactor; naarea = classer->naarea; nahash = classer->nahash; for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa1, i, L_CLONE); area1 = pixcts[i]; ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */ nt = pixaGetCount(pixat); found = FALSE; findcontext = findSimilarSizedTemplatesInit(classer, pix1); while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { /* Get the template */ pix2 = pixaGetPix(pixat, iclass, L_CLONE); numaGetIValue(nafgt, iclass, &area2); ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */ /* Find threshold for this template */ if (weight > 0.0) { numaGetIValue(naarea, iclass, &area); threshold = thresh + (1. - thresh) * weight * area2 / area; } else threshold = thresh; /* Find score for this template */ overthreshold = pixCorrelationScoreThresholded(pix1, pix2, area1, area2, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, sumtab, pixrowcts[i], threshold); #if DEBUG_CORRELATION_SCORE { l_float32 score, testscore; l_int32 count, testcount; score = pixCorrelationScore(pix1, pix2, area1, area2, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, sumtab); testscore = pixCorrelationScoreSimple(pix1, pix2, area1, area2, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, sumtab); count = (l_int32)rint(sqrt(score * area1 * area2)); testcount = (l_int32)rint(sqrt(testscore * area1 * area2)); if ((score >= threshold) != (testscore >= threshold)) { fprintf(stderr, "Correlation score mismatch: %d(%g,%d) vs %d(%g,%d) (%g)\n", count, score, score >= threshold, testcount, testscore, testscore >= threshold, score - testscore); } if ((score >= threshold) != overthreshold) { fprintf(stderr, "Mismatch between correlation/threshold comparison: %g(%g,%d) >= %g(%g) vs %s\n", score, score*area1*area2, count, threshold, threshold*area1*area2, (overthreshold ? "true" : "false")); } } #endif /* DEBUG_CORRELATION_SCORE */ pixDestroy(&pix2); if (overthreshold) { /* greedy match */ found = TRUE; numaAddNumber(naclass, iclass); numaAddNumber(napage, npages); if (classer->keep_pixaa) { /* We are keeping a record of all components */ pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); pix = pixaGetPix(pixas, i, L_CLONE); pixaAddPix(pixa, pix, L_INSERT); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaDestroy(&pixa); } break; } } findSimilarSizedTemplatesDestroy(&findcontext); if (found == FALSE) { /* new class */ numaAddNumber(naclass, nt); numaAddNumber(napage, npages); pixa = pixaCreate(0); pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ pixaAddPix(pixa, pix, L_INSERT); wt = pixGetWidth(pix); ht = pixGetHeight(pix); numaHashAdd(nahash, ht * wt, nt); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ ptaAddPt(ptact, x1, y1); numaAddNumber(nafgt, area1); pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) * (pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS); numaAddNumber(naarea, area); } else { /* don't save it */ pixDestroy(&pix1); } } classer->nclass = pixaGetCount(pixat); FREE(pixcts); FREE(centtab); for (i = 0; i < n; i++) { FREE(pixrowcts[i]); } FREE(pixrowcts); FREE(sumtab); ptaDestroy(&pta); pixaDestroy(&pixa1); return 0; } /*----------------------------------------------------------------------* * Determine the image components we start with * *----------------------------------------------------------------------*/ /*! * jbGetComponents() * * Input: pixs (1 bpp) * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) * maxwidth, maxheight (of saved components; larger are discarded) * &pboxa (<return> b.b. of component items) * &ppixa (<return> component items) * Return: 0 if OK, 1 on error */ l_int32 jbGetComponents(PIX *pixs, l_int32 components, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad) { l_int32 empty, res, redfactor; BOXA *boxa; PIX *pixt1, *pixt2, *pixt3; PIXA *pixa, *pixat; PROCNAME("jbGetComponents"); if (!pboxad) return ERROR_INT("&boxad not defined", procName, 1); *pboxad = NULL; if (!ppixad) return ERROR_INT("&pixad not defined", procName, 1); *ppixad = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return ERROR_INT("invalid components", procName, 1); pixZero(pixs, &empty); if (empty) { *pboxad = boxaCreate(0); *ppixad = pixaCreate(0); return 0; } /* If required, preprocess input pixs. The method for both * characters and words is to generate a connected component * mask over the units that we want to aggregrate, which are, * in general, sets of related connected components in pixs. * For characters, we want to include the dots with * 'i', 'j' and '!', so we do a small vertical closing to * generate the mask. For words, we make a mask over all * characters in each word. This is a bit more tricky, because * the spacing between words is difficult to predict a priori, * and words can be typeset with variable spacing that can * in some cases be barely larger than the space between * characters. The first step is to generate the mask and * identify each of its connected components. */ if (components == JB_CONN_COMPS) { /* no preprocessing */ boxa = pixConnComp(pixs, &pixa, 8); } else if (components == JB_CHARACTERS) { pixt1 = pixMorphSequence(pixs, "c1.6", 0); boxa = pixConnComp(pixt1, &pixat, 8); pixa = pixaClipToPix(pixat, pixs); pixDestroy(&pixt1); pixaDestroy(&pixat); } else { /* components == JB_WORDS */ /* Do the operations at about 150 ppi resolution. * It is much faster at 75 ppi, but the results are * more accurate at 150 ppi. This will segment the * words in body text. It can be expected that relatively * infrequent words in a larger font will be split. */ res = pixGetXRes(pixs); if (res <= 200) { redfactor = 1; pixt1 = pixClone(pixs); } else if (res <= 400) { redfactor = 2; pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); } else { redfactor = 4; pixt1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); } pixt2 = pixWordMaskByDilation(pixt1, 0, NULL); /* Expand the optimally dilated word mask to full res. */ pixt3 = pixExpandReplicate(pixt2, redfactor); /* Pull out the pixels in pixs corresponding to the mask * components in pixt3. Note that above we used threshold * levels in the reduction of 1 to insure that the resulting * mask fully covers the input pixs. The downside of using * a threshold of 1 is that very close characters from adjacent * lines can be joined. But with a level of 2 or greater, * it is necessary to use a seedfill, followed by a pixOr(): * pixt4 = pixSeedfillBinary(NULL, pixt3, pixs, 8); * pixOr(pixt3, pixt3, pixt4); * to insure that the mask coverage is complete over pixs. */ boxa = pixConnComp(pixt3, &pixat, 4); pixa = pixaClipToPix(pixat, pixs); pixaDestroy(&pixat); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); } /* Remove large components, and save the results. */ *ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL); *pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL); pixaDestroy(&pixa); boxaDestroy(&boxa); return 0; } /*! * pixWordMaskByDilation() * * Input: pixs (1 bpp; typ. at 75 to 150 ppi) * maxsize (use 0 for default; not to exceed 14) * &size (<optional return> size of optimal horiz Sel) * Return: pixd (dilated word mask), or null on error * * Notes: * (1) For 75 to 150 ppi, the optimal dilation should not exceed 7. * This is the default size chosen if maxsize <= 0. * (2) To run this on images at resolution between 200 and 300, it * is advisable to use a larger maxsize, say between 10 and 14. * (3) The best size for dilating to get word masks is optionally returned. */ PIX * pixWordMaskByDilation(PIX *pixs, l_int32 maxsize, l_int32 *psize) { l_int32 i, diffmin, ndiff, imin; l_int32 ncc[MAX_ALLOWED_DILATION + 1]; BOXA *boxa; NUMA *nacc; PIX *pixt1, *pixt2, *pixt3; PIXA *pixa; SEL *sel; PROCNAME("pixWordMaskbyDilation"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); /* Find the optimal dilation to create the word mask. * Look for successively increasing dilations where the * number of connected components doesn't decrease. * This is the situation where the components in the * word mask should properly cover each word. Use of * 4 cc slightly reduces the likelihood that words from * different lines are joined. */ diffmin = 1000000; pixa = pixaCreate(8); pixt1 = pixCopy(NULL, pixs); pixaAddPix(pixa, pixt1, L_COPY); if (maxsize <= 0) maxsize = 7; /* default */ if (maxsize > MAX_ALLOWED_DILATION) maxsize = MAX_ALLOWED_DILATION; nacc = numaCreate(maxsize); for (i = 0; i <= maxsize; i++) { if (i == 0) /* first one not dilated */ pixt2 = pixCopy(NULL, pixt1); else /* successive dilation by sel_2h */ pixt2 = pixMorphSequence(pixt1, "d2.1", 0); boxa = pixConnCompBB(pixt2, 4); ncc[i] = boxaGetCount(boxa); numaAddNumber(nacc, ncc[i]); if (i > 0) { ndiff = ncc[i - 1] - ncc[i]; #if DEBUG_PLOT_CC fprintf(stderr, "ndiff[%d] = %d\n", i - 1, ndiff); #endif /* DEBUG_PLOT_CC */ if (ndiff < diffmin) { imin = i; diffmin = ndiff; } } pixaAddPix(pixa, pixt2, L_COPY); pixDestroy(&pixt1); pixt1 = pixt2; boxaDestroy(&boxa); } pixDestroy(&pixt1); #if DEBUG_PLOT_CC {GPLOT *gplot; NUMA *naseq; naseq = numaMakeSequence(1, 1, numaGetCount(nacc)); gplot = gplotCreate("junk_cc_output", GPLOT_PNG, "Number of cc vs. horizontal dilation", "Sel horiz", "Number of cc"); gplotAddPlot(gplot, naseq, nacc, GPLOT_LINES, ""); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&naseq); } #endif /* DEBUG_PLOT_CC */ /* Save the result of the optimal dilation */ pixt2 = pixaGetPix(pixa, imin, L_CLONE); sel = selCreateBrick(1, imin, 0, imin - 1, SEL_HIT); pixt3 = pixErode(NULL, pixt2, sel); /* remove effect of dilation */ selDestroy(&sel); pixDestroy(&pixt2); pixaDestroy(&pixa); if (psize) *psize = imin + 1; numaDestroy(&nacc); return pixt3; } /*----------------------------------------------------------------------* * Build grayscale composites (templates) * *----------------------------------------------------------------------*/ /*! * jbAccumulateComposites() * * Input: pixaa (one pixa for each class) * &pna (<return> number of samples used to build each composite) * &ptat (<return> centroids of bordered composites) * Return: pixad (accumulated sum of samples in each class), * or null on error * */ PIXA * jbAccumulateComposites(PIXAA *pixaa, NUMA **pna, PTA **pptat) { l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff; l_float32 x, y, xave, yave; NUMA *na; PIX *pix, *pixt1, *pixt2, *pixsum; PIXA *pixa, *pixad; PTA *ptat, *pta; PROCNAME("jbAccumulateComposites"); if (!pptat) return (PIXA *)ERROR_PTR("&ptat not defined", procName, NULL); *pptat = NULL; if (!pna) return (PIXA *)ERROR_PTR("&na not defined", procName, NULL); *pna = NULL; if (!pixaa) return (PIXA *)ERROR_PTR("pixaa not defined", procName, NULL); n = pixaaGetCount(pixaa); if ((ptat = ptaCreate(n)) == NULL) return (PIXA *)ERROR_PTR("ptat not made", procName, NULL); *pptat = ptat; pixad = pixaCreate(n); na = numaCreate(n); *pna = na; for (i = 0; i < n; i++) { pixa = pixaaGetPixa(pixaa, i, L_CLONE); nt = pixaGetCount(pixa); numaAddNumber(na, nt); if (nt == 0) { L_WARNING("empty pixa found!", procName); pixaDestroy(&pixa); continue; } pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh); pix = pixaGetPix(pixa, 0, L_CLONE); d = pixGetDepth(pix); pixDestroy(&pix); pixt1 = pixCreate(maxw, maxh, d); pixsum = pixInitAccumulate(maxw, maxh, 0); pta = pixaCentroids(pixa); /* Find the average value of the centroids ... */ xave = yave = 0; for (j = 0; j < nt; j++) { ptaGetPt(pta, j, &x, &y); xave += x; yave += y; } xave = xave / (l_float32)nt; yave = yave / (l_float32)nt; /* and place all centroids at their average value */ for (j = 0; j < nt; j++) { pixt2 = pixaGetPix(pixa, j, L_CLONE); ptaGetPt(pta, j, &x, &y); xdiff = (l_int32)(x - xave); ydiff = (l_int32)(y - yave); pixClearAll(pixt1); pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC, pixt2, 0, 0); pixAccumulate(pixsum, pixt1, L_ARITH_ADD); pixDestroy(&pixt2); } pixaAddPix(pixad, pixsum, L_INSERT); ptaAddPt(ptat, xave, yave); pixaDestroy(&pixa); pixDestroy(&pixt1); ptaDestroy(&pta); } return pixad; } /*! * jbTemplatesFromComposites() * * Input: pixac (one pix of composites for each class) * na (number of samples used for each class composite) * Return: pixad (8 bpp templates for each class), or null on error * */ PIXA * jbTemplatesFromComposites(PIXA *pixac, NUMA *na) { l_int32 n, i; l_float32 nt; /* number of samples in the composite; always an integer */ l_float32 factor; PIX *pixsum; /* accumulated composite */ PIX *pixd; PIXA *pixad; PROCNAME("jbTemplatesFromComposites"); if (!pixac) return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL); if (!na) return (PIXA *)ERROR_PTR("na not defined", procName, NULL); n = pixaGetCount(pixac); pixad = pixaCreate(n); for (i = 0; i < n; i++) { pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */ numaGetFValue(na, i, &nt); factor = 255. / nt; pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */ pixd = pixFinalAccumulate(pixsum, 0, 8); pixaAddPix(pixad, pixd, L_INSERT); pixDestroy(&pixsum); } return pixad; } /*----------------------------------------------------------------------* * jbig2 utility routines * *----------------------------------------------------------------------*/ /*! * jbClasserCreate() * * Input: method (JB_RANKHAUS, JB_CORRELATION) * components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS) * Return: jbclasser, or null on error */ JBCLASSER * jbClasserCreate(l_int32 method, l_int32 components) { JBCLASSER *classer; PROCNAME("jbClasserCreate"); if ((classer = (JBCLASSER *)CALLOC(1, sizeof(JBCLASSER))) == NULL) return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); if (method != JB_RANKHAUS && method != JB_CORRELATION) return (JBCLASSER *)ERROR_PTR("invalid type", procName, NULL); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return (JBCLASSER *)ERROR_PTR("invalid type", procName, NULL); classer->method = method; classer->components = components; classer->nacomps = numaCreate(0); classer->pixaa = pixaaCreate(0); classer->pixat = pixaCreate(0); classer->pixatd = pixaCreate(0); classer->nafgt = numaCreate(0); classer->naarea = numaCreate(0); classer->ptac = ptaCreate(0); classer->ptact = ptaCreate(0); classer->naclass = numaCreate(0); classer->napage = numaCreate(0); classer->ptaul = ptaCreate(0); return classer; } /* * jbClasserDestroy() * * Input: &classer (<to be nulled>) * Return: void */ void jbClasserDestroy(JBCLASSER **pclasser) { JBCLASSER *classer; if (!pclasser) return; if ((classer = *pclasser) == NULL) return; sarrayDestroy(&classer->safiles); numaDestroy(&classer->nacomps); pixaaDestroy(&classer->pixaa); pixaDestroy(&classer->pixat); pixaDestroy(&classer->pixatd); numaHashDestroy(&classer->nahash); numaDestroy(&classer->nafgt); numaDestroy(&classer->naarea); ptaDestroy(&classer->ptac); ptaDestroy(&classer->ptact); numaDestroy(&classer->naclass); numaDestroy(&classer->napage); ptaDestroy(&classer->ptaul); ptaDestroy(&classer->ptall); FREE(classer); *pclasser = NULL; return; } /*! * jbDataSave() * * Input: jbclasser * latticew, latticeh (cell size used to store each * connected component in the composite) * Return: jbdata, or null on error * * Notes: * (1) This routine stores the jbig2-type data required for * generating a lossy jbig2 version of the image. * It can be losslessly written to (and read from) two files. * (2) It generates and stores the mosaic of templates. * (3) It clones the Numa and Pta arrays, so these must all * be destroyed by the caller. * (4) Input 0 to use the default values for latticew and/or latticeh, */ JBDATA * jbDataSave(JBCLASSER *classer) { l_int32 maxw, maxh; JBDATA *data; PIX *pix; PROCNAME("jbDataSave"); if (!classer) return (JBDATA *)ERROR_PTR("classer not defined", procName, NULL); /* Write the templates into an array. */ pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh); if ((pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1)) == NULL) return (JBDATA *)ERROR_PTR("data not made", procName, NULL); if ((data = (JBDATA *)CALLOC(1, sizeof(JBDATA))) == NULL) return (JBDATA *)ERROR_PTR("data not made", procName, NULL); data->pix = pix; data->npages = classer->npages; data->w = classer->w; data->h = classer->h; data->nclass = classer->nclass; data->latticew = maxw + 1; data->latticeh = maxh + 1; data->naclass = numaClone(classer->naclass); data->napage = numaClone(classer->napage); data->ptaul = ptaClone(classer->ptaul); return data; } /* * jbDataDestroy() * * Input: &data (<to be nulled>) * Return: void */ void jbDataDestroy(JBDATA **pdata) { JBDATA *data; if (!pdata) return; if ((data = *pdata) == NULL) return; pixDestroy(&data->pix); numaDestroy(&data->naclass); numaDestroy(&data->napage); ptaDestroy(&data->ptaul); FREE(data); *pdata = NULL; return; } /*! * jbDataWrite() * * Input: rootname (for output files; everything but the extension) * jbdata * Return: 0 if OK, 1 on error * * Notes: * (1) Serialization function that writes data in jbdata to file. */ l_int32 jbDataWrite(const char *rootout, JBDATA *jbdata) { char buf[L_BUF_SIZE]; l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage; NUMA *naclass, *napage; PTA *ptaul; PIX *pixt; FILE *fp; PROCNAME("jbDataWrite"); if (!rootout) return ERROR_INT("no rootout", procName, 1); if (!jbdata) return ERROR_INT("no jbdata", procName, 1); npages = jbdata->npages; w = jbdata->w; h = jbdata->h; pixt = jbdata->pix; nclass = jbdata->nclass; cellw = jbdata->latticew; cellh = jbdata->latticeh; naclass = jbdata->naclass; napage = jbdata->napage; ptaul = jbdata->ptaul; snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT); pixWrite(buf, pixt, IFF_PNG); snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT); if ((fp = fopen(buf, "w")) == NULL) return ERROR_INT("stream not opened", procName, 1); ncomp = ptaGetCount(ptaul); fprintf(fp, "jb data file\n"); fprintf(fp, "num pages = %d\n", npages); fprintf(fp, "page size: w = %d, h = %d\n", w, h); fprintf(fp, "num components = %d\n", ncomp); fprintf(fp, "num classes = %d\n", nclass); fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh); for (i = 0; i < ncomp; i++) { numaGetIValue(napage, i, &ipage); numaGetIValue(naclass, i, &iclass); ptaGetIPt(ptaul, i, &x, &y); fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y); } fclose(fp); return 0; } /*! * jbDataRead() * * Input: rootname (for template and data files) * Return: jbdata, or NULL on error */ JBDATA * jbDataRead(const char *rootname) { char fname[L_BUF_SIZE]; char *linestr; l_uint8 *data; l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage; l_int32 nbytes, npages, nclass, ncomp; JBDATA *jbdata; NUMA *naclass, *napage; PIX *pixs; PTA *ptaul; SARRAY *sa; PROCNAME("jbDataRead"); if (!rootname) return (JBDATA *)ERROR_PTR("rootname not defined", procName, NULL); snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT); if ((pixs = pixRead(fname)) == NULL) return (JBDATA *)ERROR_PTR("pix not read", procName, NULL); snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT); if ((data = arrayRead(fname, &nbytes)) == NULL) return (JBDATA *)ERROR_PTR("data not read", procName, NULL); if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) return (JBDATA *)ERROR_PTR("sa not made", procName, NULL); nsa = sarrayGetCount(sa); /* number of cc + 6 */ linestr = sarrayGetString(sa, 0, 0); if (strcmp(linestr, "jb data file")) return (JBDATA *)ERROR_PTR("invalid jb data file", procName, NULL); linestr = sarrayGetString(sa, 1, 0); sscanf(linestr, "num pages = %d", &npages); linestr = sarrayGetString(sa, 2, 0); sscanf(linestr, "page size: w = %d, h = %d", &w, &h); linestr = sarrayGetString(sa, 3, 0); sscanf(linestr, "num components = %d", &ncomp); linestr = sarrayGetString(sa, 4, 0); sscanf(linestr, "num classes = %d\n", &nclass); linestr = sarrayGetString(sa, 5, 0); sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh); #if 1 fprintf(stderr, "num pages = %d\n", npages); fprintf(stderr, "page size: w = %d, h = %d\n", w, h); fprintf(stderr, "num components = %d\n", ncomp); fprintf(stderr, "num classes = %d\n", nclass); fprintf(stderr, "template lattice size: w = %d, h = %d\n", cellw, cellh); #endif if ((naclass = numaCreate(ncomp)) == NULL) return (JBDATA *)ERROR_PTR("naclass not made", procName, NULL); if ((napage = numaCreate(ncomp)) == NULL) return (JBDATA *)ERROR_PTR("napage not made", procName, NULL); if ((ptaul = ptaCreate(ncomp)) == NULL) return (JBDATA *)ERROR_PTR("pta not made", procName, NULL); for (i = 6; i < nsa; i++) { linestr = sarrayGetString(sa, i, 0); sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y); numaAddNumber(napage, ipage); numaAddNumber(naclass, iclass); ptaAddPt(ptaul, x, y); } if ((jbdata = (JBDATA *)CALLOC(1, sizeof(JBDATA))) == NULL) return (JBDATA *)ERROR_PTR("data not made", procName, NULL); jbdata->pix = pixs; jbdata->npages = npages; jbdata->w = w; jbdata->h = h; jbdata->nclass = nclass; jbdata->latticew = cellw; jbdata->latticeh = cellh; jbdata->naclass = naclass; jbdata->napage = napage; jbdata->ptaul = ptaul; FREE(data); sarrayDestroy(&sa); return jbdata; } /*! * jbDataRender() * * Input: jbdata * debugflag (if TRUE, writes into 2 bpp pix and adds * component outlines in color) * Return: pixa (reconstruction of original images, using templates) or * null on error */ PIXA * jbDataRender(JBDATA *data, l_int32 debugflag) { l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage; l_int32 npages, nclass, ncomp, wp, hp; BOX *box; NUMA *naclass, *napage; PIX *pixt, *pixt2, *pix, *pixd; PIXA *pixat; /* pixa of templates */ PIXA *pixad; /* pixa of output images */ PIXCMAP *cmap; PTA *ptaul; PROCNAME("jbDataRender"); if (!data) return (PIXA *)ERROR_PTR("data not defined", procName, NULL); npages = data->npages; w = data->w; h = data->h; pixt = data->pix; nclass = data->nclass; cellw = data->latticew; cellh = data->latticeh; naclass = data->naclass; napage = data->napage; ptaul = data->ptaul; ncomp = numaGetCount(naclass); /* Reconstruct the original set of images from the templates * and the data associated with each component. First, * generate the output pixa as a set of empty pix. */ if ((pixad = pixaCreate(npages)) == NULL) return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); for (i = 0; i < npages; i++) { if (debugflag == FALSE) pix = pixCreate(w, h, 1); else { pix = pixCreate(w, h, 2); cmap = pixcmapCreate(2); pixcmapAddColor(cmap, 255, 255, 255); pixcmapAddColor(cmap, 0, 0, 0); pixcmapAddColor(cmap, 255, 0, 0); /* for box outlines */ pixSetColormap(pix, cmap); } pixaAddPix(pixad, pix, L_INSERT); } /* Put the class templates into a pixa. */ if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) return (PIXA *)ERROR_PTR("pixat not made", procName, NULL); /* Place each component in the right location on its page. */ for (i = 0; i < ncomp; i++) { numaGetIValue(napage, i, &ipage); numaGetIValue(naclass, i, &iclass); pix = pixaGetPix(pixat, iclass, L_CLONE); /* the template */ wp = pixGetWidth(pix); hp = pixGetHeight(pix); ptaGetIPt(ptaul, i, &x, &y); pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */ if (debugflag == FALSE) pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix, 0, 0); else { pixt2 = pixConvert1To2Cmap(pix); pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pixt2, 0, 0); box = boxCreate(x, y, wp, hp); pixRenderBoxArb(pixd, box, 1, 255, 0, 0); pixDestroy(&pixt2); boxDestroy(&box); } pixDestroy(&pix); /* the clone only */ pixDestroy(&pixd); /* the clone only */ } pixaDestroy(&pixat); return pixad; } /*! * jbGetULCorners() * * Input: jbclasser * pixs (full res image) * boxa (of c.c. bounding rectangles for this page) * Return: 0 if OK, 1 on error * * Notes: * (1) This computes the ptaul field, which has the global UL corners, * adjusted for each specific component, so that each component * can be replaced by the template for its class and have the * centroid in the template in the same position as the * centroid of the original connected component. It is important * that this be done properly to avoid a wavy baseline in the * result. * (2) The array fields ptac and ptact give the centroids of * those components relative to the UL corner of each component. * Here, we compute the difference in each component, round to * nearest integer, and correct the box->x and box->y by * the appropriate integral difference. * (3) The templates and stored instances are all bordered. */ l_int32 jbGetULCorners(JBCLASSER *classer, PIX *pixs, BOXA *boxa) { l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy; l_int32 *sumtab; l_float32 x1, x2, y1, y2, delx, dely; BOX *box; NUMA *naclass; PIX *pixt; PTA *ptac, *ptact, *ptaul; PROCNAME("jbGetULCorners"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (!boxa) return ERROR_INT("boxa not defined", procName, 1); n = boxaGetCount(boxa); ptaul = classer->ptaul; naclass = classer->naclass; ptac = classer->ptac; ptact = classer->ptact; baseindex = classer->baseindex; /* num components before this page */ sumtab = makePixelSumTab8(); for (i = 0; i < n; i++) { index = baseindex + i; ptaGetPt(ptac, index, &x1, &y1); numaGetIValue(naclass, index, &iclass); ptaGetPt(ptact, iclass, &x2, &y2); delx = x2 - x1; dely = y2 - y1; if (delx >= 0) idelx = (l_int32)(delx + 0.5); else idelx = (l_int32)(delx - 0.5); if (dely >= 0) idely = (l_int32)(dely + 0.5); else idely = (l_int32)(dely - 0.5); if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) return ERROR_INT("box not found", procName, 1); boxGetGeometry(box, &x, &y, NULL, NULL); /* Get final increments dx and dy for best alignment */ pixt = pixaGetPix(classer->pixat, iclass, L_CLONE); finalPositioningForAlignment(pixs, x, y, idelx, idely, pixt, sumtab, &dx, &dy); /* if (i % 20 == 0) fprintf(stderr, "dx = %d, dy = %d\n", dx, dy); */ ptaAddPt(ptaul, x - idelx + dx, y - idely + dy); boxDestroy(&box); pixDestroy(&pixt); } FREE(sumtab); return 0; } /*! * jbGetLLCorners() * * Input: jbclasser * Return: 0 if OK, 1 on error * * Notes: * (1) This computes the ptall field, which has the global LL corners, * adjusted for each specific component, so that each component * can be replaced by the template for its class and have the * centroid in the template in the same position as the * centroid of the original connected component. It is important * that this be done properly to avoid a wavy baseline in the result. * (2) It is computed here from the corresponding UL corners, where * the input templates and stored instances are all bordered. * This should be done after all pages have been processed. * (3) For proper substitution, the templates whose LL corners are * placed in these locations must be UN-bordered. * This is available for a realistic jbig2 encoder, which would * (1) encode each template without a border, and (2) encode * the position using the LL corner (rather than the UL * corner) because the difference between y-values * of successive instances is typically close to zero. */ l_int32 jbGetLLCorners(JBCLASSER *classer) { l_int32 i, iclass, n, x1, y1, h; NUMA *naclass; PIX *pix; PIXA *pixat; PTA *ptaul, *ptall; PROCNAME("jbGetLLCorners"); if (!classer) return ERROR_INT("classer not defined", procName, 1); ptaul = classer->ptaul; naclass = classer->naclass; pixat = classer->pixat; ptaDestroy(&classer->ptall); n = ptaGetCount(ptaul); ptall = ptaCreate(n); classer->ptall = ptall; /* If the templates were bordered, we would add h - 1 to the UL * corner y-value. However, because the templates to be used * here have their borders removed, and the borders are * JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS * to the UL corner y-value. */ for (i = 0; i < n; i++) { ptaGetIPt(ptaul, i, &x1, &y1); numaGetIValue(naclass, i, &iclass); pix = pixaGetPix(pixat, iclass, L_CLONE); h = pixGetHeight(pix); ptaAddPt(ptall, x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS); pixDestroy(&pix); } return 0; } /*----------------------------------------------------------------------* * Static helpers * *----------------------------------------------------------------------*/ /* When looking for similar matches we check templates whose size is +/- 2 in * each direction. This involves 25 possible sizes. This array contains the * offsets for each of those positions in a spiral pattern. There are 25 pairs * of numbers in this array: even positions are x values. */ static int two_by_two_walk[50] = { 0, 0, 0, 1, -1, 0, 0, -1, 1, 0, -1, 1, 1, 1, -1, -1, 1, -1, 0, -2, 2, 0, 0, 2, -2, 0, -1, -2, 1, -2, 2, -1, 2, 1, 1, 2, -1, 2, -2, 1, -2, -1, -2, -2, 2, -2, 2, 2, -2, 2}; /*! * findSimilarSizedTemplatesInit() * * Input: classer * pixs (instance to be matched) * Return: Allocated context to be used with findSimilar* */ static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs) { JBFINDCTX *state; state = (JBFINDCTX *)CALLOC(1, sizeof(JBFINDCTX)); state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS; state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS; state->classer = classer; return state; } static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate) { JBFINDCTX *state; PROCNAME("findSimilarSizedTemplatesDestroy"); if (pstate == NULL) { L_WARNING("ptr address is null", procName); return; } if ((state = *pstate) == NULL) return; numaDestroy(&state->numa); FREE(state); *pstate = NULL; return; } /*! * findSimilarSizedTemplatesNext() * * Input: state (from findSimilarSizedTemplatesInit) * Return: Next template number, or -1 when finished * * We have a hash table mapping template area to a list of template * numbers with that area. We wish to find similar sized templates, * so we first look for templates with the same width and height, and * then with width + 1, etc. This walk is guided by the * two_by_two_walk array, above. * * We don't want to have to collect the whole list of templates first because * (we hope) to find it quickly. So we keep the context for this walk in an * explictit state structure and this function acts like a generator. */ static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *state) { l_int32 desiredh, desiredw, size, templ; PIX *pixt; while(1) { /* Continue the walk over step 'i' */ if (state->i >= 25) { /* all done */ return -1; } desiredw = state->w + two_by_two_walk[2 * state->i]; desiredh = state->h + two_by_two_walk[2 * state->i + 1]; if (desiredh < 1 || desiredw < 1) { /* invalid size */ state->i++; continue; } if (!state->numa) { /* We have yet to start walking the array for the step 'i' */ state->numa = numaHashGetNuma(state->classer->nahash, desiredh * desiredw); if (!state->numa) { /* nothing there */ state->i++; continue; } state->n = 0; /* OK, we got a numa. */ } /* Continue working on this numa */ size = numaGetCount(state->numa); for ( ; state->n < size; ) { templ = (l_int32)(state->numa->array[state->n++] + 0.5); pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE); if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw && pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) { pixDestroy(&pixt); return templ; } pixDestroy(&pixt); } /* Exhausted the numa; take another step and try again */ state->i++; numaDestroy(&state->numa); continue; } } /*! * finalPositioningForAlignment() * * Input: pixs (input page image) * x, y (location of UL corner of bb of component in pixs) * idelx, idely (compensation to match centroids of component * and template) * pixt (template, with JB_ADDED_PIXELS of padding on all sides) * sumtab (for summing fg pixels in an image) * &dx, &dy (return delta on position for best match; each * one is in the set {-1, 0, 1}) * Return: 0 if OK, 1 on error * */ static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y, l_int32 idelx, l_int32 idely, PIX *pixt, l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy) { l_int32 w, h, i, j, minx, miny, count, mincount; PIX *pixi; /* clipped from source pixs */ PIX *pixr; /* temporary storage */ BOX *box; PROCNAME("finalPositioningForAlignment"); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (!pixt) return ERROR_INT("pixt not defined", procName, 1); if (!pdx || !pdy) return ERROR_INT("&dx and &dy not both defined", procName, 1); if (!sumtab) return ERROR_INT("sumtab not defined", procName, 1); *pdx = *pdy = 0; /* Use JB_ADDED_PIXELS pixels padding on each side */ w = pixGetWidth(pixt); h = pixGetHeight(pixt); box = boxCreate(x - idelx - JB_ADDED_PIXELS, y - idely - JB_ADDED_PIXELS, w, h); pixi = pixClipRectangle(pixs, box, NULL); boxDestroy(&box); if (!pixi) return ERROR_INT("pixi not made", procName, 1); pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1); mincount = 0x7fffffff; for (i = -1; i <= 1; i++) { for (j = -1; j <= 1; j++) { pixCopy(pixr, pixi); pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0); pixCountPixels(pixr, &count, sumtab); if (count < mincount) { minx = j; miny = i; mincount = count; } } } pixDestroy(&pixi); pixDestroy(&pixr); *pdx = minx; *pdy = miny; return 0; }