C++程序  |  1833行  |  48.25 KB

/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -  This software is distributed in the hope that it will be
 -  useful, but with NO WARRANTY OF ANY KIND.
 -  No author or distributor accepts responsibility to anyone for the
 -  consequences of using this software, or for whether it serves any
 -  particular purpose or works at all, unless he or she says so in
 -  writing.  Everyone is granted permission to copy, modify and
 -  redistribute this source code, for commercial or non-commercial
 -  purposes, with the following restrictions: (1) the origin of this
 -  source code must not be misrepresented; (2) modified versions must
 -  be plainly marked as such; and (3) this notice may not be removed
 -  or altered from any source or modified source distribution.
 *====================================================================*/


/*
 *  utils.c
 *
 *       error, warning and info procs; all invoked by macros
 *           l_int32    returnErrorInt()
 *           l_float32  returnErrorFloat()
 *           void      *returnErrorPtr()
 *           void       returnErrorVoid()
 *           void       l_error()
 *           void       l_errorString()
 *           void       l_errorInt()
 *           void       l_errorFloat()
 *           void       l_warning()
 *           void       l_warningString()
 *           void       l_warningInt()
 *           void       l_warningFloat()
 *           void       l_info()
 *           void       l_infoString()
 *           void       l_infoInt()
 *           void       l_infoInt2()
 *           void       l_infoFloat()
 *           void       l_infoFloat2()
 *
 *       safe string procs
 *           char      *stringNew()
 *           l_int32    stringReplace()
 *           char      *stringJoin()
 *           char      *stringReverse()
 *           char      *strtokSafe()
 *           l_int32    stringSplitOnToken()
 *
 *       find and replace procs
 *           char      *stringRemoveChars()
 *           l_int32    stringFindSubstr()
 *           char      *stringReplaceSubstr()
 *           char      *stringReplaceEachSubstr()
 *           l_int32    arrayFindSequence()
 *
 *       safe realloc
 *           void      *reallocNew()
 *
 *       read file to memory
 *           l_uint8   *arrayRead()
 *           l_uint8   *arrayReadStream()
 *           l_int32    nbytesInFile()
 *           l_int32    fnbytesInFile()
 *
 *       write memory to file
 *           l_int32    arrayWrite()
 *
 *       byte-swapping data conversion
 *           l_uint16   convertOnBigEnd16()
 *           l_uint32   convertOnBigEnd32()
 *           l_uint16   convertOnLittleEnd16()
 *           l_uint32   convertOnLittleEnd32()
 *
 *       file opening
 *           FILE      *fopenReadStream()
 *
 *       file name operations
 *           l_int32    splitPathAtDirectory()
 *           l_int32    splitPathAtExtension()
 *           char      *genPathname()
 *           char      *genTempFilename()
 *           l_int32    extractNumberFromFilename()
 *
 *       timing
 *           void       startTimer()
 *           l_float32  stopTimer()
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "allheaders.h"

#if COMPILER_MSVC
static const char sepchar = '\\';
#else
static const char sepchar = '/';
#endif


/*----------------------------------------------------------------------*
 *                 Error, warning and info message procs                *
 *                                                                      *
 *            ---------------------  N.B. ---------------------         *
 *                                                                      *
 *    (1) These functions all print messages to stderr.                 *
 *                                                                      *
 *    (2) They must be invoked only by macros, which are in             *
 *        environ.h, so that the print output can be disabled           *
 *        at compile time, using -DNO_CONSOLE_IO.                       *
 *                                                                      *
 *----------------------------------------------------------------------*/
/*!
 *  returnErrorInt()
 *
 *      Input:  msg (error message)
 *              procname
 *              ival (return val)
 *      Return: ival (typically 1)
 */
l_int32
returnErrorInt(const char  *msg, 
               const char  *procname, 
               l_int32      ival)
{
    fprintf(stderr, "Error in %s: %s\n", procname, msg);
    return ival;
}


/*!
 *  returnErrorFloat()
 *
 *      Input:  msg (error message)
 *              procname
 *              fval (return val)
 *      Return: fval
 */
l_float32
returnErrorFloat(const char  *msg, 
                 const char  *procname, 
                 l_float32    fval)
{
    fprintf(stderr, "Error in %s: %s\n", procname, msg);
    return fval;
}


/*!
 *  returnErrorPtr()
 *
 *      Input:  msg (error message)
 *              procname
 *              pval  (return val)
 *      Return: pval (typically null)
 */
void *
returnErrorPtr(const char  *msg,
               const char  *procname, 
               void        *pval)
{
    fprintf(stderr, "Error in %s: %s\n", procname, msg);
    return pval;
}


/*!
 *  returnErrorVoid()
 *
 *      Input: msg (error message)
 *             procname
 */
void
returnErrorVoid(const char  *msg, 
                const char  *procname)
{
    fprintf(stderr, "Error in %s: %s\n", procname, msg);
    return;
}


/*!
 *  l_error()
 *
 *      Input: msg (error message)
 *             procname
 */
void
l_error(const char  *msg,
        const char  *procname)
{
    fprintf(stderr, "Error in %s: %s\n", procname, msg);
    return;
}


/*!
 *  l_errorString()
 *
 *      Input: msg (error message; must include '%s')
 *             procname
 *             str (embedded in error message via %s)
 */
void
l_errorString(const char  *msg,
              const char  *procname,
              const char  *str)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname || !str) {
        ERROR_VOID("msg, procname or str not defined in l_errorString()",
                   procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_errorString()", procname);
        return;
    }

    sprintf(charbuf, "Error in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, str);

    FREE(charbuf);
    return;
}


/*!
 *  l_errorInt()
 *
 *      Input: msg (error message; must include '%d')
 *             procname
 *             ival (embedded in error message via %d)
 */
void
l_errorInt(const char  *msg,
           const char  *procname,
           l_int32      ival)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname) {
        ERROR_VOID("msg or procname not defined in l_errorInt()", procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_errorInt()", procname);
        return;
    }

    sprintf(charbuf, "Error in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, ival);

    FREE(charbuf);
    return;
}


/*!
 *  l_errorFloat()
 *
 *      Input: msg (error message; must include '%f')
 *             procname
 *             fval (embedded in error message via %f)
 */
void
l_errorFloat(const char  *msg,
             const char  *procname,
             l_float32    fval)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname) {
        ERROR_VOID("msg or procname not defined in l_errorFloat()", procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_errorFloat()", procname);
        return;
    }

    sprintf(charbuf, "Error in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, fval);

    FREE(charbuf);
    return;
}


/*!
 *  l_warning()
 *
 *      Input: msg (warning message)
 *             procname
 */
void
l_warning(const char  *msg,
          const char  *procname)
{
    fprintf(stderr, "Warning in %s: %s\n", procname, msg);
    return;
}


/*!
 *  l_warningString()
 *
 *      Input: msg (warning message; must include '%s')
 *             procname
 *             str (embedded in warning message via %s)
 */
void
l_warningString(const char  *msg,
                const char  *procname,
                const char  *str)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname || !str) {
        ERROR_VOID("msg, procname or str not defined in l_warningString()",
                   procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_warningString()", procname);
        return;
    }

    sprintf(charbuf, "Warning in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, str);

    FREE(charbuf);
    return;
}


/*!
 *  l_warningInt()
 *
 *      Input: msg (warning message; must include '%d')
 *             procname
 *             ival (embedded in warning message via %d)
 */
void
l_warningInt(const char  *msg,
             const char  *procname,
             l_int32      ival)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname) {
        ERROR_VOID("msg or procname not defined in l_warningInt()", procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_warningInt()", procname);
        return;
    }

    sprintf(charbuf, "Warning in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, ival);

    FREE(charbuf);
    return;
}


/*!
 *  l_warningFloat()
 *
 *      Input: msg (warning message; must include '%f')
 *             procname
 *             fval (embedded in warning message via %f)
 */
void
l_warningFloat(const char  *msg,
               const char  *procname,
               l_float32    fval)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname) {
        ERROR_VOID("msg or procname not defined in l_warningFloat()", procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_warningFloat()", procname);
        return;
    }

    sprintf(charbuf, "Warning in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, fval);

    FREE(charbuf);
    return;
}


/*!
 *  l_info()
 *
 *      Input: msg (info message)
 *             procname
 */
void
l_info(const char  *msg,
       const char  *procname)
{
    fprintf(stderr, "Info in %s: %s\n", procname, msg);
    return;
}


/*!
 *  l_infoString()
 *
 *      Input: msg (info message; must include '%s')
 *             procname
 *             str (embedded in warning message via %s)
 */
void
l_infoString(const char  *msg,
             const char  *procname,
             const char  *str)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname || !str) {
        ERROR_VOID("msg, procname or str not defined in l_infoString()",
                   procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_infoString()", procname);
        return;
    }

    sprintf(charbuf, "Info in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, str);

    FREE(charbuf);
    return;
}


/*!
 *  l_infoInt()
 *
 *      Input: msg (info message; must include '%d')
 *             procname
 *             ival (embedded in info message via %d)
 */
void
l_infoInt(const char  *msg,
          const char  *procname, 
          l_int32      ival)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname) {
        ERROR_VOID("msg or procname not defined in l_infoInt()", procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_infoInt()", procname);
        return;
    }

    sprintf(charbuf, "Info in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, ival);

    FREE(charbuf);
    return;
}


/*!
 *  l_infoInt2()
 *
 *      Input: msg (info message; must include two '%d')
 *             procname
 *             ival1, ival2 (two args, embedded in info message via %d)
 */
void
l_infoInt2(const char  *msg,
           const char  *procname, 
           l_int32      ival1,
           l_int32      ival2)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname) {
        ERROR_VOID("msg or procname not defined in l_infoInt2()", procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_infoInt2()", procname);
        return;
    }

    sprintf(charbuf, "Info in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, ival1, ival2);

    FREE(charbuf);
    return;
}


/*!
 *  l_infoFloat()
 *
 *      Input: msg (info message; must include '%f')
 *             procname
 *             fval (embedded in info message via %f)
 */
void
l_infoFloat(const char  *msg,
            const char  *procname, 
            l_float32    fval)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname) {
        ERROR_VOID("msg or procname not defined in l_infoFloat()", procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_infoFloat()", procname);
        return;
    }

    sprintf(charbuf, "Info in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, fval);

    FREE(charbuf);
    return;
}


/*!
 *  l_infoFloat2()
 *
 *      Input: msg (info message; must include two '%f')
 *             procname
 *             fval1, fval2 (two args, embedded in info message via %f)
 */
void
l_infoFloat2(const char  *msg,
             const char  *procname, 
             l_float32    fval1,
             l_float32    fval2)
{
l_int32  bufsize;
char    *charbuf;

    if (!msg || !procname) {
        ERROR_VOID("msg or procname not defined in l_infoFloat2()", procname);
        return;
    }

    bufsize = strlen(msg) + strlen(procname) + 128;
    if ((charbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) {
        ERROR_VOID("charbuf not made in l_infoFloat()", procname);
        return;
    }

    sprintf(charbuf, "Info in %s: %s\n", procname, msg);
    fprintf(stderr, charbuf, fval1, fval2);

    FREE(charbuf);
    return;
}



/*--------------------------------------------------------------------*
 *                       Safe string operations                       *
 *--------------------------------------------------------------------*/
/*!
 *  stringNew()
 *
 *      Input:  src string
 *      Return: dest copy of src string, or null on error
 */
char *
stringNew(const char  *src) 
{
char  *dest;

    PROCNAME("stringNew");

    if (!src)
        return (char *)ERROR_PTR("src not defined", procName, NULL);
    
    if ((dest = (char *)CALLOC(strlen(src) + 2, sizeof(char))) == NULL)
        return (char *)ERROR_PTR("dest not made", procName, NULL);
    strcpy(dest, src);

    return dest;
}
    

/*!
 *  stringReplace()
 *
 *      Input:  &dest string (<return> copy)
 *              src string
 *      Return: 0 if OK; 1 on error
 *
 *  Notes:
 *      (1) Frees any existing dest string
 *      (2) Puts a copy of src string in the dest
 *      (3) If either or both strings are null, does the reasonable thing.
 */
l_int32
stringReplace(char       **pdest,
              const char  *src)
{
char  *scopy;

    PROCNAME("stringReplace");

    if (!pdest)
        return ERROR_INT("pdest not defined", procName, 1);

    if (*pdest)
        FREE(*pdest);
    
    if (src) {
        if ((scopy = (char *)CALLOC(strlen(src) + 2, sizeof(char))) == NULL)
            return ERROR_INT("scopy not made", procName, 1);
        strcpy(scopy, src);
        *pdest = scopy;
    }
    else
        *pdest = NULL;

    return 0;
}
    

/*!
 *  stringJoin()
 *
 *      Input:  src1 string (<optional>)
 *              src2 string (<optional>)
 *      Return: concatenated string, or null on error
 *
 *  Notes:
 *      (1) This is the safe version of strcat; it makes a new string.
 *      (2) It is not an error if either or both of the strings
 *          are empty, or if either or both the pointers are null.
 */
char *
stringJoin(const char  *src1, 
           const char  *src2)
{
char    *dest;
l_int32  srclen1, srclen2, destlen;

    PROCNAME("stringJoin");

    srclen1 = srclen2 = 0;
    if (src1)
        srclen1 = strlen(src1);
    if (src2)
        srclen2 = strlen(src2);
    destlen = srclen1 + srclen2 + 3;

    if ((dest = (char *)CALLOC(destlen, sizeof(char))) == NULL)
        return (char *)ERROR_PTR("calloc fail for dest", procName, NULL);

    if (src1)
        strcpy(dest, src1);
    if (src2)
        strcat(dest, src2);
    return dest;
}


/*!
 *  stringReverse()
 *
 *      Input:  src (string)
 *      Return: dest (newly-allocated reversed string)
 */
char *
stringReverse(const char  *src)
{
char    *dest;
l_int32  i, len;

    PROCNAME("stringReverse");

    if (!src)
        return (char *)ERROR_PTR("src not defined", procName, NULL);
    len = strlen(src);
    if ((dest = (char *)CALLOC(len + 1, sizeof(char))) == NULL)
        return (char *)ERROR_PTR("calloc fail for dest", procName, NULL);
    for (i = 0; i < len; i++)
        dest[i] = src[len - 1 - i];

    return dest;
}


/*!
 *  strtokSafe()
 *
 *      Input:  cstr (input string to be sequentially parsed;
 *                    use NULL after the first call)
 *              seps (a string of character separators)
 *              &saveptr (<return> ptr to the next char after
 *                        the last encountered separator)
 *      Return: substr (a new string that is copied from the previous
 *                      saveptr up to but not including the next
 *                      separator character), or NULL if end of cstr.
 *
 *  Notes:
 *      (1) This is a thread-safe implementation of strtok.
 *      (2) It has the same interface as strtok_r.
 *      (3) It differs from strtok_r in usage in two respects:
 *          (a) the input string is not altered
 *          (b) each returned substring is newly allocated and must
 *              be freed after use.
 *      (4) Let me repeat that.  This is "safe" because the input
 *          string is not altered and because each returned string
 *          is newly allocated on the heap.
 *      (5) It is here because, surprisingly, some C libraries don't
 *          include strtok_r.
 *      (6) Important usage points:
 *          - Input the string to be parsed on the first invocation.
 *          - Then input NULL after that; the value returned in saveptr
 *            is used in all subsequent calls.
 *      (7) This is only slightly slower than strtok_k.
 */
char *
strtokSafe(char        *cstr,
           const char  *seps,
           char       **psaveptr)
{
char     nextc;
char    *start, *substr;
l_int32  istart, i, j, nchars;

    PROCNAME("strtokSafe");

    if (!seps)
        return (char *)ERROR_PTR("seps not defined", procName, NULL);
    if (!psaveptr)
        return (char *)ERROR_PTR("&saveptr not defined", procName, NULL);

    if (!cstr)
        start = *psaveptr;
    else
        start = cstr;
    if (!start)  /* nothing to do */
        return NULL;

        /* First time, scan for the first non-sep character */
    istart = 0;
    if (cstr) {
        for (istart = 0;; istart++) {
            if ((nextc = start[istart]) == '\0') {
                *psaveptr = NULL;  /* in case caller doesn't check ret value */
                return NULL;
            }
            if (!strchr(seps, nextc))
                break;
        }
    }

        /* Scan through, looking for a sep character; if none is
         * found, 'i' will be at the end of the string. */
    for (i = istart;; i++) {
        if ((nextc = start[i]) == '\0')
            break;
        if (strchr(seps, nextc))
            break;
    }

        /* Save the substring */
    nchars = i - istart;
    substr = (char *)CALLOC(nchars + 1, sizeof(char));
    strncpy(substr, start + istart, nchars);

        /* Look for the next non-sep character.
         * If this is the last substring, return a null saveptr. */
    for (j = i;; j++) {
        if ((nextc = start[j]) == '\0') {
            *psaveptr = NULL;  /* no more non-sep characters */
            break;
        }
        if (!strchr(seps, nextc)) {
            *psaveptr = start + j;  /* start here on next call */
                break;
        }
    }

    return substr;
}


/*!
 *  stringSplitOnToken()
 *
 *      Input:  cstr (input string to be split; not altered)
 *              seps (a string of character separators)
 *              &head (<return> ptr to copy of the input string, up to
 *                     the first separator token encountered)
 *              &tail (<return> ptr to copy of the part of the input string
 *                     starting with the first non-separator character
 *                     that occurs after the first separator is found)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The input string is not altered; all split parts are new strings.
 *      (2) The split occurs around the first consecutive sequence of
 *          tokens encountered.
 *      (3) The head goes from the beginning of the string up to
 *          but not including the first token found.
 *      (4) The tail contains the second part of the string, starting
 *          with the first char in that part that is NOT a token.
 *      (5) If no separator token is found, 'head' contains a copy
 *          of the input string and 'tail' is null.
 */
l_int32
stringSplitOnToken(char        *cstr,
                   const char  *seps,
                   char       **phead,
                   char       **ptail)
{
char  *saveptr;

    PROCNAME("stringSplitOnToken");

    if (!phead)
        return ERROR_INT("&head not defined", procName, 1);
    if (!ptail)
        return ERROR_INT("&tail not defined", procName, 1);
    *phead = *ptail = NULL;
    if (!cstr)
        return ERROR_INT("cstr not defined", procName, 1);
    if (!seps)
        return ERROR_INT("seps not defined", procName, 1);

    *phead = strtokSafe(cstr, seps, &saveptr);
    if (saveptr)
        *ptail = stringNew(saveptr);
    return 0;
}


/*--------------------------------------------------------------------*
 *                       Find and replace procs                       *
 *--------------------------------------------------------------------*/
/*!
 *  stringRemoveChars()
 *
 *      Input:  src (input string; can be of zero length)
 *              remchars  (string of chars to be removed from src)
 *      Return: dest (string with specified chars removed), or null on error
 */
char *
stringRemoveChars(const char  *src,
                  const char  *remchars) 
{
char     ch;
char    *dest;
l_int32  nsrc, i, k;

    PROCNAME("stringRemoveChars");

    if (!src)
        return (char *)ERROR_PTR("src not defined", procName, NULL);
    if (!remchars)
        return stringNew(src);
    
    if ((dest = (char *)CALLOC(strlen(src) + 1, sizeof(char))) == NULL)
        return (char *)ERROR_PTR("dest not made", procName, NULL);
    nsrc = strlen(src);
    for (i = 0, k = 0; i < nsrc; i++) {
        ch = src[i];
        if (!strchr(remchars, ch))
            dest[k++] = ch;
    }

    return dest;
}


/*!
 *  stringFindSubstr()
 *
 *      Input:  src (input string; can be of zero length)
 *              sub (substring to be searched for)
 *              &loc (<return optional> location of substring in src)
 *      Return: 1 if found; 0 if not found or on error
 *
 *  Notes:
 *      (1) This is a wrapper around strstr().
 *      (2) Both @src and @sub must be defined, and @sub must have
 *          length of at least 1.
 *      (3) If the substring is not found and loc is returned, it has
 *          the value -1.
 */
l_int32
stringFindSubstr(const char  *src,
                 const char  *sub,
                 l_int32     *ploc)
{
char  *ptr;

    PROCNAME("stringFindSubstr");

    if (!src)
        return ERROR_INT("src not defined", procName, 0);
    if (!sub)
        return ERROR_INT("sub not defined", procName, 0);
    if (ploc) *ploc = -1;
    if (strlen(sub) == 0)
        return ERROR_INT("substring length 0", procName, 0);
    if (strlen(src) == 0)
        return 0;

    if ((ptr = (char *)strstr(src, sub)) == NULL)  /* not found */
        return 0;

    if (*ploc)
        *ploc = ptr - src;
    return 1;
}


/*!
 *  stringReplaceSubstr()
 *
 *      Input:  src (input string; can be of zero length)
 *              sub1 (substring to be replaced)
 *              sub2 (substring to put in; can be "")
 *              &found (<return optional> 1 if sub1 is found; 0 otherwise)
 *              &loc (<return optional> location of ptr after replacement)
 *      Return: dest (string with substring replaced), or null if the
 *              substring not found or on error.
 *
 *  Notes:
 *      (1) Replaces the first instance.
 *      (2) To only remove sub1, use "" for sub2
 *      (3) Returns a new string if sub1 and sub2 are the same.
 *      (4) The optional loc is input as the byte offset within the src
 *          from which the search starts, and after the search it is the
 *          char position in the string of the next character after
 *          the substituted string.
 *      (5) N.B. If ploc is not null, loc must always be initialized.
 *          To search the string from the beginning, set loc = 0.
 */
char *
stringReplaceSubstr(const char  *src,
                    const char  *sub1,
                    const char  *sub2,
                    l_int32     *pfound,
                    l_int32     *ploc)
{
char    *ptr, *dest;
l_int32  nsrc, nsub1, nsub2, len, npre, loc;

    PROCNAME("stringReplaceSubstr");

    if (!src)
        return (char *)ERROR_PTR("src not defined", procName, NULL);
    if (!sub1)
        return (char *)ERROR_PTR("sub1 not defined", procName, NULL);
    if (!sub2)
        return (char *)ERROR_PTR("sub2 not defined", procName, NULL);
    
    if (pfound)
        *pfound = 0;
    if (ploc)
        loc = *ploc;
    else
        loc = 0;
    if ((ptr = (char *)strstr(src + loc, sub1)) == NULL) {
        return NULL;
    }

    if (pfound)
        *pfound = 1;
    nsrc = strlen(src);
    nsub1 = strlen(sub1);
    nsub2 = strlen(sub2);
    len = nsrc + nsub2 - nsub1;
    if ((dest = (char *)CALLOC(len + 1, sizeof(char))) == NULL)
        return (char *)ERROR_PTR("dest not made", procName, NULL);
    npre = ptr - src;
    memcpy(dest, src, npre);
    strcpy(dest + npre, sub2);
    strcpy(dest + npre + nsub2, ptr + nsub1);
    if (ploc)
        *ploc = npre + nsub2;

    return dest;
}


/*!
 *  stringReplaceEachSubstr()
 *
 *      Input:  src (input string; can be of zero length)
 *              sub1 (substring to be replaced)
 *              sub2 (substring to put in; can be "")
 *              &count (<optional return > the number of times that sub1
 *                      is found in src; 0 if not found)
 *      Return: dest (string with substring replaced), or null if the
 *              substring not found or on error.
 *
 *  Notes:
 *      (1) Replaces every instance.
 *      (2) To only remove each instance of sub1, use "" for sub2
 *      (3) Returns NULL if sub1 and sub2 are the same.
 */
char *
stringReplaceEachSubstr(const char  *src,
                        const char  *sub1,
                        const char  *sub2,
                        l_int32     *pcount)
{
char    *currstr, *newstr;
l_int32  loc;

    PROCNAME("stringReplaceEachSubstr");

    if (!src)
        return (char *)ERROR_PTR("src not defined", procName, NULL);
    if (!sub1)
        return (char *)ERROR_PTR("sub1 not defined", procName, NULL);
    if (!sub2)
        return (char *)ERROR_PTR("sub2 not defined", procName, NULL);

    if (pcount)
        *pcount = 0;
    loc = 0;
    if ((newstr = stringReplaceSubstr(src, sub1, sub2, NULL, &loc)) == NULL)
        return NULL;

    if (pcount)
        (*pcount)++;
    while (1) {
        currstr = newstr;
        newstr = stringReplaceSubstr(currstr, sub1, sub2, NULL, &loc);
        if (!newstr)
            return currstr;
        FREE(currstr);
        if (pcount)
            (*pcount)++;
    }
}
            

/*!
 *  arrayFindSequence()
 *
 *      Input:  data (byte array)
 *              datalen (length of data, in bytes)
 *              sequence (subarray of bytes to find in data)
 *              seqlen (length of sequence, in bytes)
 *              &offset (return> offset from beginning of
 *                       data where the sequence begins)
 *              &found (<return> 1 if sequence is found; 0 otherwise)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The byte arrays 'data' and 'sequence' are not C strings,
 *          as they can contain null bytes.  Therefore, for each
 *          we must give the length of the array.
 *      (2) This searches for the first occurrence in 'data' of
 *          the first 'seqlen' bytes of 'sequence'.  The parameter 'seqlen'
 *          must not exceed the actual length of the 'sequence' byte array.
 *      (3) If the sequence is not found, the offset will be set to -1.
 */
l_int32
arrayFindSequence(const l_uint8  *data,
                  l_int32         datalen,
                  const l_uint8  *sequence,
                  l_int32         seqlen,
                  l_int32        *poffset,
                  l_int32        *pfound)
{
l_int32  i, j, found, lastpos;

    PROCNAME("arrayFindSequence");

    if (!data || !sequence)
        return ERROR_INT("data & sequence not both defined", procName, 1);
    if (!poffset || !pfound)
        return ERROR_INT("&offset and &found not both defined", procName, 1);

    *pfound = 0;
    *poffset = -1;
    lastpos = datalen - seqlen + 1;
    found = 0;
    for (i = 0; i < lastpos; i++) {
        for (j = 0; j < seqlen; j++) {
            if (data[i + j] != sequence[j])
                 break;
            if (j == seqlen - 1)
                 found = 1;
        }
        if (found)
            break;
    }

    if (found) {
        *pfound = 1;
        *poffset = i;
    }

    return 0;
}


/*--------------------------------------------------------------------*
 *                             Safe realloc                           *
 *--------------------------------------------------------------------*/
/*!
 *  reallocNew()
 *
 *      Input:  &indata (<optional>; nulls indata)
 *              size of input data to be copied (bytes)
 *              size of data to be reallocated (bytes)
 *      Return: ptr to new data, or null on error
 *
 *  Action: !N.B. (3) and (4)!
 *      (1) Allocates memory, initialized to 0
 *      (2) Copies as much of the input data as possible
 *          to the new block, truncating the copy if necessary
 *      (3) Frees the input data
 *      (4) Zeroes the input data ptr
 *
 *  Notes:
 *      (1) If newsize <=0, just frees input data and nulls ptr
 *      (2) If input ptr is null, just callocs new memory
 *      (3) This differs from realloc in that it always allocates
 *          new memory (if newsize > 0) and initializes it to 0,
 *          it requires the amount of old data to be copied,
 *          and it takes the address of the input ptr and
 *          nulls the handle.
 */
void *
reallocNew(void   **pindata,
           l_int32  oldsize,
           l_int32  newsize)
{
l_int32  minsize;
void    *indata; 
void    *newdata;

    PROCNAME("reallocNew");

    if (!pindata)
        return ERROR_PTR("input data not defined", procName, NULL);
    indata = *pindata;

    if (newsize <= 0) {   /* nonstandard usage */
        if (indata) {
            FREE(indata);
            *pindata = NULL;
        }
        return NULL;
    }

    if (!indata)   /* nonstandard usage */
    {
        if ((newdata = (void *)CALLOC(1, newsize)) == NULL)
            return ERROR_PTR("newdata not made", procName, NULL);
        return newdata;
    }

        /* Standard usage */
    if ((newdata = (void *)CALLOC(1, newsize)) == NULL)
        return ERROR_PTR("newdata not made", procName, NULL);
    minsize = L_MIN(oldsize, newsize);
    memcpy((char *)newdata, (char *)indata, minsize);

    FREE(indata);
    *pindata = NULL;

    return newdata;
}
    


/*--------------------------------------------------------------------*
 *                       Reading bytes from file                      *
 *--------------------------------------------------------------------*/
/*!
 *  arrayRead()
 *
 *      Input:  filename
 *              &nbytes (<return> number of bytes read)
 *      Return: array, or null on error
 */
l_uint8 *
arrayRead(const char  *fname, 
           l_int32     *pnbytes)
{
l_uint8  *data;
FILE     *fp;

    PROCNAME("arrayRead");

    if (!fname)
        return (l_uint8 *)ERROR_PTR("fname not defined", procName, NULL);
    if (!pnbytes)
        return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL);
    *pnbytes = 0;

    if ((fp = fopen(fname, "r")) == NULL)
        return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL);

    data = arrayReadStream(fp, pnbytes);
    fclose(fp);

    return data;
}


/*!
 *  arrayReadStream()
 *
 *      Input:  stream
 *              &nbytes (<return> number of bytes read)
 *      Return: null-terminated array, or null on error
 *              (reading 0 bytes is not an error)
 *
 *  Notes:
 *      (1) N.B.: as a side effect, this always re-positions the
 *          stream ptr to the beginning of the file.
 */
l_uint8 *
arrayReadStream(FILE     *fp, 
                l_int32  *pnbytes)
{
l_uint8  *data;

    PROCNAME("arrayReadStream");

    if (!fp)
        return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL);
    if (!pnbytes)
        return (l_uint8 *)ERROR_PTR("ptr to nbytes not defined", procName, NULL);

    *pnbytes = fnbytesInFile(fp);

    if ((data = (l_uint8 *)CALLOC(1, *pnbytes + 1)) == NULL)
        return (l_uint8 *)ERROR_PTR("CALLOC fail for data", procName, NULL);
    fread(data, *pnbytes, 1, fp);

    return data;
}


/*!
 *  nbytesInFile()
 *
 *      Input:  filename
 *      Return: nbytes in file; 0 on error
 */
l_int32
nbytesInFile(const char  *filename)
{
l_int32  nbytes;
FILE    *fp;

    PROCNAME("nbytesInFile");

    if (!filename)
        return ERROR_INT("filename not defined", procName, 0);
    fp = fopen(filename, "r");
    nbytes = fnbytesInFile(fp);
    fclose(fp);
    return nbytes;
}


/*!
 *  fnbytesInFile()
 *
 *      Input:  file stream
 *      Return: nbytes in file; 0 on error
 */
l_int32
fnbytesInFile(FILE  *fp)
{
l_int32  nbytes, pos;

    PROCNAME("fnbytesInFile");

    if (!fp)
        return ERROR_INT("stream not open", procName, 0);

    pos = ftell(fp);          /* initial position */
    fseek(fp, 0, SEEK_END);   /* EOF */
    nbytes = ftell(fp);
    fseek(fp, 0, pos);        /* back to initial position */
    return nbytes;
}



/*--------------------------------------------------------------------*
 *                          Writing bytes to file                     *
 *--------------------------------------------------------------------*/
/*!
 *  arrayWrite()
 *
 *      Input:  filename (output)
 *              operation  ("w" for write; "a" for append)
 *              data  (binary data to be written)
 *              nbytes  (size of data array)
 *      Return: 0 if OK; 1 on error
 */
l_int32
arrayWrite(const char  *filename,
           const char  *operation,
           void        *data,
           l_int32      nbytes)
{
FILE  *fp;

    PROCNAME("arrayWrite");

    if (!filename)
        return ERROR_INT("filename not defined", procName, 1);
    if (!operation)
        return ERROR_INT("operation not defined", procName, 1);
    if (!data)
        return ERROR_INT("data not defined", procName, 1);
    if (nbytes <= 0)
        return ERROR_INT("nbytes must be > 0", procName, 1);

    if (!strcmp(operation, "w") && !strcmp(operation, "a"))
        return ERROR_INT("operation not one of {'w','a'}", procName, 1);

    if ((fp = fopen(filename, operation)) == NULL)
        return ERROR_INT("stream not opened", procName, 1);
    fwrite(data, 1, nbytes, fp);
    fclose(fp);

    return 0;
}


/*--------------------------------------------------------------------------*
 *   16 and 32 bit byte-swapping on big endian and little  endian machines  *
 *                                                                          * 
 *   These are typically used for I/O conversions:                          *
 *      (1) endian conversion for data that was read from a file            *
 *      (2) endian conversion on data before it is written to a file        *
 *--------------------------------------------------------------------------*/

/*--------------------------------------------------------------------*
 *                        16-bit byte swapping                        *
 *--------------------------------------------------------------------*/
#ifdef L_BIG_ENDIAN

l_uint16
convertOnBigEnd16(l_uint16  shortin)
{
    return ((shortin << 8) | (shortin >> 8));
}

l_uint16
convertOnLittleEnd16(l_uint16  shortin)
{
    return  shortin;
}

#else     /* L_LITTLE_ENDIAN */

l_uint16
convertOnLittleEnd16(l_uint16  shortin)
{
    return ((shortin << 8) | (shortin >> 8));
}

l_uint16
convertOnBigEnd16(l_uint16  shortin)
{
    return  shortin;
}

#endif  /* L_BIG_ENDIAN */


/*--------------------------------------------------------------------*
 *                        32-bit byte swapping                        *
 *--------------------------------------------------------------------*/
#ifdef L_BIG_ENDIAN

l_uint32
convertOnBigEnd32(l_uint32  wordin)
{
    return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) |
            ((wordin >> 8) & 0x0000ff00) | (wordin >> 24));
}

l_uint32
convertOnLittleEnd32(l_uint32  wordin)
{
    return wordin;
}

#else  /*  L_LITTLE_ENDIAN */

l_uint32
convertOnLittleEnd32(l_uint32  wordin)
{
    return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) |
            ((wordin >> 8) & 0x0000ff00) | (wordin >> 24));
}

l_uint32
convertOnBigEnd32(l_uint32  wordin)
{
    return wordin;
}

#endif  /* L_BIG_ENDIAN */



/*--------------------------------------------------------------------*
 *                         Opening read stream                        *
 *--------------------------------------------------------------------*/
/*!
 *  fopenReadStream()
 *
 *      Input:  filename 
 *      Return: stream or null on error
 */
FILE *
fopenReadStream(const char  *filename)
{
char  *tail;
FILE  *fp;

    PROCNAME("fopenReadStream");

    if (!filename)
        return (FILE *)ERROR_PTR("filename not defined", procName, NULL);

        /* Try input filename */
    if ((fp = fopen(filename, "rb")))
        return fp;

        /* Else, strip directory and try locally */
    splitPathAtDirectory(filename, NULL, &tail);
    if ((fp = fopen(tail, "rb"))) {
        FREE(tail);
        return fp;
    }
    FREE(tail);

    return (FILE *)ERROR_PTR("file not found", procName, NULL);
}


/*--------------------------------------------------------------------*
 *                         File name operations                       *
 *--------------------------------------------------------------------*/
/*!
 *  splitPathAtDirectory()
 *
 *      Input:  pathname  (full path; can be a directory)
 *              &dir  (<optional return> root directory name of
 *                     input path, including trailing '/')
 *              &tail (<optional return> path tail, which is either
 *                     the file name within the root directory or
 *                     the last sub-directory in the path)
 *      Return: 0 if OK, 1 on error
 *       
 *  Note: (1) if you only want the tail, input null for
 *            the root directory ptr.
 *        (2) if you only want the root directory name,
 *            input null for the tail ptr.
 *        (3) This function makes decisions based only on the lexical
 *            structure of the input.  Examples:
 *              /usr/tmp/abc  -->  dir: /usr/tmp/   tail: abc
 *              /usr/tmp/  -->  dir: /usr/tmp/   tail: [empty string]
 *              /usr/tmp  -->  dir: /usr/   tail: tmp
 */
l_int32
splitPathAtDirectory(const char  *pathname,
                     char       **pdir,
                     char       **ptail)
{
char  *cpathname, *lastslash;

    PROCNAME("splitPathAtDirectory");

    if (!pdir && !ptail)
        return ERROR_INT("null input for both strings", procName, 1);
    if (pdir) *pdir = NULL;
    if (ptail) *ptail = NULL;
    if (!pathname)
        return ERROR_INT("pathname not defined", procName, 1);

    cpathname = stringNew(pathname);
    if ((lastslash = strrchr(cpathname, '/'))) {
        if (ptail)
            *ptail = stringNew(lastslash + 1);
        if (pdir) {
            *(lastslash + 1) = '\0';
            *pdir = cpathname;
        }
        else
            FREE(cpathname);
    }
    else {  /* no directory */
        if (pdir)
            *pdir = stringNew("");
        if (ptail)
            *ptail = cpathname;
        else
            FREE(cpathname);
    }

    return 0;
}


/*!
 *  splitPathAtExtension()
 *
 *      Input:  pathname (full path; can be a directory)
 *              &basename (<optional return> pathname not including the
 *                        last dot and characters after that)
 *              &extension (<optional return> path extension, which is
 *                        the last dot and the characters after it.  If
 *                        there is no extension, it returns the empty string)
 *      Return: 0 if OK, 1 on error
 *       
 *  Notes:
 *      (1) If you only want the extension, input null for the basename ptr.
 *      (2) If you only want the basename without extension, input null
 *          for the extension ptr.
 *      (3) This function makes decisions based only on the lexical
 *          structure of the input.  Examples:
 *            /usr/tmp/abc.jpg  -->  basename: /usr/tmp/abc   ext: .jpg
 *            /usr/tmp/.jpg  -->  basename: /usr/tmp/   tail: .jpg
 *            /usr/tmp.jpg/  -->  basename: /usr/tmp.jpg/   tail: [empty str]
 *            ./.jpg  -->  basename: ./   tail: .jpg
 */
l_int32
splitPathAtExtension(const char  *pathname,
                     char       **pbasename,
                     char       **pextension)
{
char  *tail, *dir, *lastdot;
char   empty[4] = "";

    PROCNAME("splitPathExtension");

    if (!pbasename && !pextension)
        return ERROR_INT("null input for both strings", procName, 1);
    if (pbasename) *pbasename = NULL;
    if (pextension) *pextension = NULL;
    if (!pathname)
        return ERROR_INT("pathname not defined", procName, 1);

        /* Split out the directory first */
    splitPathAtDirectory(pathname, &dir, &tail);

        /* Then look for a "." in the tail part.
         * This way we ignore all "." in the directory. */
    if ((lastdot = strrchr(tail, '.'))) {
        if (pextension)
            *pextension = stringNew(lastdot);
        if (pbasename) {
            *lastdot = '\0';
            *pbasename = stringJoin(dir, tail);
        }
    }
    else {
        if (pextension)
            *pextension = stringNew(empty);
        if (pbasename)
            *pbasename = stringNew(pathname);
    }
    FREE(dir);
    FREE(tail);
    return 0;
}


/*! 
 *  genPathname()
 *
 *      Input:  dir (directory name, with or without trailing '/')
 *              fname (file name within the directory)
 *      Return: full pathname, or null on error
 */
char *
genPathname(const char  *dir,
            const char  *fname)
{
char    *charbuf;
l_int32  dirlen, namelen;
    
    PROCNAME("genPathname");

    if (!dir)
        return (char *)ERROR_PTR("dir not defined", procName, NULL);
    if (!fname)
        return (char *)ERROR_PTR("fname not defined", procName, NULL);

    dirlen = strlen(dir);
    namelen = strlen(fname);
    if ((charbuf = (char *)CALLOC(dirlen + namelen + 10, sizeof(char)))
            == NULL)
        return (char *)ERROR_PTR("charbuf not made", procName, NULL);

    if (dir[dirlen - 1] != sepchar)
#if COMPILER_MSVC
        sprintf(charbuf, "%s\\", dir);
#else
        sprintf(charbuf, "%s/", dir);
#endif
    else
        strcpy(charbuf, dir);
    strcat(charbuf, fname);
    return charbuf;
}


/*! 
 *  genTempFilename()
 *
 *      Input:  dir (directory name; use '.' for local dir; no trailing '/')
 *              extension (<optional> filename extention with '.'; can be null)
 *      Return: tempname (with pid embedded in file name), or null on error
 *
 *  Notes:
 *      (1) This function is useful when there can be more than one
 *          process writing and reading temporary files.  It will not
 *          work properly when multiple threads from a single process call
 *          this function.  Furthermore, as with any function that
 *          provides easily guessed temporary filenames, it is not designed
 *          to be safe from an attack where the intruder is logged onto
 *          the server.
 */
char *
genTempFilename(const char  *dir,
                const char  *extension)
{
char     buf[256];
char    *tempname;
l_int32  pid, nchars;
    
    PROCNAME("genTempFilename");

    if (!dir)
        return (char *)ERROR_PTR("dir not defined", procName, NULL);
    pid = getpid();
    if (extension)
        nchars = strlen(extension);
    else
        nchars = 0;

#if COMPILER_MSVC
    snprintf(buf, 255 - nchars, "%s\\%d", dir, pid);
#else
    snprintf(buf, 25 - nchars, "%s/%d", dir, pid);
#endif

    tempname = stringJoin(buf, extension);
    return tempname;
}


/*! 
 *  extractNumberFromFilename()
 *
 *      Input:  fname
 *              numpre (number of characters before the digits to be found)
 *              numpost (number of characters after the digits to be found)
 *      Return: num (number embedded in the filename); -1 on error or if
 *                   not found
 */
l_int32
extractNumberFromFilename(const char  *fname,
                          l_int32      numpre,
                          l_int32      numpost)
{
char    *tail, *basename;
l_int32  len, nret, num;
    
    PROCNAME("extractNumberFromFilename");

    if (!fname)
        return ERROR_INT("fname not defined", procName, -1);

    splitPathAtDirectory(fname, NULL, &tail);
    splitPathAtExtension(tail, &basename, NULL);
    FREE(tail);

    len = strlen(basename);
    if (numpre + numpost > len - 1) {
        FREE(basename);
        return ERROR_INT("numpre + numpost too big", procName, -1);
    }

    basename[len - numpost] = '\n';
    nret = sscanf(basename + numpre, "%d", &num);
    FREE(basename);

    if (nret == 1)
        return num;
    else
        return ERROR_INT("no number found", procName, -1);
}



/*---------------------------------------------------------------------*
 *                           Timing procs                              *
 *---------------------------------------------------------------------*/
/*
 *  Example of use:
 *
 *      startTimer();
 *      ....
 *      fprintf(stderr, "Elapsed time = %7.3f sec\n", stopTimer());
 */
#if !defined(__MINGW32__) && !defined(_WIN32)

#include <sys/time.h>
#include <sys/resource.h>

static struct rusage rusage_before;
static struct rusage rusage_after;

void
startTimer(void)
{
    getrusage(RUSAGE_SELF, &rusage_before);
}


l_float32
stopTimer(void)
{
l_int32  tsec, tusec;

    getrusage(RUSAGE_SELF, &rusage_after);

    tsec = rusage_after.ru_utime.tv_sec - rusage_before.ru_utime.tv_sec;
    tusec = rusage_after.ru_utime.tv_usec - rusage_before.ru_utime.tv_usec;

    return (tsec + ((l_float32)tusec) / 1000000.0);
}

#else   /* __MINGW32__ : resource.h not implemented under MINGW */

#include <windows.h>

static ULARGE_INTEGER utime_before;
static ULARGE_INTEGER utime_after;

void
startTimer(void)
{
HANDLE    this_process;
FILETIME  start, stop, kernel, user;
    
    this_process = GetCurrentProcess ();

    GetProcessTimes (this_process, &start, &stop, &kernel, &user);

    utime_before.LowPart  = user.dwLowDateTime;
    utime_before.HighPart = user.dwHighDateTime;
}

l_float32
stopTimer(void)
{
HANDLE    this_process;
FILETIME  start, stop, kernel, user;
    
    this_process = GetCurrentProcess ();

    GetProcessTimes (this_process, &start, &stop, &kernel, &user);

    utime_after.LowPart  = user.dwLowDateTime;
    utime_after.HighPart = user.dwHighDateTime;

    return ((l_float32)(signed)(utime_after.QuadPart - utime_before.QuadPart) /
             10000000.0);
}

#endif