C++程序  |  1312行  |  39.55 KB

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 1999-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  package.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2005aug25
*   created by: Markus W. Scherer
*
*   Read, modify, and write ICU .dat data package files.
*   This is an integral part of the icupkg tool, moved to the toolutil library
*   because parts of tool implementations tend to be later shared by
*   other tools.
*   Subsumes functionality and implementation code from
*   gencmn, decmn, and icuswap tools.
*/

#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/udata.h"
#include "cstring.h"
#include "uarrsort.h"
#include "ucmndata.h"
#include "udataswp.h"
#include "swapimpl.h"
#include "toolutil.h"
#include "package.h"
#include "cmemory.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */

// general definitions ----------------------------------------------------- ***

/* UDataInfo cf. udata.h */
static const UDataInfo dataInfo={
    (uint16_t)sizeof(UDataInfo),
    0,

    U_IS_BIG_ENDIAN,
    U_CHARSET_FAMILY,
    (uint8_t)sizeof(UChar),
    0,

    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
    {1, 0, 0, 0},                 /* formatVersion */
    {3, 0, 0, 0}                  /* dataVersion */
};

U_CDECL_BEGIN
static void U_CALLCONV
printPackageError(void *context, const char *fmt, va_list args) {
    vfprintf((FILE *)context, fmt, args);
}
U_CDECL_END

static uint16_t
readSwapUInt16(uint16_t x) {
    return (uint16_t)((x<<8)|(x>>8));
}

// platform types ---------------------------------------------------------- ***

static const char *types="lb?e";

enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };

static inline int32_t
makeTypeEnum(uint8_t charset, UBool isBigEndian) {
    return 2*(int32_t)charset+isBigEndian;
}

static inline int32_t
makeTypeEnum(char type) {
    return
        type == 'l' ? TYPE_L :
        type == 'b' ? TYPE_B :
        type == 'e' ? TYPE_E :
               -1;
}

static inline char
makeTypeLetter(uint8_t charset, UBool isBigEndian) {
    return types[makeTypeEnum(charset, isBigEndian)];
}

static inline char
makeTypeLetter(int32_t typeEnum) {
    return types[typeEnum];
}

static void
makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
    int32_t typeEnum=makeTypeEnum(type);
    charset=(uint8_t)(typeEnum>>1);
    isBigEndian=(UBool)(typeEnum&1);
}

U_CFUNC const UDataInfo *
getDataInfo(const uint8_t *data, int32_t length,
            int32_t &infoLength, int32_t &headerLength,
            UErrorCode *pErrorCode) {
    const DataHeader *pHeader;
    const UDataInfo *pInfo;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return NULL;
    }
    if( data==NULL ||
        (length>=0 && length<(int32_t)sizeof(DataHeader))
    ) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    pHeader=(const DataHeader *)data;
    pInfo=&pHeader->info;
    if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
        pHeader->dataHeader.magic1!=0xda ||
        pHeader->dataHeader.magic2!=0x27 ||
        pInfo->sizeofUChar!=2
    ) {
        *pErrorCode=U_UNSUPPORTED_ERROR;
        return NULL;
    }

    if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
        headerLength=pHeader->dataHeader.headerSize;
        infoLength=pInfo->size;
    } else {
        headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
        infoLength=readSwapUInt16(pInfo->size);
    }

    if( headerLength<(int32_t)sizeof(DataHeader) ||
        infoLength<(int32_t)sizeof(UDataInfo) ||
        headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
        (length>=0 && length<headerLength)
    ) {
        *pErrorCode=U_UNSUPPORTED_ERROR;
        return NULL;
    }

    return pInfo;
}

static int32_t
getTypeEnumForInputData(const uint8_t *data, int32_t length,
                        UErrorCode *pErrorCode) {
    const UDataInfo *pInfo;
    int32_t infoLength, headerLength;

    /* getDataInfo() checks for illegal arguments */
    pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
    if(pInfo==NULL) {
        return -1;
    }

    return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
}

// file handling ----------------------------------------------------------- ***

static void
extractPackageName(const char *filename,
                   char pkg[], int32_t capacity) {
    const char *basename;
    int32_t len;

    basename=findBasename(filename);
    len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */

    if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
        fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
                         basename);
        exit(U_ILLEGAL_ARGUMENT_ERROR);
    }

    if(len>=capacity) {
        fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
                         basename, (long)capacity);
        exit(U_ILLEGAL_ARGUMENT_ERROR);
    }

    memcpy(pkg, basename, len);
    pkg[len]=0;
}

static int32_t
getFileLength(FILE *f) {
    int32_t length;

    fseek(f, 0, SEEK_END);
    length=(int32_t)ftell(f);
    fseek(f, 0, SEEK_SET);
    return length;
}

/*
 * Turn tree separators and alternate file separators into normal file separators.
 */
#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
#define treeToPath(s)
#else
static void
treeToPath(char *s) {
    char *t;

    for(t=s; *t!=0; ++t) {
        if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
            *t=U_FILE_SEP_CHAR;
        }
    }
}
#endif

/*
 * Turn file separators into tree separators.
 */
#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
#define pathToTree(s)
#else
static void
pathToTree(char *s) {
    char *t;

    for(t=s; *t!=0; ++t) {
        if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
            *t=U_TREE_ENTRY_SEP_CHAR;
        }
    }
}
#endif

/*
 * Prepend the path (if any) to the name and run the name through treeToName().
 */
static void
makeFullFilename(const char *path, const char *name,
                 char *filename, int32_t capacity) {
    char *s;

    // prepend the path unless NULL or empty
    if(path!=NULL && path[0]!=0) {
        if((int32_t)(strlen(path)+1)>=capacity) {
            fprintf(stderr, "pathname too long: \"%s\"\n", path);
            exit(U_BUFFER_OVERFLOW_ERROR);
        }
        strcpy(filename, path);

        // make sure the path ends with a file separator
        s=strchr(filename, 0);
        if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
            *s++=U_FILE_SEP_CHAR;
        }
    } else {
        s=filename;
    }

    // turn the name into a filename, turn tree separators into file separators
    if((int32_t)((s-filename)+strlen(name))>=capacity) {
        fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
        exit(U_BUFFER_OVERFLOW_ERROR);
    }
    strcpy(s, name);
    treeToPath(s);
}

static void
makeFullFilenameAndDirs(const char *path, const char *name,
                        char *filename, int32_t capacity) {
    char *sep;
    UErrorCode errorCode;

    makeFullFilename(path, name, filename, capacity);

    // make tree directories
    errorCode=U_ZERO_ERROR;
    sep=strchr(filename, 0)-strlen(name);
    while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
        if(sep!=filename) {
            *sep=0;                 // truncate temporarily
            uprv_mkdir(filename, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
                exit(U_FILE_ACCESS_ERROR);
            }
        }
        *sep++=U_FILE_SEP_CHAR; // restore file separator character
    }
}

static uint8_t *
readFile(const char *path, const char *name, int32_t &length, char &type) {
    char filename[1024];
    FILE *file;
    UErrorCode errorCode;
    int32_t fileLength, typeEnum;

    makeFullFilename(path, name, filename, (int32_t)sizeof(filename));

    /* open the input file, get its length, allocate memory for it, read the file */
    file=fopen(filename, "rb");
    if(file==NULL) {
        fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    /* get the file length */
    fileLength=getFileLength(file);
    if(ferror(file) || fileLength<=0) {
        fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
        fclose(file);
        exit(U_FILE_ACCESS_ERROR);
    }

    /* allocate the buffer, pad to multiple of 16 */
    length=(fileLength+0xf)&~0xf;
    icu::LocalMemory<uint8_t> data((uint8_t *)uprv_malloc(length));
    if(data.isNull()) {
        fclose(file);
        fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length);
        exit(U_MEMORY_ALLOCATION_ERROR);
    }

    /* read the file */
    if(fileLength!=(int32_t)fread(data.getAlias(), 1, fileLength, file)) {
        fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
        fclose(file);
        exit(U_FILE_ACCESS_ERROR);
    }

    /* pad the file to a multiple of 16 using the usual padding byte */
    if(fileLength<length) {
        memset(data.getAlias()+fileLength, 0xaa, length-fileLength);
    }

    fclose(file);

    // minimum check for ICU-format data
    errorCode=U_ZERO_ERROR;
    typeEnum=getTypeEnumForInputData(data.getAlias(), length, &errorCode);
    if(typeEnum<0 || U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
#if !UCONFIG_NO_LEGACY_CONVERSION
        exit(U_INVALID_FORMAT_ERROR);
#else
        fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
        exit(0);
#endif
    }
    type=makeTypeLetter(typeEnum);

    return data.orphan();
}

// .dat package file representation ---------------------------------------- ***

U_CDECL_BEGIN

static int32_t U_CALLCONV
compareItems(const void * /*context*/, const void *left, const void *right) {
    U_NAMESPACE_USE

    return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
}

U_CDECL_END

U_NAMESPACE_BEGIN

Package::Package()
        : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) {
    inPkgName[0]=0;
    pkgPrefix[0]=0;
    inData=NULL;
    inLength=0;
    inCharset=U_CHARSET_FAMILY;
    inIsBigEndian=U_IS_BIG_ENDIAN;

    itemCount=0;
    itemMax=0;
    items=NULL;

    inStringTop=outStringTop=0;

    matchMode=0;
    findPrefix=findSuffix=NULL;
    findPrefixLength=findSuffixLength=0;
    findNextIndex=-1;

    // create a header for an empty package
    DataHeader *pHeader;
    pHeader=(DataHeader *)header;
    pHeader->dataHeader.magic1=0xda;
    pHeader->dataHeader.magic2=0x27;
    memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
    headerLength=(int32_t)(4+sizeof(dataInfo));
    if(headerLength&0xf) {
        /* NUL-pad the header to a multiple of 16 */
        int32_t length=(headerLength+0xf)&~0xf;
        memset(header+headerLength, 0, length-headerLength);
        headerLength=length;
    }
    pHeader->dataHeader.headerSize=(uint16_t)headerLength;
}

Package::~Package() {
    int32_t idx;

    uprv_free(inData);

    for(idx=0; idx<itemCount; ++idx) {
        if(items[idx].isDataOwned) {
            uprv_free(items[idx].data);
        }
    }

    uprv_free((void*)items);
}

void
Package::setPrefix(const char *p) {
    if(strlen(p)>=sizeof(pkgPrefix)) {
        fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p);
        exit(U_ILLEGAL_ARGUMENT_ERROR);
    }
    strcpy(pkgPrefix, p);
}

void
Package::readPackage(const char *filename) {
    UDataSwapper *ds;
    const UDataInfo *pInfo;
    UErrorCode errorCode;

    const uint8_t *inBytes;

    int32_t length, offset, i;
    int32_t itemLength, typeEnum;
    char type;

    const UDataOffsetTOCEntry *inEntries;

    extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));

    /* read the file */
    inData=readFile(NULL, filename, inLength, type);
    length=inLength;

    /*
     * swap the header - even if the swapping itself is a no-op
     * because it tells us the header length
     */
    errorCode=U_ZERO_ERROR;
    makeTypeProps(type, inCharset, inIsBigEndian);
    ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
                filename, u_errorName(errorCode));
        exit(errorCode);
    }

    ds->printError=printPackageError;
    ds->printErrorContext=stderr;

    headerLength=sizeof(header);
    if(length<headerLength) {
        headerLength=length;
    }
    headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
    if(U_FAILURE(errorCode)) {
        exit(errorCode);
    }

    /* check data format and format version */
    pInfo=(const UDataInfo *)((const char *)inData+4);
    if(!(
        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
        pInfo->dataFormat[1]==0x6d &&
        pInfo->dataFormat[2]==0x6e &&
        pInfo->dataFormat[3]==0x44 &&
        pInfo->formatVersion[0]==1
    )) {
        fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
                pInfo->dataFormat[0], pInfo->dataFormat[1],
                pInfo->dataFormat[2], pInfo->dataFormat[3],
                pInfo->formatVersion[0]);
        exit(U_UNSUPPORTED_ERROR);
    }
    inIsBigEndian=(UBool)pInfo->isBigEndian;
    inCharset=pInfo->charsetFamily;

    inBytes=(const uint8_t *)inData+headerLength;
    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);

    /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
    length-=headerLength;
    if(length<4) {
        /* itemCount does not fit */
        offset=0x7fffffff;
    } else {
        itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
        setItemCapacity(itemCount); /* resize so there's space */
        if(itemCount==0) {
            offset=4;
        } else if(length<(4+8*itemCount)) {
            /* ToC table does not fit */
            offset=0x7fffffff;
        } else {
            /* offset of the last item plus at least 20 bytes for its header */
            offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
        }
    }
    if(length<offset) {
        fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
                        (long)length);
        exit(U_INDEX_OUTOFBOUNDS_ERROR);
    }
    /* do not modify the package length variable until the last item's length is set */

    if(itemCount<=0) {
        if(doAutoPrefix) {
            fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n");
            exit(U_INVALID_FORMAT_ERROR);
        }
    } else {
        char prefix[MAX_PKG_NAME_LENGTH+4];
        char *s, *inItemStrings;

        if(itemCount>itemMax) {
            fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax);
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

        /* swap the item name strings */
        int32_t stringsOffset=4+8*itemCount;
        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;

        // don't include padding bytes at the end of the item names
        while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
            --itemLength;
        }

        if((inStringTop+itemLength)>STRING_STORE_SIZE) {
            fprintf(stderr, "icupkg: total length of item name strings too long\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }

        inItemStrings=inStrings+inStringTop;
        ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
            exit(U_INVALID_FORMAT_ERROR);
        }
        inStringTop+=itemLength;

        // reset the Item entries
        memset(items, 0, itemCount*sizeof(Item));

        /*
         * Get the common prefix of the items.
         * New-style ICU .dat packages use tree separators ('/') between package names,
         * tree names, and item names,
         * while old-style ICU .dat packages (before multi-tree support)
         * use an underscore ('_') between package and item names.
         */
        offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
        s=inItemStrings+offset;  // name of the first entry
        int32_t prefixLength;
        if(doAutoPrefix) {
            // Use the first entry's prefix. Must be a new-style package.
            const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR);
            if(prefixLimit==NULL) {
                fprintf(stderr,
                        "icupkg: --auto_toc_prefix[_with_type] but "
                        "the first entry \"%s\" does not contain a '%c'\n",
                        s, U_TREE_ENTRY_SEP_CHAR);
                exit(U_INVALID_FORMAT_ERROR);
            }
            prefixLength=(int32_t)(prefixLimit-s);
            if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) {
                fprintf(stderr,
                        "icupkg: --auto_toc_prefix[_with_type] but "
                        "the prefix of the first entry \"%s\" is empty or too long\n",
                        s);
                exit(U_INVALID_FORMAT_ERROR);
            }
            if(prefixEndsWithType && s[prefixLength-1]!=type) {
                fprintf(stderr,
                        "icupkg: --auto_toc_prefix_with_type but "
                        "the prefix of the first entry \"%s\" does not end with '%c'\n",
                        s, type);
                exit(U_INVALID_FORMAT_ERROR);
            }
            memcpy(pkgPrefix, s, prefixLength);
            pkgPrefix[prefixLength]=0;
            memcpy(prefix, s, ++prefixLength);  // include the /
        } else {
            // Use the package basename as prefix.
            int32_t inPkgNameLength= static_cast<int32_t>(strlen(inPkgName));
            memcpy(prefix, inPkgName, inPkgNameLength);
            prefixLength=inPkgNameLength;

            if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
                0==memcmp(s, inPkgName, inPkgNameLength) &&
                s[inPkgNameLength]=='_'
            ) {
                // old-style .dat package
                prefix[prefixLength++]='_';
            } else {
                // new-style .dat package
                prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
                // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
                // then the test in the loop below will fail
            }
        }
        prefix[prefixLength]=0;

        /* read the ToC table */
        for(i=0; i<itemCount; ++i) {
            // skip the package part of the item name, error if it does not match the actual package name
            // or if nothing follows the package name
            offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
            s=inItemStrings+offset;
            if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
                fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
                        s, prefix);
                exit(U_INVALID_FORMAT_ERROR);
            }
            items[i].name=s+prefixLength;

            // set the item's data
            items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
            if(i>0) {
                items[i-1].length=(int32_t)(items[i].data-items[i-1].data);

                // set the previous item's platform type
                typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
                if(typeEnum<0 || U_FAILURE(errorCode)) {
                    fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
                    exit(U_INVALID_FORMAT_ERROR);
                }
                items[i-1].type=makeTypeLetter(typeEnum);
            }
            items[i].isDataOwned=FALSE;
        }
        // set the last item's length
        items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);

        // set the last item's platform type
        typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
        if(typeEnum<0 || U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[itemCount-1].name, filename);
            exit(U_INVALID_FORMAT_ERROR);
        }
        items[itemCount-1].type=makeTypeLetter(typeEnum);

        if(type!=U_ICUDATA_TYPE_LETTER[0]) {
            // sort the item names for the local charset
            sortItems();
        }
    }

    udata_closeSwapper(ds);
}

char
Package::getInType() {
    return makeTypeLetter(inCharset, inIsBigEndian);
}

void
Package::writePackage(const char *filename, char outType, const char *comment) {
    char prefix[MAX_PKG_NAME_LENGTH+4];
    UDataOffsetTOCEntry entry;
    UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
    FILE *file;
    Item *pItem;
    char *name;
    UErrorCode errorCode;
    int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
    uint8_t outCharset;
    UBool outIsBigEndian;

    extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);

    // if there is an explicit comment, then use it, else use what's in the current header
    if(comment!=NULL) {
        /* get the header size minus the current comment */
        DataHeader *pHeader;
        int32_t length;

        pHeader=(DataHeader *)header;
        headerLength=4+pHeader->info.size;
        length=(int32_t)strlen(comment);
        if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
            fprintf(stderr, "icupkg: comment too long\n");
            exit(U_BUFFER_OVERFLOW_ERROR);
        }
        memcpy(header+headerLength, comment, length+1);
        headerLength+=length;
        if(headerLength&0xf) {
            /* NUL-pad the header to a multiple of 16 */
            length=(headerLength+0xf)&~0xf;
            memset(header+headerLength, 0, length-headerLength);
            headerLength=length;
        }
        pHeader->dataHeader.headerSize=(uint16_t)headerLength;
    }

    makeTypeProps(outType, outCharset, outIsBigEndian);

    // open (TYPE_COUNT-2) swappers
    // one is a no-op for local type==outType
    // one type (TYPE_LE) is bogus
    errorCode=U_ZERO_ERROR;
    i=makeTypeEnum(outType);
    ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
    ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
    ds[TYPE_LE]=NULL;
    ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
    for(i=0; i<TYPE_COUNT; ++i) {
        if(ds[i]!=NULL) {
            ds[i]->printError=printPackageError;
            ds[i]->printErrorContext=stderr;
        }
    }

    dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];

    // create the file and write its contents
    file=fopen(filename, "wb");
    if(file==NULL) {
        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    // swap and write the header
    if(dsLocalToOut!=NULL) {
        udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
    }
    length=(int32_t)fwrite(header, 1, headerLength, file);
    if(length!=headerLength) {
        fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    // prepare and swap the package name with a tree separator
    // for prepending to item names
    if(pkgPrefix[0]==0) {
        prefixLength=(int32_t)strlen(prefix);
    } else {
        prefixLength=(int32_t)strlen(pkgPrefix);
        memcpy(prefix, pkgPrefix, prefixLength);
        if(prefixEndsWithType) {
            prefix[prefixLength-1]=outType;
        }
    }
    prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
    prefix[prefixLength]=0;
    if(dsLocalToOut!=NULL) {
        dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }

        // swap and sort the item names (sorting needs to be done in the output charset)
        dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
        sortItems();
    }

    // create the output item names in sorted order, with the package name prepended to each
    for(i=0; i<itemCount; ++i) {
        length=(int32_t)strlen(items[i].name);
        name=allocString(FALSE, length+prefixLength);
        memcpy(name, prefix, prefixLength);
        memcpy(name+prefixLength, items[i].name, length+1);
        items[i].name=name;
    }

    // calculate offsets for item names and items, pad to 16-align items
    // align only the first item; each item's length is a multiple of 16
    basenameOffset=4+8*itemCount;
    offset=basenameOffset+outStringTop;
    if((length=(offset&15))!=0) {
        length=16-length;
        memset(allocString(FALSE, length-1), 0xaa, length);
        offset+=length;
    }

    // write the table of contents
    // first the itemCount
    outInt32=itemCount;
    if(dsLocalToOut!=NULL) {
        dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
    }
    length=(int32_t)fwrite(&outInt32, 1, 4, file);
    if(length!=4) {
        fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    // then write the item entries (and collect the maxItemLength)
    maxItemLength=0;
    for(i=0; i<itemCount; ++i) {
        entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
        entry.dataOffset=(uint32_t)offset;
        if(dsLocalToOut!=NULL) {
            dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
                exit(errorCode);
            }
        }
        length=(int32_t)fwrite(&entry, 1, 8, file);
        if(length!=8) {
            fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
            exit(U_FILE_ACCESS_ERROR);
        }

        length=items[i].length;
        if(length>maxItemLength) {
            maxItemLength=length;
        }
        offset+=length;
    }

    // write the item names
    length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
    if(length!=outStringTop) {
        fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    // write the items
    for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
        int32_t type=makeTypeEnum(pItem->type);
        if(ds[type]!=NULL) {
            // swap each item from its platform properties to the desired ones
            udata_swap(
                ds[type],
                pItem->data, pItem->length, pItem->data,
                &errorCode);
            if(U_FAILURE(errorCode)) {
                fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
                exit(errorCode);
            }
        }
        length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
        if(length!=pItem->length) {
            fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
            exit(U_FILE_ACCESS_ERROR);
        }
    }

    if(ferror(file)) {
        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }

    fclose(file);
    for(i=0; i<TYPE_COUNT; ++i) {
        udata_closeSwapper(ds[i]);
    }
}

int32_t
Package::findItem(const char *name, int32_t length) const {
    int32_t i, start, limit;
    int result;

    /* do a binary search for the string */
    start=0;
    limit=itemCount;
    while(start<limit) {
        i=(start+limit)/2;
        if(length>=0) {
            result=strncmp(name, items[i].name, length);
        } else {
            result=strcmp(name, items[i].name);
        }

        if(result==0) {
            /* found */
            if(length>=0) {
                /*
                 * if we compared just prefixes, then we may need to back up
                 * to the first item with this prefix
                 */
                while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
                    --i;
                }
            }
            return i;
        } else if(result<0) {
            limit=i;
        } else /* result>0 */ {
            start=i+1;
        }
    }

    return ~start; /* not found, return binary-not of the insertion point */
}

void
Package::findItems(const char *pattern) {
    const char *wild;

    if(pattern==NULL || *pattern==0) {
        findNextIndex=-1;
        return;
    }

    findPrefix=pattern;
    findSuffix=NULL;
    findSuffixLength=0;

    wild=strchr(pattern, '*');
    if(wild==NULL) {
        // no wildcard
        findPrefixLength=(int32_t)strlen(pattern);
    } else {
        // one wildcard
        findPrefixLength=(int32_t)(wild-pattern);
        findSuffix=wild+1;
        findSuffixLength=(int32_t)strlen(findSuffix);
        if(NULL!=strchr(findSuffix, '*')) {
            // two or more wildcards
            fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
            exit(U_PARSE_ERROR);
        }
    }

    if(findPrefixLength==0) {
        findNextIndex=0;
    } else {
        findNextIndex=findItem(findPrefix, findPrefixLength);
    }
}

int32_t
Package::findNextItem() {
    const char *name, *middle, *treeSep;
    int32_t idx, nameLength, middleLength;

    if(findNextIndex<0) {
        return -1;
    }

    while(findNextIndex<itemCount) {
        idx=findNextIndex++;
        name=items[idx].name;
        nameLength=(int32_t)strlen(name);
        if(nameLength<(findPrefixLength+findSuffixLength)) {
            // item name too short for prefix & suffix
            continue;
        }
        if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
            // left the range of names with this prefix
            break;
        }
        middle=name+findPrefixLength;
        middleLength=nameLength-findPrefixLength-findSuffixLength;
        if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
            // suffix does not match
            continue;
        }
        // prefix & suffix match

        if(matchMode&MATCH_NOSLASH) {
            treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
            if(treeSep!=NULL && (treeSep-middle)<middleLength) {
                // the middle (matching the * wildcard) contains a tree separator /
                continue;
            }
        }

        // found a matching item
        return idx;
    }

    // no more items
    findNextIndex=-1;
    return -1;
}

void
Package::setMatchMode(uint32_t mode) {
    matchMode=mode;
}

void
Package::addItem(const char *name) {
    addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
}

void
Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
    int32_t idx;

    idx=findItem(name);
    if(idx<0) {
        // new item, make space at the insertion point
        ensureItemCapacity();
        // move the following items down
        idx=~idx;
        if(idx<itemCount) {
            memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
        }
        ++itemCount;

        // reset this Item entry
        memset(items+idx, 0, sizeof(Item));

        // copy the item's name
        items[idx].name=allocString(TRUE, static_cast<int32_t>(strlen(name)));
        strcpy(items[idx].name, name);
        pathToTree(items[idx].name);
    } else {
        // same-name item found, replace it
        if(items[idx].isDataOwned) {
            uprv_free(items[idx].data);
        }

        // keep the item's name since it is the same
    }

    // set the item's data
    items[idx].data=data;
    items[idx].length=length;
    items[idx].isDataOwned=isDataOwned;
    items[idx].type=type;
}

void
Package::addFile(const char *filesPath, const char *name) {
    uint8_t *data;
    int32_t length;
    char type;

    data=readFile(filesPath, name, length, type);
    // readFile() exits the tool if it fails
    addItem(name, data, length, TRUE, type);
}

void
Package::addItems(const Package &listPkg) {
    const Item *pItem;
    int32_t i;

    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
        addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
    }
}

void
Package::removeItem(int32_t idx) {
    if(idx>=0) {
        // remove the item
        if(items[idx].isDataOwned) {
            uprv_free(items[idx].data);
        }

        // move the following items up
        if((idx+1)<itemCount) {
            memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
        }
        --itemCount;

        if(idx<=findNextIndex) {
            --findNextIndex;
        }
    }
}

void
Package::removeItems(const char *pattern) {
    int32_t idx;

    findItems(pattern);
    while((idx=findNextItem())>=0) {
        removeItem(idx);
    }
}

void
Package::removeItems(const Package &listPkg) {
    const Item *pItem;
    int32_t i;

    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
        removeItems(pItem->name);
    }
}

void
Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
    char filename[1024];
    UDataSwapper *ds;
    FILE *file;
    Item *pItem;
    int32_t fileLength;
    uint8_t itemCharset, outCharset;
    UBool itemIsBigEndian, outIsBigEndian;

    if(idx<0 || itemCount<=idx) {
        return;
    }
    pItem=items+idx;

    // swap the data to the outType
    // outType==0: don't swap
    if(outType!=0 && pItem->type!=outType) {
        // open the swapper
        UErrorCode errorCode=U_ZERO_ERROR;
        makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
        makeTypeProps(outType, outCharset, outIsBigEndian);
        ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
                    (long)idx, u_errorName(errorCode));
            exit(errorCode);
        }

        ds->printError=printPackageError;
        ds->printErrorContext=stderr;

        // swap the item from its platform properties to the desired ones
        udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
            exit(errorCode);
        }
        udata_closeSwapper(ds);
        pItem->type=outType;
    }

    // create the file and write its contents
    makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
    file=fopen(filename, "wb");
    if(file==NULL) {
        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }
    fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);

    if(ferror(file) || fileLength!=pItem->length) {
        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
        exit(U_FILE_ACCESS_ERROR);
    }
    fclose(file);
}

void
Package::extractItem(const char *filesPath, int32_t idx, char outType) {
    extractItem(filesPath, items[idx].name, idx, outType);
}

void
Package::extractItems(const char *filesPath, const char *pattern, char outType) {
    int32_t idx;

    findItems(pattern);
    while((idx=findNextItem())>=0) {
        extractItem(filesPath, idx, outType);
    }
}

void
Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
    const Item *pItem;
    int32_t i;

    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
        extractItems(filesPath, pItem->name, outType);
    }
}

int32_t
Package::getItemCount() const {
    return itemCount;
}

const Item *
Package::getItem(int32_t idx) const {
    if (0 <= idx && idx < itemCount) {
        return &items[idx];
    }
    return NULL;
}

void
Package::checkDependency(void *context, const char *itemName, const char *targetName) {
    // check dependency: make sure the target item is in the package
    Package *me=(Package *)context;
    if(me->findItem(targetName)<0) {
        me->isMissingItems=TRUE;
        fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
    }
}

UBool
Package::checkDependencies() {
    isMissingItems=FALSE;
    enumDependencies(this, checkDependency);
    return (UBool)!isMissingItems;
}

void
Package::enumDependencies(void *context, CheckDependency check) {
    int32_t i;

    for(i=0; i<itemCount; ++i) {
        enumDependencies(items+i, context, check);
    }
}

char *
Package::allocString(UBool in, int32_t length) {
    char *p;
    int32_t top;

    if(in) {
        top=inStringTop;
        p=inStrings+top;
    } else {
        top=outStringTop;
        p=outStrings+top;
    }
    top+=length+1;

    if(top>STRING_STORE_SIZE) {
        fprintf(stderr, "icupkg: string storage overflow\n");
        exit(U_BUFFER_OVERFLOW_ERROR);
    }
    if(in) {
        inStringTop=top;
    } else {
        outStringTop=top;
    }
    return p;
}

void
Package::sortItems() {
    UErrorCode errorCode=U_ZERO_ERROR;
    uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
}

void Package::setItemCapacity(int32_t max)
{
  if(max<=itemMax) {
    return;
  }
  Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0]));
  Item *oldItems = items;
  if(newItems == NULL) {
    fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n",
        (unsigned long)(max*sizeof(items[0])), max);
    exit(U_MEMORY_ALLOCATION_ERROR);
  }
  if(items && itemCount>0) {
    uprv_memcpy(newItems, items, (size_t)itemCount*sizeof(items[0]));
  }
  itemMax = max;
  items = newItems;
  uprv_free(oldItems);
}

void Package::ensureItemCapacity()
{
  if((itemCount+1)>itemMax) {
    setItemCapacity(itemCount+kItemsChunk);
  }
}

U_NAMESPACE_END