// Copyright (C) 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 1999-2015, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: package.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2005aug25 * created by: Markus W. Scherer * * Read, modify, and write ICU .dat data package files. * This is an integral part of the icupkg tool, moved to the toolutil library * because parts of tool implementations tend to be later shared by * other tools. * Subsumes functionality and implementation code from * gencmn, decmn, and icuswap tools. */ #include "unicode/utypes.h" #include "unicode/putil.h" #include "unicode/udata.h" #include "cstring.h" #include "uarrsort.h" #include "ucmndata.h" #include "udataswp.h" #include "swapimpl.h" #include "toolutil.h" #include "package.h" #include "cmemory.h" #include <stdio.h> #include <stdlib.h> #include <string.h> static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ // general definitions ----------------------------------------------------- *** /* UDataInfo cf. udata.h */ static const UDataInfo dataInfo={ (uint16_t)sizeof(UDataInfo), 0, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, (uint8_t)sizeof(UChar), 0, {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ {1, 0, 0, 0}, /* formatVersion */ {3, 0, 0, 0} /* dataVersion */ }; U_CDECL_BEGIN static void U_CALLCONV printPackageError(void *context, const char *fmt, va_list args) { vfprintf((FILE *)context, fmt, args); } U_CDECL_END static uint16_t readSwapUInt16(uint16_t x) { return (uint16_t)((x<<8)|(x>>8)); } // platform types ---------------------------------------------------------- *** static const char *types="lb?e"; enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; static inline int32_t makeTypeEnum(uint8_t charset, UBool isBigEndian) { return 2*(int32_t)charset+isBigEndian; } static inline int32_t makeTypeEnum(char type) { return type == 'l' ? TYPE_L : type == 'b' ? TYPE_B : type == 'e' ? TYPE_E : -1; } static inline char makeTypeLetter(uint8_t charset, UBool isBigEndian) { return types[makeTypeEnum(charset, isBigEndian)]; } static inline char makeTypeLetter(int32_t typeEnum) { return types[typeEnum]; } static void makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { int32_t typeEnum=makeTypeEnum(type); charset=(uint8_t)(typeEnum>>1); isBigEndian=(UBool)(typeEnum&1); } U_CFUNC const UDataInfo * getDataInfo(const uint8_t *data, int32_t length, int32_t &infoLength, int32_t &headerLength, UErrorCode *pErrorCode) { const DataHeader *pHeader; const UDataInfo *pInfo; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return NULL; } if( data==NULL || (length>=0 && length<(int32_t)sizeof(DataHeader)) ) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return NULL; } pHeader=(const DataHeader *)data; pInfo=&pHeader->info; if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || pHeader->dataHeader.magic1!=0xda || pHeader->dataHeader.magic2!=0x27 || pInfo->sizeofUChar!=2 ) { *pErrorCode=U_UNSUPPORTED_ERROR; return NULL; } if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { headerLength=pHeader->dataHeader.headerSize; infoLength=pInfo->size; } else { headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); infoLength=readSwapUInt16(pInfo->size); } if( headerLength<(int32_t)sizeof(DataHeader) || infoLength<(int32_t)sizeof(UDataInfo) || headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || (length>=0 && length<headerLength) ) { *pErrorCode=U_UNSUPPORTED_ERROR; return NULL; } return pInfo; } static int32_t getTypeEnumForInputData(const uint8_t *data, int32_t length, UErrorCode *pErrorCode) { const UDataInfo *pInfo; int32_t infoLength, headerLength; /* getDataInfo() checks for illegal arguments */ pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); if(pInfo==NULL) { return -1; } return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); } // file handling ----------------------------------------------------------- *** static void extractPackageName(const char *filename, char pkg[], int32_t capacity) { const char *basename; int32_t len; basename=findBasename(filename); len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ if(len<=0 || 0!=strcmp(basename+len, ".dat")) { fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", basename); exit(U_ILLEGAL_ARGUMENT_ERROR); } if(len>=capacity) { fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", basename, (long)capacity); exit(U_ILLEGAL_ARGUMENT_ERROR); } memcpy(pkg, basename, len); pkg[len]=0; } static int32_t getFileLength(FILE *f) { int32_t length; fseek(f, 0, SEEK_END); length=(int32_t)ftell(f); fseek(f, 0, SEEK_SET); return length; } /* * Turn tree separators and alternate file separators into normal file separators. */ #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR #define treeToPath(s) #else static void treeToPath(char *s) { char *t; for(t=s; *t!=0; ++t) { if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { *t=U_FILE_SEP_CHAR; } } } #endif /* * Turn file separators into tree separators. */ #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR #define pathToTree(s) #else static void pathToTree(char *s) { char *t; for(t=s; *t!=0; ++t) { if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { *t=U_TREE_ENTRY_SEP_CHAR; } } } #endif /* * Prepend the path (if any) to the name and run the name through treeToName(). */ static void makeFullFilename(const char *path, const char *name, char *filename, int32_t capacity) { char *s; // prepend the path unless NULL or empty if(path!=NULL && path[0]!=0) { if((int32_t)(strlen(path)+1)>=capacity) { fprintf(stderr, "pathname too long: \"%s\"\n", path); exit(U_BUFFER_OVERFLOW_ERROR); } strcpy(filename, path); // make sure the path ends with a file separator s=strchr(filename, 0); if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { *s++=U_FILE_SEP_CHAR; } } else { s=filename; } // turn the name into a filename, turn tree separators into file separators if((int32_t)((s-filename)+strlen(name))>=capacity) { fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); exit(U_BUFFER_OVERFLOW_ERROR); } strcpy(s, name); treeToPath(s); } static void makeFullFilenameAndDirs(const char *path, const char *name, char *filename, int32_t capacity) { char *sep; UErrorCode errorCode; makeFullFilename(path, name, filename, capacity); // make tree directories errorCode=U_ZERO_ERROR; sep=strchr(filename, 0)-strlen(name); while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { if(sep!=filename) { *sep=0; // truncate temporarily uprv_mkdir(filename, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } } *sep++=U_FILE_SEP_CHAR; // restore file separator character } } static uint8_t * readFile(const char *path, const char *name, int32_t &length, char &type) { char filename[1024]; FILE *file; UErrorCode errorCode; int32_t fileLength, typeEnum; makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); /* open the input file, get its length, allocate memory for it, read the file */ file=fopen(filename, "rb"); if(file==NULL) { fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } /* get the file length */ fileLength=getFileLength(file); if(ferror(file) || fileLength<=0) { fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); fclose(file); exit(U_FILE_ACCESS_ERROR); } /* allocate the buffer, pad to multiple of 16 */ length=(fileLength+0xf)&~0xf; icu::LocalMemory<uint8_t> data((uint8_t *)uprv_malloc(length)); if(data.isNull()) { fclose(file); fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); exit(U_MEMORY_ALLOCATION_ERROR); } /* read the file */ if(fileLength!=(int32_t)fread(data.getAlias(), 1, fileLength, file)) { fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); fclose(file); exit(U_FILE_ACCESS_ERROR); } /* pad the file to a multiple of 16 using the usual padding byte */ if(fileLength<length) { memset(data.getAlias()+fileLength, 0xaa, length-fileLength); } fclose(file); // minimum check for ICU-format data errorCode=U_ZERO_ERROR; typeEnum=getTypeEnumForInputData(data.getAlias(), length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); #if !UCONFIG_NO_LEGACY_CONVERSION exit(U_INVALID_FORMAT_ERROR); #else fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); exit(0); #endif } type=makeTypeLetter(typeEnum); return data.orphan(); } // .dat package file representation ---------------------------------------- *** U_CDECL_BEGIN static int32_t U_CALLCONV compareItems(const void * /*context*/, const void *left, const void *right) { U_NAMESPACE_USE return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); } U_CDECL_END U_NAMESPACE_BEGIN Package::Package() : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) { inPkgName[0]=0; pkgPrefix[0]=0; inData=NULL; inLength=0; inCharset=U_CHARSET_FAMILY; inIsBigEndian=U_IS_BIG_ENDIAN; itemCount=0; itemMax=0; items=NULL; inStringTop=outStringTop=0; matchMode=0; findPrefix=findSuffix=NULL; findPrefixLength=findSuffixLength=0; findNextIndex=-1; // create a header for an empty package DataHeader *pHeader; pHeader=(DataHeader *)header; pHeader->dataHeader.magic1=0xda; pHeader->dataHeader.magic2=0x27; memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); headerLength=(int32_t)(4+sizeof(dataInfo)); if(headerLength&0xf) { /* NUL-pad the header to a multiple of 16 */ int32_t length=(headerLength+0xf)&~0xf; memset(header+headerLength, 0, length-headerLength); headerLength=length; } pHeader->dataHeader.headerSize=(uint16_t)headerLength; } Package::~Package() { int32_t idx; uprv_free(inData); for(idx=0; idx<itemCount; ++idx) { if(items[idx].isDataOwned) { uprv_free(items[idx].data); } } uprv_free((void*)items); } void Package::setPrefix(const char *p) { if(strlen(p)>=sizeof(pkgPrefix)) { fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p); exit(U_ILLEGAL_ARGUMENT_ERROR); } strcpy(pkgPrefix, p); } void Package::readPackage(const char *filename) { UDataSwapper *ds; const UDataInfo *pInfo; UErrorCode errorCode; const uint8_t *inBytes; int32_t length, offset, i; int32_t itemLength, typeEnum; char type; const UDataOffsetTOCEntry *inEntries; extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); /* read the file */ inData=readFile(NULL, filename, inLength, type); length=inLength; /* * swap the header - even if the swapping itself is a no-op * because it tells us the header length */ errorCode=U_ZERO_ERROR; makeTypeProps(type, inCharset, inIsBigEndian); ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", filename, u_errorName(errorCode)); exit(errorCode); } ds->printError=printPackageError; ds->printErrorContext=stderr; headerLength=sizeof(header); if(length<headerLength) { headerLength=length; } headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); if(U_FAILURE(errorCode)) { exit(errorCode); } /* check data format and format version */ pInfo=(const UDataInfo *)((const char *)inData+4); if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ pInfo->dataFormat[1]==0x6d && pInfo->dataFormat[2]==0x6e && pInfo->dataFormat[3]==0x44 && pInfo->formatVersion[0]==1 )) { fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); exit(U_UNSUPPORTED_ERROR); } inIsBigEndian=(UBool)pInfo->isBigEndian; inCharset=pInfo->charsetFamily; inBytes=(const uint8_t *)inData+headerLength; inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ length-=headerLength; if(length<4) { /* itemCount does not fit */ offset=0x7fffffff; } else { itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); setItemCapacity(itemCount); /* resize so there's space */ if(itemCount==0) { offset=4; } else if(length<(4+8*itemCount)) { /* ToC table does not fit */ offset=0x7fffffff; } else { /* offset of the last item plus at least 20 bytes for its header */ offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); } } if(length<offset) { fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", (long)length); exit(U_INDEX_OUTOFBOUNDS_ERROR); } /* do not modify the package length variable until the last item's length is set */ if(itemCount<=0) { if(doAutoPrefix) { fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n"); exit(U_INVALID_FORMAT_ERROR); } } else { char prefix[MAX_PKG_NAME_LENGTH+4]; char *s, *inItemStrings; if(itemCount>itemMax) { fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); exit(U_BUFFER_OVERFLOW_ERROR); } /* swap the item name strings */ int32_t stringsOffset=4+8*itemCount; itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; // don't include padding bytes at the end of the item names while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { --itemLength; } if((inStringTop+itemLength)>STRING_STORE_SIZE) { fprintf(stderr, "icupkg: total length of item name strings too long\n"); exit(U_BUFFER_OVERFLOW_ERROR); } inItemStrings=inStrings+inStringTop; ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); exit(U_INVALID_FORMAT_ERROR); } inStringTop+=itemLength; // reset the Item entries memset(items, 0, itemCount*sizeof(Item)); /* * Get the common prefix of the items. * New-style ICU .dat packages use tree separators ('/') between package names, * tree names, and item names, * while old-style ICU .dat packages (before multi-tree support) * use an underscore ('_') between package and item names. */ offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; s=inItemStrings+offset; // name of the first entry int32_t prefixLength; if(doAutoPrefix) { // Use the first entry's prefix. Must be a new-style package. const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR); if(prefixLimit==NULL) { fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but " "the first entry \"%s\" does not contain a '%c'\n", s, U_TREE_ENTRY_SEP_CHAR); exit(U_INVALID_FORMAT_ERROR); } prefixLength=(int32_t)(prefixLimit-s); if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) { fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but " "the prefix of the first entry \"%s\" is empty or too long\n", s); exit(U_INVALID_FORMAT_ERROR); } if(prefixEndsWithType && s[prefixLength-1]!=type) { fprintf(stderr, "icupkg: --auto_toc_prefix_with_type but " "the prefix of the first entry \"%s\" does not end with '%c'\n", s, type); exit(U_INVALID_FORMAT_ERROR); } memcpy(pkgPrefix, s, prefixLength); pkgPrefix[prefixLength]=0; memcpy(prefix, s, ++prefixLength); // include the / } else { // Use the package basename as prefix. int32_t inPkgNameLength=strlen(inPkgName); memcpy(prefix, inPkgName, inPkgNameLength); prefixLength=inPkgNameLength; if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 0==memcmp(s, inPkgName, inPkgNameLength) && s[inPkgNameLength]=='_' ) { // old-style .dat package prefix[prefixLength++]='_'; } else { // new-style .dat package prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR // then the test in the loop below will fail } } prefix[prefixLength]=0; /* read the ToC table */ for(i=0; i<itemCount; ++i) { // skip the package part of the item name, error if it does not match the actual package name // or if nothing follows the package name offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; s=inItemStrings+offset; if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", s, prefix); exit(U_INVALID_FORMAT_ERROR); } items[i].name=s+prefixLength; // set the item's data items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); if(i>0) { items[i-1].length=(int32_t)(items[i].data-items[i-1].data); // set the previous item's platform type typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); exit(U_INVALID_FORMAT_ERROR); } items[i-1].type=makeTypeLetter(typeEnum); } items[i].isDataOwned=FALSE; } // set the last item's length items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); // set the last item's platform type typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); if(typeEnum<0 || U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); exit(U_INVALID_FORMAT_ERROR); } items[itemCount-1].type=makeTypeLetter(typeEnum); if(type!=U_ICUDATA_TYPE_LETTER[0]) { // sort the item names for the local charset sortItems(); } } udata_closeSwapper(ds); } char Package::getInType() { return makeTypeLetter(inCharset, inIsBigEndian); } void Package::writePackage(const char *filename, char outType, const char *comment) { char prefix[MAX_PKG_NAME_LENGTH+4]; UDataOffsetTOCEntry entry; UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; FILE *file; Item *pItem; char *name; UErrorCode errorCode; int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; uint8_t outCharset; UBool outIsBigEndian; extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); // if there is an explicit comment, then use it, else use what's in the current header if(comment!=NULL) { /* get the header size minus the current comment */ DataHeader *pHeader; int32_t length; pHeader=(DataHeader *)header; headerLength=4+pHeader->info.size; length=(int32_t)strlen(comment); if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { fprintf(stderr, "icupkg: comment too long\n"); exit(U_BUFFER_OVERFLOW_ERROR); } memcpy(header+headerLength, comment, length+1); headerLength+=length; if(headerLength&0xf) { /* NUL-pad the header to a multiple of 16 */ length=(headerLength+0xf)&~0xf; memset(header+headerLength, 0, length-headerLength); headerLength=length; } pHeader->dataHeader.headerSize=(uint16_t)headerLength; } makeTypeProps(outType, outCharset, outIsBigEndian); // open (TYPE_COUNT-2) swappers // one is a no-op for local type==outType // one type (TYPE_LE) is bogus errorCode=U_ZERO_ERROR; i=makeTypeEnum(outType); ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); ds[TYPE_LE]=NULL; ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); exit(errorCode); } for(i=0; i<TYPE_COUNT; ++i) { if(ds[i]!=NULL) { ds[i]->printError=printPackageError; ds[i]->printErrorContext=stderr; } } dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; // create the file and write its contents file=fopen(filename, "wb"); if(file==NULL) { fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } // swap and write the header if(dsLocalToOut!=NULL) { udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } length=(int32_t)fwrite(header, 1, headerLength, file); if(length!=headerLength) { fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } // prepare and swap the package name with a tree separator // for prepending to item names if(pkgPrefix[0]==0) { prefixLength=(int32_t)strlen(prefix); } else { prefixLength=(int32_t)strlen(pkgPrefix); memcpy(prefix, pkgPrefix, prefixLength); if(prefixEndsWithType) { prefix[prefixLength-1]=outType; } } prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; prefix[prefixLength]=0; if(dsLocalToOut!=NULL) { dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); exit(errorCode); } // swap and sort the item names (sorting needs to be done in the output charset) dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); exit(errorCode); } sortItems(); } // create the output item names in sorted order, with the package name prepended to each for(i=0; i<itemCount; ++i) { length=(int32_t)strlen(items[i].name); name=allocString(FALSE, length+prefixLength); memcpy(name, prefix, prefixLength); memcpy(name+prefixLength, items[i].name, length+1); items[i].name=name; } // calculate offsets for item names and items, pad to 16-align items // align only the first item; each item's length is a multiple of 16 basenameOffset=4+8*itemCount; offset=basenameOffset+outStringTop; if((length=(offset&15))!=0) { length=16-length; memset(allocString(FALSE, length-1), 0xaa, length); offset+=length; } // write the table of contents // first the itemCount outInt32=itemCount; if(dsLocalToOut!=NULL) { dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } length=(int32_t)fwrite(&outInt32, 1, 4, file); if(length!=4) { fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } // then write the item entries (and collect the maxItemLength) maxItemLength=0; for(i=0; i<itemCount; ++i) { entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); entry.dataOffset=(uint32_t)offset; if(dsLocalToOut!=NULL) { dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); exit(errorCode); } } length=(int32_t)fwrite(&entry, 1, 8, file); if(length!=8) { fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); exit(U_FILE_ACCESS_ERROR); } length=items[i].length; if(length>maxItemLength) { maxItemLength=length; } offset+=length; } // write the item names length=(int32_t)fwrite(outStrings, 1, outStringTop, file); if(length!=outStringTop) { fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } // write the items for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { int32_t type=makeTypeEnum(pItem->type); if(ds[type]!=NULL) { // swap each item from its platform properties to the desired ones udata_swap( ds[type], pItem->data, pItem->length, pItem->data, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); exit(errorCode); } } length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); if(length!=pItem->length) { fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); exit(U_FILE_ACCESS_ERROR); } } if(ferror(file)) { fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } fclose(file); for(i=0; i<TYPE_COUNT; ++i) { udata_closeSwapper(ds[i]); } } int32_t Package::findItem(const char *name, int32_t length) const { int32_t i, start, limit; int result; /* do a binary search for the string */ start=0; limit=itemCount; while(start<limit) { i=(start+limit)/2; if(length>=0) { result=strncmp(name, items[i].name, length); } else { result=strcmp(name, items[i].name); } if(result==0) { /* found */ if(length>=0) { /* * if we compared just prefixes, then we may need to back up * to the first item with this prefix */ while(i>0 && 0==strncmp(name, items[i-1].name, length)) { --i; } } return i; } else if(result<0) { limit=i; } else /* result>0 */ { start=i+1; } } return ~start; /* not found, return binary-not of the insertion point */ } void Package::findItems(const char *pattern) { const char *wild; if(pattern==NULL || *pattern==0) { findNextIndex=-1; return; } findPrefix=pattern; findSuffix=NULL; findSuffixLength=0; wild=strchr(pattern, '*'); if(wild==NULL) { // no wildcard findPrefixLength=(int32_t)strlen(pattern); } else { // one wildcard findPrefixLength=(int32_t)(wild-pattern); findSuffix=wild+1; findSuffixLength=(int32_t)strlen(findSuffix); if(NULL!=strchr(findSuffix, '*')) { // two or more wildcards fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); exit(U_PARSE_ERROR); } } if(findPrefixLength==0) { findNextIndex=0; } else { findNextIndex=findItem(findPrefix, findPrefixLength); } } int32_t Package::findNextItem() { const char *name, *middle, *treeSep; int32_t idx, nameLength, middleLength; if(findNextIndex<0) { return -1; } while(findNextIndex<itemCount) { idx=findNextIndex++; name=items[idx].name; nameLength=(int32_t)strlen(name); if(nameLength<(findPrefixLength+findSuffixLength)) { // item name too short for prefix & suffix continue; } if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { // left the range of names with this prefix break; } middle=name+findPrefixLength; middleLength=nameLength-findPrefixLength-findSuffixLength; if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { // suffix does not match continue; } // prefix & suffix match if(matchMode&MATCH_NOSLASH) { treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); if(treeSep!=NULL && (treeSep-middle)<middleLength) { // the middle (matching the * wildcard) contains a tree separator / continue; } } // found a matching item return idx; } // no more items findNextIndex=-1; return -1; } void Package::setMatchMode(uint32_t mode) { matchMode=mode; } void Package::addItem(const char *name) { addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); } void Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { int32_t idx; idx=findItem(name); if(idx<0) { // new item, make space at the insertion point ensureItemCapacity(); // move the following items down idx=~idx; if(idx<itemCount) { memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); } ++itemCount; // reset this Item entry memset(items+idx, 0, sizeof(Item)); // copy the item's name items[idx].name=allocString(TRUE, strlen(name)); strcpy(items[idx].name, name); pathToTree(items[idx].name); } else { // same-name item found, replace it if(items[idx].isDataOwned) { uprv_free(items[idx].data); } // keep the item's name since it is the same } // set the item's data items[idx].data=data; items[idx].length=length; items[idx].isDataOwned=isDataOwned; items[idx].type=type; } void Package::addFile(const char *filesPath, const char *name) { uint8_t *data; int32_t length; char type; data=readFile(filesPath, name, length, type); // readFile() exits the tool if it fails addItem(name, data, length, TRUE, type); } void Package::addItems(const Package &listPkg) { const Item *pItem; int32_t i; for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); } } void Package::removeItem(int32_t idx) { if(idx>=0) { // remove the item if(items[idx].isDataOwned) { uprv_free(items[idx].data); } // move the following items up if((idx+1)<itemCount) { memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); } --itemCount; if(idx<=findNextIndex) { --findNextIndex; } } } void Package::removeItems(const char *pattern) { int32_t idx; findItems(pattern); while((idx=findNextItem())>=0) { removeItem(idx); } } void Package::removeItems(const Package &listPkg) { const Item *pItem; int32_t i; for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { removeItems(pItem->name); } } void Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { char filename[1024]; UDataSwapper *ds; FILE *file; Item *pItem; int32_t fileLength; uint8_t itemCharset, outCharset; UBool itemIsBigEndian, outIsBigEndian; if(idx<0 || itemCount<=idx) { return; } pItem=items+idx; // swap the data to the outType // outType==0: don't swap if(outType!=0 && pItem->type!=outType) { // open the swapper UErrorCode errorCode=U_ZERO_ERROR; makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); makeTypeProps(outType, outCharset, outIsBigEndian); ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); exit(errorCode); } ds->printError=printPackageError; ds->printErrorContext=stderr; // swap the item from its platform properties to the desired ones udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); exit(errorCode); } udata_closeSwapper(ds); pItem->type=outType; } // create the file and write its contents makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); file=fopen(filename, "wb"); if(file==NULL) { fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); if(ferror(file) || fileLength!=pItem->length) { fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); exit(U_FILE_ACCESS_ERROR); } fclose(file); } void Package::extractItem(const char *filesPath, int32_t idx, char outType) { extractItem(filesPath, items[idx].name, idx, outType); } void Package::extractItems(const char *filesPath, const char *pattern, char outType) { int32_t idx; findItems(pattern); while((idx=findNextItem())>=0) { extractItem(filesPath, idx, outType); } } void Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { const Item *pItem; int32_t i; for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { extractItems(filesPath, pItem->name, outType); } } int32_t Package::getItemCount() const { return itemCount; } const Item * Package::getItem(int32_t idx) const { if (0 <= idx && idx < itemCount) { return &items[idx]; } return NULL; } void Package::checkDependency(void *context, const char *itemName, const char *targetName) { // check dependency: make sure the target item is in the package Package *me=(Package *)context; if(me->findItem(targetName)<0) { me->isMissingItems=TRUE; fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); } } UBool Package::checkDependencies() { isMissingItems=FALSE; enumDependencies(this, checkDependency); return (UBool)!isMissingItems; } void Package::enumDependencies(void *context, CheckDependency check) { int32_t i; for(i=0; i<itemCount; ++i) { enumDependencies(items+i, context, check); } } char * Package::allocString(UBool in, int32_t length) { char *p; int32_t top; if(in) { top=inStringTop; p=inStrings+top; } else { top=outStringTop; p=outStrings+top; } top+=length+1; if(top>STRING_STORE_SIZE) { fprintf(stderr, "icupkg: string storage overflow\n"); exit(U_BUFFER_OVERFLOW_ERROR); } if(in) { inStringTop=top; } else { outStringTop=top; } return p; } void Package::sortItems() { UErrorCode errorCode=U_ZERO_ERROR; uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); exit(errorCode); } } void Package::setItemCapacity(int32_t max) { if(max<=itemMax) { return; } Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); Item *oldItems = items; if(newItems == NULL) { fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", (unsigned long)max*sizeof(items[0]), max); exit(U_MEMORY_ALLOCATION_ERROR); } if(items && itemCount>0) { uprv_memcpy(newItems, items, (size_t)itemCount*sizeof(items[0])); } itemMax = max; items = newItems; uprv_free(oldItems); } void Package::ensureItemCapacity() { if((itemCount+1)>itemMax) { setItemCapacity(itemCount+kItemsChunk); } } U_NAMESPACE_END