// © 2018 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // utrie_swap.cpp // created: 2018aug08 Markus W. Scherer #include "unicode/utypes.h" #include "cmemory.h" #include "ucptrie_impl.h" #include "udataswp.h" #include "utrie.h" #include "utrie2_impl.h" // These functions for swapping different generations of ICU code point tries are here // so that their implementation files need not depend on swapper code, // need not depend on each other, and so that other swapper code // need not depend on other trie code. namespace { constexpr int32_t ASCII_LIMIT = 0x80; } // namespace U_CAPI int32_t U_EXPORT2 utrie_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UTrieHeader *inTrie; UTrieHeader trie; int32_t size; UBool dataIs32; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* setup and swapping */ if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inTrie=(const UTrieHeader *)inData; trie.signature=ds->readUInt32(inTrie->signature); trie.options=ds->readUInt32(inTrie->options); trie.indexLength=udata_readInt32(ds, inTrie->indexLength); trie.dataLength=udata_readInt32(ds, inTrie->dataLength); if( trie.signature!=0x54726965 || (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || trie.indexLength<UTRIE_BMP_INDEX_LENGTH || (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) ) { *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ return 0; } dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0); size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); if(length>=0) { UTrieHeader *outTrie; if(length<size) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outTrie=(UTrieHeader *)outData; /* swap the header */ ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); /* swap the index and the data */ if(dataIs32) { ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); } else { ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); } } return size; } U_CAPI int32_t U_EXPORT2 utrie2_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UTrie2Header *inTrie; UTrie2Header trie; int32_t dataLength, size; UTrie2ValueBits valueBits; if(U_FAILURE(*pErrorCode)) { return 0; } if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* setup and swapping */ if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inTrie=(const UTrie2Header *)inData; trie.signature=ds->readUInt32(inTrie->signature); trie.options=ds->readUInt16(inTrie->options); trie.indexLength=ds->readUInt16(inTrie->indexLength); trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; if( trie.signature!=UTRIE2_SIG || valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || trie.indexLength<UTRIE2_INDEX_1_OFFSET || dataLength<UTRIE2_DATA_START_OFFSET ) { *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ return 0; } size=sizeof(UTrie2Header)+trie.indexLength*2; switch(valueBits) { case UTRIE2_16_VALUE_BITS: size+=dataLength*2; break; case UTRIE2_32_VALUE_BITS: size+=dataLength*4; break; default: *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } if(length>=0) { UTrie2Header *outTrie; if(length<size) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outTrie=(UTrie2Header *)outData; /* swap the header */ ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); /* swap the index and the data */ switch(valueBits) { case UTRIE2_16_VALUE_BITS: ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); break; case UTRIE2_32_VALUE_BITS: ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); break; default: *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } } return size; } U_CAPI int32_t U_EXPORT2 ucptrie_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { const UCPTrieHeader *inTrie; UCPTrieHeader trie; int32_t dataLength, size; UCPTrieValueWidth valueWidth; if(U_FAILURE(*pErrorCode)) { return 0; } if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* setup and swapping */ if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } inTrie=(const UCPTrieHeader *)inData; trie.signature=ds->readUInt32(inTrie->signature); trie.options=ds->readUInt16(inTrie->options); trie.indexLength=ds->readUInt16(inTrie->indexLength); trie.dataLength = ds->readUInt16(inTrie->dataLength); UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; if( trie.signature!=UCPTRIE_SIG || type > UCPTRIE_TYPE_SMALL || (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || valueWidth > UCPTRIE_VALUE_BITS_8 || trie.indexLength < minIndexLength || dataLength < ASCII_LIMIT ) { *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ return 0; } size=sizeof(UCPTrieHeader)+trie.indexLength*2; switch(valueWidth) { case UCPTRIE_VALUE_BITS_16: size+=dataLength*2; break; case UCPTRIE_VALUE_BITS_32: size+=dataLength*4; break; case UCPTRIE_VALUE_BITS_8: size+=dataLength; break; default: *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } if(length>=0) { UCPTrieHeader *outTrie; if(length<size) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } outTrie=(UCPTrieHeader *)outData; /* swap the header */ ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); /* swap the index and the data */ switch(valueWidth) { case UCPTRIE_VALUE_BITS_16: ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); break; case UCPTRIE_VALUE_BITS_32: ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); break; case UCPTRIE_VALUE_BITS_8: ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); if(inTrie!=outTrie) { uprv_memmove((outTrie+1)+trie.indexLength, (inTrie+1)+trie.indexLength, dataLength); } break; default: *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } } return size; } namespace { /** * Gets the trie version from 32-bit-aligned memory containing the serialized form * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). * * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie * @param length the number of bytes available at data; * can be more than necessary (see return value) * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. * If TRUE, opposite-endian serialized forms are recognized as well. * @return the trie version of the serialized form, or 0 if it is not * recognized as a serialized trie */ int32_t getVersion(const void *data, int32_t length, UBool anyEndianOk) { uint32_t signature; if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { return 0; } signature=*(const uint32_t *)data; if(signature==UCPTRIE_SIG) { return 3; } if(anyEndianOk && signature==UCPTRIE_OE_SIG) { return 3; } if(signature==UTRIE2_SIG) { return 2; } if(anyEndianOk && signature==UTRIE2_OE_SIG) { return 2; } if(signature==UTRIE_SIG) { return 1; } if(anyEndianOk && signature==UTRIE_OE_SIG) { return 1; } return 0; } } // namespace U_CAPI int32_t U_EXPORT2 utrie_swapAnyVersion(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return 0; } switch(getVersion(inData, length, TRUE)) { case 1: return utrie_swap(ds, inData, length, outData, pErrorCode); case 2: return utrie2_swap(ds, inData, length, outData, pErrorCode); case 3: return ucptrie_swap(ds, inData, length, outData, pErrorCode); default: *pErrorCode=U_INVALID_FORMAT_ERROR; return 0; } }