/******************************************************************** * COPYRIGHT: * Copyright (c) 1997-2014, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************* * * File CALLCOLL.C * * Modification History: * Name Description * Madhu Katragadda Ported for C API ******************************************************************************** */ /* * Important: This file is included into intltest/allcoll.cpp so that the * test data is shared. This makes it easier to maintain the test data, * especially since the Unicode data must be portable and quoted character * literals will not work. * If it is included, then there will be a #define INCLUDE_CALLCOLL_C * that must prevent the actual code in here from being part of the * allcoll.cpp compilation. */ /** * CollationDummyTest is a third level test class. This tests creation of * a customized collator object. For example, number 1 to be sorted * equlivalent to word 'one'. */ #include <string.h> #include <stdlib.h> #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION #include "unicode/ucol.h" #include "unicode/uloc.h" #include "unicode/ures.h" #include "unicode/udata.h" #include "unicode/ucoleitr.h" #include "unicode/ustring.h" #include "unicode/uclean.h" #include "unicode/putil.h" #include "unicode/uenum.h" #include "cintltst.h" #include "ccolltst.h" #include "callcoll.h" #include "calldata.h" #include "cstring.h" #include "cmemory.h" /* set to 1 to test offsets in backAndForth() */ #define TEST_OFFSETS 0 /* perform test with strength PRIMARY */ static void TestPrimary(void); /* perform test with strength SECONDARY */ static void TestSecondary(void); /* perform test with strength tertiary */ static void TestTertiary(void); /*perform tests with strength Identical */ static void TestIdentical(void); /* perform extra tests */ static void TestExtra(void); /* Test jitterbug 581 */ static void TestJB581(void); /* Test jitterbug 1401 */ static void TestJB1401(void); /* Test [variable top] in the rule syntax */ static void TestVariableTop(void); /* Test surrogates */ static void TestSurrogates(void); static void TestInvalidRules(void); static void TestJitterbug1098(void); static void TestFCDCrash(void); static void TestJ5298(void); const UCollationResult results[] = { UCOL_LESS, UCOL_LESS, /*UCOL_GREATER,*/ UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_GREATER, UCOL_GREATER, UCOL_LESS, /* 10 */ UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_LESS, /* test primary > 17 */ UCOL_EQUAL, UCOL_EQUAL, UCOL_EQUAL, /* 20 */ UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS, /* test secondary > 26 */ UCOL_EQUAL, UCOL_EQUAL, UCOL_EQUAL, UCOL_EQUAL, UCOL_EQUAL, /* 30 */ UCOL_EQUAL, UCOL_LESS, UCOL_EQUAL, /* 34 */ UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS /* 37 */ }; static void uprv_appendByteToHexString(char *dst, uint8_t val) { uint32_t len = (uint32_t)uprv_strlen(dst); *(dst+len) = T_CString_itosOffset((val >> 4)); *(dst+len+1) = T_CString_itosOffset((val & 0xF)); *(dst+len+2) = 0; } /* this function makes a string with representation of a sortkey */ static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) { int32_t strength = UCOL_PRIMARY; uint32_t res_size = 0; UBool doneCase = FALSE; UErrorCode errorCode = U_ZERO_ERROR; char *current = buffer; const uint8_t *currentSk = sortkey; uprv_strcpy(current, "["); while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) { if(strength > UCOL_PRIMARY) { uprv_strcat(current, " . "); } while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */ uprv_appendByteToHexString(current, *currentSk++); uprv_strcat(current, " "); } if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) { doneCase = TRUE; } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) { strength ++; } if (*currentSk) { uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */ } if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) { break; } } if(ucol_getStrength(coll) == UCOL_IDENTICAL) { uprv_strcat(current, " . "); while(*currentSk != 0) { uprv_appendByteToHexString(current, *currentSk++); uprv_strcat(current, " "); } uprv_appendByteToHexString(current, *currentSk++); } uprv_strcat(current, "]"); if(res_size > *len) { return NULL; } return buffer; } void addAllCollTest(TestNode** root) { addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary"); addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary"); addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary"); addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical"); addTest(root, &TestExtra, "tscoll/callcoll/TestExtra"); addTest(root, &TestJB581, "tscoll/callcoll/TestJB581"); addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop"); addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates"); addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules"); addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401"); addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098"); addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash"); addTest(root, &TestJ5298, "tscoll/callcoll/TestJ5298"); } UBool hasCollationElements(const char *locName) { UErrorCode status = U_ZERO_ERROR; UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);; if(U_SUCCESS(status)) { status = U_ZERO_ERROR; loc = ures_getByKey(loc, "collations", loc, &status); ures_close(loc); if(status == U_ZERO_ERROR) { /* do the test - there are real elements */ return TRUE; } } return FALSE; } static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) { int32_t partialSKResult = 0; UCharIterator sIter, tIter; uint32_t sState[2], tState[2]; int32_t sSize = pieceSize, tSize = pieceSize; /*int32_t i = 0;*/ uint8_t sBuf[16384], tBuf[16384]; if(pieceSize > 16384) { log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n"); *status = U_BUFFER_OVERFLOW_ERROR; return UCOL_EQUAL; } *status = U_ZERO_ERROR; sState[0] = 0; sState[1] = 0; tState[0] = 0; tState[1] = 0; while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status); tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status); if(sState[0] != 0 || tState[0] != 0) { /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/ } /*log_verbose("%i ", i++);*/ partialSKResult = memcmp(sBuf, tBuf, pieceSize); } if(partialSKResult < 0) { return UCOL_LESS; } else if(partialSKResult > 0) { return UCOL_GREATER; } else { return UCOL_EQUAL; } } static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) { int32_t sortklen1, sortklen2, sortklenmax, sortklenmin; int temp=0, gSortklen1=0,gSortklen2=0; UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result; uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a; uint32_t sLen = u_strlen(source); uint32_t tLen = u_strlen(target); char buffer[256]; uint32_t len; UErrorCode status = U_ZERO_ERROR; UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); UCharIterator sIter, tIter; compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen); if (compareResult != result) { log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n", compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1)); } compareResulta = ucol_strcoll(myCollation, source, -1, target, -1); if (compareResulta != result) { log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n", compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1)); } uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != result) { log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */ { char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log_verbose("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status); if(U_SUCCESS(status)) { { /* ucol_strcollUTF8 */ compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status); if (U_FAILURE(status)) { log_err("Error in ucol_strcollUTF8 with explicit length\n"); status = U_ZERO_ERROR; } else if (compareResulta != result) { log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n", compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1)); } compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status); if (U_FAILURE(status)) { log_err("Error in ucol_strcollUTF8 with null terminated strings\n"); status = U_ZERO_ERROR; } else if (compareResulta != result) { log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n", compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1)); } } { /* char iterator over UTF8 */ UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result; uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8Iter != compareResultIter) { log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } if(compareResultUTF8Iter != compareResultUTF8IterNorm) { log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } } } else { log_verbose("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log_verbose("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ if(1) { /*!QUICK*/ int32_t i = 0; int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(getTestOption(QUICK_OPTION) <= 0) { partialSizesSize = 7; } /*log_verbose("partial sortkey test piecesize=");*/ for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = result, partialNormalizedSKResult = result; /*log_verbose("%i ", partialSizes[i]);*/ partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); if(partialSKResult != result) { log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", partialSKResult, result, aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) { /*log_verbose("N ");*/ ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } } } /*log_verbose("\n");*/ } sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0); sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0); sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2); sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2); (void)sortklenmin; /* Suppress set but not used warning. */ sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1); ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1); sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1); ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1); /* Check that sort key generated with null terminated string is identical */ /* to that generted with a length specified. */ if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 || uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) { log_err("Sort Keys from null terminated and explicit length strings differ.\n"); } /*memcmp(sortKey1, sortKey2,sortklenmax);*/ temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2); gSortklen1 = uprv_strlen((const char *)sortKey1)+1; gSortklen2 = uprv_strlen((const char *)sortKey2)+1; if(sortklen1 != gSortklen1){ log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len)); } if(sortklen2!= gSortklen2){ log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len)); } if(temp < 0) { keyResult=UCOL_LESS; } else if(temp > 0) { keyResult= UCOL_GREATER; } else { keyResult = UCOL_EQUAL; } reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result ); free(sortKey1); free(sortKey2); free(sortKey1a); free(sortKey2a); } void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) { if(myCollation) { doTestVariant(myCollation, source, target, result); if(result == UCOL_LESS) { doTestVariant(myCollation, target, source, UCOL_GREATER); } else if(result == UCOL_GREATER) { doTestVariant(myCollation, target, source, UCOL_LESS); } else { doTestVariant(myCollation, target, source, UCOL_EQUAL); } } else { log_data_err("No collator! Any data around?\n"); } } /** * Return an integer array containing all of the collation orders * returned by calls to next on the specified iterator */ OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength) { UErrorCode status; int32_t order; int32_t maxSize = 100; int32_t size = 0; int32_t offset = ucol_getOffset(iter); OrderAndOffset *temp; OrderAndOffset *orders =(OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize); status= U_ZERO_ERROR; while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER) { if (size == maxSize) { maxSize *= 2; temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize); memcpy(temp, orders, size * sizeof(OrderAndOffset)); free(orders); orders = temp; } orders[size].order = order; orders[size].offset = offset; offset = ucol_getOffset(iter); size += 1; } if (maxSize > size && size > 0) { temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * size); memcpy(temp, orders, size * sizeof(OrderAndOffset)); free(orders); orders = temp; } *orderLength = size; return orders; } void backAndForth(UCollationElements *iter) { /* Run through the iterator forwards and stick it into an array */ int32_t idx, o; UErrorCode status = U_ZERO_ERROR; int32_t orderLength = 0; OrderAndOffset *orders = getOrders(iter, &orderLength); /* Now go through it backwards and make sure we get the same values */ idx = orderLength; ucol_reset(iter); /* synwee : changed */ while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) { #if TEST_OFFSETS int32_t offset = #endif ucol_getOffset(iter); idx -= 1; if (o != orders[idx].order) { if (o == 0) idx ++; else { while (idx > 0 && orders[-- idx].order == 0) { /* nothing... */ } if (o != orders[idx].order) { log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", idx, orders[idx].order, o); goto bail; } } } #if TEST_OFFSETS if (offset != orders[idx].offset) { log_err("Mismatched offset at index %d: %d vs. %d\n", idx, orders[idx].offset, offset); goto bail; } #endif } while (idx != 0 && orders[idx - 1].order == 0) { idx -= 1; } if (idx != 0) { log_err("Didn't get back to beginning - index is %d\n", idx); ucol_reset(iter); log_err("\nnext: "); if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER) { log_err("Error at %x\n", o); } log_err("\nprev: "); if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) { log_err("Error at %x\n", o); } log_verbose("\n"); } bail: free(orders); } void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result) { UChar t1[2048] = {0}; UChar t2[2048] = {0}; UCollationElements *iter; UErrorCode status = U_ZERO_ERROR; uint32_t i = 0, j = 0; log_verbose("testing sequence:\n"); for(i = 0; i < size; i++) { log_verbose("%s\n", s[i]); } iter = ucol_openElements(coll, t1, u_strlen(t1), &status); if (U_FAILURE(status)) { log_err("Creation of iterator failed\n"); } for(i = 0; i < size-1; i++) { for(j = i+1; j < size; j++) { u_unescape(s[i], t1, 2048); u_unescape(s[j], t2, 2048); doTest(coll, t1, t2, result); /* synwee : added collation element iterator test */ ucol_setText(iter, t1, u_strlen(t1), &status); backAndForth(iter); ucol_setText(iter, t2, u_strlen(t2), &status); backAndForth(iter); } } ucol_closeElements(iter); } void genericOrderingTest(UCollator *coll, const char * const s[], uint32_t size) { genericOrderingTestWithResult(coll, s, size, UCOL_LESS); } void genericLocaleStarter(const char *locale, const char * const s[], uint32_t size) { UErrorCode status = U_ZERO_ERROR; UCollator *coll = ucol_open(locale, &status); log_verbose("Locale starter for %s\n", locale); if(U_SUCCESS(status)) { genericOrderingTest(coll, s, size); } else if(status == U_FILE_ACCESS_ERROR) { log_data_err("Is your data around?\n"); return; } else { log_err("Unable to open collator for locale %s\n", locale); } ucol_close(coll); } void genericLocaleStarterWithResult(const char *locale, const char * const s[], uint32_t size, UCollationResult result) { UErrorCode status = U_ZERO_ERROR; UCollator *coll = ucol_open(locale, &status); log_verbose("Locale starter for %s\n", locale); if(U_SUCCESS(status)) { genericOrderingTestWithResult(coll, s, size, result); } else if(status == U_FILE_ACCESS_ERROR) { log_data_err("Is your data around?\n"); return; } else { log_err("Unable to open collator for locale %s\n", locale); } ucol_close(coll); } /* currently not used with options */ void genericRulesStarterWithOptionsAndResult(const char *rules, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) { UErrorCode status = U_ZERO_ERROR; UChar rlz[RULE_BUFFER_LEN] = { 0 }; uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN); uint32_t i; UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); log_verbose("Rules starter for %s\n", rules); if(U_SUCCESS(status)) { log_verbose("Setting attributes\n"); for(i = 0; i < attsize; i++) { ucol_setAttribute(coll, attrs[i], values[i], &status); } genericOrderingTestWithResult(coll, s, size, result); } else { log_err_status(status, "Unable to open collator with rules %s\n", rules); } ucol_close(coll); } void genericLocaleStarterWithOptionsAndResult(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) { UErrorCode status = U_ZERO_ERROR; uint32_t i; UCollator *coll = ucol_open(locale, &status); log_verbose("Locale starter for %s\n", locale); if(U_SUCCESS(status)) { log_verbose("Setting attributes\n"); for(i = 0; i < attsize; i++) { ucol_setAttribute(coll, attrs[i], values[i], &status); } genericOrderingTestWithResult(coll, s, size, result); } else { log_err_status(status, "Unable to open collator for locale %s\n", locale); } ucol_close(coll); } void genericLocaleStarterWithOptions(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) { genericLocaleStarterWithOptionsAndResult(locale, s, size, attrs, values, attsize, UCOL_LESS); } void genericRulesStarterWithResult(const char *rules, const char * const s[], uint32_t size, UCollationResult result) { UErrorCode status = U_ZERO_ERROR; UChar rlz[RULE_BUFFER_LEN] = { 0 }; uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN); UCollator *coll = NULL; coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); log_verbose("Rules starter for %s\n", rules); if(U_SUCCESS(status)) { genericOrderingTestWithResult(coll, s, size, result); ucol_close(coll); } else if(status == U_FILE_ACCESS_ERROR) { log_data_err("Is your data around?\n"); } else { log_err("Unable to open collator with rules %s\n", rules); } } void genericRulesStarter(const char *rules, const char * const s[], uint32_t size) { genericRulesStarterWithResult(rules, s, size, UCOL_LESS); } static void TestTertiary() { int32_t len,i; UCollator *myCollation; UErrorCode status=U_ZERO_ERROR; static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; UChar rules[sizeof(str)]; len = strlen(str); u_uastrcpy(rules, str); myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(myCollation, UCOL_TERTIARY); for (i = 0; i < 17 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); myCollation = 0; } static void TestPrimary( ) { int32_t len,i; UCollator *myCollation; UErrorCode status=U_ZERO_ERROR; static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; UChar rules[sizeof(str)]; len = strlen(str); u_uastrcpy(rules, str); myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(myCollation, UCOL_PRIMARY); for (i = 17; i < 26 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); myCollation = 0; } static void TestSecondary() { int32_t i; int32_t len; UCollator *myCollation; UErrorCode status=U_ZERO_ERROR; static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; UChar rules[sizeof(str)]; len = strlen(str); u_uastrcpy(rules, str); myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(myCollation, UCOL_SECONDARY); for (i = 26; i < 34 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); myCollation = 0; } static void TestIdentical() { int32_t i; int32_t len; UCollator *myCollation; UErrorCode status=U_ZERO_ERROR; static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; UChar rules[sizeof(str)]; len = strlen(str); u_uastrcpy(rules, str); myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } for(i= 34; i<37; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); myCollation = 0; } static void TestExtra() { int32_t i, j; int32_t len; UCollator *myCollation; UErrorCode status = U_ZERO_ERROR; static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; UChar rules[sizeof(str)]; len = strlen(str); u_uastrcpy(rules, str); myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(myCollation, UCOL_TERTIARY); for (i = 0; i < COUNT_TEST_CASES-1 ; i++) { for (j = i + 1; j < COUNT_TEST_CASES; j += 1) { doTest(myCollation, testCases[i], testCases[j], UCOL_LESS); } } ucol_close(myCollation); myCollation = 0; } static void TestJB581(void) { int32_t bufferLen = 0; UChar source [100]; UChar target [100]; UCollationResult result = UCOL_EQUAL; uint8_t sourceKeyArray [100]; uint8_t targetKeyArray [100]; int32_t sourceKeyOut = 0, targetKeyOut = 0; UCollator *myCollator = 0; UErrorCode status = U_ZERO_ERROR; /*u_uastrcpy(source, "This is a test.");*/ /*u_uastrcpy(target, "THISISATEST.");*/ u_uastrcpy(source, "THISISATEST."); u_uastrcpy(target, "Thisisatest."); myCollator = ucol_open("en_US", &status); if (U_FAILURE(status)){ log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status)); return; } result = ucol_strcoll(myCollator, source, -1, target, -1); /* result is 1, secondary differences only for ignorable space characters*/ if (result != 1) { log_err("Comparing two strings with only secondary differences in C failed.\n"); } /* To compare them with just primary differences */ ucol_setStrength(myCollator, UCOL_PRIMARY); result = ucol_strcoll(myCollator, source, -1, target, -1); /* result is 0 */ if (result != 0) { log_err("Comparing two strings with no differences in C failed.\n"); } /* Now, do the same comparison with keys */ sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100); (void)sourceKeyOut; /* Suppress set but not used warning. */ targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100); bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut); if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0) { log_err("Comparing two strings with sort keys in C failed.\n"); } ucol_close(myCollator); } static void TestJB1401(void) { UCollator *myCollator = 0; UErrorCode status = U_ZERO_ERROR; static UChar NFD_UnsafeStartChars[] = { 0x0f73, /* Tibetan Vowel Sign II */ 0x0f75, /* Tibetan Vowel Sign UU */ 0x0f81, /* Tibetan Vowel Sign Reversed II */ 0 }; int i; myCollator = ucol_open("en_US", &status); if (U_FAILURE(status)){ log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status)); return; } ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); if (U_FAILURE(status)){ log_err("ERROR: Failed to set normalization mode ON for collator.\n"); return; } for (i=0; ; i++) { UChar c; UChar X[4]; UChar Y[20]; UChar Z[20]; /* Get the next funny character to be tested, and set up the * three test strings X, Y, Z, consisting of an A-grave + test char, * in original form, NFD, and then NFC form. */ c = NFD_UnsafeStartChars[i]; if (c==0) {break;} X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/ unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status); unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status); if (U_FAILURE(status)){ log_err("ERROR: Failed to normalize test of character %x\n", c); return; } /* Collation test. All three strings should be equal. * doTest does both strcoll and sort keys, with params in both orders. */ doTest(myCollator, X, Y, UCOL_EQUAL); doTest(myCollator, X, Z, UCOL_EQUAL); doTest(myCollator, Y, Z, UCOL_EQUAL); /* Run collation element iterators over the three strings. Results should be same for each. */ { UCollationElements *ceiX, *ceiY, *ceiZ; int32_t ceX, ceY, ceZ; int j; ceiX = ucol_openElements(myCollator, X, -1, &status); ceiY = ucol_openElements(myCollator, Y, -1, &status); ceiZ = ucol_openElements(myCollator, Z, -1, &status); if (U_FAILURE(status)) { log_err("ERROR: uucol_openElements failed.\n"); return; } for (j=0;; j++) { ceX = ucol_next(ceiX, &status); ceY = ucol_next(ceiY, &status); ceZ = ucol_next(ceiZ, &status); if (U_FAILURE(status)) { log_err("ERROR: ucol_next failed for iteration #%d.\n", j); break; } if (ceX != ceY || ceY != ceZ) { log_err("ERROR: ucol_next failed for iteration #%d.\n", j); break; } if (ceX == UCOL_NULLORDER) { break; } } ucol_closeElements(ceiX); ucol_closeElements(ceiY); ucol_closeElements(ceiZ); } } ucol_close(myCollator); } /** * Tests the [variable top] tag in rule syntax. Since the default [alternate] * tag has the value shifted, any codepoints before [variable top] should give * a primary ce of 0. */ static void TestVariableTop(void) { #if 0 /* * Starting with ICU 53, setting the variable top via a pseudo relation string * is not supported any more. * It was replaced by the [maxVariable symbol] setting. * See ICU tickets #9958 and #8032. */ static const char str[] = "&z = [variable top]"; int len = strlen(str); UChar rules[sizeof(str)]; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[1]; UChar ch; uint8_t result[20]; uint8_t expected[20]; u_uastrcpy(rules, str); enCollation = ucol_open("en_US", &status); if (U_FAILURE(status)) { log_err_status(status, "ERROR: in creation of collator :%s\n", myErrorName(status)); return; } myCollation = ucol_openRules(rules, len, UCOL_OFF, UCOL_PRIMARY,NULL, &status); if (U_FAILURE(status)) { ucol_close(enCollation); log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(enCollation, UCOL_PRIMARY); ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != UCOL_SHIFTED || U_FAILURE(status)) { log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); } uprv_memset(expected, 0, 20); /* space is supposed to be a variable */ source[0] = ' '; len = ucol_getSortKey(enCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); } ch = 'a'; while (ch < 'z') { source[0] = ch; len = ucol_getSortKey(myCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", ch); } ch ++; } ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; #endif } /** * Tests surrogate support. * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret * Therefore, another (unassigned) code point was used for this test. */ static void TestSurrogates(void) { static const char str[] = "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A"; int len = strlen(str); int rlen = 0; UChar rules[sizeof(str)]; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[][4] = {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}}; UChar target[][4] = {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}}; int count = 0; uint8_t enresult[20], myresult[20]; int enlen, mylen; /* tests for open rules with surrogate rules */ rlen = u_unescape(str, rules, len); enCollation = ucol_open("en_US", &status); if (U_FAILURE(status)) { log_err_status(status, "ERROR: in creation of collator :%s\n", myErrorName(status)); return; } myCollation = ucol_openRules(rules, rlen, UCOL_OFF, UCOL_TERTIARY,NULL, &status); if (U_FAILURE(status)) { ucol_close(enCollation); log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } /* this test is to verify the supplementary sort key order in the english collator */ log_verbose("start of english collation supplementary characters test\n"); while (count < 2) { doTest(enCollation, source[count], target[count], UCOL_LESS); count ++; } doTest(enCollation, source[count], target[count], UCOL_GREATER); log_verbose("start of tailored collation supplementary characters test\n"); count = 0; /* tests getting collation elements for surrogates for tailored rules */ while (count < 4) { doTest(myCollation, source[count], target[count], UCOL_LESS); count ++; } /* tests that \uD800\uDC02 still has the same value, not changed */ enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20); mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20); if (enlen != mylen || uprv_memcmp(enresult, myresult, enlen) != 0) { log_verbose("Failed : non-tailored supplementary characters should have the same value\n"); } ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; } /* *### TODO: Add more invalid rules to test all different scenarios. * */ static void TestInvalidRules(){ #define MAX_ERROR_STATES 2 static const char* rulesArr[MAX_ERROR_STATES] = { "& C < ch, cH, Ch[this should fail]<d", "& C < ch, cH, & Ch[variable top]" }; static const char* preContextArr[MAX_ERROR_STATES] = { " C < ch, cH, Ch", "& C < ch, cH", }; static const char* postContextArr[MAX_ERROR_STATES] = { "[this should fa", ", & Ch[variable" }; int i; for(i = 0;i<MAX_ERROR_STATES;i++){ UChar rules[1000] = { '\0' }; UChar preContextExp[1000] = { '\0' }; UChar postContextExp[1000] = { '\0' }; UParseError parseError; UErrorCode status = U_ZERO_ERROR; UCollator* coll=0; u_charsToUChars(rulesArr[i],rules,uprv_strlen(rulesArr[i])+1); u_charsToUChars(preContextArr[i],preContextExp,uprv_strlen(preContextArr[i])+1); u_charsToUChars(postContextArr[i],postContextExp,uprv_strlen(postContextArr[i])+1); /* clean up stuff in parseError */ u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN); u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN); /* open the rules and test */ coll = ucol_openRules(rules,u_strlen(rules),UCOL_OFF,UCOL_DEFAULT_STRENGTH,&parseError,&status); (void)coll; /* Suppress set but not used warning. */ if(u_strcmp(parseError.preContext,preContextExp)!=0){ log_err_status(status, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n", aescstrdup(parseError.preContext, -1)); } if(u_strcmp(parseError.postContext,postContextExp)!=0){ log_err_status(status, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n", aescstrdup(parseError.postContext, -1)); } } } static void TestJitterbug1098(){ UChar rule[1000]; UCollator* c1 = NULL; UErrorCode status = U_ZERO_ERROR; UParseError parseError; char preContext[200]={0}; char postContext[200]={0}; int i=0; const char* rules[] = { "&''<\\\\", "&\\'<\\\\", "&\\\"<'\\'", "&'\"'<\\'", NULL }; const UCollationResult results1098[] = { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_LESS, }; const UChar input[][2]= { {0x0027,0x005c}, {0x0027,0x005c}, {0x0022,0x005c}, {0x0022,0x0027}, }; UChar X[2] ={0}; UChar Y[2] ={0}; u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN); u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN); for(;rules[i]!=0;i++){ u_uastrcpy(rule, rules[i]); c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status); if(U_FAILURE(status)){ log_err_status(status, "Could not parse the rules syntax. Error: %s\n", u_errorName(status)); if (status == U_PARSE_ERROR) { u_UCharsToChars(parseError.preContext,preContext,20); u_UCharsToChars(parseError.postContext,postContext,20); log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext); } return; } X[0] = input[i][0]; Y[0] = input[i][1]; doTest(c1,X,Y,results1098[i]); ucol_close(c1); } } static void TestFCDCrash(void) { static const char *test[] = { "Gr\\u00F6\\u00DFe", "Grossist" }; UErrorCode status = U_ZERO_ERROR; UCollator *coll = ucol_open("es", &status); if(U_FAILURE(status)) { log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); return; } ucol_close(coll); coll = NULL; ctest_resetICU(); coll = ucol_open("de_DE", &status); if(U_FAILURE(status)) { log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); return; } ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); genericOrderingTest(coll, test, 2); ucol_close(coll); } /*static UBool find(UEnumeration* list, const char* str, UErrorCode* status){ const char* value = NULL; int32_t length=0; if(U_FAILURE(*status)){ return FALSE; } uenum_reset(list, status); while( (value= uenum_next(list, &length, status))!=NULL){ if(strcmp(value, str)==0){ return TRUE; } } return FALSE; }*/ static void TestJ5298(void) { UErrorCode status = U_ZERO_ERROR; char input[256], output[256]; UBool isAvailable; int32_t i = 0; UEnumeration* values = NULL; const char *keywordValue = NULL; log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable()); values = ucol_getKeywordValues("collation", &status); while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) { if (strncmp(keywordValue, "private-", 8) == 0) { log_err("ucol_getKeywordValues() returns private collation keyword: %s\n", keywordValue); } } for (i = 0; i < ucol_countAvailable(); i++) { uenum_reset(values, &status); while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) { strcpy(input, ucol_getAvailable(i)); if (strcmp(keywordValue, "standard") != 0) { strcat(input, "@collation="); strcat(input, keywordValue); } ucol_getFunctionalEquivalent(output, 256, "collation", input, &isAvailable, &status); if (strcmp(input, output) == 0) { /* Unique locale, print it out */ log_verbose("%s, \n", output); } } } uenum_close(values); log_verbose("\n"); } #endif /* #if !UCONFIG_NO_COLLATION */