/*
*******************************************************************************
*
* Copyright (C) 2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: bidiconf.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009oct16
* created by: Markus W. Scherer
*
* BiDi conformance test, using the Unicode BidiTest.txt file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "unicode/utypes.h"
#include "unicode/ubidi.h"
#include "unicode/errorcode.h"
#include "unicode/localpointer.h"
#include "unicode/putil.h"
#include "unicode/unistr.h"
#include "intltest.h"
#include "uparse.h"
class BiDiConformanceTest : public IntlTest {
public:
BiDiConformanceTest() :
directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
errorCount(0) {}
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
void TestBidiTest();
private:
char *getUnidataPath(char path[]);
UBool parseLevels(const char *start);
UBool parseOrdering(const char *start);
UBool parseInputStringFromBiDiClasses(const char *&start);
UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
const char *paraLevelName);
UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
void printErrorLine(const char *paraLevelName);
char line[10000];
UBiDiLevel levels[1000];
uint32_t directionBits;
int32_t ordering[1000];
int32_t lineNumber;
int32_t levelsCount;
int32_t orderingCount;
int32_t errorCount;
UnicodeString inputString;
};
extern IntlTest *createBiDiConformanceTest() {
return new BiDiConformanceTest();
}
void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) {
if(exec) {
logln("TestSuite BiDiConformanceTest: ");
}
switch (index) {
TESTCASE(0, TestBidiTest);
default:
name="";
break; // needed to end the loop
}
}
// TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
char *BiDiConformanceTest::getUnidataPath(char path[]) {
IcuTestErrorCode errorCode(*this, "getUnidataPath");
const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt")
// Look inside ICU_DATA first.
strcpy(path, pathToDataDirectory());
strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
FILE *f=fopen(path, "r");
if(f!=NULL) {
fclose(f);
*(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
return path;
}
// As a fallback, try to guess where the source data was located
// at the time ICU was built, and look there.
# ifdef U_TOPSRCDIR
strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data");
# else
strcpy(path, loadTestData(errorCode));
strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
U_FILE_SEP_STRING "data");
# endif
strcat(path, U_FILE_SEP_STRING);
strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
f=fopen(path, "r");
if(f!=NULL) {
fclose(f);
*(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
return path;
}
return NULL;
}
U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
// TODO: Make "public" in uparse.h.
#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
UBool BiDiConformanceTest::parseLevels(const char *start) {
directionBits=0;
levelsCount=0;
while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
if(*start=='x') {
levels[levelsCount++]=UBIDI_DEFAULT_LTR;
++start;
} else {
char *end;
uint32_t value=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
errln("@Levels: parse error at %s", start);
return FALSE;
}
levels[levelsCount++]=(UBiDiLevel)value;
directionBits|=(1<<(value&1));
start=end;
}
}
return TRUE;
}
UBool BiDiConformanceTest::parseOrdering(const char *start) {
orderingCount=0;
while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
char *end;
uint32_t value=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
errln("@Reorder: parse error at %s", start);
return FALSE;
}
ordering[orderingCount++]=(int32_t)value;
start=end;
}
return TRUE;
}
static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
0x6c, // 'l' for L
0x52, // 'R' for R
0x33, // '3' for EN
0x2d, // '-' for ES
0x25, // '%' for ET
0x39, // '9' for AN
0x2c, // ',' for CS
0x2f, // '/' for B
0x5f, // '_' for S
0x20, // ' ' for WS
0x3d, // '=' for ON
0x65, // 'e' for LRE
0x6f, // 'o' for LRO
0x41, // 'A' for AL
0x45, // 'E' for RLE
0x4f, // 'O' for RLO
0x2a, // '*' for PDF
0x60, // '`' for NSM
0x7c // '|' for BN
};
U_CDECL_BEGIN
static UCharDirection U_CALLCONV
biDiConfUBiDiClassCallback(const void *context, UChar32 c) {
for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
if(c==charFromBiDiClass[i]) {
return (UCharDirection)i;
}
}
// Character not in our hardcoded table.
// Should not occur during testing.
return U_BIDI_CLASS_DEFAULT;
}
U_CDECL_END
static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
};
UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
inputString.remove();
/*
* Lengthy but fast BiDi class parser.
* A simple parser could terminate or extract the name string and use
* int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
* but that makes this test take significantly more time.
*/
while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
// Compare each character once until we have a match on
// a complete, short BiDi class name.
if(start[0]=='L') {
if(start[1]=='R') {
if(start[2]=='E') {
biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
} else if(start[2]=='O') {
biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
}
} else {
biDiClass=U_LEFT_TO_RIGHT;
}
} else if(start[0]=='R') {
if(start[1]=='L') {
if(start[2]=='E') {
biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
} else if(start[2]=='O') {
biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
}
} else {
biDiClass=U_RIGHT_TO_LEFT;
}
} else if(start[0]=='E') {
if(start[1]=='N') {
biDiClass=U_EUROPEAN_NUMBER;
} else if(start[1]=='S') {
biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
} else if(start[1]=='T') {
biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
}
} else if(start[0]=='A') {
if(start[1]=='L') {
biDiClass=U_RIGHT_TO_LEFT_ARABIC;
} else if(start[1]=='N') {
biDiClass=U_ARABIC_NUMBER;
}
} else if(start[0]=='C' && start[1]=='S') {
biDiClass=U_COMMON_NUMBER_SEPARATOR;
} else if(start[0]=='B') {
if(start[1]=='N') {
biDiClass=U_BOUNDARY_NEUTRAL;
} else {
biDiClass=U_BLOCK_SEPARATOR;
}
} else if(start[0]=='S') {
biDiClass=U_SEGMENT_SEPARATOR;
} else if(start[0]=='W' && start[1]=='S') {
biDiClass=U_WHITE_SPACE_NEUTRAL;
} else if(start[0]=='O' && start[1]=='N') {
biDiClass=U_OTHER_NEUTRAL;
} else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
biDiClass=U_POP_DIRECTIONAL_FORMAT;
} else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
biDiClass=U_DIR_NON_SPACING_MARK;
}
// Now we verify that the class name is terminated properly,
// and not just the start of a longer word.
int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
char c=start[biDiClassNameLength];
if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
errln("BiDi class string not recognized at %s", start);
return FALSE;
}
inputString.append(charFromBiDiClass[biDiClass]);
start+=biDiClassNameLength;
}
return TRUE;
}
void BiDiConformanceTest::TestBidiTest() {
IcuTestErrorCode errorCode(*this, "TestBidiTest");
const char *sourceTestDataPath=getSourceTestData(errorCode);
if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
"folder (getSourceTestData())")) {
return;
}
char bidiTestPath[400];
strcpy(bidiTestPath, sourceTestDataPath);
strcat(bidiTestPath, "BidiTest.txt");
LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
if(bidiTestFile.isNull()) {
errln("unable to open %s", bidiTestPath);
return;
}
LocalUBiDiPointer ubidi(ubidi_open());
ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
NULL, NULL, errorCode);
if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
return;
}
lineNumber=0;
levelsCount=0;
orderingCount=0;
errorCount=0;
while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
++lineNumber;
// Remove trailing comments and whitespace.
char *commentStart=strchr(line, '#');
if(commentStart!=NULL) {
*commentStart=0;
}
u_rtrim(line);
const char *start=u_skipWhitespace(line);
if(*start==0) {
continue; // Skip empty and comment-only lines.
}
if(*start=='@') {
++start;
if(0==strncmp(start, "Levels:", 7)) {
if(!parseLevels(start+7)) {
return;
}
} else if(0==strncmp(start, "Reorder:", 8)) {
if(!parseOrdering(start+8)) {
return;
}
}
// Skip unknown @Xyz: ...
} else {
if(!parseInputStringFromBiDiClasses(start)) {
return;
}
start=u_skipWhitespace(start);
if(*start!=';') {
errln("missing ; separator on input line %s", line);
return;
}
start=u_skipWhitespace(start+1);
char *end;
uint32_t bitset=(uint32_t)strtoul(start, &end, 10);
if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
errln("input bitset parse error at %s", start);
return;
}
// Loop over the bitset.
static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1 };
static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL" };
for(int i=0; i<=2; ++i) {
if(bitset&(1<<i)) {
ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
paraLevels[i], NULL, errorCode);
const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
errln("Input line %d: %s", (int)lineNumber, line);
return;
}
if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
paraLevelNames[i])) {
// continue outerLoop; does not exist in C++
// so just break out of the inner loop.
break;
}
if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
// continue outerLoop; does not exist in C++
// so just break out of the inner loop.
break;
}
}
}
}
}
}
static UChar printLevel(UBiDiLevel level) {
if(level<UBIDI_DEFAULT_LTR) {
return 0x30+level;
} else {
return 0x78; // 'x'
}
}
static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
uint32_t actualDirectionBits=0;
for(int32_t i=0; i<actualCount; ++i) {
actualDirectionBits|=(1<<(actualLevels[i]&1));
}
return actualDirectionBits;
}
UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
const char *paraLevelName) {
UBool isOk=TRUE;
if(levelsCount!=actualCount) {
errln("Wrong number of level values; expected %d actual %d",
(int)levelsCount, (int)actualCount);
isOk=FALSE;
} else {
for(int32_t i=0; i<actualCount; ++i) {
if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
// ICU used a shortcut:
// Since the text is unidirectional, it did not store the resolved
// levels but just returns all levels as the paragraph level 0 or 1.
// The reordering result is the same, so this is fine.
break;
} else {
errln("Wrong level value at index %d; expected %d actual %d",
(int)i, levels[i], actualLevels[i]);
isOk=FALSE;
break;
}
}
}
}
if(!isOk) {
printErrorLine(paraLevelName);
UnicodeString els("Expected levels: ");
int32_t i;
for(i=0; i<levelsCount; ++i) {
els.append((UChar)0x20).append(printLevel(levels[i]));
}
UnicodeString als("Actual levels: ");
for(i=0; i<actualCount; ++i) {
als.append((UChar)0x20).append(printLevel(actualLevels[i]));
}
errln(els);
errln(als);
}
return isOk;
}
// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
// does not work for custom BiDi class assignments
// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
// Therefore we just skip the indexes for BiDi controls while comparing
// with the expected ordering that has them omitted.
UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
UBool isOk=TRUE;
IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
int32_t i, visualIndex;
// Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
// and loop over each run's indexes, but that seems unnecessary for this test code.
for(i=visualIndex=0; i<resultLength; ++i) {
int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
errln("Input line %d: %s", (int)lineNumber, line);
return FALSE;
}
if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
continue; // BiDi control, omitted from expected ordering.
}
if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
errln("Wrong ordering value at visual index %d; expected %d actual %d",
(int)visualIndex, ordering[visualIndex], logicalIndex);
isOk=FALSE;
break;
}
++visualIndex;
}
// visualIndex is now the visual length minus the BiDi controls,
// which should match the length of the BidiTest.txt ordering.
if(isOk && orderingCount!=visualIndex) {
errln("Wrong number of ordering values; expected %d actual %d",
(int)orderingCount, (int)visualIndex);
isOk=FALSE;
}
if(!isOk) {
printErrorLine(paraLevelName);
UnicodeString eord("Expected ordering: ");
for(i=0; i<orderingCount; ++i) {
eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
}
UnicodeString aord("Actual ordering: ");
for(i=0; i<resultLength; ++i) {
int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
}
}
errln(eord);
errln(aord);
}
return isOk;
}
void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
++errorCount;
errln("Input line %5d: %s", (int)lineNumber, line);
errln(UnicodeString("Input string: ")+inputString);
errln("Para level: %s", paraLevelName);
}