C++程序  |  141行  |  4.66 KB

/*
**********************************************************************
* Copyright (C) 2011-2011, International Business Machines Corporation 
* and others.  All Rights Reserved.
**********************************************************************
************************************************************************
*   Date          Name        Description
*   05/14/2011    grhoten     Creation.
************************************************************************/

#include "unicode/utypes.h"

#if !UCONFIG_NO_BREAK_ITERATION

#include "dicttest.h"
#include "textfile.h"
#include "uvector.h"
#include "unicode/rbbi.h"

void DictionaryWordTest::TestThaiBreaks() {
    UErrorCode status=U_ZERO_ERROR;
    BreakIterator* b;
    Locale locale = Locale("th");
    int32_t p, index;
    UChar c[]= { 
            0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B, 
            0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19, 
            0x0E16, 0x0E49, 0x0E33, 0x0000
    };
    int32_t expectedWordResult[] = {
            2, 3, 6, 10, 11, 15, 17, 20, 22
    };
    int32_t expectedLineResult[] = {
            3, 6, 11, 15, 17, 20, 22
    };

    int32_t size = u_strlen(c);
    UnicodeString text=UnicodeString(c);
    
    b = BreakIterator::createWordInstance(locale, status);
    if (U_FAILURE(status)) {
        errcheckln(status, "Unable to create thai word break iterator. - %s", u_errorName(status));
        return;
    }
    b->setText(text);
    p = index = 0;
    while ((p=b->next())!=BreakIterator::DONE && p < size) {
        if (p != expectedWordResult[index++]) {
            errln("Incorrect break given by thai word break iterator. Expected: %d  Got: %d", expectedWordResult[index-1], p);
        }
    }
    delete b;
    
    b = BreakIterator::createLineInstance(locale, status);
    if (U_FAILURE(status)) {
        errln("Unable to create thai line break iterator.");
        return;
    }
    b->setText(text);
    p = index = 0;
    while ((p=b->next())!=BreakIterator::DONE && p < size) {
        if (p != expectedLineResult[index++]) {
            errln("Incorrect break given by thai line break iterator. Expected: %d  Got: %d", expectedLineResult[index-1], p);
        }
    }

    delete b;
}

#define DICTIONARY_TEST_FILE "wordsegments.txt"

void DictionaryWordTest::TestWordBoundaries() {
    UErrorCode      status  = U_ZERO_ERROR;

    TextFile phrases(DICTIONARY_TEST_FILE, "UTF8", status);
    if (U_FAILURE(status)) {
        dataerrln("Can't open "DICTIONARY_TEST_FILE": %s; skipping test",
              u_errorName(status));
        return;
    }

    // Due to how the word break iterator works,
    // scripts for languages that use no spaces should use the correct dictionary by default.
    BreakIterator *wb = BreakIterator::createWordInstance("en", status);
    if (U_FAILURE(status)) {
        dataerrln("Word break iterator can not be opened: %s; skipping test",
              u_errorName(status));
        return;
    }

    int32_t pos, pIdx;
    int32_t testLines = 0;
    UnicodeString phrase;
    while (phrases.readLineSkippingComments(phrase, status, FALSE) && U_SUCCESS(status)) {
        UVector breaks(status);

        for (pIdx = 0; pIdx < phrase.length(); pIdx++) {
            if (phrase.charAt(pIdx) == 0x007C /* | */) {
                breaks.addElement(pIdx, status);
                phrase.remove(pIdx, 1);
            }
        }
        breaks.addElement(pIdx, status);

        wb->setText(phrase);
        int32_t brkArrPos = 0;
        while ((pos=wb->next())!=BreakIterator::DONE) {
            int32_t expectedPos = breaks.elementAti(brkArrPos);
            if (expectedPos != pos) {
                errln("Incorrect forward word break on line %d. Expected: %d  Got: %d",
                    phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos);
            }
            brkArrPos++;
        }
        brkArrPos = breaks.size() - 1;
        while ((pos=wb->previous())!=BreakIterator::DONE) {
            brkArrPos--;
            int32_t expectedPos = breaks.elementAti(brkArrPos);
            if (expectedPos != pos) {
                errln("Incorrect backward word break on line %d. Expected: %d  Got: %d",
                    phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos);
            }
        }
        testLines++;
    }
    delete wb;
    logln("%d tests were run.", testLines);
}

void DictionaryWordTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
{
    if (exec) logln("TestSuite DictionaryWordTest: ");
    TESTCASE_AUTO_BEGIN;
    TESTCASE_AUTO(TestThaiBreaks);
    TESTCASE_AUTO(TestWordBoundaries);
    TESTCASE_AUTO_END;
}


#endif