C++程序  |  484行  |  16.72 KB

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2001-2014 IBM and others. All rights reserved.
**********************************************************************
*   Date        Name        Description
*  03/22/2000   helena      Creation.
**********************************************************************
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION

#include "unicode/stsearch.h"
#include "usrchimp.h"
#include "cmemory.h"

U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)

// public constructors and destructors -----------------------------------

StringSearch::StringSearch(const UnicodeString &pattern,
                           const UnicodeString &text,
                           const Locale        &locale,
                                 BreakIterator *breakiter,
                                 UErrorCode    &status) :
                           SearchIterator(text, breakiter),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }

    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
                              m_text_.getBuffer(), m_text_.length(),
                              locale.getName(), (UBreakIterator *)breakiter,
                              &status);
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
        // m_search_ has been created by the base SearchIterator class
        m_search_        = m_strsrch_->search;
    }
}

StringSearch::StringSearch(const UnicodeString     &pattern,
                           const UnicodeString     &text,
                                 RuleBasedCollator *coll,
                                 BreakIterator     *breakiter,
                                 UErrorCode        &status) :
                           SearchIterator(text, breakiter),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }
    if (coll == NULL) {
        status     = U_ILLEGAL_ARGUMENT_ERROR;
        m_strsrch_ = NULL;
        return;
    }
    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
                                          m_pattern_.length(),
                                          m_text_.getBuffer(),
                                          m_text_.length(), coll->toUCollator(),
                                          (UBreakIterator *)breakiter,
                                          &status);
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
        // m_search_ has been created by the base SearchIterator class
        m_search_ = m_strsrch_->search;
    }
}

StringSearch::StringSearch(const UnicodeString     &pattern,
                                 CharacterIterator &text,
                           const Locale            &locale,
                                 BreakIterator     *breakiter,
                                 UErrorCode        &status) :
                           SearchIterator(text, breakiter),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }
    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
                              m_text_.getBuffer(), m_text_.length(),
                              locale.getName(), (UBreakIterator *)breakiter,
                              &status);
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
        // m_search_ has been created by the base SearchIterator class
        m_search_ = m_strsrch_->search;
    }
}

StringSearch::StringSearch(const UnicodeString     &pattern,
                                 CharacterIterator &text,
                                 RuleBasedCollator *coll,
                                 BreakIterator     *breakiter,
                                 UErrorCode        &status) :
                           SearchIterator(text, breakiter),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }
    if (coll == NULL) {
        status     = U_ILLEGAL_ARGUMENT_ERROR;
        m_strsrch_ = NULL;
        return;
    }
    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
                                          m_pattern_.length(),
                                          m_text_.getBuffer(),
                                          m_text_.length(), coll->toUCollator(),
                                          (UBreakIterator *)breakiter,
                                          &status);
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
        // m_search_ has been created by the base SearchIterator class
        m_search_ = m_strsrch_->search;
    }
}

StringSearch::StringSearch(const StringSearch &that) :
                       SearchIterator(that.m_text_, that.m_breakiterator_),
                       m_pattern_(that.m_pattern_)
{
    UErrorCode status = U_ZERO_ERROR;

    // Free m_search_ from the superclass
    uprv_free(m_search_);
    m_search_ = NULL;

    if (that.m_strsrch_ == NULL) {
        // This was not a good copy
        m_strsrch_ = NULL;
    }
    else {
        // Make a deep copy
        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
                                              m_pattern_.length(),
                                              m_text_.getBuffer(),
                                              m_text_.length(),
                                              that.m_strsrch_->collator,
                                             (UBreakIterator *)that.m_breakiterator_,
                                              &status);
        if (U_SUCCESS(status)) {
            // m_search_ has been created by the base SearchIterator class
            m_search_        = m_strsrch_->search;
        }
    }
}

StringSearch::~StringSearch()
{
    if (m_strsrch_ != NULL) {
        usearch_close(m_strsrch_);
        m_search_ = NULL;
    }
}

StringSearch *
StringSearch::clone() const {
    return new StringSearch(*this);
}

// operator overloading ---------------------------------------------
StringSearch & StringSearch::operator=(const StringSearch &that)
{
    if ((*this) != that) {
        UErrorCode status = U_ZERO_ERROR;
        m_text_          = that.m_text_;
        m_breakiterator_ = that.m_breakiterator_;
        m_pattern_       = that.m_pattern_;
        // all m_search_ in the parent class is linked up with m_strsrch_
        usearch_close(m_strsrch_);
        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
                                              m_pattern_.length(),
                                              m_text_.getBuffer(),
                                              m_text_.length(),
                                              that.m_strsrch_->collator,
                                              NULL, &status);
        // Check null pointer
        if (m_strsrch_ != NULL) {
            m_search_ = m_strsrch_->search;
        }
    }
    return *this;
}

UBool StringSearch::operator==(const SearchIterator &that) const
{
    if (this == &that) {
        return TRUE;
    }
    if (SearchIterator::operator ==(that)) {
        StringSearch &thatsrch = (StringSearch &)that;
        return (this->m_pattern_ == thatsrch.m_pattern_ &&
                this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
    }
    return FALSE;
}

// public get and set methods ----------------------------------------

void StringSearch::setOffset(int32_t position, UErrorCode &status)
{
    // status checked in usearch_setOffset
    usearch_setOffset(m_strsrch_, position, &status);
}

int32_t StringSearch::getOffset(void) const
{
    return usearch_getOffset(m_strsrch_);
}

void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        m_text_ = text;
        usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
    }
}

void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        text.getText(m_text_);
        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
    }
}

RuleBasedCollator * StringSearch::getCollator() const
{
    // Note the const_cast. It would be cleaner if this const method returned a const collator.
    return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
}

void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
    }
}

void StringSearch::setPattern(const UnicodeString &pattern,
                                    UErrorCode    &status)
{
    if (U_SUCCESS(status)) {
        m_pattern_ = pattern;
        usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
                           &status);
    }
}

const UnicodeString & StringSearch::getPattern() const
{
    return m_pattern_;
}

// public methods ----------------------------------------------------

void StringSearch::reset()
{
    usearch_reset(m_strsrch_);
}

SearchIterator * StringSearch::safeClone(void) const
{
    UErrorCode status = U_ZERO_ERROR;
    StringSearch *result = new StringSearch(m_pattern_, m_text_,
                                            getCollator(),
                                            m_breakiterator_,
                                            status);
    /* test for NULL */
    if (result == 0) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return 0;
    }
    result->setOffset(getOffset(), status);
    result->setMatchStart(m_strsrch_->search->matchedIndex);
    result->setMatchLength(m_strsrch_->search->matchedLength);
    if (U_FAILURE(status)) {
        return NULL;
    }
    return result;
}

// protected method -------------------------------------------------

int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
{
    // values passed here are already in the pre-shift position
    if (U_SUCCESS(status)) {
        if (m_strsrch_->pattern.cesLength == 0) {
            m_search_->matchedIndex =
                                    m_search_->matchedIndex == USEARCH_DONE ?
                                    getOffset() : m_search_->matchedIndex + 1;
            m_search_->matchedLength = 0;
            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
                           &status);
            if (m_search_->matchedIndex == m_search_->textLength) {
                m_search_->matchedIndex = USEARCH_DONE;
            }
        }
        else {
            // looking at usearch.cpp, this part is shifted out to
            // StringSearch instead of SearchIterator because m_strsrch_ is
            // not accessible in SearchIterator
#if 0
            if (position + m_strsrch_->pattern.defaultShiftSize
                > m_search_->textLength) {
                setMatchNotFound();
                return USEARCH_DONE;
            }
#endif
            if (m_search_->matchedLength <= 0) {
                // the flipping direction issue has already been handled
                // in next()
                // for boundary check purposes. this will ensure that the
                // next match will not preceed the current offset
                // note search->matchedIndex will always be set to something
                // in the code
                m_search_->matchedIndex = position - 1;
            }

            ucol_setOffset(m_strsrch_->textIter, position, &status);
            
#if 0
            for (;;) {
                if (m_search_->isCanonicalMatch) {
                    // can't use exact here since extra accents are allowed.
                    usearch_handleNextCanonical(m_strsrch_, &status);
                }
                else {
                    usearch_handleNextExact(m_strsrch_, &status);
                }
                if (U_FAILURE(status)) {
                    return USEARCH_DONE;
                }
                if (m_breakiterator_ == NULL
#if !UCONFIG_NO_BREAK_ITERATION
                    ||
                    m_search_->matchedIndex == USEARCH_DONE ||
                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
                                                  m_search_->matchedLength))
#endif
                ) {
                    if (m_search_->matchedIndex == USEARCH_DONE) {
                        ucol_setOffset(m_strsrch_->textIter,
                                       m_search_->textLength, &status);
                    }
                    else {
                        ucol_setOffset(m_strsrch_->textIter,
                                       m_search_->matchedIndex, &status);
                    }
                    return m_search_->matchedIndex;
                }
            }
#else
            // if m_strsrch_->breakIter is always the same as m_breakiterator_
            // then we don't need to check the match boundaries here because
            // usearch_handleNextXXX will already have done it.
            if (m_search_->isCanonicalMatch) {
            	// *could* actually use exact here 'cause no extra accents allowed...
            	usearch_handleNextCanonical(m_strsrch_, &status);
            } else {
            	usearch_handleNextExact(m_strsrch_, &status);
            }
            
            if (U_FAILURE(status)) {
            	return USEARCH_DONE;
            }
            
            if (m_search_->matchedIndex == USEARCH_DONE) {
            	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
            } else {
            	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
            }
            
            return m_search_->matchedIndex;
#endif
        }
    }
    return USEARCH_DONE;
}

int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
{
    // values passed here are already in the pre-shift position
    if (U_SUCCESS(status)) {
        if (m_strsrch_->pattern.cesLength == 0) {
            m_search_->matchedIndex =
                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
                   m_search_->matchedIndex);
            if (m_search_->matchedIndex == 0) {
                setMatchNotFound();
            }
            else {
                m_search_->matchedIndex --;
                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
                               &status);
                m_search_->matchedLength = 0;
            }
        }
        else {
            // looking at usearch.cpp, this part is shifted out to
            // StringSearch instead of SearchIterator because m_strsrch_ is
            // not accessible in SearchIterator
#if 0
            if (!m_search_->isOverlap &&
                position - m_strsrch_->pattern.defaultShiftSize < 0) {
                setMatchNotFound();
                return USEARCH_DONE;
            }
            
            for (;;) {
                if (m_search_->isCanonicalMatch) {
                    // can't use exact here since extra accents are allowed.
                    usearch_handlePreviousCanonical(m_strsrch_, &status);
                }
                else {
                    usearch_handlePreviousExact(m_strsrch_, &status);
                }
                if (U_FAILURE(status)) {
                    return USEARCH_DONE;
                }
                if (m_breakiterator_ == NULL
#if !UCONFIG_NO_BREAK_ITERATION
                    ||
                    m_search_->matchedIndex == USEARCH_DONE ||
                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
                                                  m_search_->matchedLength))
#endif
                ) {
                    return m_search_->matchedIndex;
                }
            }
#else
            ucol_setOffset(m_strsrch_->textIter, position, &status);
            
            if (m_search_->isCanonicalMatch) {
            	// *could* use exact match here since extra accents *not* allowed!
            	usearch_handlePreviousCanonical(m_strsrch_, &status);
            } else {
            	usearch_handlePreviousExact(m_strsrch_, &status);
            }
            
            if (U_FAILURE(status)) {
            	return USEARCH_DONE;
            }
            
            return m_search_->matchedIndex;
#endif
        }

        return m_search_->matchedIndex;
    }
    return USEARCH_DONE;
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_COLLATION */