/* ********************************************************************** * Copyright (C) 2001-2008 IBM and others. All rights reserved. ********************************************************************** * Date Name Description * 03/22/2000 helena Creation. ********************************************************************** */ #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION #include "unicode/brkiter.h" #include "unicode/schriter.h" #include "unicode/search.h" #include "usrchimp.h" #include "cmemory.h" // public constructors and destructors ----------------------------------- U_NAMESPACE_BEGIN SearchIterator::SearchIterator(const SearchIterator &other) : UObject(other) { m_breakiterator_ = other.m_breakiterator_; m_text_ = other.m_text_; m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); m_search_->breakIter = other.m_search_->breakIter; m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; m_search_->isOverlap = other.m_search_->isOverlap; m_search_->matchedIndex = other.m_search_->matchedIndex; m_search_->matchedLength = other.m_search_->matchedLength; m_search_->text = other.m_search_->text; m_search_->textLength = other.m_search_->textLength; } SearchIterator::~SearchIterator() { if (m_search_ != NULL) { uprv_free(m_search_); } } // public get and set methods ---------------------------------------- void SearchIterator::setAttribute(USearchAttribute attribute, USearchAttributeValue value, UErrorCode &status) { if (U_SUCCESS(status)) { switch (attribute) { case USEARCH_OVERLAP : m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); break; case USEARCH_CANONICAL_MATCH : m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); break; default: status = U_ILLEGAL_ARGUMENT_ERROR; } } if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; } } USearchAttributeValue SearchIterator::getAttribute( USearchAttribute attribute) const { switch (attribute) { case USEARCH_OVERLAP : return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); case USEARCH_CANONICAL_MATCH : return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : USEARCH_OFF); default : return USEARCH_DEFAULT; } } int32_t SearchIterator::getMatchedStart() const { return m_search_->matchedIndex; } int32_t SearchIterator::getMatchedLength() const { return m_search_->matchedLength; } void SearchIterator::getMatchedText(UnicodeString &result) const { int32_t matchedindex = m_search_->matchedIndex; int32_t matchedlength = m_search_->matchedLength; if (matchedindex != USEARCH_DONE && matchedlength != 0) { result.setTo(m_search_->text + matchedindex, matchedlength); } else { result.remove(); } } void SearchIterator::setBreakIterator(BreakIterator *breakiter, UErrorCode &status) { if (U_SUCCESS(status)) { #if 0 m_search_->breakIter = NULL; // the c++ breakiterator may not make use of ubreakiterator. // so we'll have to keep track of it ourselves. #else // Well, gee... the Constructors that take a BreakIterator // all cast the BreakIterator to a UBreakIterator and // pass it to the corresponding usearch_openFromXXX // routine, so there's no reason not to do this. // // Besides, a UBreakIterator is a BreakIterator, so // any subclass of BreakIterator should work fine here... m_search_->breakIter = (UBreakIterator *) breakiter; #endif m_breakiterator_ = breakiter; } } const BreakIterator * SearchIterator::getBreakIterator(void) const { return m_breakiterator_; } void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) { if (U_SUCCESS(status)) { if (text.length() == 0) { status = U_ILLEGAL_ARGUMENT_ERROR; } else { m_text_ = text; m_search_->text = m_text_.getBuffer(); m_search_->textLength = m_text_.length(); } } } void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) { if (U_SUCCESS(status)) { text.getText(m_text_); setText(m_text_, status); } } const UnicodeString & SearchIterator::getText(void) const { return m_text_; } // operator overloading ---------------------------------------------- UBool SearchIterator::operator==(const SearchIterator &that) const { if (this == &that) { return TRUE; } return (m_breakiterator_ == that.m_breakiterator_ && m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && m_search_->isOverlap == that.m_search_->isOverlap && m_search_->matchedIndex == that.m_search_->matchedIndex && m_search_->matchedLength == that.m_search_->matchedLength && m_search_->textLength == that.m_search_->textLength && getOffset() == that.getOffset() && (uprv_memcmp(m_search_->text, that.m_search_->text, m_search_->textLength * sizeof(UChar)) == 0)); } // public methods ---------------------------------------------------- int32_t SearchIterator::first(UErrorCode &status) { if (U_FAILURE(status)) { return USEARCH_DONE; } setOffset(0, status); return handleNext(0, status); } int32_t SearchIterator::following(int32_t position, UErrorCode &status) { if (U_FAILURE(status)) { return USEARCH_DONE; } setOffset(position, status); return handleNext(position, status); } int32_t SearchIterator::last(UErrorCode &status) { if (U_FAILURE(status)) { return USEARCH_DONE; } setOffset(m_search_->textLength, status); return handlePrev(m_search_->textLength, status); } int32_t SearchIterator::preceding(int32_t position, UErrorCode &status) { if (U_FAILURE(status)) { return USEARCH_DONE; } setOffset(position, status); return handlePrev(position, status); } int32_t SearchIterator::next(UErrorCode &status) { if (U_SUCCESS(status)) { int32_t offset = getOffset(); int32_t matchindex = m_search_->matchedIndex; int32_t matchlength = m_search_->matchedLength; m_search_->reset = FALSE; if (m_search_->isForwardSearching == TRUE) { int32_t textlength = m_search_->textLength; if (offset == textlength || matchindex == textlength || (matchindex != USEARCH_DONE && matchindex + matchlength >= textlength)) { // not enough characters to match setMatchNotFound(); return USEARCH_DONE; } } else { // switching direction. // if matchedIndex == USEARCH_DONE, it means that either a // setOffset has been called or that previous ran off the text // string. the iterator would have been set to offset 0 if a // match is not found. m_search_->isForwardSearching = TRUE; if (m_search_->matchedIndex != USEARCH_DONE) { // there's no need to set the collation element iterator // the next call to next will set the offset. return matchindex; } } if (matchlength > 0) { // if matchlength is 0 we are at the start of the iteration if (m_search_->isOverlap) { offset ++; } else { offset += matchlength; } } return handleNext(offset, status); } return USEARCH_DONE; } int32_t SearchIterator::previous(UErrorCode &status) { if (U_SUCCESS(status)) { int32_t offset; if (m_search_->reset) { offset = m_search_->textLength; m_search_->isForwardSearching = FALSE; m_search_->reset = FALSE; setOffset(offset, status); } else { offset = getOffset(); } int32_t matchindex = m_search_->matchedIndex; if (m_search_->isForwardSearching == TRUE) { // switching direction. // if matchedIndex == USEARCH_DONE, it means that either a // setOffset has been called or that next ran off the text // string. the iterator would have been set to offset textLength if // a match is not found. m_search_->isForwardSearching = FALSE; if (matchindex != USEARCH_DONE) { return matchindex; } } else { if (offset == 0 || matchindex == 0) { // not enough characters to match setMatchNotFound(); return USEARCH_DONE; } } if (matchindex != USEARCH_DONE) { if (m_search_->isOverlap) { matchindex += m_search_->matchedLength - 2; } return handlePrev(matchindex, status); } return handlePrev(offset, status); } return USEARCH_DONE; } void SearchIterator::reset() { UErrorCode status = U_ZERO_ERROR; setMatchNotFound(); setOffset(0, status); m_search_->isOverlap = FALSE; m_search_->isCanonicalMatch = FALSE; m_search_->isForwardSearching = TRUE; m_search_->reset = TRUE; } // protected constructors and destructors ----------------------------- SearchIterator::SearchIterator() { m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); m_search_->breakIter = NULL; m_search_->isOverlap = FALSE; m_search_->isCanonicalMatch = FALSE; m_search_->isForwardSearching = TRUE; m_search_->reset = TRUE; m_search_->matchedIndex = USEARCH_DONE; m_search_->matchedLength = 0; m_search_->text = NULL; m_search_->textLength = 0; m_breakiterator_ = NULL; } SearchIterator::SearchIterator(const UnicodeString &text, BreakIterator *breakiter) : m_breakiterator_(breakiter), m_text_(text) { m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); m_search_->breakIter = NULL; m_search_->isOverlap = FALSE; m_search_->isCanonicalMatch = FALSE; m_search_->isForwardSearching = TRUE; m_search_->reset = TRUE; m_search_->matchedIndex = USEARCH_DONE; m_search_->matchedLength = 0; m_search_->text = m_text_.getBuffer(); m_search_->textLength = text.length(); } SearchIterator::SearchIterator(CharacterIterator &text, BreakIterator *breakiter) : m_breakiterator_(breakiter) { m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); m_search_->breakIter = NULL; m_search_->isOverlap = FALSE; m_search_->isCanonicalMatch = FALSE; m_search_->isForwardSearching = TRUE; m_search_->reset = TRUE; m_search_->matchedIndex = USEARCH_DONE; m_search_->matchedLength = 0; text.getText(m_text_); m_search_->text = m_text_.getBuffer(); m_search_->textLength = m_text_.length(); m_breakiterator_ = breakiter; } // protected methods ------------------------------------------------------ SearchIterator & SearchIterator::operator=(const SearchIterator &that) { if (this != &that) { m_breakiterator_ = that.m_breakiterator_; m_text_ = that.m_text_; m_search_->breakIter = that.m_search_->breakIter; m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; m_search_->isOverlap = that.m_search_->isOverlap; m_search_->matchedIndex = that.m_search_->matchedIndex; m_search_->matchedLength = that.m_search_->matchedLength; m_search_->text = that.m_search_->text; m_search_->textLength = that.m_search_->textLength; } return *this; } void SearchIterator::setMatchLength(int32_t length) { m_search_->matchedLength = length; } void SearchIterator::setMatchStart(int32_t position) { m_search_->matchedIndex = position; } void SearchIterator::setMatchNotFound() { setMatchStart(USEARCH_DONE); setMatchLength(0); UErrorCode status = U_ZERO_ERROR; // by default no errors should be returned here since offsets are within // range. if (m_search_->isForwardSearching) { setOffset(m_search_->textLength, status); } else { setOffset(0, status); } } U_NAMESPACE_END #endif /* #if !UCONFIG_NO_COLLATION */