// © 2018 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING // Allow implicit conversion from char16_t* to UnicodeString for this file: // Helpful in toString methods and elsewhere. #define UNISTR_FROM_STRING_EXPLICIT #include "numparse_types.h" #include "numparse_currency.h" #include "ucurrimp.h" #include "unicode/errorcode.h" #include "numparse_utils.h" using namespace icu; using namespace icu::numparse; using namespace icu::numparse::impl; CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, parse_flags_t parseFlags, UErrorCode& status) : fCurrency1(currencySymbols.getCurrencySymbol(status)), fCurrency2(currencySymbols.getIntlCurrencySymbol(status)), fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)), afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)), beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)), fLocaleName(dfs.getLocale().getName(), -1, status) { utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode()); // Pre-load the long names for the current locale and currency // if we are parsing without the full currency data. if (!fUseFullCurrencyData) { for (int32_t i=0; i<StandardPlural::COUNT; i++) { auto plural = static_cast<StandardPlural::Form>(i); fLocalLongNames[i] = currencySymbols.getPluralName(plural, status); } } // TODO: Figure out how to make this faster and re-enable. // Computing the "lead code points" set for fastpathing is too slow to use in production. // See http://bugs.icu-project.org/trac/ticket/13584 // // Compute the full set of characters that could be the first in a currency to allow for // // efficient smoke test. // fLeadCodePoints.add(fCurrency1.char32At(0)); // fLeadCodePoints.add(fCurrency2.char32At(0)); // fLeadCodePoints.add(beforeSuffixInsert.char32At(0)); // uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status); // // Always apply case mapping closure for currencies // fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS); // fLeadCodePoints.freeze(); } bool CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { if (result.currencyCode[0] != 0) { return false; } // Try to match a currency spacing separator. int32_t initialOffset = segment.getOffset(); bool maybeMore = false; if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) { int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert); if (overlap == beforeSuffixInsert.length()) { segment.adjustOffset(overlap); // Note: let currency spacing be a weak match. Don't update chars consumed. } maybeMore = maybeMore || overlap == segment.length(); } // Match the currency string, and reset if we didn't find one. maybeMore = maybeMore || matchCurrency(segment, result, status); if (result.currencyCode[0] == 0) { segment.setOffset(initialOffset); return maybeMore; } // Try to match a currency spacing separator. if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) { int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert); if (overlap == afterPrefixInsert.length()) { segment.adjustOffset(overlap); // Note: let currency spacing be a weak match. Don't update chars consumed. } maybeMore = maybeMore || overlap == segment.length(); } return maybeMore; } bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { bool maybeMore = false; int32_t overlap1; if (!fCurrency1.isEmpty()) { overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1); } else { overlap1 = -1; } maybeMore = maybeMore || overlap1 == segment.length(); if (overlap1 == fCurrency1.length()) { utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); segment.adjustOffset(overlap1); result.setCharsConsumed(segment); return maybeMore; } int32_t overlap2; if (!fCurrency2.isEmpty()) { // ISO codes should be accepted case-insensitive. // https://unicode-org.atlassian.net/browse/ICU-13696 overlap2 = segment.getCommonPrefixLength(fCurrency2); } else { overlap2 = -1; } maybeMore = maybeMore || overlap2 == segment.length(); if (overlap2 == fCurrency2.length()) { utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); segment.adjustOffset(overlap2); result.setCharsConsumed(segment); return maybeMore; } if (fUseFullCurrencyData) { // Use the full currency data. // NOTE: This call site should be improved with #13584. const UnicodeString segmentString = segment.toTempUnicodeString(); // Try to parse the currency ParsePosition ppos(0); int32_t partialMatchLen = 0; uprv_parseCurrency( fLocaleName.data(), segmentString, ppos, UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME &partialMatchLen, result.currencyCode, status); maybeMore = maybeMore || partialMatchLen == segment.length(); if (U_SUCCESS(status) && ppos.getIndex() != 0) { // Complete match. // NOTE: The currency code should already be saved in the ParsedNumber. segment.adjustOffset(ppos.getIndex()); result.setCharsConsumed(segment); return maybeMore; } } else { // Use the locale long names. int32_t longestFullMatch = 0; for (int32_t i=0; i<StandardPlural::COUNT; i++) { const UnicodeString& name = fLocalLongNames[i]; int32_t overlap = segment.getCommonPrefixLength(name); if (overlap == name.length() && name.length() > longestFullMatch) { longestFullMatch = name.length(); } maybeMore = maybeMore || overlap > 0; } if (longestFullMatch > 0) { utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); segment.adjustOffset(longestFullMatch); result.setCharsConsumed(segment); return maybeMore; } } // No match found. return maybeMore; } bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const { // TODO: See constructor return true; //return segment.startsWith(fLeadCodePoints); } UnicodeString CombinedCurrencyMatcher::toString() const { return u"<CombinedCurrencyMatcher>"; } #endif /* #if !UCONFIG_NO_FORMATTING */