// © 2017 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // casemap.h // created: 2017jan12 Markus W. Scherer #ifndef __CASEMAP_H__ #define __CASEMAP_H__ #include "unicode/utypes.h" #include "unicode/stringpiece.h" #include "unicode/uobject.h" /** * \file * \brief C++ API: Low-level C++ case mapping functions. */ U_NAMESPACE_BEGIN class BreakIterator; class ByteSink; class Edits; /** * Low-level C++ case mapping functions. * * @stable ICU 59 */ class U_COMMON_API CaseMap U_FINAL : public UMemory { public: /** * Lowercases a UTF-16 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. * When the result would be longer than destCapacity, * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToLower * @stable ICU 59 */ static int32_t toLower( const char *locale, uint32_t options, const char16_t *src, int32_t srcLength, char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); /** * Uppercases a UTF-16 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. * When the result would be longer than destCapacity, * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToUpper * @stable ICU 59 */ static int32_t toUpper( const char *locale, uint32_t options, const char16_t *src, int32_t srcLength, char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #if !UCONFIG_NO_BREAK_ITERATION /** * Titlecases a UTF-16 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * Titlecasing uses a break iterator to find the first characters of words * that are to be titlecased. It titlecases those characters and lowercases * all others. (This can be modified with options bits.) * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_TITLECASE_NO_LOWERCASE, * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. * @param iter A break iterator to find the first characters of words that are to be titlecased. * It is set to the source string (setText()) * and used one or more times for iteration (first() and next()). * If NULL, then a word break iterator for the locale is used * (or something equivalent). * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. * When the result would be longer than destCapacity, * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToTitle * @see ucasemap_toTitle * @stable ICU 59 */ static int32_t toTitle( const char *locale, uint32_t options, BreakIterator *iter, const char16_t *src, int32_t srcLength, char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #endif // UCONFIG_NO_BREAK_ITERATION /** * Case-folds a UTF-16 string and optionally records edits. * * Case folding is locale-independent and not context-sensitive, * but there is an option for whether to include or exclude mappings for dotted I * and dotless i that are marked with 'T' in CaseFolding.txt. * * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. * When the result would be longer than destCapacity, * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strFoldCase * @stable ICU 59 */ static int32_t fold( uint32_t options, const char16_t *src, int32_t srcLength, char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); /** * Lowercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param sink A ByteSink to which the result string is written. * sink.Flush() is called at the end. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * * @see ucasemap_utf8ToLower * @stable ICU 60 */ static void utf8ToLower( const char *locale, uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode); /** * Uppercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param sink A ByteSink to which the result string is written. * sink.Flush() is called at the end. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * * @see ucasemap_utf8ToUpper * @stable ICU 60 */ static void utf8ToUpper( const char *locale, uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode); #if !UCONFIG_NO_BREAK_ITERATION /** * Titlecases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * * Titlecasing uses a break iterator to find the first characters of words * that are to be titlecased. It titlecases those characters and lowercases * all others. (This can be modified with options bits.) * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_TITLECASE_NO_LOWERCASE, * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. * @param iter A break iterator to find the first characters of words that are to be titlecased. * It is set to the source string (setUText()) * and used one or more times for iteration (first() and next()). * If NULL, then a word break iterator for the locale is used * (or something equivalent). * @param src The original string. * @param sink A ByteSink to which the result string is written. * sink.Flush() is called at the end. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * * @see ucasemap_utf8ToTitle * @stable ICU 60 */ static void utf8ToTitle( const char *locale, uint32_t options, BreakIterator *iter, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode); #endif // UCONFIG_NO_BREAK_ITERATION /** * Case-folds a UTF-8 string and optionally records edits. * * Case folding is locale-independent and not context-sensitive, * but there is an option for whether to include or exclude mappings for dotted I * and dotless i that are marked with 'T' in CaseFolding.txt. * * The result may be longer or shorter than the original. * * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param sink A ByteSink to which the result string is written. * sink.Flush() is called at the end. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * * @see ucasemap_utf8FoldCase * @stable ICU 60 */ static void utf8Fold( uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode); /** * Lowercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. * When the result would be longer than destCapacity, * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToLower * @stable ICU 59 */ static int32_t utf8ToLower( const char *locale, uint32_t options, const char *src, int32_t srcLength, char *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); /** * Uppercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. * When the result would be longer than destCapacity, * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToUpper * @stable ICU 59 */ static int32_t utf8ToUpper( const char *locale, uint32_t options, const char *src, int32_t srcLength, char *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #if !UCONFIG_NO_BREAK_ITERATION /** * Titlecases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * Titlecasing uses a break iterator to find the first characters of words * that are to be titlecased. It titlecases those characters and lowercases * all others. (This can be modified with options bits.) * * @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_TITLECASE_NO_LOWERCASE, * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. * @param iter A break iterator to find the first characters of words that are to be titlecased. * It is set to the source string (setUText()) * and used one or more times for iteration (first() and next()). * If NULL, then a word break iterator for the locale is used * (or something equivalent). * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. * When the result would be longer than destCapacity, * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToTitle * @stable ICU 59 */ static int32_t utf8ToTitle( const char *locale, uint32_t options, BreakIterator *iter, const char *src, int32_t srcLength, char *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); #endif // UCONFIG_NO_BREAK_ITERATION /** * Case-folds a UTF-8 string and optionally records edits. * * Case folding is locale-independent and not context-sensitive, * but there is an option for whether to include or exclude mappings for dotted I * and dotless i that are marked with 'T' in CaseFolding.txt. * * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. * @param destCapacity The size of the buffer (number of bytes). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. * This function calls edits->reset() first unless * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. * When the result would be longer than destCapacity, * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8FoldCase * @stable ICU 59 */ static int32_t utf8Fold( uint32_t options, const char *src, int32_t srcLength, char *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); private: CaseMap() = delete; CaseMap(const CaseMap &other) = delete; CaseMap &operator=(const CaseMap &other) = delete; }; U_NAMESPACE_END #endif // __CASEMAP_H__