// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2003-2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * * File prscmnts.cpp * * Modification History: * * Date Name Description * 08/22/2003 ram Creation. ******************************************************************************* */ // Safer use of UnicodeString. #ifndef UNISTR_FROM_CHAR_EXPLICIT # define UNISTR_FROM_CHAR_EXPLICIT explicit #endif // Less important, but still a good idea. #ifndef UNISTR_FROM_STRING_EXPLICIT # define UNISTR_FROM_STRING_EXPLICIT explicit #endif #include "unicode/regex.h" #include "unicode/unistr.h" #include "unicode/parseerr.h" #include "prscmnts.h" #include <stdio.h> #include <stdlib.h> U_NAMESPACE_USE #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ #define MAX_SPLIT_STRINGS 20 const char *patternStrings[UPC_LIMIT]={ "^translate\\s*(.*)", "^note\\s*(.*)" }; U_CFUNC int32_t removeText(UChar *source, int32_t srcLen, UnicodeString patString,uint32_t options, UnicodeString replaceText, UErrorCode *status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString src(source, srcLen); RegexMatcher myMatcher(patString, src, options, *status); if(U_FAILURE(*status)){ return 0; } UnicodeString dest; dest = myMatcher.replaceAll(replaceText,*status); return dest.extract(source, srcLen, *status); } U_CFUNC int32_t trim(UChar *src, int32_t srcLen, UErrorCode *status){ srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes return srcLen; } U_CFUNC int32_t removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ srcLen = trim(source, srcLen, status); UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; } U_CFUNC int32_t getText(const UChar* source, int32_t srcLen, UChar** dest, int32_t destCapacity, UnicodeString patternString, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString stringArray[MAX_SPLIT_STRINGS]; RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); UnicodeString src (source,srcLen); if (U_FAILURE(*status)) { return 0; } pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); if (U_FAILURE(*status)) { return 0; } for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ matcher.reset(stringArray[i]); if(matcher.lookingAt(*status)){ UnicodeString out = matcher.group(1, *status); return out.extract(*dest, destCapacity,*status); } } return 0; } #define AT_SIGN 0x0040 U_CFUNC int32_t getDescription( const UChar* source, int32_t srcLen, UChar** dest, int32_t destCapacity, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString stringArray[MAX_SPLIT_STRINGS]; RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); UnicodeString src(source, srcLen); if (U_FAILURE(*status)) { return 0; } pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); return trim(*dest, destLen, status); } return 0; } U_CFUNC int32_t getCount(const UChar* source, int32_t srcLen, UParseCommentsOption option, UErrorCode *status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString stringArray[MAX_SPLIT_STRINGS]; RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); UnicodeString src (source, srcLen); if (U_FAILURE(*status)) { return 0; } int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); UnicodeString patternString(patternStrings[option]); RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); if (U_FAILURE(*status)) { return 0; } int32_t count = 0; for(int32_t i=0; i<retLen; i++){ matcher.reset(stringArray[i]); if(matcher.lookingAt(*status)){ count++; } } if(option == UPC_TRANSLATE && count > 1){ fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); exit(U_UNSUPPORTED_ERROR); } return count; } U_CFUNC int32_t getAt(const UChar* source, int32_t srcLen, UChar** dest, int32_t destCapacity, int32_t index, UParseCommentsOption option, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString stringArray[MAX_SPLIT_STRINGS]; RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); UnicodeString src (source, srcLen); if (U_FAILURE(*status)) { return 0; } int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); UnicodeString patternString(patternStrings[option]); RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); if (U_FAILURE(*status)) { return 0; } int32_t count = 0; for(int32_t i=0; i<retLen; i++){ matcher.reset(stringArray[i]); if(matcher.lookingAt(*status)){ if(count == index){ UnicodeString out = matcher.group(1, *status); return out.extract(*dest, destCapacity,*status); } count++; } } return 0; } U_CFUNC int32_t getTranslate( const UChar* source, int32_t srcLen, UChar** dest, int32_t destCapacity, UErrorCode* status){ UnicodeString notePatternString("^translate\\s*?(.*)"); int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); return trim(*dest, destLen, status); } U_CFUNC int32_t getNote(const UChar* source, int32_t srcLen, UChar** dest, int32_t destCapacity, UErrorCode* status){ UnicodeString notePatternString("^note\\s*?(.*)"); int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); return trim(*dest, destLen, status); } #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */