C++程序  |  249行  |  7.24 KB

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 *   Copyright (C) 2003-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 *
 * File prscmnts.cpp
 *
 * Modification History:
 *
 *   Date          Name        Description
 *   08/22/2003    ram         Creation.
 *******************************************************************************
 */

// Safer use of UnicodeString.
#ifndef UNISTR_FROM_CHAR_EXPLICIT
#   define UNISTR_FROM_CHAR_EXPLICIT explicit
#endif

// Less important, but still a good idea.
#ifndef UNISTR_FROM_STRING_EXPLICIT
#   define UNISTR_FROM_STRING_EXPLICIT explicit
#endif

#include "unicode/regex.h"
#include "unicode/unistr.h"
#include "unicode/parseerr.h"
#include "prscmnts.h"
#include <stdio.h>
#include <stdlib.h>

U_NAMESPACE_USE

#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */

#define MAX_SPLIT_STRINGS 20

const char *patternStrings[UPC_LIMIT]={
    "^translate\\s*(.*)",
    "^note\\s*(.*)"
};

U_CFUNC int32_t 
removeText(UChar *source, int32_t srcLen, 
           UnicodeString patString,uint32_t options,  
           UnicodeString replaceText, UErrorCode *status){

    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString src(source, srcLen);

    RegexMatcher    myMatcher(patString, src, options, *status);
    if(U_FAILURE(*status)){
        return 0;
    }
    UnicodeString dest;


    dest = myMatcher.replaceAll(replaceText,*status);
    
    
    return dest.extract(source, srcLen, *status);

}
U_CFUNC int32_t
trim(UChar *src, int32_t srcLen, UErrorCode *status){
     srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
     srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
     srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
     return srcLen;
}

U_CFUNC int32_t 
removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
    srcLen = trim(source, srcLen, status);
    UnicodeString patString("^\\s*?\\*\\s*?");  // remove pattern like " * " at the begining of the line
    srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
    return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
}

U_CFUNC int32_t 
getText(const UChar* source, int32_t srcLen,
        UChar** dest, int32_t destCapacity,
        UnicodeString patternString, 
        UErrorCode* status){
    
    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
    UnicodeString src (source,srcLen);
    
    if (U_FAILURE(*status)) {
        return 0;
    }
    pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    
    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    if (U_FAILURE(*status)) {
        return 0;
    }
    for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
        matcher.reset(stringArray[i]);
        if(matcher.lookingAt(*status)){
            UnicodeString out = matcher.group(1, *status);

            return out.extract(*dest, destCapacity,*status);
        }
    }
    return 0;
}


#define AT_SIGN  0x0040

U_CFUNC int32_t
getDescription( const UChar* source, int32_t srcLen,
                UChar** dest, int32_t destCapacity,
                UErrorCode* status){
    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    UnicodeString src(source, srcLen);
    
    if (U_FAILURE(*status)) {
        return 0;
    }
    pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);

    if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
        int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
        return trim(*dest, destLen, status);
    }
    return 0;
}

U_CFUNC int32_t
getCount(const UChar* source, int32_t srcLen, 
         UParseCommentsOption option, UErrorCode *status){
    
    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    UnicodeString src (source, srcLen);


    if (U_FAILURE(*status)) {
        return 0;
    }
    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    
    UnicodeString patternString(patternStrings[option]);
    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    if (U_FAILURE(*status)) {
        return 0;
    } 
    int32_t count = 0;
    for(int32_t i=0; i<retLen; i++){
        matcher.reset(stringArray[i]);
        if(matcher.lookingAt(*status)){
            count++;
        }
    }
    if(option == UPC_TRANSLATE && count > 1){
        fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
        exit(U_UNSUPPORTED_ERROR);
    }
    return count;
}

U_CFUNC int32_t 
getAt(const UChar* source, int32_t srcLen,
        UChar** dest, int32_t destCapacity,
        int32_t index,
        UParseCommentsOption option,
        UErrorCode* status){

    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    UnicodeString src (source, srcLen);


    if (U_FAILURE(*status)) {
        return 0;
    }
    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    
    UnicodeString patternString(patternStrings[option]);
    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    if (U_FAILURE(*status)) {
        return 0;
    } 
    int32_t count = 0;
    for(int32_t i=0; i<retLen; i++){
        matcher.reset(stringArray[i]);
        if(matcher.lookingAt(*status)){
            if(count == index){
                UnicodeString out = matcher.group(1, *status);
                return out.extract(*dest, destCapacity,*status);
            }
            count++;
            
        }
    }
    return 0;

}

U_CFUNC int32_t
getTranslate( const UChar* source, int32_t srcLen,
              UChar** dest, int32_t destCapacity,
              UErrorCode* status){
    UnicodeString     notePatternString("^translate\\s*?(.*)");
    
    int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
    return trim(*dest, destLen, status);
}

U_CFUNC int32_t 
getNote(const UChar* source, int32_t srcLen,
        UChar** dest, int32_t destCapacity,
        UErrorCode* status){

    UnicodeString     notePatternString("^note\\s*?(.*)");
    int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
    return trim(*dest, destLen, status);

}

#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */