C++程序  |  177行  |  4.46 KB

/*
 * Copyright 2017 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "RegexParser.h"

#include "LexUtil.h"

RegexNode RegexParser::parse(std::string source) {
    fSource = source;
    fIndex = 0;
    ASSERT(fStack.size() == 0);
    this->regex();
    ASSERT(fStack.size() == 1);
    ASSERT(fIndex == source.size());
    return this->pop();
}

char RegexParser::peek() {
    if (fIndex >= fSource.size()) {
        return END;
    }
    return fSource[fIndex];
}

void RegexParser::expect(char c) {
    if (this->peek() != c) {
        printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
        exit(1);
    }
    ++fIndex;
}

RegexNode RegexParser::pop() {
    RegexNode result = fStack.top();
    fStack.pop();
    return result;
}

void RegexParser::term() {
    switch (this->peek()) {
        case '(': this->group();  break;
        case '[': this->set();    break;
        case '.': this->dot();    break;
        default: this->literal();
    }
}

void RegexParser::quantifiedTerm() {
    this->term();
    switch (this->peek()) {
        case '*': fStack.push(RegexNode(RegexNode::kStar_Kind,     this->pop())); ++fIndex; break;
        case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind,     this->pop())); ++fIndex; break;
        case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
        default:  break;
    }
}

void RegexParser::sequence() {
    this->quantifiedTerm();
    for (;;) {
        switch (this->peek()) {
            case END: // fall through
            case '|': // fall through
            case ')': return;
            default:
                this->sequence();
                RegexNode right = this->pop();
                RegexNode left = this->pop();
                fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
        }
    }
}

RegexNode RegexParser::escapeSequence(char c) {
    switch (c) {
        case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
        case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
        case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
        case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
        default:  return RegexNode(RegexNode::kChar_Kind, c);
    }
}

void RegexParser::literal() {
    char c = this->peek();
    if (c == '\\') {
        ++fIndex;
        fStack.push(this->escapeSequence(peek()));
        ++fIndex;
    }
    else {
        fStack.push(RegexNode(RegexNode::kChar_Kind, c));
        ++fIndex;
    }
}

void RegexParser::dot() {
    this->expect('.');
    fStack.push(RegexNode(RegexNode::kDot_Kind));
}

void RegexParser::group() {
    this->expect('(');
    this->regex();
    this->expect(')');
}

void RegexParser::setItem() {
    this->literal();
    if (this->peek() == '-') {
        ++fIndex;
        if (peek() == ']') {
            fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
        }
        else {
            literal();
            RegexNode end = this->pop();
            ASSERT(end.fKind == RegexNode::kChar_Kind);
            RegexNode start = this->pop();
            ASSERT(start.fKind == RegexNode::kChar_Kind);
            fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
        }
    }
}

void RegexParser::set() {
    expect('[');
    size_t depth = fStack.size();
    RegexNode set(RegexNode::kCharset_Kind);
    if (this->peek() == '^') {
        ++fIndex;
        set.fPayload.fBool = true;
    }
    else {
        set.fPayload.fBool = false;
    }
    for (;;) {
        switch (this->peek()) {
            case ']':
                ++fIndex;
                while (fStack.size() > depth) {
                    set.fChildren.push_back(this->pop());
                }
                fStack.push(std::move(set));
                return;
            case END:
                printf("unterminated character set\n");
                exit(1);
            default:
                this->setItem();
        }
    }
}

void RegexParser::regex() {
    this->sequence();
    switch (this->peek()) {
        case '|': {
            ++fIndex;
            this->regex();
            RegexNode right = this->pop();
            RegexNode left = this->pop();
            fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
            break;
        }
        case END: // fall through
        case ')':
            return;
        default:
            ASSERT(false);
    }
}