Lexer.cpp - Android社区 - https://www.androidos.net.cn/

/*
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
 *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#include "config.h"
#include "Lexer.h"

#include "JSFunction.h"
#include "JSGlobalObjectFunctions.h"
#include "NodeInfo.h"
#include "Nodes.h"
#include "dtoa.h"
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <wtf/Assertions.h>

using namespace WTF;
using namespace Unicode;

// We can't specify the namespace in yacc's C output, so do it here instead.
using namespace JSC;

#include "Grammar.h"
#include "Lookup.h"
#include "Lexer.lut.h"

namespace JSC {

static const UChar byteOrderMark = 0xFEFF;

Lexer::Lexer(JSGlobalData* globalData)
    : m_isReparsing(false)
    , m_globalData(globalData)
    , m_keywordTable(JSC::mainTable)
{
    m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
    m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
}

Lexer::~Lexer()
{
    m_keywordTable.deleteTable();
}

inline const UChar* Lexer::currentCharacter() const
{
    return m_code - 4;
}

inline int Lexer::currentOffset() const
{
    return currentCharacter() - m_codeStart;
}

ALWAYS_INLINE void Lexer::shift1()
{
    m_current = m_next1;
    m_next1 = m_next2;
    m_next2 = m_next3;
    if (LIKELY(m_code < m_codeEnd))
        m_next3 = m_code[0];
    else
        m_next3 = -1;

++m_code;
}

ALWAYS_INLINE void Lexer::shift2()
{
    m_current = m_next2;
    m_next1 = m_next3;
    if (LIKELY(m_code + 1 < m_codeEnd)) {
        m_next2 = m_code[0];
        m_next3 = m_code[1];
    } else {
        m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
        m_next3 = -1;
    }

m_code += 2;
}

ALWAYS_INLINE void Lexer::shift3()
{
    m_current = m_next3;
    if (LIKELY(m_code + 2 < m_codeEnd)) {
        m_next1 = m_code[0];
        m_next2 = m_code[1];
        m_next3 = m_code[2];
    } else {
        m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
        m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
        m_next3 = -1;
    }

m_code += 3;
}

ALWAYS_INLINE void Lexer::shift4()
{
    if (LIKELY(m_code + 3 < m_codeEnd)) {
        m_current = m_code[0];
        m_next1 = m_code[1];
        m_next2 = m_code[2];
        m_next3 = m_code[3];
    } else {
        m_current = m_code < m_codeEnd ? m_code[0] : -1;
        m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
        m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
        m_next3 = -1;
    }

m_code += 4;
}

void Lexer::setCode(const SourceCode& source, ParserArena& arena)
{
    m_arena = &arena.identifierArena();

m_lineNumber = source.firstLine();
    m_delimited = false;
    m_lastToken = -1;

const UChar* data = source.provider()->data();

m_source = &source;
    m_codeStart = data;
    m_code = data + source.startOffset();
    m_codeEnd = data + source.endOffset();
    m_error = false;
    m_atLineStart = true;

// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
    // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
    if (source.provider()->hasBOMs()) {
        for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
            if (UNLIKELY(*p == byteOrderMark)) {
                copyCodeWithoutBOMs();
                break;
            }
        }
    }

// Read the first characters into the 4-character buffer.
    shift4();
    ASSERT(currentOffset() == source.startOffset());
}

void Lexer::copyCodeWithoutBOMs()
{
    // Note: In this case, the character offset data for debugging will be incorrect.
    // If it's important to correctly debug code with extraneous BOMs, then the caller
    // should strip the BOMs when creating the SourceProvider object and do its own
    // mapping of offsets within the stripped text to original text offset.

m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
    for (const UChar* p = m_code; p < m_codeEnd; ++p) {
        UChar c = *p;
        if (c != byteOrderMark)
            m_codeWithoutBOMs.append(c);
    }
    ptrdiff_t startDelta = m_codeStart - m_code;
    m_code = m_codeWithoutBOMs.data();
    m_codeStart = m_code + startDelta;
    m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
}

void Lexer::shiftLineTerminator()
{
    ASSERT(isLineTerminator(m_current));

// Allow both CRLF and LFCR.
    if (m_current + m_next1 == '\n' + '\r')
        shift2();
    else
        shift1();

++m_lineNumber;
}

ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
{
    return &m_arena->makeIdentifier(m_globalData, characters, length);
}

inline bool Lexer::lastTokenWasRestrKeyword() const
{
    return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
}

static NEVER_INLINE bool isNonASCIIIdentStart(int c)
{
    return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
}

static inline bool isIdentStart(int c)
{
    return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
}

static NEVER_INLINE bool isNonASCIIIdentPart(int c)
{
    return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
        | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
}

static inline bool isIdentPart(int c)
{
    return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
}

static inline int singleEscape(int c)
{
    switch (c) {
        case 'b':
            return 0x08;
        case 't':
            return 0x09;
        case 'n':
            return 0x0A;
        case 'v':
            return 0x0B;
        case 'f':
            return 0x0C;
        case 'r':
            return 0x0D;
        default:
            return c;
    }
}

inline void Lexer::record8(int c)
{
    ASSERT(c >= 0);
    ASSERT(c <= 0xFF);
    m_buffer8.append(static_cast<char>(c));
}

inline void Lexer::record16(UChar c)
{
    m_buffer16.append(c);
}

inline void Lexer::record16(int c)
{
    ASSERT(c >= 0);
    ASSERT(c <= USHRT_MAX);
    record16(UChar(static_cast<unsigned short>(c)));
}

int Lexer::lex(void* p1, void* p2)
{
    ASSERT(!m_error);
    ASSERT(m_buffer8.isEmpty());
    ASSERT(m_buffer16.isEmpty());

YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
    YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
    int token = 0;
    m_terminator = false;

start:
    while (isWhiteSpace(m_current))
        shift1();

int startOffset = currentOffset();

if (m_current == -1) {
        if (!m_terminator && !m_delimited && !m_isReparsing) {
            // automatic semicolon insertion if program incomplete
            token = ';';
            goto doneSemicolon;
        }
        return 0;
    }

m_delimited = false;
    switch (m_current) {
        case '>':
            if (m_next1 == '>' && m_next2 == '>') {
                if (m_next3 == '=') {
                    shift4();
                    token = URSHIFTEQUAL;
                    break;
                }
                shift3();
                token = URSHIFT;
                break;
            }
            if (m_next1 == '>') {
                if (m_next2 == '=') {
                    shift3();
                    token = RSHIFTEQUAL;
                    break;
                }
                shift2();
                token = RSHIFT;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = GE;
                break;
            }
            shift1();
            token = '>';
            break;
        case '=':
            if (m_next1 == '=') {
                if (m_next2 == '=') {
                    shift3();
                    token = STREQ;
                    break;
                }
                shift2();
                token = EQEQ;
                break;
            }
            shift1();
            token = '=';
            break;
        case '!':
            if (m_next1 == '=') {
                if (m_next2 == '=') {
                    shift3();
                    token = STRNEQ;
                    break;
                }
                shift2();
                token = NE;
                break;
            }
            shift1();
            token = '!';
            break;
        case '<':
            if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
                // <!-- marks the beginning of a line comment (for www usage)
                shift4();
                goto inSingleLineComment;
            }
            if (m_next1 == '<') {
                if (m_next2 == '=') {
                    shift3();
                    token = LSHIFTEQUAL;
                    break;
                }
                shift2();
                token = LSHIFT;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = LE;
                break;
            }
            shift1();
            token = '<';
            break;
        case '+':
            if (m_next1 == '+') {
                shift2();
                if (m_terminator) {
                    token = AUTOPLUSPLUS;
                    break;
                }
                token = PLUSPLUS;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = PLUSEQUAL;
                break;
            }
            shift1();
            token = '+';
            break;
        case '-':
            if (m_next1 == '-') {
                if (m_atLineStart && m_next2 == '>') {
                    shift3();
                    goto inSingleLineComment;
                }
                shift2();
                if (m_terminator) {
                    token = AUTOMINUSMINUS;
                    break;
                }
                token = MINUSMINUS;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = MINUSEQUAL;
                break;
            }
            shift1();
            token = '-';
            break;
        case '*':
            if (m_next1 == '=') {
                shift2();
                token = MULTEQUAL;
                break;
            }
            shift1();
            token = '*';
            break;
        case '/':
            if (m_next1 == '/') {
                shift2();
                goto inSingleLineComment;
            }
            if (m_next1 == '*')
                goto inMultiLineComment;
            if (m_next1 == '=') {
                shift2();
                token = DIVEQUAL;
                break;
            }
            shift1();
            token = '/';
            break;
        case '&':
            if (m_next1 == '&') {
                shift2();
                token = AND;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = ANDEQUAL;
                break;
            }
            shift1();
            token = '&';
            break;
        case '^':
            if (m_next1 == '=') {
                shift2();
                token = XOREQUAL;
                break;
            }
            shift1();
            token = '^';
            break;
        case '%':
            if (m_next1 == '=') {
                shift2();
                token = MODEQUAL;
                break;
            }
            shift1();
            token = '%';
            break;
        case '|':
            if (m_next1 == '=') {
                shift2();
                token = OREQUAL;
                break;
            }
            if (m_next1 == '|') {
                shift2();
                token = OR;
                break;
            }
            shift1();
            token = '|';
            break;
        case '.':
            if (isASCIIDigit(m_next1)) {
                record8('.');
                shift1();
                goto inNumberAfterDecimalPoint;
            }
            token = '.';
            shift1();
            break;
        case ',':
        case '~':
        case '?':
        case ':':
        case '(':
        case ')':
        case '[':
        case ']':
            token = m_current;
            shift1();
            break;
        case ';':
            shift1();
            m_delimited = true;
            token = ';';
            break;
        case '{':
            lvalp->intValue = currentOffset();
            shift1();
            token = OPENBRACE;
            break;
        case '}':
            lvalp->intValue = currentOffset();
            shift1();
            m_delimited = true;
            token = CLOSEBRACE;
            break;
        case '\\':
            goto startIdentifierWithBackslash;
        case '0':
            goto startNumberWithZeroDigit;
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            goto startNumber;
        case '"':
        case '\'':
            goto startString;
        default:
            if (isIdentStart(m_current))
                goto startIdentifierOrKeyword;
            if (isLineTerminator(m_current)) {
                shiftLineTerminator();
                m_atLineStart = true;
                m_terminator = true;
                if (lastTokenWasRestrKeyword()) {
                    token = ';';
                    goto doneSemicolon;
                }
                goto start;
            }
            goto returnError;
    }

m_atLineStart = false;
    goto returnToken;

startString: {
    int stringQuoteCharacter = m_current;
    shift1();

const UChar* stringStart = currentCharacter();
    while (m_current != stringQuoteCharacter) {
        // Fast check for characters that require special handling.
        // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
        // as possible, and lets through all common ASCII characters.
        if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
            m_buffer16.append(stringStart, currentCharacter() - stringStart);
            goto inString;
        }
        shift1();
    }
    lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
    shift1();
    m_atLineStart = false;
    m_delimited = false;
    token = STRING;
    goto returnToken;

inString:
    while (m_current != stringQuoteCharacter) {
        if (m_current == '\\')
            goto inStringEscapeSequence;
        if (UNLIKELY(isLineTerminator(m_current)))
            goto returnError;
        if (UNLIKELY(m_current == -1))
            goto returnError;
        record16(m_current);
        shift1();
    }
    goto doneString;

inStringEscapeSequence:
    shift1();
    if (m_current == 'x') {
        shift1();
        if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
            record16(convertHex(m_current, m_next1));
            shift2();
            goto inString;
        }
        record16('x');
        if (m_current == stringQuoteCharacter)
            goto doneString;
        goto inString;
    }
    if (m_current == 'u') {
        shift1();
        if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
            record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
            shift4();
            goto inString;
        }
        if (m_current == stringQuoteCharacter) {
            record16('u');
            goto doneString;
        }
        goto returnError;
    }
    if (isASCIIOctalDigit(m_current)) {
        if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
            record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
            shift3();
            goto inString;
        }
        if (isASCIIOctalDigit(m_next1)) {
            record16((m_current - '0') * 8 + m_next1 - '0');
            shift2();
            goto inString;
        }
        record16(m_current - '0');
        shift1();
        goto inString;
    }
    if (isLineTerminator(m_current)) {
        shiftLineTerminator();
        goto inString;
    }
    if (m_current == -1)
        goto returnError;
    record16(singleEscape(m_current));
    shift1();
    goto inString;
}

startIdentifierWithBackslash:
    shift1();
    if (UNLIKELY(m_current != 'u'))
        goto returnError;
    shift1();
    if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
        goto returnError;
    token = convertUnicode(m_current, m_next1, m_next2, m_next3);
    if (UNLIKELY(!isIdentStart(token)))
        goto returnError;
    goto inIdentifierAfterCharacterCheck;

startIdentifierOrKeyword: {
    const UChar* identifierStart = currentCharacter();
    shift1();
    while (isIdentPart(m_current))
        shift1();
    if (LIKELY(m_current != '\\')) {
        lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
        goto doneIdentifierOrKeyword;
    }
    m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
}

do {
        shift1();
        if (UNLIKELY(m_current != 'u'))
            goto returnError;
        shift1();
        if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
            goto returnError;
        token = convertUnicode(m_current, m_next1, m_next2, m_next3);
        if (UNLIKELY(!isIdentPart(token)))
            goto returnError;
inIdentifierAfterCharacterCheck:
        record16(token);
        shift4();

while (isIdentPart(m_current)) {
            record16(m_current);
            shift1();
        }
    } while (UNLIKELY(m_current == '\\'));
    goto doneIdentifier;

inSingleLineComment:
    while (!isLineTerminator(m_current)) {
        if (UNLIKELY(m_current == -1))
            return 0;
        shift1();
    }
    shiftLineTerminator();
    m_atLineStart = true;
    m_terminator = true;
    if (lastTokenWasRestrKeyword())
        goto doneSemicolon;
    goto start;

inMultiLineComment:
    shift2();
    while (m_current != '*' || m_next1 != '/') {
        if (isLineTerminator(m_current))
            shiftLineTerminator();
        else {
            shift1();
            if (UNLIKELY(m_current == -1))
                goto returnError;
        }
    }
    shift2();
    m_atLineStart = false;
    goto start;

startNumberWithZeroDigit:
    shift1();
    if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
        shift1();
        goto inHex;
    }
    if (m_current == '.') {
        record8('0');
        record8('.');
        shift1();
        goto inNumberAfterDecimalPoint;
    }
    if ((m_current | 0x20) == 'e') {
        record8('0');
        record8('e');
        shift1();
        goto inExponentIndicator;
    }
    if (isASCIIOctalDigit(m_current))
        goto inOctal;
    if (isASCIIDigit(m_current))
        goto startNumber;
    lvalp->doubleValue = 0;
    goto doneNumeric;

inNumberAfterDecimalPoint:
    while (isASCIIDigit(m_current)) {
        record8(m_current);
        shift1();
    }
    if ((m_current | 0x20) == 'e') {
        record8('e');
        shift1();
        goto inExponentIndicator;
    }
    goto doneNumber;

inExponentIndicator:
    if (m_current == '+' || m_current == '-') {
        record8(m_current);
        shift1();
    }
    if (!isASCIIDigit(m_current))
        goto returnError;
    do {
        record8(m_current);
        shift1();
    } while (isASCIIDigit(m_current));
    goto doneNumber;

inOctal: {
    do {
        record8(m_current);
        shift1();
    } while (isASCIIOctalDigit(m_current));
    if (isASCIIDigit(m_current))
        goto startNumber;

double dval = 0;

const char* end = m_buffer8.end();
    for (const char* p = m_buffer8.data(); p < end; ++p) {
        dval *= 8;
        dval += *p - '0';
    }
    if (dval >= mantissaOverflowLowerBound)
        dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);

m_buffer8.resize(0);

lvalp->doubleValue = dval;
    goto doneNumeric;
}

inHex: {
    do {
        record8(m_current);
        shift1();
    } while (isASCIIHexDigit(m_current));

double dval = 0;

const char* end = m_buffer8.end();
    for (const char* p = m_buffer8.data(); p < end; ++p) {
        dval *= 16;
        dval += toASCIIHexValue(*p);
    }
    if (dval >= mantissaOverflowLowerBound)
        dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);

m_buffer8.resize(0);

lvalp->doubleValue = dval;
    goto doneNumeric;
}

startNumber:
    record8(m_current);
    shift1();
    while (isASCIIDigit(m_current)) {
        record8(m_current);
        shift1();
    }
    if (m_current == '.') {
        record8('.');
        shift1();
        goto inNumberAfterDecimalPoint;
    }
    if ((m_current | 0x20) == 'e') {
        record8('e');
        shift1();
        goto inExponentIndicator;
    }

// Fall through into doneNumber.

doneNumber:
    // Null-terminate string for strtod.
    m_buffer8.append('\0');
    lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
    m_buffer8.resize(0);

// Fall through into doneNumeric.

doneNumeric:
    // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
    if (UNLIKELY(isIdentStart(m_current)))
        goto returnError;

m_atLineStart = false;
    m_delimited = false;
    token = NUMBER;
    goto returnToken;

doneSemicolon:
    token = ';';
    m_delimited = true;
    goto returnToken;

doneIdentifier:
    m_atLineStart = false;
    m_delimited = false;
    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);
    token = IDENT;
    goto returnToken;

doneIdentifierOrKeyword: {
    m_atLineStart = false;
    m_delimited = false;
    m_buffer16.resize(0);
    const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
    token = entry ? entry->lexerValue() : IDENT;
    goto returnToken;
}

doneString:
    // Atomize constant strings in case they're later used in property lookup.
    shift1();
    m_atLineStart = false;
    m_delimited = false;
    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);
    token = STRING;

// Fall through into returnToken.

returnToken: {
    int lineNumber = m_lineNumber;
    llocp->first_line = lineNumber;
    llocp->last_line = lineNumber;
    llocp->first_column = startOffset;
    llocp->last_column = currentOffset();

m_lastToken = token;
    return token;
}

returnError:
    m_error = true;
    return -1;
}

bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
{
    ASSERT(m_buffer16.isEmpty());

bool lastWasEscape = false;
    bool inBrackets = false;

if (patternPrefix) {
        ASSERT(!isLineTerminator(patternPrefix));
        ASSERT(patternPrefix != '/');
        ASSERT(patternPrefix != '[');
        record16(patternPrefix);
    }

while (true) {
        int current = m_current;

if (isLineTerminator(current) || current == -1) {
            m_buffer16.resize(0);
            return false;
        }

shift1();

if (current == '/' && !lastWasEscape && !inBrackets)
            break;

record16(current);

if (lastWasEscape) {
            lastWasEscape = false;
            continue;
        }

switch (current) {
        case '[':
            inBrackets = true;
            break;
        case ']':
            inBrackets = false;
            break;
        case '\\':
            lastWasEscape = true;
            break;
        }
    }

pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);

while (isIdentPart(m_current)) {
        record16(m_current);
        shift1();
    }

flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);

return true;
}

bool Lexer::skipRegExp()
{
    bool lastWasEscape = false;
    bool inBrackets = false;

while (true) {
        int current = m_current;

if (isLineTerminator(current) || current == -1)
            return false;

shift1();

if (current == '/' && !lastWasEscape && !inBrackets)
            break;

if (lastWasEscape) {
            lastWasEscape = false;
            continue;
        }

while (isIdentPart(m_current))
        shift1();

return true;
}

void Lexer::clear()
{
    m_arena = 0;
    m_codeWithoutBOMs.clear();

Vector<char> newBuffer8;
    newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
    m_buffer8.swap(newBuffer8);

Vector<UChar> newBuffer16;
    newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
    m_buffer16.swap(newBuffer16);

m_isReparsing = false;
}

SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
{
    if (m_codeWithoutBOMs.isEmpty())
        return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);

const UChar* data = m_source->provider()->data();

ASSERT(openBrace < closeBrace);

int numBOMsBeforeOpenBrace = 0;
    int numBOMsBetweenBraces = 0;

int i;
    for (i = m_source->startOffset(); i < openBrace; ++i)
        numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
    for (; i < closeBrace; ++i)
        numBOMsBetweenBraces += data[i] == byteOrderMark;

return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
        closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
}

} // namespace JSC

C++程序 | 1045行 | 26.39 KB

/*
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
 *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#include "config.h"
#include "Lexer.h"

#include "JSFunction.h"
#include "JSGlobalObjectFunctions.h"
#include "NodeInfo.h"
#include "Nodes.h"
#include "dtoa.h"
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <wtf/Assertions.h>

using namespace WTF;
using namespace Unicode;

// We can't specify the namespace in yacc's C output, so do it here instead.
using namespace JSC;

#include "Grammar.h"
#include "Lookup.h"
#include "Lexer.lut.h"

namespace JSC {

static const UChar byteOrderMark = 0xFEFF;

Lexer::Lexer(JSGlobalData* globalData)
    : m_isReparsing(false)
    , m_globalData(globalData)
    , m_keywordTable(JSC::mainTable)
{
    m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
    m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
}

Lexer::~Lexer()
{
    m_keywordTable.deleteTable();
}

inline const UChar* Lexer::currentCharacter() const
{
    return m_code - 4;
}

inline int Lexer::currentOffset() const
{
    return currentCharacter() - m_codeStart;
}

ALWAYS_INLINE void Lexer::shift1()
{
    m_current = m_next1;
    m_next1 = m_next2;
    m_next2 = m_next3;
    if (LIKELY(m_code < m_codeEnd))
        m_next3 = m_code[0];
    else
        m_next3 = -1;

    ++m_code;
}

ALWAYS_INLINE void Lexer::shift2()
{
    m_current = m_next2;
    m_next1 = m_next3;
    if (LIKELY(m_code + 1 < m_codeEnd)) {
        m_next2 = m_code[0];
        m_next3 = m_code[1];
    } else {
        m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
        m_next3 = -1;
    }

    m_code += 2;
}

ALWAYS_INLINE void Lexer::shift3()
{
    m_current = m_next3;
    if (LIKELY(m_code + 2 < m_codeEnd)) {
        m_next1 = m_code[0];
        m_next2 = m_code[1];
        m_next3 = m_code[2];
    } else {
        m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
        m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
        m_next3 = -1;
    }

    m_code += 3;
}

ALWAYS_INLINE void Lexer::shift4()
{
    if (LIKELY(m_code + 3 < m_codeEnd)) {
        m_current = m_code[0];
        m_next1 = m_code[1];
        m_next2 = m_code[2];
        m_next3 = m_code[3];
    } else {
        m_current = m_code < m_codeEnd ? m_code[0] : -1;
        m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
        m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
        m_next3 = -1;
    }

    m_code += 4;
}

void Lexer::setCode(const SourceCode& source, ParserArena& arena)
{
    m_arena = &arena.identifierArena();

    m_lineNumber = source.firstLine();
    m_delimited = false;
    m_lastToken = -1;

    const UChar* data = source.provider()->data();

    m_source = &source;
    m_codeStart = data;
    m_code = data + source.startOffset();
    m_codeEnd = data + source.endOffset();
    m_error = false;
    m_atLineStart = true;

    // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
    // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
    if (source.provider()->hasBOMs()) {
        for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
            if (UNLIKELY(*p == byteOrderMark)) {
                copyCodeWithoutBOMs();
                break;
            }
        }
    }

    // Read the first characters into the 4-character buffer.
    shift4();
    ASSERT(currentOffset() == source.startOffset());
}

void Lexer::copyCodeWithoutBOMs()
{
    // Note: In this case, the character offset data for debugging will be incorrect.
    // If it's important to correctly debug code with extraneous BOMs, then the caller
    // should strip the BOMs when creating the SourceProvider object and do its own
    // mapping of offsets within the stripped text to original text offset.

    m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
    for (const UChar* p = m_code; p < m_codeEnd; ++p) {
        UChar c = *p;
        if (c != byteOrderMark)
            m_codeWithoutBOMs.append(c);
    }
    ptrdiff_t startDelta = m_codeStart - m_code;
    m_code = m_codeWithoutBOMs.data();
    m_codeStart = m_code + startDelta;
    m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
}

void Lexer::shiftLineTerminator()
{
    ASSERT(isLineTerminator(m_current));

    // Allow both CRLF and LFCR.
    if (m_current + m_next1 == '\n' + '\r')
        shift2();
    else
        shift1();

    ++m_lineNumber;
}

ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
{
    return &m_arena->makeIdentifier(m_globalData, characters, length);
}

inline bool Lexer::lastTokenWasRestrKeyword() const
{
    return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
}

static NEVER_INLINE bool isNonASCIIIdentStart(int c)
{
    return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
}

static inline bool isIdentStart(int c)
{
    return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
}

static NEVER_INLINE bool isNonASCIIIdentPart(int c)
{
    return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
        | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
}

static inline bool isIdentPart(int c)
{
    return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
}

static inline int singleEscape(int c)
{
    switch (c) {
        case 'b':
            return 0x08;
        case 't':
            return 0x09;
        case 'n':
            return 0x0A;
        case 'v':
            return 0x0B;
        case 'f':
            return 0x0C;
        case 'r':
            return 0x0D;
        default:
            return c;
    }
}

inline void Lexer::record8(int c)
{
    ASSERT(c >= 0);
    ASSERT(c <= 0xFF);
    m_buffer8.append(static_cast<char>(c));
}

inline void Lexer::record16(UChar c)
{
    m_buffer16.append(c);
}

inline void Lexer::record16(int c)
{
    ASSERT(c >= 0);
    ASSERT(c <= USHRT_MAX);
    record16(UChar(static_cast<unsigned short>(c)));
}

int Lexer::lex(void* p1, void* p2)
{
    ASSERT(!m_error);
    ASSERT(m_buffer8.isEmpty());
    ASSERT(m_buffer16.isEmpty());

    YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
    YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
    int token = 0;
    m_terminator = false;

start:
    while (isWhiteSpace(m_current))
        shift1();

    int startOffset = currentOffset();

    if (m_current == -1) {
        if (!m_terminator && !m_delimited && !m_isReparsing) {
            // automatic semicolon insertion if program incomplete
            token = ';';
            goto doneSemicolon;
        }
        return 0;
    }

    m_delimited = false;
    switch (m_current) {
        case '>':
            if (m_next1 == '>' && m_next2 == '>') {
                if (m_next3 == '=') {
                    shift4();
                    token = URSHIFTEQUAL;
                    break;
                }
                shift3();
                token = URSHIFT;
                break;
            }
            if (m_next1 == '>') {
                if (m_next2 == '=') {
                    shift3();
                    token = RSHIFTEQUAL;
                    break;
                }
                shift2();
                token = RSHIFT;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = GE;
                break;
            }
            shift1();
            token = '>';
            break;
        case '=':
            if (m_next1 == '=') {
                if (m_next2 == '=') {
                    shift3();
                    token = STREQ;
                    break;
                }
                shift2();
                token = EQEQ;
                break;
            }
            shift1();
            token = '=';
            break;
        case '!':
            if (m_next1 == '=') {
                if (m_next2 == '=') {
                    shift3();
                    token = STRNEQ;
                    break;
                }
                shift2();
                token = NE;
                break;
            }
            shift1();
            token = '!';
            break;
        case '<':
            if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
                // <!-- marks the beginning of a line comment (for www usage)
                shift4();
                goto inSingleLineComment;
            }
            if (m_next1 == '<') {
                if (m_next2 == '=') {
                    shift3();
                    token = LSHIFTEQUAL;
                    break;
                }
                shift2();
                token = LSHIFT;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = LE;
                break;
            }
            shift1();
            token = '<';
            break;
        case '+':
            if (m_next1 == '+') {
                shift2();
                if (m_terminator) {
                    token = AUTOPLUSPLUS;
                    break;
                }
                token = PLUSPLUS;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = PLUSEQUAL;
                break;
            }
            shift1();
            token = '+';
            break;
        case '-':
            if (m_next1 == '-') {
                if (m_atLineStart && m_next2 == '>') {
                    shift3();
                    goto inSingleLineComment;
                }
                shift2();
                if (m_terminator) {
                    token = AUTOMINUSMINUS;
                    break;
                }
                token = MINUSMINUS;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = MINUSEQUAL;
                break;
            }
            shift1();
            token = '-';
            break;
        case '*':
            if (m_next1 == '=') {
                shift2();
                token = MULTEQUAL;
                break;
            }
            shift1();
            token = '*';
            break;
        case '/':
            if (m_next1 == '/') {
                shift2();
                goto inSingleLineComment;
            }
            if (m_next1 == '*')
                goto inMultiLineComment;
            if (m_next1 == '=') {
                shift2();
                token = DIVEQUAL;
                break;
            }
            shift1();
            token = '/';
            break;
        case '&':
            if (m_next1 == '&') {
                shift2();
                token = AND;
                break;
            }
            if (m_next1 == '=') {
                shift2();
                token = ANDEQUAL;
                break;
            }
            shift1();
            token = '&';
            break;
        case '^':
            if (m_next1 == '=') {
                shift2();
                token = XOREQUAL;
                break;
            }
            shift1();
            token = '^';
            break;
        case '%':
            if (m_next1 == '=') {
                shift2();
                token = MODEQUAL;
                break;
            }
            shift1();
            token = '%';
            break;
        case '|':
            if (m_next1 == '=') {
                shift2();
                token = OREQUAL;
                break;
            }
            if (m_next1 == '|') {
                shift2();
                token = OR;
                break;
            }
            shift1();
            token = '|';
            break;
        case '.':
            if (isASCIIDigit(m_next1)) {
                record8('.');
                shift1();
                goto inNumberAfterDecimalPoint;
            }
            token = '.';
            shift1();
            break;
        case ',':
        case '~':
        case '?':
        case ':':
        case '(':
        case ')':
        case '[':
        case ']':
            token = m_current;
            shift1();
            break;
        case ';':
            shift1();
            m_delimited = true;
            token = ';';
            break;
        case '{':
            lvalp->intValue = currentOffset();
            shift1();
            token = OPENBRACE;
            break;
        case '}':
            lvalp->intValue = currentOffset();
            shift1();
            m_delimited = true;
            token = CLOSEBRACE;
            break;
        case '\\':
            goto startIdentifierWithBackslash;
        case '0':
            goto startNumberWithZeroDigit;
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            goto startNumber;
        case '"':
        case '\'':
            goto startString;
        default:
            if (isIdentStart(m_current))
                goto startIdentifierOrKeyword;
            if (isLineTerminator(m_current)) {
                shiftLineTerminator();
                m_atLineStart = true;
                m_terminator = true;
                if (lastTokenWasRestrKeyword()) {
                    token = ';';
                    goto doneSemicolon;
                }
                goto start;
            }
            goto returnError;
    }

    m_atLineStart = false;
    goto returnToken;

startString: {
    int stringQuoteCharacter = m_current;
    shift1();

    const UChar* stringStart = currentCharacter();
    while (m_current != stringQuoteCharacter) {
        // Fast check for characters that require special handling.
        // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
        // as possible, and lets through all common ASCII characters.
        if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
            m_buffer16.append(stringStart, currentCharacter() - stringStart);
            goto inString;
        }
        shift1();
    }
    lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
    shift1();
    m_atLineStart = false;
    m_delimited = false;
    token = STRING;
    goto returnToken;

inString:
    while (m_current != stringQuoteCharacter) {
        if (m_current == '\\')
            goto inStringEscapeSequence;
        if (UNLIKELY(isLineTerminator(m_current)))
            goto returnError;
        if (UNLIKELY(m_current == -1))
            goto returnError;
        record16(m_current);
        shift1();
    }
    goto doneString;

inStringEscapeSequence:
    shift1();
    if (m_current == 'x') {
        shift1();
        if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
            record16(convertHex(m_current, m_next1));
            shift2();
            goto inString;
        }
        record16('x');
        if (m_current == stringQuoteCharacter)
            goto doneString;
        goto inString;
    }
    if (m_current == 'u') {
        shift1();
        if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
            record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
            shift4();
            goto inString;
        }
        if (m_current == stringQuoteCharacter) {
            record16('u');
            goto doneString;
        }
        goto returnError;
    }
    if (isASCIIOctalDigit(m_current)) {
        if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
            record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
            shift3();
            goto inString;
        }
        if (isASCIIOctalDigit(m_next1)) {
            record16((m_current - '0') * 8 + m_next1 - '0');
            shift2();
            goto inString;
        }
        record16(m_current - '0');
        shift1();
        goto inString;
    }
    if (isLineTerminator(m_current)) {
        shiftLineTerminator();
        goto inString;
    }
    if (m_current == -1)
        goto returnError;
    record16(singleEscape(m_current));
    shift1();
    goto inString;
}

startIdentifierWithBackslash:
    shift1();
    if (UNLIKELY(m_current != 'u'))
        goto returnError;
    shift1();
    if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
        goto returnError;
    token = convertUnicode(m_current, m_next1, m_next2, m_next3);
    if (UNLIKELY(!isIdentStart(token)))
        goto returnError;
    goto inIdentifierAfterCharacterCheck;

startIdentifierOrKeyword: {
    const UChar* identifierStart = currentCharacter();
    shift1();
    while (isIdentPart(m_current))
        shift1();
    if (LIKELY(m_current != '\\')) {
        lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
        goto doneIdentifierOrKeyword;
    }
    m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
}

    do {
        shift1();
        if (UNLIKELY(m_current != 'u'))
            goto returnError;
        shift1();
        if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
            goto returnError;
        token = convertUnicode(m_current, m_next1, m_next2, m_next3);
        if (UNLIKELY(!isIdentPart(token)))
            goto returnError;
inIdentifierAfterCharacterCheck:
        record16(token);
        shift4();

        while (isIdentPart(m_current)) {
            record16(m_current);
            shift1();
        }
    } while (UNLIKELY(m_current == '\\'));
    goto doneIdentifier;

inSingleLineComment:
    while (!isLineTerminator(m_current)) {
        if (UNLIKELY(m_current == -1))
            return 0;
        shift1();
    }
    shiftLineTerminator();
    m_atLineStart = true;
    m_terminator = true;
    if (lastTokenWasRestrKeyword())
        goto doneSemicolon;
    goto start;

inMultiLineComment:
    shift2();
    while (m_current != '*' || m_next1 != '/') {
        if (isLineTerminator(m_current))
            shiftLineTerminator();
        else {
            shift1();
            if (UNLIKELY(m_current == -1))
                goto returnError;
        }
    }
    shift2();
    m_atLineStart = false;
    goto start;

startNumberWithZeroDigit:
    shift1();
    if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
        shift1();
        goto inHex;
    }
    if (m_current == '.') {
        record8('0');
        record8('.');
        shift1();
        goto inNumberAfterDecimalPoint;
    }
    if ((m_current | 0x20) == 'e') {
        record8('0');
        record8('e');
        shift1();
        goto inExponentIndicator;
    }
    if (isASCIIOctalDigit(m_current))
        goto inOctal;
    if (isASCIIDigit(m_current))
        goto startNumber;
    lvalp->doubleValue = 0;
    goto doneNumeric;

inNumberAfterDecimalPoint:
    while (isASCIIDigit(m_current)) {
        record8(m_current);
        shift1();
    }
    if ((m_current | 0x20) == 'e') {
        record8('e');
        shift1();
        goto inExponentIndicator;
    }
    goto doneNumber;

inExponentIndicator:
    if (m_current == '+' || m_current == '-') {
        record8(m_current);
        shift1();
    }
    if (!isASCIIDigit(m_current))
        goto returnError;
    do {
        record8(m_current);
        shift1();
    } while (isASCIIDigit(m_current));
    goto doneNumber;

inOctal: {
    do {
        record8(m_current);
        shift1();
    } while (isASCIIOctalDigit(m_current));
    if (isASCIIDigit(m_current))
        goto startNumber;

    double dval = 0;

    const char* end = m_buffer8.end();
    for (const char* p = m_buffer8.data(); p < end; ++p) {
        dval *= 8;
        dval += *p - '0';
    }
    if (dval >= mantissaOverflowLowerBound)
        dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);

    m_buffer8.resize(0);

    lvalp->doubleValue = dval;
    goto doneNumeric;
}

inHex: {
    do {
        record8(m_current);
        shift1();
    } while (isASCIIHexDigit(m_current));

    double dval = 0;

    const char* end = m_buffer8.end();
    for (const char* p = m_buffer8.data(); p < end; ++p) {
        dval *= 16;
        dval += toASCIIHexValue(*p);
    }
    if (dval >= mantissaOverflowLowerBound)
        dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);

    m_buffer8.resize(0);

    lvalp->doubleValue = dval;
    goto doneNumeric;
}

startNumber:
    record8(m_current);
    shift1();
    while (isASCIIDigit(m_current)) {
        record8(m_current);
        shift1();
    }
    if (m_current == '.') {
        record8('.');
        shift1();
        goto inNumberAfterDecimalPoint;
    }
    if ((m_current | 0x20) == 'e') {
        record8('e');
        shift1();
        goto inExponentIndicator;
    }

    // Fall through into doneNumber.

doneNumber:
    // Null-terminate string for strtod.
    m_buffer8.append('\0');
    lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
    m_buffer8.resize(0);

    // Fall through into doneNumeric.

doneNumeric:
    // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
    if (UNLIKELY(isIdentStart(m_current)))
        goto returnError;

    m_atLineStart = false;
    m_delimited = false;
    token = NUMBER;
    goto returnToken;

doneSemicolon:
    token = ';';
    m_delimited = true;
    goto returnToken;

doneIdentifier:
    m_atLineStart = false;
    m_delimited = false;
    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);
    token = IDENT;
    goto returnToken;

doneIdentifierOrKeyword: {
    m_atLineStart = false;
    m_delimited = false;
    m_buffer16.resize(0);
    const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
    token = entry ? entry->lexerValue() : IDENT;
    goto returnToken;
}

doneString:
    // Atomize constant strings in case they're later used in property lookup.
    shift1();
    m_atLineStart = false;
    m_delimited = false;
    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);
    token = STRING;

    // Fall through into returnToken.

returnToken: {
    int lineNumber = m_lineNumber;
    llocp->first_line = lineNumber;
    llocp->last_line = lineNumber;
    llocp->first_column = startOffset;
    llocp->last_column = currentOffset();

    m_lastToken = token;
    return token;
}

returnError:
    m_error = true;
    return -1;
}

bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
{
    ASSERT(m_buffer16.isEmpty());

    bool lastWasEscape = false;
    bool inBrackets = false;

    if (patternPrefix) {
        ASSERT(!isLineTerminator(patternPrefix));
        ASSERT(patternPrefix != '/');
        ASSERT(patternPrefix != '[');
        record16(patternPrefix);
    }

    while (true) {
        int current = m_current;

        if (isLineTerminator(current) || current == -1) {
            m_buffer16.resize(0);
            return false;
        }

        shift1();

        if (current == '/' && !lastWasEscape && !inBrackets)
            break;

        record16(current);

        if (lastWasEscape) {
            lastWasEscape = false;
            continue;
        }

        switch (current) {
        case '[':
            inBrackets = true;
            break;
        case ']':
            inBrackets = false;
            break;
        case '\\':
            lastWasEscape = true;
            break;
        }
    }

    pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);

    while (isIdentPart(m_current)) {
        record16(m_current);
        shift1();
    }

    flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    m_buffer16.resize(0);

    return true;
}

bool Lexer::skipRegExp()
{
    bool lastWasEscape = false;
    bool inBrackets = false;

    while (true) {
        int current = m_current;

        if (isLineTerminator(current) || current == -1)
            return false;

        shift1();

        if (current == '/' && !lastWasEscape && !inBrackets)
            break;

        if (lastWasEscape) {
            lastWasEscape = false;
            continue;
        }

        switch (current) {
        case '[':
            inBrackets = true;
            break;
        case ']':
            inBrackets = false;
            break;
        case '\\':
            lastWasEscape = true;
            break;
        }
    }

    while (isIdentPart(m_current))
        shift1();

    return true;
}

void Lexer::clear()
{
    m_arena = 0;
    m_codeWithoutBOMs.clear();

    Vector<char> newBuffer8;
    newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
    m_buffer8.swap(newBuffer8);

    Vector<UChar> newBuffer16;
    newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
    m_buffer16.swap(newBuffer16);

    m_isReparsing = false;
}

SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
{
    if (m_codeWithoutBOMs.isEmpty())
        return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);

    const UChar* data = m_source->provider()->data();

    ASSERT(openBrace < closeBrace);

    int numBOMsBeforeOpenBrace = 0;
    int numBOMsBetweenBraces = 0;

    int i;
    for (i = m_source->startOffset(); i < openBrace; ++i)
        numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
    for (; i < closeBrace; ++i)
        numBOMsBetweenBraces += data[i] == byteOrderMark;

    return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
        closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
}

} // namespace JSC

登录后可以享受更多权益