/* * Copyright (C) 2009 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "LiteralParser.h" #include "JSArray.h" #include "JSString.h" #include "Lexer.h" #include "UStringBuilder.h" #include <wtf/ASCIICType.h> #include <wtf/dtoa.h> namespace JSC { static inline bool isJSONWhiteSpace(const UChar& c) { // The JSON RFC 4627 defines a list of allowed characters to be considered // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar). return c == ' ' || c == 0x9 || c == 0xA || c == 0xD; } LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) { while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr)) ++m_ptr; ASSERT(m_ptr <= m_end); if (m_ptr >= m_end) { token.type = TokEnd; token.start = token.end = m_ptr; return TokEnd; } token.type = TokError; token.start = m_ptr; switch (*m_ptr) { case '[': token.type = TokLBracket; token.end = ++m_ptr; return TokLBracket; case ']': token.type = TokRBracket; token.end = ++m_ptr; return TokRBracket; case '(': token.type = TokLParen; token.end = ++m_ptr; return TokLBracket; case ')': token.type = TokRParen; token.end = ++m_ptr; return TokRBracket; case '{': token.type = TokLBrace; token.end = ++m_ptr; return TokLBrace; case '}': token.type = TokRBrace; token.end = ++m_ptr; return TokRBrace; case ',': token.type = TokComma; token.end = ++m_ptr; return TokComma; case ':': token.type = TokColon; token.end = ++m_ptr; return TokColon; case '"': if (m_mode == StrictJSON) return lexString<StrictJSON>(token); return lexString<NonStrictJSON>(token); case 't': if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { m_ptr += 4; token.type = TokTrue; token.end = m_ptr; return TokTrue; } break; case 'f': if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { m_ptr += 5; token.type = TokFalse; token.end = m_ptr; return TokFalse; } break; case 'n': if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { m_ptr += 4; token.type = TokNull; token.end = m_ptr; return TokNull; } break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return lexNumber(token); } return TokError; } template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) { return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; } // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) { ++m_ptr; const UChar* runStart; UStringBuilder builder; do { runStart = m_ptr; while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) ++m_ptr; if (runStart < m_ptr) builder.append(runStart, m_ptr - runStart); if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { ++m_ptr; if (m_ptr >= m_end) return TokError; switch (*m_ptr) { case '"': builder.append('"'); m_ptr++; break; case '\\': builder.append('\\'); m_ptr++; break; case '/': builder.append('/'); m_ptr++; break; case 'b': builder.append('\b'); m_ptr++; break; case 'f': builder.append('\f'); m_ptr++; break; case 'n': builder.append('\n'); m_ptr++; break; case 'r': builder.append('\r'); m_ptr++; break; case 't': builder.append('\t'); m_ptr++; break; case 'u': if ((m_end - m_ptr) < 5) // uNNNN == 5 characters return TokError; for (int i = 1; i < 5; i++) { if (!isASCIIHexDigit(m_ptr[i])) return TokError; } builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); m_ptr += 5; break; default: return TokError; } } } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); if (m_ptr >= m_end || *m_ptr != '"') return TokError; token.stringToken = builder.toUString(); token.type = TokString; token.end = ++m_ptr; return TokString; } LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) { // ES5 and json.org define numbers as // number // int // int frac? exp? // // int // -? 0 // -? digit1-9 digits? // // digits // digit digits? // // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? if (m_ptr < m_end && *m_ptr == '-') // -? ++m_ptr; // (0 | [1-9][0-9]*) if (m_ptr < m_end && *m_ptr == '0') // 0 ++m_ptr; else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] ++m_ptr; // [0-9]* while (m_ptr < m_end && isASCIIDigit(*m_ptr)) ++m_ptr; } else return TokError; // ('.' [0-9]+)? if (m_ptr < m_end && *m_ptr == '.') { ++m_ptr; // [0-9]+ if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) return TokError; ++m_ptr; while (m_ptr < m_end && isASCIIDigit(*m_ptr)) ++m_ptr; } // ([eE][+-]? [0-9]+)? if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] ++m_ptr; // [-+]? if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) ++m_ptr; // [0-9]+ if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) return TokError; ++m_ptr; while (m_ptr < m_end && isASCIIDigit(*m_ptr)) ++m_ptr; } token.type = TokNumber; token.end = m_ptr; Vector<char, 64> buffer(token.end - token.start + 1); int i; for (i = 0; i < token.end - token.start; i++) { ASSERT(static_cast<char>(token.start[i]) == token.start[i]); buffer[i] = static_cast<char>(token.start[i]); } buffer[i] = 0; char* end; token.numberToken = WTF::strtod(buffer.data(), &end); ASSERT(buffer.data() + (token.end - token.start) == end); return TokNumber; } JSValue LiteralParser::parse(ParserState initialState) { ParserState state = initialState; MarkedArgumentBuffer objectStack; JSValue lastValue; Vector<ParserState, 16> stateStack; Vector<Identifier, 16> identifierStack; while (1) { switch(state) { startParseArray: case StartParseArray: { JSArray* array = constructEmptyArray(m_exec); objectStack.append(array); // fallthrough } doParseArrayStartExpression: case DoParseArrayStartExpression: { TokenType lastToken = m_lexer.currentToken().type; if (m_lexer.next() == TokRBracket) { if (lastToken == TokComma) return JSValue(); m_lexer.next(); lastValue = objectStack.last(); objectStack.removeLast(); break; } stateStack.append(DoParseArrayEndExpression); goto startParseExpression; } case DoParseArrayEndExpression: { asArray(objectStack.last())->push(m_exec, lastValue); if (m_lexer.currentToken().type == TokComma) goto doParseArrayStartExpression; if (m_lexer.currentToken().type != TokRBracket) return JSValue(); m_lexer.next(); lastValue = objectStack.last(); objectStack.removeLast(); break; } startParseObject: case StartParseObject: { JSObject* object = constructEmptyObject(m_exec); objectStack.append(object); TokenType type = m_lexer.next(); if (type == TokString) { Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); // Check for colon if (m_lexer.next() != TokColon) return JSValue(); m_lexer.next(); identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); stateStack.append(DoParseObjectEndExpression); goto startParseExpression; } else if (type != TokRBrace) return JSValue(); m_lexer.next(); lastValue = objectStack.last(); objectStack.removeLast(); break; } doParseObjectStartExpression: case DoParseObjectStartExpression: { TokenType type = m_lexer.next(); if (type != TokString) return JSValue(); Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); // Check for colon if (m_lexer.next() != TokColon) return JSValue(); m_lexer.next(); identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); stateStack.append(DoParseObjectEndExpression); goto startParseExpression; } case DoParseObjectEndExpression: { asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue); identifierStack.removeLast(); if (m_lexer.currentToken().type == TokComma) goto doParseObjectStartExpression; if (m_lexer.currentToken().type != TokRBrace) return JSValue(); m_lexer.next(); lastValue = objectStack.last(); objectStack.removeLast(); break; } startParseExpression: case StartParseExpression: { switch (m_lexer.currentToken().type) { case TokLBracket: goto startParseArray; case TokLBrace: goto startParseObject; case TokString: { Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); m_lexer.next(); lastValue = jsString(m_exec, stringToken.stringToken); break; } case TokNumber: { Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); m_lexer.next(); lastValue = jsNumber(numberToken.numberToken); break; } case TokNull: m_lexer.next(); lastValue = jsNull(); break; case TokTrue: m_lexer.next(); lastValue = jsBoolean(true); break; case TokFalse: m_lexer.next(); lastValue = jsBoolean(false); break; default: // Error return JSValue(); } break; } case StartParseStatement: { switch (m_lexer.currentToken().type) { case TokLBracket: case TokNumber: case TokString: goto startParseExpression; case TokLParen: { m_lexer.next(); stateStack.append(StartParseStatementEndStatement); goto startParseExpression; } default: return JSValue(); } } case StartParseStatementEndStatement: { ASSERT(stateStack.isEmpty()); if (m_lexer.currentToken().type != TokRParen) return JSValue(); if (m_lexer.next() == TokEnd) return lastValue; return JSValue(); } default: ASSERT_NOT_REACHED(); } if (stateStack.isEmpty()) return lastValue; state = stateStack.last(); stateStack.removeLast(); continue; } } }