// Copyright 2007-2011 Baptiste Lepilleur // Distributed under MIT license, or public domain if desired and // recognized in your jurisdiction. // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE #if !defined(JSON_IS_AMALGAMATION) #include <json/assertions.h> #include <json/reader.h> #include <json/value.h> #include "json_tool.h" #endif // if !defined(JSON_IS_AMALGAMATION) #include <iostream> #include <fstream> #include <utility> #include <cstdio> #include <cassert> #include <cstring> #include <istream> #if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below #define snprintf _snprintf #endif #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0 // Disable warning about strdup being deprecated. #pragma warning(disable : 4996) #endif namespace Json { // Implementation of class Features // //////////////////////////////// Features::Features() : allowComments_(true), strictRoot_(false), allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {} Features Features::all() { return Features(); } Features Features::strictMode() { Features features; features.allowComments_ = false; features.strictRoot_ = true; features.allowDroppedNullPlaceholders_ = false; features.allowNumericKeys_ = false; return features; } // Implementation of class Reader // //////////////////////////////// static inline bool in(Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4) { return c == c1 || c == c2 || c == c3 || c == c4; } static inline bool in(Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5) { return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; } static bool containsNewLine(Reader::Location begin, Reader::Location end) { for (; begin < end; ++begin) if (*begin == '\n' || *begin == '\r') return true; return false; } // Class Reader // ////////////////////////////////////////////////////////////////// Reader::Reader() : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), lastValue_(), commentsBefore_(), features_(Features::all()), collectComments_() {} Reader::Reader(const Features& features) : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), lastValue_(), commentsBefore_(), features_(features), collectComments_() { } bool Reader::parse(const std::string& document, Value& root, bool collectComments) { document_ = document; const char* begin = document_.c_str(); const char* end = begin + document_.length(); return parse(begin, end, root, collectComments); } bool Reader::parse(std::istream& sin, Value& root, bool collectComments) { // std::istream_iterator<char> begin(sin); // std::istream_iterator<char> end; // Those would allow streamed input from a file, if parse() were a // template function. // Since std::string is reference-counted, this at least does not // create an extra copy. std::string doc; std::getline(sin, doc, (char)EOF); return parse(doc, root, collectComments); } bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root, bool collectComments) { if (!features_.allowComments_) { collectComments = false; } begin_ = beginDoc; end_ = endDoc; collectComments_ = collectComments; current_ = begin_; lastValueEnd_ = 0; lastValue_ = 0; commentsBefore_ = ""; errors_.clear(); while (!nodes_.empty()) nodes_.pop(); nodes_.push(&root); bool successful = readValue(); Token token; skipCommentTokens(token); if (collectComments_ && !commentsBefore_.empty()) root.setComment(commentsBefore_, commentAfter); if (features_.strictRoot_) { if (!root.isArray() && !root.isObject()) { // Set error location to start of doc, ideally should be first token found // in doc token.type_ = tokenError; token.start_ = beginDoc; token.end_ = endDoc; addError( "A valid JSON document must be either an array or an object value.", token); return false; } } return successful; } bool Reader::readValue() { Token token; skipCommentTokens(token); bool successful = true; if (collectComments_ && !commentsBefore_.empty()) { // Remove newline characters at the end of the comments size_t lastNonNewline = commentsBefore_.find_last_not_of("\r\n"); if (lastNonNewline != std::string::npos) { commentsBefore_.erase(lastNonNewline + 1); } else { commentsBefore_.clear(); } currentValue().setComment(commentsBefore_, commentBefore); commentsBefore_ = ""; } switch (token.type_) { case tokenObjectBegin: successful = readObject(token); currentValue().setOffsetLimit(current_ - begin_); break; case tokenArrayBegin: successful = readArray(token); currentValue().setOffsetLimit(current_ - begin_); break; case tokenNumber: successful = decodeNumber(token); break; case tokenString: successful = decodeString(token); break; case tokenTrue: currentValue() = true; currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); break; case tokenFalse: currentValue() = false; currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); break; case tokenNull: currentValue() = Value(); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); break; case tokenArraySeparator: if (features_.allowDroppedNullPlaceholders_) { // "Un-read" the current token and mark the current value as a null // token. current_--; currentValue() = Value(); currentValue().setOffsetStart(current_ - begin_ - 1); currentValue().setOffsetLimit(current_ - begin_); break; } // Else, fall through... default: currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return addError("Syntax error: value, object or array expected.", token); } if (collectComments_) { lastValueEnd_ = current_; lastValue_ = ¤tValue(); } return successful; } void Reader::skipCommentTokens(Token& token) { if (features_.allowComments_) { do { readToken(token); } while (token.type_ == tokenComment); } else { readToken(token); } } bool Reader::expectToken(TokenType type, Token& token, const char* message) { readToken(token); if (token.type_ != type) return addError(message, token); return true; } bool Reader::readToken(Token& token) { skipSpaces(); token.start_ = current_; Char c = getNextChar(); bool ok = true; switch (c) { case '{': token.type_ = tokenObjectBegin; break; case '}': token.type_ = tokenObjectEnd; break; case '[': token.type_ = tokenArrayBegin; break; case ']': token.type_ = tokenArrayEnd; break; case '"': token.type_ = tokenString; ok = readString(); break; case '/': token.type_ = tokenComment; ok = readComment(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': token.type_ = tokenNumber; readNumber(); break; case 't': token.type_ = tokenTrue; ok = match("rue", 3); break; case 'f': token.type_ = tokenFalse; ok = match("alse", 4); break; case 'n': token.type_ = tokenNull; ok = match("ull", 3); break; case ',': token.type_ = tokenArraySeparator; break; case ':': token.type_ = tokenMemberSeparator; break; case 0: token.type_ = tokenEndOfStream; break; default: ok = false; break; } if (!ok) token.type_ = tokenError; token.end_ = current_; return true; } void Reader::skipSpaces() { while (current_ != end_) { Char c = *current_; if (c == ' ' || c == '\t' || c == '\r' || c == '\n') ++current_; else break; } } bool Reader::match(Location pattern, int patternLength) { if (end_ - current_ < patternLength) return false; int index = patternLength; while (index--) if (current_[index] != pattern[index]) return false; current_ += patternLength; return true; } bool Reader::readComment() { Location commentBegin = current_ - 1; Char c = getNextChar(); bool successful = false; if (c == '*') successful = readCStyleComment(); else if (c == '/') successful = readCppStyleComment(); if (!successful) return false; if (collectComments_) { CommentPlacement placement = commentBefore; if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { if (c != '*' || !containsNewLine(commentBegin, current_)) placement = commentAfterOnSameLine; } addComment(commentBegin, current_, placement); } return true; } void Reader::addComment(Location begin, Location end, CommentPlacement placement) { assert(collectComments_); if (placement == commentAfterOnSameLine) { assert(lastValue_ != 0); lastValue_->setComment(std::string(begin, end), placement); } else { commentsBefore_ += std::string(begin, end); } } bool Reader::readCStyleComment() { while (current_ != end_) { Char c = getNextChar(); if (c == '*' && *current_ == '/') break; } return getNextChar() == '/'; } bool Reader::readCppStyleComment() { while (current_ != end_) { Char c = getNextChar(); if (c == '\r' || c == '\n') break; } return true; } void Reader::readNumber() { while (current_ != end_) { if (!(*current_ >= '0' && *current_ <= '9') && !in(*current_, '.', 'e', 'E', '+', '-')) break; ++current_; } } bool Reader::readString() { Char c = 0; while (current_ != end_) { c = getNextChar(); if (c == '\\') getNextChar(); else if (c == '"') break; } return c == '"'; } bool Reader::readObject(Token& tokenStart) { Token tokenName; std::string name; currentValue() = Value(objectValue); currentValue().setOffsetStart(tokenStart.start_ - begin_); while (readToken(tokenName)) { bool initialTokenOk = true; while (tokenName.type_ == tokenComment && initialTokenOk) initialTokenOk = readToken(tokenName); if (!initialTokenOk) break; if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object return true; name = ""; if (tokenName.type_ == tokenString) { if (!decodeString(tokenName, name)) return recoverFromError(tokenObjectEnd); } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { Value numberName; if (!decodeNumber(tokenName, numberName)) return recoverFromError(tokenObjectEnd); name = numberName.asString(); } else { break; } Token colon; if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { return addErrorAndRecover( "Missing ':' after object member name", colon, tokenObjectEnd); } Value& value = currentValue()[name]; nodes_.push(&value); bool ok = readValue(); nodes_.pop(); if (!ok) // error already set return recoverFromError(tokenObjectEnd); Token comma; if (!readToken(comma) || (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment)) { return addErrorAndRecover( "Missing ',' or '}' in object declaration", comma, tokenObjectEnd); } bool finalizeTokenOk = true; while (comma.type_ == tokenComment && finalizeTokenOk) finalizeTokenOk = readToken(comma); if (comma.type_ == tokenObjectEnd) return true; } return addErrorAndRecover( "Missing '}' or object member name", tokenName, tokenObjectEnd); } bool Reader::readArray(Token& tokenStart) { currentValue() = Value(arrayValue); currentValue().setOffsetStart(tokenStart.start_ - begin_); skipSpaces(); if (*current_ == ']') // empty array { Token endArray; readToken(endArray); return true; } int index = 0; for (;;) { Value& value = currentValue()[index++]; nodes_.push(&value); bool ok = readValue(); nodes_.pop(); if (!ok) // error already set return recoverFromError(tokenArrayEnd); Token token; // Accept Comment after last item in the array. ok = readToken(token); while (token.type_ == tokenComment && ok) { ok = readToken(token); } bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd); if (!ok || badTokenType) { return addErrorAndRecover( "Missing ',' or ']' in array declaration", token, tokenArrayEnd); } if (token.type_ == tokenArrayEnd) break; } return true; } bool Reader::decodeNumber(Token& token) { Value decoded; if (!decodeNumber(token, decoded)) return false; currentValue() = decoded; currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool Reader::decodeNumber(Token& token, Value& decoded) { bool isDouble = false; for (Location inspect = token.start_; inspect != token.end_; ++inspect) { isDouble = isDouble || in(*inspect, '.', 'e', 'E', '+') || (*inspect == '-' && inspect != token.start_); } if (isDouble) return decodeDouble(token, decoded); // Attempts to parse the number as an integer. If the number is // larger than the maximum supported value of an integer then // we decode the number as a double. Location current = token.start_; bool isNegative = *current == '-'; if (isNegative) ++current; Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) : Value::maxLargestUInt; Value::LargestUInt threshold = maxIntegerValue / 10; Value::LargestUInt value = 0; while (current < token.end_) { Char c = *current++; if (c < '0' || c > '9') return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token); Value::UInt digit(c - '0'); if (value >= threshold) { // We've hit or exceeded the max value divided by 10 (rounded down). If // a) we've only just touched the limit, b) this is the last digit, and // c) it's small enough to fit in that rounding delta, we're okay. // Otherwise treat this number as a double to avoid overflow. if (value > threshold || current != token.end_ || digit > maxIntegerValue % 10) { return decodeDouble(token, decoded); } } value = value * 10 + digit; } if (isNegative) decoded = -Value::LargestInt(value); else if (value <= Value::LargestUInt(Value::maxInt)) decoded = Value::LargestInt(value); else decoded = value; return true; } bool Reader::decodeDouble(Token& token) { Value decoded; if (!decodeDouble(token, decoded)) return false; currentValue() = decoded; currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool Reader::decodeDouble(Token& token, Value& decoded) { double value = 0; const int bufferSize = 32; int count; int length = int(token.end_ - token.start_); // Sanity check to avoid buffer overflow exploits. if (length < 0) { return addError("Unable to parse token length", token); } // Avoid using a string constant for the format control string given to // sscanf, as this can cause hard to debug crashes on OS X. See here for more // info: // // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html char format[] = "%lf"; if (length <= bufferSize) { Char buffer[bufferSize + 1]; memcpy(buffer, token.start_, length); buffer[length] = 0; count = sscanf(buffer, format, &value); } else { std::string buffer(token.start_, token.end_); count = sscanf(buffer.c_str(), format, &value); } if (count != 1) return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token); decoded = value; return true; } bool Reader::decodeString(Token& token) { std::string decoded; if (!decodeString(token, decoded)) return false; currentValue() = decoded; currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool Reader::decodeString(Token& token, std::string& decoded) { decoded.reserve(token.end_ - token.start_ - 2); Location current = token.start_ + 1; // skip '"' Location end = token.end_ - 1; // do not include '"' while (current != end) { Char c = *current++; if (c == '"') break; else if (c == '\\') { if (current == end) return addError("Empty escape sequence in string", token, current); Char escape = *current++; switch (escape) { case '"': decoded += '"'; break; case '/': decoded += '/'; break; case '\\': decoded += '\\'; break; case 'b': decoded += '\b'; break; case 'f': decoded += '\f'; break; case 'n': decoded += '\n'; break; case 'r': decoded += '\r'; break; case 't': decoded += '\t'; break; case 'u': { unsigned int unicode; if (!decodeUnicodeCodePoint(token, current, end, unicode)) return false; decoded += codePointToUTF8(unicode); } break; default: return addError("Bad escape sequence in string", token, current); } } else { decoded += c; } } return true; } bool Reader::decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode) { if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) return false; if (unicode >= 0xD800 && unicode <= 0xDBFF) { // surrogate pairs if (end - current < 6) return addError( "additional six characters expected to parse unicode surrogate pair.", token, current); unsigned int surrogatePair; if (*(current++) == '\\' && *(current++) == 'u') { if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); } else return false; } else return addError("expecting another \\u token to begin the second half of " "a unicode surrogate pair", token, current); } return true; } bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current, Location end, unsigned int& unicode) { if (end - current < 4) return addError( "Bad unicode escape sequence in string: four digits expected.", token, current); unicode = 0; for (int index = 0; index < 4; ++index) { Char c = *current++; unicode *= 16; if (c >= '0' && c <= '9') unicode += c - '0'; else if (c >= 'a' && c <= 'f') unicode += c - 'a' + 10; else if (c >= 'A' && c <= 'F') unicode += c - 'A' + 10; else return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current); } return true; } bool Reader::addError(const std::string& message, Token& token, Location extra) { ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = extra; errors_.push_back(info); return false; } bool Reader::recoverFromError(TokenType skipUntilToken) { int errorCount = int(errors_.size()); Token skip; for (;;) { if (!readToken(skip)) errors_.resize(errorCount); // discard errors caused by recovery if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) break; } errors_.resize(errorCount); return false; } bool Reader::addErrorAndRecover(const std::string& message, Token& token, TokenType skipUntilToken) { addError(message, token); return recoverFromError(skipUntilToken); } Value& Reader::currentValue() { return *(nodes_.top()); } Reader::Char Reader::getNextChar() { if (current_ == end_) return 0; return *current_++; } void Reader::getLocationLineAndColumn(Location location, int& line, int& column) const { Location current = begin_; Location lastLineStart = current; line = 0; while (current < location && current != end_) { Char c = *current++; if (c == '\r') { if (*current == '\n') ++current; lastLineStart = current; ++line; } else if (c == '\n') { lastLineStart = current; ++line; } } // column & line start at 1 column = int(location - lastLineStart) + 1; ++line; } std::string Reader::getLocationLineAndColumn(Location location) const { int line, column; getLocationLineAndColumn(location, line, column); char buffer[18 + 16 + 16 + 1]; #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) #if defined(WINCE) _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #else sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #endif #else snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #endif return buffer; } // Deprecated. Preserved for backward compatibility std::string Reader::getFormatedErrorMessages() const { return getFormattedErrorMessages(); } std::string Reader::getFormattedErrorMessages() const { std::string formattedMessage; for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) { const ErrorInfo& error = *itError; formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; formattedMessage += " " + error.message_ + "\n"; if (error.extra_) formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; } return formattedMessage; } std::vector<Reader::StructuredError> Reader::getStructuredErrors() const { std::vector<Reader::StructuredError> allErrors; for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) { const ErrorInfo& error = *itError; Reader::StructuredError structured; structured.offset_start = error.token_.start_ - begin_; structured.offset_limit = error.token_.end_ - begin_; structured.message = error.message_; allErrors.push_back(structured); } return allErrors; } bool Reader::pushError(const Value& value, const std::string& message) { size_t length = end_ - begin_; if(value.getOffsetStart() > length || value.getOffsetLimit() > length) return false; Token token; token.type_ = tokenError; token.start_ = begin_ + value.getOffsetStart(); token.end_ = end_ + value.getOffsetLimit(); ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = 0; errors_.push_back(info); return true; } bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) { size_t length = end_ - begin_; if(value.getOffsetStart() > length || value.getOffsetLimit() > length || extra.getOffsetLimit() > length) return false; Token token; token.type_ = tokenError; token.start_ = begin_ + value.getOffsetStart(); token.end_ = begin_ + value.getOffsetLimit(); ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = begin_ + extra.getOffsetStart(); errors_.push_back(info); return true; } bool Reader::good() const { return !errors_.size(); } std::istream& operator>>(std::istream& sin, Value& root) { Json::Reader reader; bool ok = reader.parse(sin, root, true); if (!ok) { fprintf(stderr, "Error from reader: %s", reader.getFormattedErrorMessages().c_str()); JSON_FAIL_MESSAGE("reader error"); } return sin; } } // namespace Json