JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2// Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3// Distributed under MIT license, or public domain if desired and
4// recognized in your jurisdiction.
5// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7#if !defined(JSON_IS_AMALGAMATION)
8#include "json_tool.h"
9#include <json/assertions.h>
10#include <json/reader.h>
11#include <json/value.h>
12#endif // if !defined(JSON_IS_AMALGAMATION)
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstring>
17#include <iostream>
18#include <istream>
19#include <iterator>
20#include <limits>
21#include <memory>
22#include <set>
23#include <sstream>
24#include <utility>
25
26#include <cstdio>
27
28#if defined(_MSC_VER)
29#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
30#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
31#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
32#endif //_MSC_VER
33
34#if defined(_MSC_VER)
35// Disable warning about strdup being deprecated.
36#pragma warning(disable : 4996)
37#endif
38
39// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
40// time to change the stack limit
41#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
42#define JSONCPP_DEPRECATED_STACK_LIMIT 256
43#endif
44
45static size_t const stackLimit_g =
46 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
47
48namespace Json {
49
50using CharReaderPtr = std::unique_ptr<CharReader>;
51
52// Implementation of class Features
53// ////////////////////////////////
54
55Features::Features() = default;
56
57Features Features::all() { return {}; }
58
60 Features features;
61 features.allowComments_ = false;
62 features.strictRoot_ = true;
63 features.allowDroppedNullPlaceholders_ = false;
64 features.allowNumericKeys_ = false;
65 return features;
66}
67
68// Implementation of class Reader
69// ////////////////////////////////
70
71bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
72 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
73}
74
75// Class Reader
76// //////////////////////////////////////////////////////////////////
77
78Reader::Reader() : features_(Features::all()) {}
79
80Reader::Reader(const Features& features) : features_(features) {}
81
82bool Reader::parse(const std::string& document, Value& root,
83 bool collectComments) {
84 document_.assign(document.begin(), document.end());
85 const char* begin = document_.c_str();
86 const char* end = begin + document_.length();
87 return parse(begin, end, root, collectComments);
88}
89
90bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
91 document_.assign(std::istreambuf_iterator<char>(is),
92 std::istreambuf_iterator<char>());
93 return parse(document_.data(), document_.data() + document_.size(), root,
94 collectComments);
95}
96
97bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
98 bool collectComments) {
99 if (!features_.allowComments_) {
100 collectComments = false;
101 }
102
103 begin_ = beginDoc;
104 end_ = endDoc;
105 collectComments_ = collectComments;
106 current_ = begin_;
107 lastValueEnd_ = nullptr;
108 lastValue_ = nullptr;
109 commentsBefore_.clear();
110 errors_.clear();
111 while (!nodes_.empty())
112 nodes_.pop();
113 nodes_.push(&root);
114
115 bool successful = readValue();
116 Token token;
117 readTokenSkippingComments(token);
118 if (collectComments_ && !commentsBefore_.empty())
119 root.setComment(commentsBefore_, commentAfter);
120 if (features_.strictRoot_) {
121 if (!root.isArray() && !root.isObject()) {
122 // Set error location to start of doc, ideally should be first token found
123 // in doc
124 token.type_ = tokenError;
125 token.start_ = beginDoc;
126 token.end_ = endDoc;
127 addError(
128 "A valid JSON document must be either an array or an object value.",
129 token);
130 return false;
131 }
132 }
133 return successful;
134}
135
136bool Reader::readValue() {
137 // readValue() may call itself only if it calls readObject() or ReadArray().
138 // These methods execute nodes_.push() just before and nodes_.pop)() just
139 // after calling readValue(). parse() executes one nodes_.push(), so > instead
140 // of >=.
141 if (nodes_.size() > stackLimit_g)
142#if JSON_USE_EXCEPTION
143 throwRuntimeError("Exceeded stackLimit in readValue().");
144#else
145 // throwRuntimeError aborts. Don't abort here.
146 return false;
147#endif
148
149 Token token;
150 readTokenSkippingComments(token);
151 bool successful = true;
152
153 if (collectComments_ && !commentsBefore_.empty()) {
154 currentValue().setComment(commentsBefore_, commentBefore);
155 commentsBefore_.clear();
156 }
157
158 switch (token.type_) {
159 case tokenObjectBegin:
160 successful = readObject(token);
161 currentValue().setOffsetLimit(current_ - begin_);
162 break;
163 case tokenArrayBegin:
164 successful = readArray(token);
165 currentValue().setOffsetLimit(current_ - begin_);
166 break;
167 case tokenNumber:
168 successful = decodeNumber(token);
169 break;
170 case tokenString:
171 successful = decodeString(token);
172 break;
173 case tokenTrue: {
174 Value v(true);
175 currentValue().swapPayload(v);
176 currentValue().setOffsetStart(token.start_ - begin_);
177 currentValue().setOffsetLimit(token.end_ - begin_);
178 } break;
179 case tokenFalse: {
180 Value v(false);
181 currentValue().swapPayload(v);
182 currentValue().setOffsetStart(token.start_ - begin_);
183 currentValue().setOffsetLimit(token.end_ - begin_);
184 } break;
185 case tokenNull: {
186 Value v;
187 currentValue().swapPayload(v);
188 currentValue().setOffsetStart(token.start_ - begin_);
189 currentValue().setOffsetLimit(token.end_ - begin_);
190 } break;
191 case tokenArraySeparator:
192 case tokenObjectEnd:
193 case tokenArrayEnd:
194 if (features_.allowDroppedNullPlaceholders_) {
195 // "Un-read" the current token and mark the current value as a null
196 // token.
197 current_--;
198 Value v;
199 currentValue().swapPayload(v);
200 currentValue().setOffsetStart(current_ - begin_ - 1);
201 currentValue().setOffsetLimit(current_ - begin_);
202 break;
203 } // Else, fall through...
204 default:
205 currentValue().setOffsetStart(token.start_ - begin_);
206 currentValue().setOffsetLimit(token.end_ - begin_);
207 return addError("Syntax error: value, object or array expected.", token);
208 }
209
210 if (collectComments_) {
211 lastValueEnd_ = current_;
212 lastValue_ = &currentValue();
213 }
214
215 return successful;
216}
217
218bool Reader::readTokenSkippingComments(Token& token) {
219 bool success = readToken(token);
220 if (features_.allowComments_) {
221 while (success && token.type_ == tokenComment) {
222 success = readToken(token);
223 }
224 }
225 return success;
226}
227
228bool Reader::readToken(Token& token) {
229 skipSpaces();
230 token.start_ = current_;
231 Char c = getNextChar();
232 bool ok = true;
233 switch (c) {
234 case '{':
235 token.type_ = tokenObjectBegin;
236 break;
237 case '}':
238 token.type_ = tokenObjectEnd;
239 break;
240 case '[':
241 token.type_ = tokenArrayBegin;
242 break;
243 case ']':
244 token.type_ = tokenArrayEnd;
245 break;
246 case '"':
247 token.type_ = tokenString;
248 ok = readString();
249 break;
250 case '/':
251 token.type_ = tokenComment;
252 ok = readComment();
253 break;
254 case '0':
255 case '1':
256 case '2':
257 case '3':
258 case '4':
259 case '5':
260 case '6':
261 case '7':
262 case '8':
263 case '9':
264 case '-':
265 token.type_ = tokenNumber;
266 readNumber();
267 break;
268 case 't':
269 token.type_ = tokenTrue;
270 ok = match("rue", 3);
271 break;
272 case 'f':
273 token.type_ = tokenFalse;
274 ok = match("alse", 4);
275 break;
276 case 'n':
277 token.type_ = tokenNull;
278 ok = match("ull", 3);
279 break;
280 case ',':
281 token.type_ = tokenArraySeparator;
282 break;
283 case ':':
284 token.type_ = tokenMemberSeparator;
285 break;
286 case 0:
287 token.type_ = tokenEndOfStream;
288 break;
289 default:
290 ok = false;
291 break;
292 }
293 if (!ok)
294 token.type_ = tokenError;
295 token.end_ = current_;
296 return ok;
297}
298
299void Reader::skipSpaces() {
300 while (current_ != end_) {
301 Char c = *current_;
302 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
303 ++current_;
304 else
305 break;
306 }
307}
308
309bool Reader::match(const Char* pattern, int patternLength) {
310 if (end_ - current_ < patternLength)
311 return false;
312 int index = patternLength;
313 while (index--)
314 if (current_[index] != pattern[index])
315 return false;
316 current_ += patternLength;
317 return true;
318}
319
320bool Reader::readComment() {
321 Location commentBegin = current_ - 1;
322 Char c = getNextChar();
323 bool successful = false;
324 if (c == '*')
325 successful = readCStyleComment();
326 else if (c == '/')
327 successful = readCppStyleComment();
328 if (!successful)
329 return false;
330
331 if (collectComments_) {
333 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
334 if (c != '*' || !containsNewLine(commentBegin, current_))
335 placement = commentAfterOnSameLine;
336 }
337
338 addComment(commentBegin, current_, placement);
339 }
340 return true;
341}
342
343String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
344 String normalized;
345 normalized.reserve(static_cast<size_t>(end - begin));
346 Reader::Location current = begin;
347 while (current != end) {
348 char c = *current++;
349 if (c == '\r') {
350 if (current != end && *current == '\n')
351 // convert dos EOL
352 ++current;
353 // convert Mac EOL
354 normalized += '\n';
355 } else {
356 normalized += c;
357 }
358 }
359 return normalized;
360}
361
362void Reader::addComment(Location begin, Location end,
363 CommentPlacement placement) {
364 assert(collectComments_);
365 const String& normalized = normalizeEOL(begin, end);
366 if (placement == commentAfterOnSameLine) {
367 assert(lastValue_ != nullptr);
368 lastValue_->setComment(normalized, placement);
369 } else {
370 commentsBefore_ += normalized;
371 }
372}
373
374bool Reader::readCStyleComment() {
375 while ((current_ + 1) < end_) {
376 Char c = getNextChar();
377 if (c == '*' && *current_ == '/')
378 break;
379 }
380 return getNextChar() == '/';
381}
382
383bool Reader::readCppStyleComment() {
384 while (current_ != end_) {
385 Char c = getNextChar();
386 if (c == '\n')
387 break;
388 if (c == '\r') {
389 // Consume DOS EOL. It will be normalized in addComment.
390 if (current_ != end_ && *current_ == '\n')
391 getNextChar();
392 // Break on Moc OS 9 EOL.
393 break;
394 }
395 }
396 return true;
397}
398
399void Reader::readNumber() {
400 Location p = current_;
401 char c = '0'; // stopgap for already consumed character
402 // integral part
403 while (c >= '0' && c <= '9')
404 c = (current_ = p) < end_ ? *p++ : '\0';
405 // fractional part
406 if (c == '.') {
407 c = (current_ = p) < end_ ? *p++ : '\0';
408 while (c >= '0' && c <= '9')
409 c = (current_ = p) < end_ ? *p++ : '\0';
410 }
411 // exponential part
412 if (c == 'e' || c == 'E') {
413 c = (current_ = p) < end_ ? *p++ : '\0';
414 if (c == '+' || c == '-')
415 c = (current_ = p) < end_ ? *p++ : '\0';
416 while (c >= '0' && c <= '9')
417 c = (current_ = p) < end_ ? *p++ : '\0';
418 }
419}
420
421bool Reader::readString() {
422 Char c = '\0';
423 while (current_ != end_) {
424 c = getNextChar();
425 if (c == '\\')
426 getNextChar();
427 else if (c == '"')
428 break;
429 }
430 return c == '"';
431}
432
433bool Reader::readObject(Token& token) {
434 Token tokenName;
435 String name;
436 Value init(objectValue);
437 currentValue().swapPayload(init);
438 currentValue().setOffsetStart(token.start_ - begin_);
439 while (readTokenSkippingComments(tokenName)) {
440 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
441 return true;
442 name.clear();
443 if (tokenName.type_ == tokenString) {
444 if (!decodeString(tokenName, name))
445 return recoverFromError(tokenObjectEnd);
446 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
447 Value numberName;
448 if (!decodeNumber(tokenName, numberName))
449 return recoverFromError(tokenObjectEnd);
450 name = numberName.asString();
451 } else {
452 break;
453 }
454
455 Token colon;
456 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
457 return addErrorAndRecover("Missing ':' after object member name", colon,
458 tokenObjectEnd);
459 }
460 Value& value = currentValue()[name];
461 nodes_.push(&value);
462 bool ok = readValue();
463 nodes_.pop();
464 if (!ok) // error already set
465 return recoverFromError(tokenObjectEnd);
466
467 Token comma;
468 if (!readTokenSkippingComments(comma) ||
469 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
470 return addErrorAndRecover("Missing ',' or '}' in object declaration",
471 comma, tokenObjectEnd);
472 }
473 if (comma.type_ == tokenObjectEnd)
474 return true;
475 }
476 return addErrorAndRecover("Missing '}' or object member name", tokenName,
477 tokenObjectEnd);
478}
479
480bool Reader::readArray(Token& token) {
481 Value init(arrayValue);
482 currentValue().swapPayload(init);
483 currentValue().setOffsetStart(token.start_ - begin_);
484 skipSpaces();
485 if (current_ != end_ && *current_ == ']') // empty array
486 {
487 Token endArray;
488 readToken(endArray);
489 return true;
490 }
491 int index = 0;
492 for (;;) {
493 Value& value = currentValue()[index++];
494 nodes_.push(&value);
495 bool ok = readValue();
496 nodes_.pop();
497 if (!ok) // error already set
498 return recoverFromError(tokenArrayEnd);
499
500 Token currentToken;
501 // Accept Comment after last item in the array.
502 ok = readTokenSkippingComments(currentToken);
503 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
504 currentToken.type_ != tokenArrayEnd);
505 if (!ok || badTokenType) {
506 return addErrorAndRecover("Missing ',' or ']' in array declaration",
507 currentToken, tokenArrayEnd);
508 }
509 if (currentToken.type_ == tokenArrayEnd)
510 break;
511 }
512 return true;
513}
514
515bool Reader::decodeNumber(Token& token) {
516 Value decoded;
517 if (!decodeNumber(token, decoded))
518 return false;
519 currentValue().swapPayload(decoded);
520 currentValue().setOffsetStart(token.start_ - begin_);
521 currentValue().setOffsetLimit(token.end_ - begin_);
522 return true;
523}
524
525bool Reader::decodeNumber(Token& token, Value& decoded) {
526 // Attempts to parse the number as an integer. If the number is
527 // larger than the maximum supported value of an integer then
528 // we decode the number as a double.
529 Location current = token.start_;
530 bool isNegative = *current == '-';
531 if (isNegative)
532 ++current;
533 // TODO: Help the compiler do the div and mod at compile time or get rid of
534 // them.
535 Value::LargestUInt maxIntegerValue =
538 Value::LargestUInt threshold = maxIntegerValue / 10;
539 Value::LargestUInt value = 0;
540 while (current < token.end_) {
541 Char c = *current++;
542 if (c < '0' || c > '9')
543 return decodeDouble(token, decoded);
544 auto digit(static_cast<Value::UInt>(c - '0'));
545 if (value >= threshold) {
546 // We've hit or exceeded the max value divided by 10 (rounded down). If
547 // a) we've only just touched the limit, b) this is the last digit, and
548 // c) it's small enough to fit in that rounding delta, we're okay.
549 // Otherwise treat this number as a double to avoid overflow.
550 if (value > threshold || current != token.end_ ||
551 digit > maxIntegerValue % 10) {
552 return decodeDouble(token, decoded);
553 }
554 }
555 value = value * 10 + digit;
556 }
557 if (isNegative && value == maxIntegerValue)
558 decoded = Value::minLargestInt;
559 else if (isNegative)
560 decoded = -Value::LargestInt(value);
561 else if (value <= Value::LargestUInt(Value::maxInt))
562 decoded = Value::LargestInt(value);
563 else
564 decoded = value;
565 return true;
566}
567
568bool Reader::decodeDouble(Token& token) {
569 Value decoded;
570 if (!decodeDouble(token, decoded))
571 return false;
572 currentValue().swapPayload(decoded);
573 currentValue().setOffsetStart(token.start_ - begin_);
574 currentValue().setOffsetLimit(token.end_ - begin_);
575 return true;
576}
577
578bool Reader::decodeDouble(Token& token, Value& decoded) {
579 double value = 0;
580 IStringStream is(String(token.start_, token.end_));
581 is.imbue(std::locale::classic());
582 if (!(is >> value)) {
583 if (value == std::numeric_limits<double>::max())
584 value = std::numeric_limits<double>::infinity();
585 else if (value == std::numeric_limits<double>::lowest())
586 value = -std::numeric_limits<double>::infinity();
587 else if (!std::isinf(value))
588 return addError(
589 "'" + String(token.start_, token.end_) + "' is not a number.", token);
590 }
591 decoded = value;
592 return true;
593}
594
595bool Reader::decodeString(Token& token) {
596 String decoded_string;
597 if (!decodeString(token, decoded_string))
598 return false;
599 Value decoded(decoded_string);
600 currentValue().swapPayload(decoded);
601 currentValue().setOffsetStart(token.start_ - begin_);
602 currentValue().setOffsetLimit(token.end_ - begin_);
603 return true;
604}
605
606bool Reader::decodeString(Token& token, String& decoded) {
607 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
608 Location current = token.start_ + 1; // skip '"'
609 Location end = token.end_ - 1; // do not include '"'
610 while (current != end) {
611 Char c = *current++;
612 if (c == '"')
613 break;
614 if (c == '\\') {
615 if (current == end)
616 return addError("Empty escape sequence in string", token, current);
617 Char escape = *current++;
618 switch (escape) {
619 case '"':
620 decoded += '"';
621 break;
622 case '/':
623 decoded += '/';
624 break;
625 case '\\':
626 decoded += '\\';
627 break;
628 case 'b':
629 decoded += '\b';
630 break;
631 case 'f':
632 decoded += '\f';
633 break;
634 case 'n':
635 decoded += '\n';
636 break;
637 case 'r':
638 decoded += '\r';
639 break;
640 case 't':
641 decoded += '\t';
642 break;
643 case 'u': {
644 unsigned int unicode;
645 if (!decodeUnicodeCodePoint(token, current, end, unicode))
646 return false;
647 decoded += codePointToUTF8(unicode);
648 } break;
649 default:
650 return addError("Bad escape sequence in string", token, current);
651 }
652 } else {
653 if (static_cast<unsigned char>(c) < 0x20)
654 return addError("Control character in string", token, current - 1);
655 decoded += c;
656 }
657 }
658 return true;
659}
660
661bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
662 Location end, unsigned int& unicode) {
663
664 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
665 return false;
666 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
667 // surrogate pairs
668 if (end - current < 6)
669 return addError(
670 "additional six characters expected to parse unicode surrogate pair.",
671 token, current);
672 if (*(current++) == '\\' && *(current++) == 'u') {
673 unsigned int surrogatePair;
674 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
675 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
676 } else
677 return false;
678 } else
679 return addError("expecting another \\u token to begin the second half of "
680 "a unicode surrogate pair",
681 token, current);
682 }
683 return true;
684}
685
686bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
687 Location end,
688 unsigned int& ret_unicode) {
689 if (end - current < 4)
690 return addError(
691 "Bad unicode escape sequence in string: four digits expected.", token,
692 current);
693 int unicode = 0;
694 for (int index = 0; index < 4; ++index) {
695 Char c = *current++;
696 unicode *= 16;
697 if (c >= '0' && c <= '9')
698 unicode += c - '0';
699 else if (c >= 'a' && c <= 'f')
700 unicode += c - 'a' + 10;
701 else if (c >= 'A' && c <= 'F')
702 unicode += c - 'A' + 10;
703 else
704 return addError(
705 "Bad unicode escape sequence in string: hexadecimal digit expected.",
706 token, current);
707 }
708 ret_unicode = static_cast<unsigned int>(unicode);
709 return true;
710}
711
712bool Reader::addError(const String& message, Token& token, Location extra) {
713 ErrorInfo info;
714 info.token_ = token;
715 info.message_ = message;
716 info.extra_ = extra;
717 errors_.push_back(info);
718 return false;
719}
720
721bool Reader::recoverFromError(TokenType skipUntilToken) {
722 size_t const errorCount = errors_.size();
723 Token skip;
724 for (;;) {
725 if (!readToken(skip))
726 errors_.resize(errorCount); // discard errors caused by recovery
727 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
728 break;
729 }
730 errors_.resize(errorCount);
731 return false;
732}
733
734bool Reader::addErrorAndRecover(const String& message, Token& token,
735 TokenType skipUntilToken) {
736 addError(message, token);
737 return recoverFromError(skipUntilToken);
738}
739
740Value& Reader::currentValue() { return *(nodes_.top()); }
741
742Reader::Char Reader::getNextChar() {
743 if (current_ == end_)
744 return 0;
745 return *current_++;
746}
747
748void Reader::getLocationLineAndColumn(Location location, int& line,
749 int& column) const {
750 Location current = begin_;
751 Location lastLineStart = current;
752 line = 0;
753 while (current < location && current != end_) {
754 Char c = *current++;
755 if (c == '\r') {
756 if (current != end_ && *current == '\n')
757 ++current;
758 lastLineStart = current;
759 ++line;
760 } else if (c == '\n') {
761 lastLineStart = current;
762 ++line;
763 }
764 }
765 // column & line start at 1
766 column = int(location - lastLineStart) + 1;
767 ++line;
768}
769
770String Reader::getLocationLineAndColumn(Location location) const {
771 int line, column;
772 getLocationLineAndColumn(location, line, column);
773 char buffer[18 + 16 + 16 + 1];
774 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
775 return buffer;
776}
777
778// Deprecated. Preserved for backward compatibility
779String Reader::getFormatedErrorMessages() const {
781}
782
784 String formattedMessage;
785 for (const auto& error : errors_) {
786 formattedMessage +=
787 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
788 formattedMessage += " " + error.message_ + "\n";
789 if (error.extra_)
790 formattedMessage +=
791 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
792 }
793 return formattedMessage;
794}
795
796std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
797 std::vector<Reader::StructuredError> allErrors;
798 for (const auto& error : errors_) {
799 Reader::StructuredError structured;
800 structured.offset_start = error.token_.start_ - begin_;
801 structured.offset_limit = error.token_.end_ - begin_;
802 structured.message = error.message_;
803 allErrors.push_back(structured);
804 }
805 return allErrors;
806}
807
808bool Reader::pushError(const Value& value, const String& message) {
809 ptrdiff_t const length = end_ - begin_;
810 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
811 return false;
812 Token token;
813 token.type_ = tokenError;
814 token.start_ = begin_ + value.getOffsetStart();
815 token.end_ = begin_ + value.getOffsetLimit();
816 ErrorInfo info;
817 info.token_ = token;
818 info.message_ = message;
819 info.extra_ = nullptr;
820 errors_.push_back(info);
821 return true;
822}
823
824bool Reader::pushError(const Value& value, const String& message,
825 const Value& extra) {
826 ptrdiff_t const length = end_ - begin_;
827 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
828 extra.getOffsetLimit() > length)
829 return false;
830 Token token;
831 token.type_ = tokenError;
832 token.start_ = begin_ + value.getOffsetStart();
833 token.end_ = begin_ + value.getOffsetLimit();
834 ErrorInfo info;
835 info.token_ = token;
836 info.message_ = message;
837 info.extra_ = begin_ + extra.getOffsetStart();
838 errors_.push_back(info);
839 return true;
840}
841
842bool Reader::good() const { return errors_.empty(); }
843
844// Originally copied from the Features class (now deprecated), used internally
845// for features implementation.
846class OurFeatures {
847public:
848 static OurFeatures all();
849 bool allowComments_;
850 bool allowTrailingCommas_;
851 bool strictRoot_;
852 bool allowDroppedNullPlaceholders_;
853 bool allowNumericKeys_;
854 bool allowSingleQuotes_;
855 bool failIfExtra_;
856 bool rejectDupKeys_;
857 bool allowSpecialFloats_;
858 bool skipBom_;
859 size_t stackLimit_;
860}; // OurFeatures
861
862OurFeatures OurFeatures::all() { return {}; }
863
864// Implementation of class Reader
865// ////////////////////////////////
866
867// Originally copied from the Reader class (now deprecated), used internally
868// for implementing JSON reading.
869class OurReader {
870public:
871 using Char = char;
872 using Location = const Char*;
873
874 explicit OurReader(OurFeatures const& features);
875 bool parse(const char* beginDoc, const char* endDoc, Value& root,
876 bool collectComments = true);
877 String getFormattedErrorMessages() const;
878 std::vector<CharReader::StructuredError> getStructuredErrors() const;
879
880private:
881 OurReader(OurReader const&); // no impl
882 void operator=(OurReader const&); // no impl
883
884 enum TokenType {
885 tokenEndOfStream = 0,
886 tokenObjectBegin,
887 tokenObjectEnd,
888 tokenArrayBegin,
889 tokenArrayEnd,
890 tokenString,
891 tokenNumber,
892 tokenTrue,
893 tokenFalse,
894 tokenNull,
895 tokenNaN,
896 tokenPosInf,
897 tokenNegInf,
898 tokenArraySeparator,
899 tokenMemberSeparator,
900 tokenComment,
901 tokenError
902 };
903
904 class Token {
905 public:
906 TokenType type_;
907 Location start_;
908 Location end_;
909 };
910
911 class ErrorInfo {
912 public:
913 Token token_;
914 String message_;
915 Location extra_;
916 };
917
918 using Errors = std::deque<ErrorInfo>;
919
920 bool readToken(Token& token);
921 bool readTokenSkippingComments(Token& token);
922 void skipSpaces();
923 void skipBom(bool skipBom);
924 bool match(const Char* pattern, int patternLength);
925 bool readComment();
926 bool readCStyleComment(bool* containsNewLineResult);
927 bool readCppStyleComment();
928 bool readString();
929 bool readStringSingleQuote();
930 bool readNumber(bool checkInf);
931 bool readValue();
932 bool readObject(Token& token);
933 bool readArray(Token& token);
934 bool decodeNumber(Token& token);
935 bool decodeNumber(Token& token, Value& decoded);
936 bool decodeString(Token& token);
937 bool decodeString(Token& token, String& decoded);
938 bool decodeDouble(Token& token);
939 bool decodeDouble(Token& token, Value& decoded);
940 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
941 unsigned int& unicode);
942 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
943 Location end, unsigned int& unicode);
944 bool addError(const String& message, Token& token, Location extra = nullptr);
945 bool recoverFromError(TokenType skipUntilToken);
946 bool addErrorAndRecover(const String& message, Token& token,
947 TokenType skipUntilToken);
948 void skipUntilSpace();
949 Value& currentValue();
950 Char getNextChar();
951 void getLocationLineAndColumn(Location location, int& line,
952 int& column) const;
953 String getLocationLineAndColumn(Location location) const;
954 void addComment(Location begin, Location end, CommentPlacement placement);
955
956 static String normalizeEOL(Location begin, Location end);
957 static bool containsNewLine(Location begin, Location end);
958
959 using Nodes = std::stack<Value*>;
960
961 Nodes nodes_{};
962 Errors errors_{};
963 String document_{};
964 Location begin_ = nullptr;
965 Location end_ = nullptr;
966 Location current_ = nullptr;
967 Location lastValueEnd_ = nullptr;
968 Value* lastValue_ = nullptr;
969 bool lastValueHasAComment_ = false;
970 String commentsBefore_{};
971
972 OurFeatures const features_;
973 bool collectComments_ = false;
974}; // OurReader
975
976// complete copy of Read impl, for OurReader
977
978// Test-only instrumentation: total bytes examined by
979// OurReader::containsNewLine, so unit tests can assert that comment handling
980// stays linear in the input rather than quadratic in the comment count (see
981// CharReaderTest/parseCommentsAfterValueScansLinearly). thread_local so it
982// never races during concurrent parsing; the increment is negligible and only
983// runs while parsing comments. Not part of the supported public API.
985 static thread_local size_t count = 0;
986 return count;
987}
988
989bool OurReader::containsNewLine(OurReader::Location begin,
990 OurReader::Location end) {
991 newlineScanByteCountForTesting() += static_cast<size_t>(end - begin);
992 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
993}
994
995OurReader::OurReader(OurFeatures const& features) : features_(features) {}
996
997bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
998 bool collectComments) {
999 if (!features_.allowComments_) {
1000 collectComments = false;
1001 }
1002
1003 begin_ = beginDoc;
1004 end_ = endDoc;
1005 collectComments_ = collectComments;
1006 current_ = begin_;
1007 lastValueEnd_ = nullptr;
1008 lastValue_ = nullptr;
1009 commentsBefore_.clear();
1010 errors_.clear();
1011 while (!nodes_.empty())
1012 nodes_.pop();
1013 nodes_.push(&root);
1014
1015 // skip byte order mark if it exists at the beginning of the UTF-8 text.
1016 skipBom(features_.skipBom_);
1017 bool successful = readValue();
1018 nodes_.pop();
1019 Token token;
1020 readTokenSkippingComments(token);
1021 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1022 addError("Extra non-whitespace after JSON value.", token);
1023 return false;
1024 }
1025 if (collectComments_ && !commentsBefore_.empty())
1026 root.setComment(commentsBefore_, commentAfter);
1027 if (features_.strictRoot_) {
1028 if (!root.isArray() && !root.isObject()) {
1029 // Set error location to start of doc, ideally should be first token found
1030 // in doc
1031 token.type_ = tokenError;
1032 token.start_ = beginDoc;
1033 token.end_ = endDoc;
1034 addError(
1035 "A valid JSON document must be either an array or an object value.",
1036 token);
1037 return false;
1038 }
1039 }
1040 return successful;
1041}
1042
1043bool OurReader::readValue() {
1044 // To preserve the old behaviour we cast size_t to int.
1045 if (nodes_.size() > features_.stackLimit_)
1046 throwRuntimeError("Exceeded stackLimit in readValue().");
1047 Token token;
1048 readTokenSkippingComments(token);
1049 bool successful = true;
1050
1051 if (collectComments_ && !commentsBefore_.empty()) {
1052 currentValue().setComment(commentsBefore_, commentBefore);
1053 commentsBefore_.clear();
1054 }
1055
1056 switch (token.type_) {
1057 case tokenObjectBegin:
1058 successful = readObject(token);
1059 currentValue().setOffsetLimit(current_ - begin_);
1060 break;
1061 case tokenArrayBegin:
1062 successful = readArray(token);
1063 currentValue().setOffsetLimit(current_ - begin_);
1064 break;
1065 case tokenNumber:
1066 successful = decodeNumber(token);
1067 break;
1068 case tokenString:
1069 successful = decodeString(token);
1070 break;
1071 case tokenTrue: {
1072 Value v(true);
1073 currentValue().swapPayload(v);
1074 currentValue().setOffsetStart(token.start_ - begin_);
1075 currentValue().setOffsetLimit(token.end_ - begin_);
1076 } break;
1077 case tokenFalse: {
1078 Value v(false);
1079 currentValue().swapPayload(v);
1080 currentValue().setOffsetStart(token.start_ - begin_);
1081 currentValue().setOffsetLimit(token.end_ - begin_);
1082 } break;
1083 case tokenNull: {
1084 Value v;
1085 currentValue().swapPayload(v);
1086 currentValue().setOffsetStart(token.start_ - begin_);
1087 currentValue().setOffsetLimit(token.end_ - begin_);
1088 } break;
1089 case tokenNaN: {
1090 Value v(std::numeric_limits<double>::quiet_NaN());
1091 currentValue().swapPayload(v);
1092 currentValue().setOffsetStart(token.start_ - begin_);
1093 currentValue().setOffsetLimit(token.end_ - begin_);
1094 } break;
1095 case tokenPosInf: {
1096 Value v(std::numeric_limits<double>::infinity());
1097 currentValue().swapPayload(v);
1098 currentValue().setOffsetStart(token.start_ - begin_);
1099 currentValue().setOffsetLimit(token.end_ - begin_);
1100 } break;
1101 case tokenNegInf: {
1102 Value v(-std::numeric_limits<double>::infinity());
1103 currentValue().swapPayload(v);
1104 currentValue().setOffsetStart(token.start_ - begin_);
1105 currentValue().setOffsetLimit(token.end_ - begin_);
1106 } break;
1107 case tokenArraySeparator:
1108 case tokenObjectEnd:
1109 case tokenArrayEnd:
1110 if (features_.allowDroppedNullPlaceholders_) {
1111 // "Un-read" the current token and mark the current value as a null
1112 // token.
1113 current_--;
1114 Value v;
1115 currentValue().swapPayload(v);
1116 currentValue().setOffsetStart(current_ - begin_ - 1);
1117 currentValue().setOffsetLimit(current_ - begin_);
1118 break;
1119 } // else, fall through ...
1120 default:
1121 currentValue().setOffsetStart(token.start_ - begin_);
1122 currentValue().setOffsetLimit(token.end_ - begin_);
1123 return addError("Syntax error: value, object or array expected.", token);
1124 }
1125
1126 if (collectComments_) {
1127 lastValueEnd_ = current_;
1128 lastValueHasAComment_ = false;
1129 lastValue_ = &currentValue();
1130 }
1131
1132 return successful;
1133}
1134
1135bool OurReader::readTokenSkippingComments(Token& token) {
1136 bool success = readToken(token);
1137 if (features_.allowComments_) {
1138 while (success && token.type_ == tokenComment) {
1139 success = readToken(token);
1140 }
1141 }
1142 return success;
1143}
1144
1145bool OurReader::readToken(Token& token) {
1146 skipSpaces();
1147 token.start_ = current_;
1148 Char c = getNextChar();
1149 bool ok = true;
1150 switch (c) {
1151 case '{':
1152 token.type_ = tokenObjectBegin;
1153 break;
1154 case '}':
1155 token.type_ = tokenObjectEnd;
1156 break;
1157 case '[':
1158 token.type_ = tokenArrayBegin;
1159 break;
1160 case ']':
1161 token.type_ = tokenArrayEnd;
1162 break;
1163 case '"':
1164 token.type_ = tokenString;
1165 ok = readString();
1166 break;
1167 case '\'':
1168 if (features_.allowSingleQuotes_) {
1169 token.type_ = tokenString;
1170 ok = readStringSingleQuote();
1171 } else {
1172 // If we don't allow single quotes, this is a failure case.
1173 ok = false;
1174 }
1175 break;
1176 case '/':
1177 token.type_ = tokenComment;
1178 ok = readComment();
1179 break;
1180 case '0':
1181 case '1':
1182 case '2':
1183 case '3':
1184 case '4':
1185 case '5':
1186 case '6':
1187 case '7':
1188 case '8':
1189 case '9':
1190 token.type_ = tokenNumber;
1191 readNumber(false);
1192 break;
1193 case '-':
1194 if (readNumber(true)) {
1195 token.type_ = tokenNumber;
1196 } else {
1197 token.type_ = tokenNegInf;
1198 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1199 }
1200 break;
1201 case '+':
1202 if (readNumber(true)) {
1203 token.type_ = tokenNumber;
1204 } else {
1205 token.type_ = tokenPosInf;
1206 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1207 }
1208 break;
1209 case 't':
1210 token.type_ = tokenTrue;
1211 ok = match("rue", 3);
1212 break;
1213 case 'f':
1214 token.type_ = tokenFalse;
1215 ok = match("alse", 4);
1216 break;
1217 case 'n':
1218 token.type_ = tokenNull;
1219 ok = match("ull", 3);
1220 break;
1221 case 'N':
1222 if (features_.allowSpecialFloats_) {
1223 token.type_ = tokenNaN;
1224 ok = match("aN", 2);
1225 } else {
1226 ok = false;
1227 }
1228 break;
1229 case 'I':
1230 if (features_.allowSpecialFloats_) {
1231 token.type_ = tokenPosInf;
1232 ok = match("nfinity", 7);
1233 } else {
1234 ok = false;
1235 }
1236 break;
1237 case ',':
1238 token.type_ = tokenArraySeparator;
1239 break;
1240 case ':':
1241 token.type_ = tokenMemberSeparator;
1242 break;
1243 case 0:
1244 token.type_ = tokenEndOfStream;
1245 break;
1246 default:
1247 ok = false;
1248 break;
1249 }
1250 if (!ok)
1251 token.type_ = tokenError;
1252 token.end_ = current_;
1253 return ok;
1254}
1255
1256void OurReader::skipSpaces() {
1257 while (current_ != end_) {
1258 Char c = *current_;
1259 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1260 ++current_;
1261 else
1262 break;
1263 }
1264}
1265
1266void OurReader::skipBom(bool skipBom) {
1267 // The default behavior is to skip BOM.
1268 if (skipBom) {
1269 if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1270 begin_ += 3;
1271 current_ = begin_;
1272 }
1273 }
1274}
1275
1276bool OurReader::match(const Char* pattern, int patternLength) {
1277 if (end_ - current_ < patternLength)
1278 return false;
1279 int index = patternLength;
1280 while (index--)
1281 if (current_[index] != pattern[index])
1282 return false;
1283 current_ += patternLength;
1284 return true;
1285}
1286
1287bool OurReader::readComment() {
1288 const Location commentBegin = current_ - 1;
1289 const Char c = getNextChar();
1290 bool successful = false;
1291 bool cStyleWithEmbeddedNewline = false;
1292
1293 const bool isCStyleComment = (c == '*');
1294 const bool isCppStyleComment = (c == '/');
1295 if (isCStyleComment) {
1296 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1297 } else if (isCppStyleComment) {
1298 successful = readCppStyleComment();
1299 }
1300
1301 if (!successful)
1302 return false;
1303
1304 if (collectComments_) {
1305 CommentPlacement placement = commentBefore;
1306
1307 if (!lastValueHasAComment_) {
1308 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1309 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1310 placement = commentAfterOnSameLine;
1311 }
1312 }
1313 // The gap between the last value and this comment only grows as more
1314 // comments are consumed, so a later comment can never be on the same
1315 // line as that value. Mark it handled to avoid re-scanning the same
1316 // growing prefix for every following comment (quadratic behavior).
1317 lastValueHasAComment_ = true;
1318 }
1319
1320 addComment(commentBegin, current_, placement);
1321 }
1322 return true;
1323}
1324
1325String OurReader::normalizeEOL(OurReader::Location begin,
1326 OurReader::Location end) {
1327 String normalized;
1328 normalized.reserve(static_cast<size_t>(end - begin));
1329 OurReader::Location current = begin;
1330 while (current != end) {
1331 char c = *current++;
1332 if (c == '\r') {
1333 if (current != end && *current == '\n')
1334 // convert dos EOL
1335 ++current;
1336 // convert Mac EOL
1337 normalized += '\n';
1338 } else {
1339 normalized += c;
1340 }
1341 }
1342 return normalized;
1343}
1344
1345void OurReader::addComment(Location begin, Location end,
1346 CommentPlacement placement) {
1347 assert(collectComments_);
1348 const String& normalized = normalizeEOL(begin, end);
1349 if (placement == commentAfterOnSameLine) {
1350 assert(lastValue_ != nullptr);
1351 lastValue_->setComment(normalized, placement);
1352 } else {
1353 commentsBefore_ += normalized;
1354 }
1355}
1356
1357bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1358 *containsNewLineResult = false;
1359
1360 while ((current_ + 1) < end_) {
1361 Char c = getNextChar();
1362 if (c == '*' && *current_ == '/')
1363 break;
1364 if (c == '\n')
1365 *containsNewLineResult = true;
1366 }
1367
1368 return getNextChar() == '/';
1369}
1370
1371bool OurReader::readCppStyleComment() {
1372 while (current_ != end_) {
1373 Char c = getNextChar();
1374 if (c == '\n')
1375 break;
1376 if (c == '\r') {
1377 // Consume DOS EOL. It will be normalized in addComment.
1378 if (current_ != end_ && *current_ == '\n')
1379 getNextChar();
1380 // Break on Moc OS 9 EOL.
1381 break;
1382 }
1383 }
1384 return true;
1385}
1386
1387bool OurReader::readNumber(bool checkInf) {
1388 Location p = current_;
1389 if (checkInf && p != end_ && *p == 'I') {
1390 current_ = ++p;
1391 return false;
1392 }
1393 char c = '0'; // stopgap for already consumed character
1394 // integral part
1395 while (c >= '0' && c <= '9')
1396 c = (current_ = p) < end_ ? *p++ : '\0';
1397 // fractional part
1398 if (c == '.') {
1399 c = (current_ = p) < end_ ? *p++ : '\0';
1400 while (c >= '0' && c <= '9')
1401 c = (current_ = p) < end_ ? *p++ : '\0';
1402 }
1403 // exponential part
1404 if (c == 'e' || c == 'E') {
1405 c = (current_ = p) < end_ ? *p++ : '\0';
1406 if (c == '+' || c == '-')
1407 c = (current_ = p) < end_ ? *p++ : '\0';
1408 while (c >= '0' && c <= '9')
1409 c = (current_ = p) < end_ ? *p++ : '\0';
1410 }
1411 return true;
1412}
1413bool OurReader::readString() {
1414 Char c = 0;
1415 while (current_ != end_) {
1416 c = getNextChar();
1417 if (c == '\\')
1418 getNextChar();
1419 else if (c == '"')
1420 break;
1421 }
1422 return c == '"';
1423}
1424
1425bool OurReader::readStringSingleQuote() {
1426 Char c = 0;
1427 while (current_ != end_) {
1428 c = getNextChar();
1429 if (c == '\\')
1430 getNextChar();
1431 else if (c == '\'')
1432 break;
1433 }
1434 return c == '\'';
1435}
1436
1437bool OurReader::readObject(Token& token) {
1438 Token tokenName;
1439 String name;
1440 Value init(objectValue);
1441 currentValue().swapPayload(init);
1442 currentValue().setOffsetStart(token.start_ - begin_);
1443 while (readTokenSkippingComments(tokenName)) {
1444 if (tokenName.type_ == tokenObjectEnd &&
1445 (name.empty() ||
1446 features_.allowTrailingCommas_)) // empty object or trailing comma
1447 return true;
1448 name.clear();
1449 if (tokenName.type_ == tokenString) {
1450 if (!decodeString(tokenName, name))
1451 return recoverFromError(tokenObjectEnd);
1452 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1453 Value numberName;
1454 if (!decodeNumber(tokenName, numberName))
1455 return recoverFromError(tokenObjectEnd);
1456 name = numberName.asString();
1457 } else {
1458 break;
1459 }
1460 if (name.length() >= (1U << 30))
1461 throwRuntimeError("keylength >= 2^30");
1462 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1463 String msg = "Duplicate key: '" + name + "'";
1464 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1465 }
1466
1467 Token colon;
1468 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1469 return addErrorAndRecover("Missing ':' after object member name", colon,
1470 tokenObjectEnd);
1471 }
1472 Value& value = currentValue()[name];
1473 nodes_.push(&value);
1474 bool ok = readValue();
1475 nodes_.pop();
1476 if (!ok) // error already set
1477 return recoverFromError(tokenObjectEnd);
1478
1479 Token comma;
1480 if (!readTokenSkippingComments(comma) ||
1481 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
1482 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1483 comma, tokenObjectEnd);
1484 }
1485 if (comma.type_ == tokenObjectEnd)
1486 return true;
1487 }
1488 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1489 tokenObjectEnd);
1490}
1491
1492bool OurReader::readArray(Token& token) {
1493 Value init(arrayValue);
1494 currentValue().swapPayload(init);
1495 currentValue().setOffsetStart(token.start_ - begin_);
1496 int index = 0;
1497 for (;;) {
1498 skipSpaces();
1499 if (current_ != end_ && *current_ == ']' &&
1500 (index == 0 ||
1501 (features_.allowTrailingCommas_ &&
1502 !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1503 // comma
1504 {
1505 Token endArray;
1506 readToken(endArray);
1507 return true;
1508 }
1509 Value& value = currentValue()[index++];
1510 nodes_.push(&value);
1511 bool ok = readValue();
1512 nodes_.pop();
1513 if (!ok) // error already set
1514 return recoverFromError(tokenArrayEnd);
1515
1516 Token currentToken;
1517 // Accept Comment after last item in the array.
1518 ok = readTokenSkippingComments(currentToken);
1519 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1520 currentToken.type_ != tokenArrayEnd);
1521 if (!ok || badTokenType) {
1522 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1523 currentToken, tokenArrayEnd);
1524 }
1525 if (currentToken.type_ == tokenArrayEnd)
1526 break;
1527 }
1528 return true;
1529}
1530
1531bool OurReader::decodeNumber(Token& token) {
1532 Value decoded;
1533 if (!decodeNumber(token, decoded))
1534 return false;
1535 currentValue().swapPayload(decoded);
1536 currentValue().setOffsetStart(token.start_ - begin_);
1537 currentValue().setOffsetLimit(token.end_ - begin_);
1538 return true;
1539}
1540
1541bool OurReader::decodeNumber(Token& token, Value& decoded) {
1542 // Attempts to parse the number as an integer. If the number is
1543 // larger than the maximum supported value of an integer then
1544 // we decode the number as a double.
1545 Location current = token.start_;
1546 const bool isNegative = *current == '-';
1547 if (isNegative) {
1548 ++current;
1549 }
1550
1551 // We assume we can represent the largest and smallest integer types as
1552 // unsigned integers with separate sign. This is only true if they can fit
1553 // into an unsigned integer.
1554 static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
1555 "Int must be smaller than UInt");
1556
1557 // We need to convert minLargestInt into a positive number. The easiest way
1558 // to do this conversion is to assume our "threshold" value of minLargestInt
1559 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1560 // be a safe assumption.
1561 static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1562 "The absolute value of minLargestInt must be greater than or "
1563 "equal to maxLargestInt");
1564 static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1565 "The absolute value of minLargestInt must be only 1 magnitude "
1566 "larger than maxLargest Int");
1567
1568 static constexpr Value::LargestUInt positive_threshold =
1569 Value::maxLargestUInt / 10;
1570 static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1571
1572 // For the negative values, we have to be more careful. Since typically
1573 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1574 // then take the inverse. This assumes that minLargestInt is only a single
1575 // power of 10 different in magnitude, which we check above. For the last
1576 // digit, we take the modulus before negating for the same reason.
1577 static constexpr auto negative_threshold =
1578 Value::LargestUInt(-(Value::minLargestInt / 10));
1579 static constexpr auto negative_last_digit =
1580 Value::UInt(-(Value::minLargestInt % 10));
1581
1582 const Value::LargestUInt threshold =
1583 isNegative ? negative_threshold : positive_threshold;
1584 const Value::UInt max_last_digit =
1585 isNegative ? negative_last_digit : positive_last_digit;
1586
1587 Value::LargestUInt value = 0;
1588 while (current < token.end_) {
1589 Char c = *current++;
1590 if (c < '0' || c > '9')
1591 return decodeDouble(token, decoded);
1592
1593 const auto digit(static_cast<Value::UInt>(c - '0'));
1594 if (value >= threshold) {
1595 // We've hit or exceeded the max value divided by 10 (rounded down). If
1596 // a) we've only just touched the limit, meaning value == threshold,
1597 // b) this is the last digit, or
1598 // c) it's small enough to fit in that rounding delta, we're okay.
1599 // Otherwise treat this number as a double to avoid overflow.
1600 if (value > threshold || current != token.end_ ||
1601 digit > max_last_digit) {
1602 return decodeDouble(token, decoded);
1603 }
1604 }
1605 value = value * 10 + digit;
1606 }
1607
1608 if (isNegative) {
1609 // We use the same magnitude assumption here, just in case.
1610 const auto last_digit = static_cast<Value::UInt>(value % 10);
1611 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1612 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1613 decoded = Value::LargestInt(value);
1614 } else {
1615 decoded = value;
1616 }
1617
1618 return true;
1619}
1620
1621bool OurReader::decodeDouble(Token& token) {
1622 Value decoded;
1623 if (!decodeDouble(token, decoded))
1624 return false;
1625 currentValue().swapPayload(decoded);
1626 currentValue().setOffsetStart(token.start_ - begin_);
1627 currentValue().setOffsetLimit(token.end_ - begin_);
1628 return true;
1629}
1630
1631bool OurReader::decodeDouble(Token& token, Value& decoded) {
1632 double value = 0;
1633 IStringStream is(String(token.start_, token.end_));
1634 is.imbue(std::locale::classic());
1635 if (!(is >> value)) {
1636 if (value == std::numeric_limits<double>::max())
1637 value = std::numeric_limits<double>::infinity();
1638 else if (value == std::numeric_limits<double>::lowest())
1639 value = -std::numeric_limits<double>::infinity();
1640 else if (!std::isinf(value))
1641 return addError(
1642 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1643 }
1644 decoded = value;
1645 return true;
1646}
1647
1648bool OurReader::decodeString(Token& token) {
1649 String decoded_string;
1650 if (!decodeString(token, decoded_string))
1651 return false;
1652 Value decoded(decoded_string);
1653 currentValue().swapPayload(decoded);
1654 currentValue().setOffsetStart(token.start_ - begin_);
1655 currentValue().setOffsetLimit(token.end_ - begin_);
1656 return true;
1657}
1658
1659bool OurReader::decodeString(Token& token, String& decoded) {
1660 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1661 Location current = token.start_ + 1; // skip '"'
1662 Location end = token.end_ - 1; // do not include '"'
1663 while (current != end) {
1664 Char c = *current++;
1665 if (c == '"')
1666 break;
1667 if (c == '\\') {
1668 if (current == end)
1669 return addError("Empty escape sequence in string", token, current);
1670 Char escape = *current++;
1671 switch (escape) {
1672 case '"':
1673 decoded += '"';
1674 break;
1675 case '/':
1676 decoded += '/';
1677 break;
1678 case '\\':
1679 decoded += '\\';
1680 break;
1681 case 'b':
1682 decoded += '\b';
1683 break;
1684 case 'f':
1685 decoded += '\f';
1686 break;
1687 case 'n':
1688 decoded += '\n';
1689 break;
1690 case 'r':
1691 decoded += '\r';
1692 break;
1693 case 't':
1694 decoded += '\t';
1695 break;
1696 case 'u': {
1697 unsigned int unicode;
1698 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1699 return false;
1700 decoded += codePointToUTF8(unicode);
1701 } break;
1702 default:
1703 return addError("Bad escape sequence in string", token, current);
1704 }
1705 } else {
1706 if (static_cast<unsigned char>(c) < 0x20)
1707 return addError("Control character in string", token, current - 1);
1708 decoded += c;
1709 }
1710 }
1711 return true;
1712}
1713
1714bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1715 Location end, unsigned int& unicode) {
1716
1717 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1718 return false;
1719 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1720 // surrogate pairs
1721 if (end - current < 6)
1722 return addError(
1723 "additional six characters expected to parse unicode surrogate pair.",
1724 token, current);
1725 if (*(current++) == '\\' && *(current++) == 'u') {
1726 unsigned int surrogatePair;
1727 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1728 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1729 } else
1730 return false;
1731 } else
1732 return addError("expecting another \\u token to begin the second half of "
1733 "a unicode surrogate pair",
1734 token, current);
1735 }
1736 return true;
1737}
1738
1739bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1740 Location end,
1741 unsigned int& ret_unicode) {
1742 if (end - current < 4)
1743 return addError(
1744 "Bad unicode escape sequence in string: four digits expected.", token,
1745 current);
1746 int unicode = 0;
1747 for (int index = 0; index < 4; ++index) {
1748 Char c = *current++;
1749 unicode *= 16;
1750 if (c >= '0' && c <= '9')
1751 unicode += c - '0';
1752 else if (c >= 'a' && c <= 'f')
1753 unicode += c - 'a' + 10;
1754 else if (c >= 'A' && c <= 'F')
1755 unicode += c - 'A' + 10;
1756 else
1757 return addError(
1758 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1759 token, current);
1760 }
1761 ret_unicode = static_cast<unsigned int>(unicode);
1762 return true;
1763}
1764
1765bool OurReader::addError(const String& message, Token& token, Location extra) {
1766 ErrorInfo info;
1767 info.token_ = token;
1768 info.message_ = message;
1769 info.extra_ = extra;
1770 errors_.push_back(info);
1771 return false;
1772}
1773
1774bool OurReader::recoverFromError(TokenType skipUntilToken) {
1775 size_t errorCount = errors_.size();
1776 Token skip;
1777 for (;;) {
1778 if (!readToken(skip))
1779 errors_.resize(errorCount); // discard errors caused by recovery
1780 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1781 break;
1782 }
1783 errors_.resize(errorCount);
1784 return false;
1785}
1786
1787bool OurReader::addErrorAndRecover(const String& message, Token& token,
1788 TokenType skipUntilToken) {
1789 addError(message, token);
1790 return recoverFromError(skipUntilToken);
1791}
1792
1793Value& OurReader::currentValue() { return *(nodes_.top()); }
1794
1795OurReader::Char OurReader::getNextChar() {
1796 if (current_ == end_)
1797 return 0;
1798 return *current_++;
1799}
1800
1801void OurReader::getLocationLineAndColumn(Location location, int& line,
1802 int& column) const {
1803 Location current = begin_;
1804 Location lastLineStart = current;
1805 line = 0;
1806 while (current < location && current != end_) {
1807 Char c = *current++;
1808 if (c == '\r') {
1809 if (current != end_ && *current == '\n')
1810 ++current;
1811 lastLineStart = current;
1812 ++line;
1813 } else if (c == '\n') {
1814 lastLineStart = current;
1815 ++line;
1816 }
1817 }
1818 // column & line start at 1
1819 column = int(location - lastLineStart) + 1;
1820 ++line;
1821}
1822
1823String OurReader::getLocationLineAndColumn(Location location) const {
1824 int line, column;
1825 getLocationLineAndColumn(location, line, column);
1826 char buffer[18 + 16 + 16 + 1];
1827 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1828 return buffer;
1829}
1830
1831String OurReader::getFormattedErrorMessages() const {
1832 String formattedMessage;
1833 for (const auto& error : errors_) {
1834 formattedMessage +=
1835 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1836 formattedMessage += " " + error.message_ + "\n";
1837 if (error.extra_)
1838 formattedMessage +=
1839 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1840 }
1841 return formattedMessage;
1842}
1843
1844std::vector<CharReader::StructuredError>
1845OurReader::getStructuredErrors() const {
1846 std::vector<CharReader::StructuredError> allErrors;
1847 for (const auto& error : errors_) {
1848 CharReader::StructuredError structured;
1849 structured.offset_start = error.token_.start_ - begin_;
1850 structured.offset_limit = error.token_.end_ - begin_;
1851 structured.message = error.message_;
1852 allErrors.push_back(structured);
1853 }
1854 return allErrors;
1855}
1856
1857class OurCharReader : public CharReader {
1858
1859public:
1860 OurCharReader(bool collectComments, OurFeatures const& features)
1861 : CharReader(
1862 std::unique_ptr<OurImpl>(new OurImpl(collectComments, features))) {}
1863
1864protected:
1865 class OurImpl : public Impl {
1866 public:
1867 OurImpl(bool collectComments, OurFeatures const& features)
1868 : collectComments_(collectComments), reader_(features) {}
1869
1870 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1871 String* errs) override {
1872 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1873 if (errs) {
1874 *errs = reader_.getFormattedErrorMessages();
1875 }
1876 return ok;
1877 }
1878
1879 std::vector<CharReader::StructuredError>
1880 getStructuredErrors() const override {
1881 return reader_.getStructuredErrors();
1882 }
1883
1884 private:
1885 bool const collectComments_;
1886 OurReader reader_;
1887 };
1888};
1889
1890CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1891CharReaderBuilder::~CharReaderBuilder() = default;
1892CharReader* CharReaderBuilder::newCharReader() const {
1893 bool collectComments = settings_["collectComments"].asBool();
1894 OurFeatures features = OurFeatures::all();
1895 features.allowComments_ = settings_["allowComments"].asBool();
1896 features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1897 features.strictRoot_ = settings_["strictRoot"].asBool();
1898 features.allowDroppedNullPlaceholders_ =
1899 settings_["allowDroppedNullPlaceholders"].asBool();
1900 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1901 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1902
1903 // Stack limit is always a size_t, so we get this as an unsigned int
1904 // regardless of it we have 64-bit integer support enabled.
1905 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1906 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1907 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1908 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1909 features.skipBom_ = settings_["skipBom"].asBool();
1910 return new OurCharReader(collectComments, features);
1911}
1912
1914 static const auto& valid_keys = *new std::set<String>{
1915 "collectComments",
1916 "allowComments",
1917 "allowTrailingCommas",
1918 "strictRoot",
1919 "allowDroppedNullPlaceholders",
1920 "allowNumericKeys",
1921 "allowSingleQuotes",
1922 "stackLimit",
1923 "failIfExtra",
1924 "rejectDupKeys",
1925 "allowSpecialFloats",
1926 "skipBom",
1927 };
1928 for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1929 auto key = si.name();
1930 if (valid_keys.count(key))
1931 continue;
1932 if (invalid)
1933 (*invalid)[key] = *si;
1934 else
1935 return false;
1936 }
1937 return invalid ? invalid->empty() : true;
1938}
1939
1941 return settings_[key];
1942}
1943// static
1946 (*settings)["allowComments"] = false;
1947 (*settings)["allowTrailingCommas"] = false;
1948 (*settings)["strictRoot"] = true;
1949 (*settings)["allowDroppedNullPlaceholders"] = false;
1950 (*settings)["allowNumericKeys"] = false;
1951 (*settings)["allowSingleQuotes"] = false;
1952 (*settings)["stackLimit"] = 256;
1953 (*settings)["failIfExtra"] = true;
1954 (*settings)["rejectDupKeys"] = true;
1955 (*settings)["allowSpecialFloats"] = false;
1956 (*settings)["skipBom"] = true;
1958}
1959// static
1962 (*settings)["collectComments"] = true;
1963 (*settings)["allowComments"] = true;
1964 (*settings)["allowTrailingCommas"] = true;
1965 (*settings)["strictRoot"] = false;
1966 (*settings)["allowDroppedNullPlaceholders"] = false;
1967 (*settings)["allowNumericKeys"] = false;
1968 (*settings)["allowSingleQuotes"] = false;
1969 (*settings)["stackLimit"] = 256;
1970 (*settings)["failIfExtra"] = false;
1971 (*settings)["rejectDupKeys"] = false;
1972 (*settings)["allowSpecialFloats"] = false;
1973 (*settings)["skipBom"] = true;
1975}
1976// static
1979 (*settings)["allowComments"] = false;
1980 (*settings)["allowTrailingCommas"] = false;
1981 (*settings)["strictRoot"] = false;
1982 (*settings)["allowDroppedNullPlaceholders"] = false;
1983 (*settings)["allowNumericKeys"] = false;
1984 (*settings)["allowSingleQuotes"] = false;
1985 (*settings)["stackLimit"] = 256;
1986 (*settings)["failIfExtra"] = true;
1987 (*settings)["rejectDupKeys"] = false;
1988 (*settings)["allowSpecialFloats"] = false;
1989 (*settings)["skipBom"] = false;
1991}
1992
1993std::vector<CharReader::StructuredError>
1995 return _impl->getStructuredErrors();
1996}
1997
1998bool CharReader::parse(char const* beginDoc, char const* endDoc, Value* root,
1999 String* errs) {
2000 return _impl->parse(beginDoc, endDoc, root, errs);
2001}
2002
2004// global functions
2005
2006bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
2007 String* errs) {
2008 OStringStream ssin;
2009 ssin << sin.rdbuf();
2010 String doc = std::move(ssin).str();
2011 char const* begin = doc.data();
2012 char const* end = begin + doc.size();
2013 // Note that we do not actually need a null-terminator.
2014 CharReaderPtr const reader(fact.newCharReader());
2015 return reader->parse(begin, end, root, errs);
2016}
2017
2020 String errs;
2021 bool ok = parseFromStream(b, sin, &root, &errs);
2022 if (!ok) {
2023 throwRuntimeError(errs);
2024 }
2025 return sin;
2026}
2027
2028} // namespace Json
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
Build a CharReader implementation.
Definition reader.h:317
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
static void ecma404Mode(Json::Value *settings)
ECMA-404 mode.
Value & operator[](const String &key)
A simple way to update a specific setting.
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
bool validate(Json::Value *invalid) const
Configuration of this builder.
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
virtual bool parse(char const *beginDoc, char const *endDoc, Value *root, String *errs)
Read a Value from a JSON document.
Configuration passed to reader and writer.
bool strictRoot_
true if root must be either an array or an object value.
bool allowComments_
true if comments are allowed. Default: true.
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Features()
Initialize the configuration like JsonConfig::allFeatures;.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
char Char
Definition reader.h:39
Reader()
Constructs a Reader allowing all features for parsing.
bool pushError(const Value &value, const String &message)
Add a semantic error message.
bool good() const
Return whether there are any errors.
const Char * Location
Definition reader.h:40
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Represents a JSON value.
Definition value.h:207
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition value.h:241
Json::UInt UInt
Definition value.h:215
bool isArray() const
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... *‍/.
Definition value.h:668
ptrdiff_t getOffsetLimit() const
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
void setOffsetLimit(ptrdiff_t limit)
Json::LargestInt LargestInt
Definition value.h:221
Json::LargestUInt LargestUInt
Definition value.h:222
bool isObject() const
void setOffsetStart(ptrdiff_t start)
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition value.h:248
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition value.h:243
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition value.h:238
ptrdiff_t getOffsetStart() const
#define JSON_API
If defined, indicates that the source file is amalgamated to prevent private header inclusion.
Definition config.h:50
#define jsoncpp_snprintf
Definition config.h:63
#define JSONCPP_DEPRECATED_STACK_LIMIT
static size_t const stackLimit_g
JSON (JavaScript Object Notation).
Definition allocator.h:16
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition config.h:139
size_t & newlineScanByteCountForTesting()
CommentPlacement
Definition value.h:132
@ commentAfterOnSameLine
a comment just after a value on the same line
Definition value.h:134
@ commentBefore
a comment placed on the line before a value
Definition value.h:133
@ commentAfter
a comment on the line after a value (only make sense for
Definition value.h:135
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition config.h:136
std::unique_ptr< CharReader > CharReaderPtr
@ arrayValue
array value (ordered list)
Definition value.h:128
@ objectValue
object value (collection of name/value pairs).
Definition value.h:129
std::istream IStream
Definition config.h:142
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition json_tool.h:39
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition config.h:135
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
An error tagged with where in the JSON text it was encountered.
Definition reader.h:47