JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2// Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3// Distributed under MIT license, or public domain if desired and
4// recognized in your jurisdiction.
5// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7#if !defined(JSON_IS_AMALGAMATION)
8#include "json_tool.h"
9#include <json/assertions.h>
10#include <json/reader.h>
11#include <json/value.h>
12#endif // if !defined(JSON_IS_AMALGAMATION)
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstring>
17#include <iostream>
18#include <istream>
19#include <limits>
20#include <memory>
21#include <set>
22#include <sstream>
23#include <utility>
24
25#include <cstdio>
26#if __cplusplus >= 201103L
27
28#if !defined(sscanf)
29#define sscanf std::sscanf
30#endif
31
32#endif //__cplusplus
33
34#if defined(_MSC_VER)
35#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
36#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
37#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
38#endif //_MSC_VER
39
40#if defined(_MSC_VER)
41// Disable warning about strdup being deprecated.
42#pragma warning(disable : 4996)
43#endif
44
45// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
46// time to change the stack limit
47#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
48#define JSONCPP_DEPRECATED_STACK_LIMIT 1000
49#endif
50
51static size_t const stackLimit_g =
52 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
53
54namespace Json {
55
56#if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
57using CharReaderPtr = std::unique_ptr<CharReader>;
58#else
59using CharReaderPtr = std::auto_ptr<CharReader>;
60#endif
61
62// Implementation of class Features
63// ////////////////////////////////
64
65Features::Features() = default;
66
67Features Features::all() { return {}; }
68
70 Features features;
71 features.allowComments_ = false;
72 features.strictRoot_ = true;
73 features.allowDroppedNullPlaceholders_ = false;
74 features.allowNumericKeys_ = false;
75 return features;
76}
77
78// Implementation of class Reader
79// ////////////////////////////////
80
81bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
82 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
83}
84
85// Class Reader
86// //////////////////////////////////////////////////////////////////
87
88Reader::Reader() : features_(Features::all()) {}
89
90Reader::Reader(const Features& features) : features_(features) {}
91
92bool Reader::parse(const std::string& document, Value& root,
93 bool collectComments) {
94 document_.assign(document.begin(), document.end());
95 const char* begin = document_.c_str();
96 const char* end = begin + document_.length();
97 return parse(begin, end, root, collectComments);
98}
99
100bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
101 // std::istream_iterator<char> begin(is);
102 // std::istream_iterator<char> end;
103 // Those would allow streamed input from a file, if parse() were a
104 // template function.
105
106 // Since String is reference-counted, this at least does not
107 // create an extra copy.
108 String doc(std::istreambuf_iterator<char>(is), {});
109 return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
110}
111
112bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
113 bool collectComments) {
114 if (!features_.allowComments_) {
115 collectComments = false;
116 }
117
118 begin_ = beginDoc;
119 end_ = endDoc;
120 collectComments_ = collectComments;
121 current_ = begin_;
122 lastValueEnd_ = nullptr;
123 lastValue_ = nullptr;
124 commentsBefore_.clear();
125 errors_.clear();
126 while (!nodes_.empty())
127 nodes_.pop();
128 nodes_.push(&root);
129
130 bool successful = readValue();
131 Token token;
132 readTokenSkippingComments(token);
133 if (collectComments_ && !commentsBefore_.empty())
134 root.setComment(commentsBefore_, commentAfter);
135 if (features_.strictRoot_) {
136 if (!root.isArray() && !root.isObject()) {
137 // Set error location to start of doc, ideally should be first token found
138 // in doc
139 token.type_ = tokenError;
140 token.start_ = beginDoc;
141 token.end_ = endDoc;
142 addError(
143 "A valid JSON document must be either an array or an object value.",
144 token);
145 return false;
146 }
147 }
148 return successful;
149}
150
151bool Reader::readValue() {
152 // readValue() may call itself only if it calls readObject() or ReadArray().
153 // These methods execute nodes_.push() just before and nodes_.pop)() just
154 // after calling readValue(). parse() executes one nodes_.push(), so > instead
155 // of >=.
156 if (nodes_.size() > stackLimit_g)
157 throwRuntimeError("Exceeded stackLimit in readValue().");
158
159 Token token;
160 readTokenSkippingComments(token);
161 bool successful = true;
162
163 if (collectComments_ && !commentsBefore_.empty()) {
164 currentValue().setComment(commentsBefore_, commentBefore);
165 commentsBefore_.clear();
166 }
167
168 switch (token.type_) {
169 case tokenObjectBegin:
170 successful = readObject(token);
171 currentValue().setOffsetLimit(current_ - begin_);
172 break;
173 case tokenArrayBegin:
174 successful = readArray(token);
175 currentValue().setOffsetLimit(current_ - begin_);
176 break;
177 case tokenNumber:
178 successful = decodeNumber(token);
179 break;
180 case tokenString:
181 successful = decodeString(token);
182 break;
183 case tokenTrue: {
184 Value v(true);
185 currentValue().swapPayload(v);
186 currentValue().setOffsetStart(token.start_ - begin_);
187 currentValue().setOffsetLimit(token.end_ - begin_);
188 } break;
189 case tokenFalse: {
190 Value v(false);
191 currentValue().swapPayload(v);
192 currentValue().setOffsetStart(token.start_ - begin_);
193 currentValue().setOffsetLimit(token.end_ - begin_);
194 } break;
195 case tokenNull: {
196 Value v;
197 currentValue().swapPayload(v);
198 currentValue().setOffsetStart(token.start_ - begin_);
199 currentValue().setOffsetLimit(token.end_ - begin_);
200 } break;
201 case tokenArraySeparator:
202 case tokenObjectEnd:
203 case tokenArrayEnd:
204 if (features_.allowDroppedNullPlaceholders_) {
205 // "Un-read" the current token and mark the current value as a null
206 // token.
207 current_--;
208 Value v;
209 currentValue().swapPayload(v);
210 currentValue().setOffsetStart(current_ - begin_ - 1);
211 currentValue().setOffsetLimit(current_ - begin_);
212 break;
213 } // Else, fall through...
214 default:
215 currentValue().setOffsetStart(token.start_ - begin_);
216 currentValue().setOffsetLimit(token.end_ - begin_);
217 return addError("Syntax error: value, object or array expected.", token);
218 }
219
220 if (collectComments_) {
221 lastValueEnd_ = current_;
222 lastValue_ = &currentValue();
223 }
224
225 return successful;
226}
227
228bool Reader::readTokenSkippingComments(Token& token) {
229 bool success = readToken(token);
230 if (features_.allowComments_) {
231 while (success && token.type_ == tokenComment) {
232 success = readToken(token);
233 }
234 }
235 return success;
236}
237
238bool Reader::readToken(Token& token) {
239 skipSpaces();
240 token.start_ = current_;
241 Char c = getNextChar();
242 bool ok = true;
243 switch (c) {
244 case '{':
245 token.type_ = tokenObjectBegin;
246 break;
247 case '}':
248 token.type_ = tokenObjectEnd;
249 break;
250 case '[':
251 token.type_ = tokenArrayBegin;
252 break;
253 case ']':
254 token.type_ = tokenArrayEnd;
255 break;
256 case '"':
257 token.type_ = tokenString;
258 ok = readString();
259 break;
260 case '/':
261 token.type_ = tokenComment;
262 ok = readComment();
263 break;
264 case '0':
265 case '1':
266 case '2':
267 case '3':
268 case '4':
269 case '5':
270 case '6':
271 case '7':
272 case '8':
273 case '9':
274 case '-':
275 token.type_ = tokenNumber;
276 readNumber();
277 break;
278 case 't':
279 token.type_ = tokenTrue;
280 ok = match("rue", 3);
281 break;
282 case 'f':
283 token.type_ = tokenFalse;
284 ok = match("alse", 4);
285 break;
286 case 'n':
287 token.type_ = tokenNull;
288 ok = match("ull", 3);
289 break;
290 case ',':
291 token.type_ = tokenArraySeparator;
292 break;
293 case ':':
294 token.type_ = tokenMemberSeparator;
295 break;
296 case 0:
297 token.type_ = tokenEndOfStream;
298 break;
299 default:
300 ok = false;
301 break;
302 }
303 if (!ok)
304 token.type_ = tokenError;
305 token.end_ = current_;
306 return ok;
307}
308
309void Reader::skipSpaces() {
310 while (current_ != end_) {
311 Char c = *current_;
312 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
313 ++current_;
314 else
315 break;
316 }
317}
318
319bool Reader::match(const Char* pattern, int patternLength) {
320 if (end_ - current_ < patternLength)
321 return false;
322 int index = patternLength;
323 while (index--)
324 if (current_[index] != pattern[index])
325 return false;
326 current_ += patternLength;
327 return true;
328}
329
330bool Reader::readComment() {
331 Location commentBegin = current_ - 1;
332 Char c = getNextChar();
333 bool successful = false;
334 if (c == '*')
335 successful = readCStyleComment();
336 else if (c == '/')
337 successful = readCppStyleComment();
338 if (!successful)
339 return false;
340
341 if (collectComments_) {
343 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
344 if (c != '*' || !containsNewLine(commentBegin, current_))
345 placement = commentAfterOnSameLine;
346 }
347
348 addComment(commentBegin, current_, placement);
349 }
350 return true;
351}
352
353String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
354 String normalized;
355 normalized.reserve(static_cast<size_t>(end - begin));
356 Reader::Location current = begin;
357 while (current != end) {
358 char c = *current++;
359 if (c == '\r') {
360 if (current != end && *current == '\n')
361 // convert dos EOL
362 ++current;
363 // convert Mac EOL
364 normalized += '\n';
365 } else {
366 normalized += c;
367 }
368 }
369 return normalized;
370}
371
372void Reader::addComment(Location begin, Location end,
373 CommentPlacement placement) {
374 assert(collectComments_);
375 const String& normalized = normalizeEOL(begin, end);
376 if (placement == commentAfterOnSameLine) {
377 assert(lastValue_ != nullptr);
378 lastValue_->setComment(normalized, placement);
379 } else {
380 commentsBefore_ += normalized;
381 }
382}
383
384bool Reader::readCStyleComment() {
385 while ((current_ + 1) < end_) {
386 Char c = getNextChar();
387 if (c == '*' && *current_ == '/')
388 break;
389 }
390 return getNextChar() == '/';
391}
392
393bool Reader::readCppStyleComment() {
394 while (current_ != end_) {
395 Char c = getNextChar();
396 if (c == '\n')
397 break;
398 if (c == '\r') {
399 // Consume DOS EOL. It will be normalized in addComment.
400 if (current_ != end_ && *current_ == '\n')
401 getNextChar();
402 // Break on Moc OS 9 EOL.
403 break;
404 }
405 }
406 return true;
407}
408
409void Reader::readNumber() {
410 Location p = current_;
411 char c = '0'; // stopgap for already consumed character
412 // integral part
413 while (c >= '0' && c <= '9')
414 c = (current_ = p) < end_ ? *p++ : '\0';
415 // fractional part
416 if (c == '.') {
417 c = (current_ = p) < end_ ? *p++ : '\0';
418 while (c >= '0' && c <= '9')
419 c = (current_ = p) < end_ ? *p++ : '\0';
420 }
421 // exponential part
422 if (c == 'e' || c == 'E') {
423 c = (current_ = p) < end_ ? *p++ : '\0';
424 if (c == '+' || c == '-')
425 c = (current_ = p) < end_ ? *p++ : '\0';
426 while (c >= '0' && c <= '9')
427 c = (current_ = p) < end_ ? *p++ : '\0';
428 }
429}
430
431bool Reader::readString() {
432 Char c = '\0';
433 while (current_ != end_) {
434 c = getNextChar();
435 if (c == '\\')
436 getNextChar();
437 else if (c == '"')
438 break;
439 }
440 return c == '"';
441}
442
443bool Reader::readObject(Token& token) {
444 Token tokenName;
445 String name;
446 Value init(objectValue);
447 currentValue().swapPayload(init);
448 currentValue().setOffsetStart(token.start_ - begin_);
449 while (readTokenSkippingComments(tokenName)) {
450 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
451 return true;
452 name.clear();
453 if (tokenName.type_ == tokenString) {
454 if (!decodeString(tokenName, name))
455 return recoverFromError(tokenObjectEnd);
456 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
457 Value numberName;
458 if (!decodeNumber(tokenName, numberName))
459 return recoverFromError(tokenObjectEnd);
460 name = numberName.asString();
461 } else {
462 break;
463 }
464
465 Token colon;
466 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
467 return addErrorAndRecover("Missing ':' after object member name", colon,
468 tokenObjectEnd);
469 }
470 Value& value = currentValue()[name];
471 nodes_.push(&value);
472 bool ok = readValue();
473 nodes_.pop();
474 if (!ok) // error already set
475 return recoverFromError(tokenObjectEnd);
476
477 Token comma;
478 if (!readTokenSkippingComments(comma) ||
479 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
480 return addErrorAndRecover("Missing ',' or '}' in object declaration",
481 comma, tokenObjectEnd);
482 }
483 if (comma.type_ == tokenObjectEnd)
484 return true;
485 }
486 return addErrorAndRecover("Missing '}' or object member name", tokenName,
487 tokenObjectEnd);
488}
489
490bool Reader::readArray(Token& token) {
491 Value init(arrayValue);
492 currentValue().swapPayload(init);
493 currentValue().setOffsetStart(token.start_ - begin_);
494 skipSpaces();
495 if (current_ != end_ && *current_ == ']') // empty array
496 {
497 Token endArray;
498 readToken(endArray);
499 return true;
500 }
501 int index = 0;
502 for (;;) {
503 Value& value = currentValue()[index++];
504 nodes_.push(&value);
505 bool ok = readValue();
506 nodes_.pop();
507 if (!ok) // error already set
508 return recoverFromError(tokenArrayEnd);
509
510 Token currentToken;
511 // Accept Comment after last item in the array.
512 ok = readTokenSkippingComments(currentToken);
513 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
514 currentToken.type_ != tokenArrayEnd);
515 if (!ok || badTokenType) {
516 return addErrorAndRecover("Missing ',' or ']' in array declaration",
517 currentToken, tokenArrayEnd);
518 }
519 if (currentToken.type_ == tokenArrayEnd)
520 break;
521 }
522 return true;
523}
524
525bool Reader::decodeNumber(Token& token) {
526 Value decoded;
527 if (!decodeNumber(token, decoded))
528 return false;
529 currentValue().swapPayload(decoded);
530 currentValue().setOffsetStart(token.start_ - begin_);
531 currentValue().setOffsetLimit(token.end_ - begin_);
532 return true;
533}
534
535bool Reader::decodeNumber(Token& token, Value& decoded) {
536 // Attempts to parse the number as an integer. If the number is
537 // larger than the maximum supported value of an integer then
538 // we decode the number as a double.
539 Location current = token.start_;
540 bool isNegative = *current == '-';
541 if (isNegative)
542 ++current;
543 // TODO: Help the compiler do the div and mod at compile time or get rid of
544 // them.
545 Value::LargestUInt maxIntegerValue =
548 Value::LargestUInt threshold = maxIntegerValue / 10;
549 Value::LargestUInt value = 0;
550 while (current < token.end_) {
551 Char c = *current++;
552 if (c < '0' || c > '9')
553 return decodeDouble(token, decoded);
554 auto digit(static_cast<Value::UInt>(c - '0'));
555 if (value >= threshold) {
556 // We've hit or exceeded the max value divided by 10 (rounded down). If
557 // a) we've only just touched the limit, b) this is the last digit, and
558 // c) it's small enough to fit in that rounding delta, we're okay.
559 // Otherwise treat this number as a double to avoid overflow.
560 if (value > threshold || current != token.end_ ||
561 digit > maxIntegerValue % 10) {
562 return decodeDouble(token, decoded);
563 }
564 }
565 value = value * 10 + digit;
566 }
567 if (isNegative && value == maxIntegerValue)
568 decoded = Value::minLargestInt;
569 else if (isNegative)
570 decoded = -Value::LargestInt(value);
571 else if (value <= Value::LargestUInt(Value::maxInt))
572 decoded = Value::LargestInt(value);
573 else
574 decoded = value;
575 return true;
576}
577
578bool Reader::decodeDouble(Token& token) {
579 Value decoded;
580 if (!decodeDouble(token, decoded))
581 return false;
582 currentValue().swapPayload(decoded);
583 currentValue().setOffsetStart(token.start_ - begin_);
584 currentValue().setOffsetLimit(token.end_ - begin_);
585 return true;
586}
587
588bool Reader::decodeDouble(Token& token, Value& decoded) {
589 double value = 0;
590 IStringStream is(String(token.start_, token.end_));
591 if (!(is >> value)) {
592 if (value == std::numeric_limits<double>::max())
593 value = std::numeric_limits<double>::infinity();
594 else if (value == std::numeric_limits<double>::lowest())
595 value = -std::numeric_limits<double>::infinity();
596 else if (!std::isinf(value))
597 return addError(
598 "'" + String(token.start_, token.end_) + "' is not a number.", token);
599 }
600 decoded = value;
601 return true;
602}
603
604bool Reader::decodeString(Token& token) {
605 String decoded_string;
606 if (!decodeString(token, decoded_string))
607 return false;
608 Value decoded(decoded_string);
609 currentValue().swapPayload(decoded);
610 currentValue().setOffsetStart(token.start_ - begin_);
611 currentValue().setOffsetLimit(token.end_ - begin_);
612 return true;
613}
614
615bool Reader::decodeString(Token& token, String& decoded) {
616 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
617 Location current = token.start_ + 1; // skip '"'
618 Location end = token.end_ - 1; // do not include '"'
619 while (current != end) {
620 Char c = *current++;
621 if (c == '"')
622 break;
623 if (c == '\\') {
624 if (current == end)
625 return addError("Empty escape sequence in string", token, current);
626 Char escape = *current++;
627 switch (escape) {
628 case '"':
629 decoded += '"';
630 break;
631 case '/':
632 decoded += '/';
633 break;
634 case '\\':
635 decoded += '\\';
636 break;
637 case 'b':
638 decoded += '\b';
639 break;
640 case 'f':
641 decoded += '\f';
642 break;
643 case 'n':
644 decoded += '\n';
645 break;
646 case 'r':
647 decoded += '\r';
648 break;
649 case 't':
650 decoded += '\t';
651 break;
652 case 'u': {
653 unsigned int unicode;
654 if (!decodeUnicodeCodePoint(token, current, end, unicode))
655 return false;
656 decoded += codePointToUTF8(unicode);
657 } break;
658 default:
659 return addError("Bad escape sequence in string", token, current);
660 }
661 } else {
662 decoded += c;
663 }
664 }
665 return true;
666}
667
668bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
669 Location end, unsigned int& unicode) {
670
671 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
672 return false;
673 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
674 // surrogate pairs
675 if (end - current < 6)
676 return addError(
677 "additional six characters expected to parse unicode surrogate pair.",
678 token, current);
679 if (*(current++) == '\\' && *(current++) == 'u') {
680 unsigned int surrogatePair;
681 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
682 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
683 } else
684 return false;
685 } else
686 return addError("expecting another \\u token to begin the second half of "
687 "a unicode surrogate pair",
688 token, current);
689 }
690 return true;
691}
692
693bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
694 Location end,
695 unsigned int& ret_unicode) {
696 if (end - current < 4)
697 return addError(
698 "Bad unicode escape sequence in string: four digits expected.", token,
699 current);
700 int unicode = 0;
701 for (int index = 0; index < 4; ++index) {
702 Char c = *current++;
703 unicode *= 16;
704 if (c >= '0' && c <= '9')
705 unicode += c - '0';
706 else if (c >= 'a' && c <= 'f')
707 unicode += c - 'a' + 10;
708 else if (c >= 'A' && c <= 'F')
709 unicode += c - 'A' + 10;
710 else
711 return addError(
712 "Bad unicode escape sequence in string: hexadecimal digit expected.",
713 token, current);
714 }
715 ret_unicode = static_cast<unsigned int>(unicode);
716 return true;
717}
718
719bool Reader::addError(const String& message, Token& token, Location extra) {
720 ErrorInfo info;
721 info.token_ = token;
722 info.message_ = message;
723 info.extra_ = extra;
724 errors_.push_back(info);
725 return false;
726}
727
728bool Reader::recoverFromError(TokenType skipUntilToken) {
729 size_t const errorCount = errors_.size();
730 Token skip;
731 for (;;) {
732 if (!readToken(skip))
733 errors_.resize(errorCount); // discard errors caused by recovery
734 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
735 break;
736 }
737 errors_.resize(errorCount);
738 return false;
739}
740
741bool Reader::addErrorAndRecover(const String& message, Token& token,
742 TokenType skipUntilToken) {
743 addError(message, token);
744 return recoverFromError(skipUntilToken);
745}
746
747Value& Reader::currentValue() { return *(nodes_.top()); }
748
749Reader::Char Reader::getNextChar() {
750 if (current_ == end_)
751 return 0;
752 return *current_++;
753}
754
755void Reader::getLocationLineAndColumn(Location location, int& line,
756 int& column) const {
757 Location current = begin_;
758 Location lastLineStart = current;
759 line = 0;
760 while (current < location && current != end_) {
761 Char c = *current++;
762 if (c == '\r') {
763 if (current != end_ && *current == '\n')
764 ++current;
765 lastLineStart = current;
766 ++line;
767 } else if (c == '\n') {
768 lastLineStart = current;
769 ++line;
770 }
771 }
772 // column & line start at 1
773 column = int(location - lastLineStart) + 1;
774 ++line;
775}
776
777String Reader::getLocationLineAndColumn(Location location) const {
778 int line, column;
779 getLocationLineAndColumn(location, line, column);
780 char buffer[18 + 16 + 16 + 1];
781 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
782 return buffer;
783}
784
785// Deprecated. Preserved for backward compatibility
786String Reader::getFormatedErrorMessages() const {
788}
789
791 String formattedMessage;
792 for (const auto& error : errors_) {
793 formattedMessage +=
794 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
795 formattedMessage += " " + error.message_ + "\n";
796 if (error.extra_)
797 formattedMessage +=
798 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
799 }
800 return formattedMessage;
801}
802
803std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
804 std::vector<Reader::StructuredError> allErrors;
805 for (const auto& error : errors_) {
806 Reader::StructuredError structured;
807 structured.offset_start = error.token_.start_ - begin_;
808 structured.offset_limit = error.token_.end_ - begin_;
809 structured.message = error.message_;
810 allErrors.push_back(structured);
811 }
812 return allErrors;
813}
814
815bool Reader::pushError(const Value& value, const String& message) {
816 ptrdiff_t const length = end_ - begin_;
817 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
818 return false;
819 Token token;
820 token.type_ = tokenError;
821 token.start_ = begin_ + value.getOffsetStart();
822 token.end_ = begin_ + value.getOffsetLimit();
823 ErrorInfo info;
824 info.token_ = token;
825 info.message_ = message;
826 info.extra_ = nullptr;
827 errors_.push_back(info);
828 return true;
829}
830
831bool Reader::pushError(const Value& value, const String& message,
832 const Value& extra) {
833 ptrdiff_t const length = end_ - begin_;
834 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
835 extra.getOffsetLimit() > length)
836 return false;
837 Token token;
838 token.type_ = tokenError;
839 token.start_ = begin_ + value.getOffsetStart();
840 token.end_ = begin_ + value.getOffsetLimit();
841 ErrorInfo info;
842 info.token_ = token;
843 info.message_ = message;
844 info.extra_ = begin_ + extra.getOffsetStart();
845 errors_.push_back(info);
846 return true;
847}
848
849bool Reader::good() const { return errors_.empty(); }
850
851// Originally copied from the Features class (now deprecated), used internally
852// for features implementation.
853class OurFeatures {
854public:
855 static OurFeatures all();
856 bool allowComments_;
857 bool allowTrailingCommas_;
858 bool strictRoot_;
859 bool allowDroppedNullPlaceholders_;
860 bool allowNumericKeys_;
861 bool allowSingleQuotes_;
862 bool failIfExtra_;
863 bool rejectDupKeys_;
864 bool allowSpecialFloats_;
865 bool skipBom_;
866 size_t stackLimit_;
867}; // OurFeatures
868
869OurFeatures OurFeatures::all() { return {}; }
870
871// Implementation of class Reader
872// ////////////////////////////////
873
874// Originally copied from the Reader class (now deprecated), used internally
875// for implementing JSON reading.
876class OurReader {
877public:
878 using Char = char;
879 using Location = const Char*;
880
881 explicit OurReader(OurFeatures const& features);
882 bool parse(const char* beginDoc, const char* endDoc, Value& root,
883 bool collectComments = true);
884 String getFormattedErrorMessages() const;
885 std::vector<CharReader::StructuredError> getStructuredErrors() const;
886
887private:
888 OurReader(OurReader const&); // no impl
889 void operator=(OurReader const&); // no impl
890
891 enum TokenType {
892 tokenEndOfStream = 0,
893 tokenObjectBegin,
894 tokenObjectEnd,
895 tokenArrayBegin,
896 tokenArrayEnd,
897 tokenString,
898 tokenNumber,
899 tokenTrue,
900 tokenFalse,
901 tokenNull,
902 tokenNaN,
903 tokenPosInf,
904 tokenNegInf,
905 tokenArraySeparator,
906 tokenMemberSeparator,
907 tokenComment,
908 tokenError
909 };
910
911 class Token {
912 public:
913 TokenType type_;
914 Location start_;
915 Location end_;
916 };
917
918 class ErrorInfo {
919 public:
920 Token token_;
921 String message_;
922 Location extra_;
923 };
924
925 using Errors = std::deque<ErrorInfo>;
926
927 bool readToken(Token& token);
928 bool readTokenSkippingComments(Token& token);
929 void skipSpaces();
930 void skipBom(bool skipBom);
931 bool match(const Char* pattern, int patternLength);
932 bool readComment();
933 bool readCStyleComment(bool* containsNewLineResult);
934 bool readCppStyleComment();
935 bool readString();
936 bool readStringSingleQuote();
937 bool readNumber(bool checkInf);
938 bool readValue();
939 bool readObject(Token& token);
940 bool readArray(Token& token);
941 bool decodeNumber(Token& token);
942 bool decodeNumber(Token& token, Value& decoded);
943 bool decodeString(Token& token);
944 bool decodeString(Token& token, String& decoded);
945 bool decodeDouble(Token& token);
946 bool decodeDouble(Token& token, Value& decoded);
947 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
948 unsigned int& unicode);
949 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
950 Location end, unsigned int& unicode);
951 bool addError(const String& message, Token& token, Location extra = nullptr);
952 bool recoverFromError(TokenType skipUntilToken);
953 bool addErrorAndRecover(const String& message, Token& token,
954 TokenType skipUntilToken);
955 void skipUntilSpace();
956 Value& currentValue();
957 Char getNextChar();
958 void getLocationLineAndColumn(Location location, int& line,
959 int& column) const;
960 String getLocationLineAndColumn(Location location) const;
961 void addComment(Location begin, Location end, CommentPlacement placement);
962
963 static String normalizeEOL(Location begin, Location end);
964 static bool containsNewLine(Location begin, Location end);
965
966 using Nodes = std::stack<Value*>;
967
968 Nodes nodes_{};
969 Errors errors_{};
970 String document_{};
971 Location begin_ = nullptr;
972 Location end_ = nullptr;
973 Location current_ = nullptr;
974 Location lastValueEnd_ = nullptr;
975 Value* lastValue_ = nullptr;
976 bool lastValueHasAComment_ = false;
977 String commentsBefore_{};
978
979 OurFeatures const features_;
980 bool collectComments_ = false;
981}; // OurReader
982
983// complete copy of Read impl, for OurReader
984
985bool OurReader::containsNewLine(OurReader::Location begin,
986 OurReader::Location end) {
987 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
988}
989
990OurReader::OurReader(OurFeatures const& features) : features_(features) {}
991
992bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
993 bool collectComments) {
994 if (!features_.allowComments_) {
995 collectComments = false;
996 }
997
998 begin_ = beginDoc;
999 end_ = endDoc;
1000 collectComments_ = collectComments;
1001 current_ = begin_;
1002 lastValueEnd_ = nullptr;
1003 lastValue_ = nullptr;
1004 commentsBefore_.clear();
1005 errors_.clear();
1006 while (!nodes_.empty())
1007 nodes_.pop();
1008 nodes_.push(&root);
1009
1010 // skip byte order mark if it exists at the beginning of the UTF-8 text.
1011 skipBom(features_.skipBom_);
1012 bool successful = readValue();
1013 nodes_.pop();
1014 Token token;
1015 readTokenSkippingComments(token);
1016 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1017 addError("Extra non-whitespace after JSON value.", token);
1018 return false;
1019 }
1020 if (collectComments_ && !commentsBefore_.empty())
1021 root.setComment(commentsBefore_, commentAfter);
1022 if (features_.strictRoot_) {
1023 if (!root.isArray() && !root.isObject()) {
1024 // Set error location to start of doc, ideally should be first token found
1025 // in doc
1026 token.type_ = tokenError;
1027 token.start_ = beginDoc;
1028 token.end_ = endDoc;
1029 addError(
1030 "A valid JSON document must be either an array or an object value.",
1031 token);
1032 return false;
1033 }
1034 }
1035 return successful;
1036}
1037
1038bool OurReader::readValue() {
1039 // To preserve the old behaviour we cast size_t to int.
1040 if (nodes_.size() > features_.stackLimit_)
1041 throwRuntimeError("Exceeded stackLimit in readValue().");
1042 Token token;
1043 readTokenSkippingComments(token);
1044 bool successful = true;
1045
1046 if (collectComments_ && !commentsBefore_.empty()) {
1047 currentValue().setComment(commentsBefore_, commentBefore);
1048 commentsBefore_.clear();
1049 }
1050
1051 switch (token.type_) {
1052 case tokenObjectBegin:
1053 successful = readObject(token);
1054 currentValue().setOffsetLimit(current_ - begin_);
1055 break;
1056 case tokenArrayBegin:
1057 successful = readArray(token);
1058 currentValue().setOffsetLimit(current_ - begin_);
1059 break;
1060 case tokenNumber:
1061 successful = decodeNumber(token);
1062 break;
1063 case tokenString:
1064 successful = decodeString(token);
1065 break;
1066 case tokenTrue: {
1067 Value v(true);
1068 currentValue().swapPayload(v);
1069 currentValue().setOffsetStart(token.start_ - begin_);
1070 currentValue().setOffsetLimit(token.end_ - begin_);
1071 } break;
1072 case tokenFalse: {
1073 Value v(false);
1074 currentValue().swapPayload(v);
1075 currentValue().setOffsetStart(token.start_ - begin_);
1076 currentValue().setOffsetLimit(token.end_ - begin_);
1077 } break;
1078 case tokenNull: {
1079 Value v;
1080 currentValue().swapPayload(v);
1081 currentValue().setOffsetStart(token.start_ - begin_);
1082 currentValue().setOffsetLimit(token.end_ - begin_);
1083 } break;
1084 case tokenNaN: {
1085 Value v(std::numeric_limits<double>::quiet_NaN());
1086 currentValue().swapPayload(v);
1087 currentValue().setOffsetStart(token.start_ - begin_);
1088 currentValue().setOffsetLimit(token.end_ - begin_);
1089 } break;
1090 case tokenPosInf: {
1091 Value v(std::numeric_limits<double>::infinity());
1092 currentValue().swapPayload(v);
1093 currentValue().setOffsetStart(token.start_ - begin_);
1094 currentValue().setOffsetLimit(token.end_ - begin_);
1095 } break;
1096 case tokenNegInf: {
1097 Value v(-std::numeric_limits<double>::infinity());
1098 currentValue().swapPayload(v);
1099 currentValue().setOffsetStart(token.start_ - begin_);
1100 currentValue().setOffsetLimit(token.end_ - begin_);
1101 } break;
1102 case tokenArraySeparator:
1103 case tokenObjectEnd:
1104 case tokenArrayEnd:
1105 if (features_.allowDroppedNullPlaceholders_) {
1106 // "Un-read" the current token and mark the current value as a null
1107 // token.
1108 current_--;
1109 Value v;
1110 currentValue().swapPayload(v);
1111 currentValue().setOffsetStart(current_ - begin_ - 1);
1112 currentValue().setOffsetLimit(current_ - begin_);
1113 break;
1114 } // else, fall through ...
1115 default:
1116 currentValue().setOffsetStart(token.start_ - begin_);
1117 currentValue().setOffsetLimit(token.end_ - begin_);
1118 return addError("Syntax error: value, object or array expected.", token);
1119 }
1120
1121 if (collectComments_) {
1122 lastValueEnd_ = current_;
1123 lastValueHasAComment_ = false;
1124 lastValue_ = &currentValue();
1125 }
1126
1127 return successful;
1128}
1129
1130bool OurReader::readTokenSkippingComments(Token& token) {
1131 bool success = readToken(token);
1132 if (features_.allowComments_) {
1133 while (success && token.type_ == tokenComment) {
1134 success = readToken(token);
1135 }
1136 }
1137 return success;
1138}
1139
1140bool OurReader::readToken(Token& token) {
1141 skipSpaces();
1142 token.start_ = current_;
1143 Char c = getNextChar();
1144 bool ok = true;
1145 switch (c) {
1146 case '{':
1147 token.type_ = tokenObjectBegin;
1148 break;
1149 case '}':
1150 token.type_ = tokenObjectEnd;
1151 break;
1152 case '[':
1153 token.type_ = tokenArrayBegin;
1154 break;
1155 case ']':
1156 token.type_ = tokenArrayEnd;
1157 break;
1158 case '"':
1159 token.type_ = tokenString;
1160 ok = readString();
1161 break;
1162 case '\'':
1163 if (features_.allowSingleQuotes_) {
1164 token.type_ = tokenString;
1165 ok = readStringSingleQuote();
1166 } else {
1167 // If we don't allow single quotes, this is a failure case.
1168 ok = false;
1169 }
1170 break;
1171 case '/':
1172 token.type_ = tokenComment;
1173 ok = readComment();
1174 break;
1175 case '0':
1176 case '1':
1177 case '2':
1178 case '3':
1179 case '4':
1180 case '5':
1181 case '6':
1182 case '7':
1183 case '8':
1184 case '9':
1185 token.type_ = tokenNumber;
1186 readNumber(false);
1187 break;
1188 case '-':
1189 if (readNumber(true)) {
1190 token.type_ = tokenNumber;
1191 } else {
1192 token.type_ = tokenNegInf;
1193 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1194 }
1195 break;
1196 case '+':
1197 if (readNumber(true)) {
1198 token.type_ = tokenNumber;
1199 } else {
1200 token.type_ = tokenPosInf;
1201 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1202 }
1203 break;
1204 case 't':
1205 token.type_ = tokenTrue;
1206 ok = match("rue", 3);
1207 break;
1208 case 'f':
1209 token.type_ = tokenFalse;
1210 ok = match("alse", 4);
1211 break;
1212 case 'n':
1213 token.type_ = tokenNull;
1214 ok = match("ull", 3);
1215 break;
1216 case 'N':
1217 if (features_.allowSpecialFloats_) {
1218 token.type_ = tokenNaN;
1219 ok = match("aN", 2);
1220 } else {
1221 ok = false;
1222 }
1223 break;
1224 case 'I':
1225 if (features_.allowSpecialFloats_) {
1226 token.type_ = tokenPosInf;
1227 ok = match("nfinity", 7);
1228 } else {
1229 ok = false;
1230 }
1231 break;
1232 case ',':
1233 token.type_ = tokenArraySeparator;
1234 break;
1235 case ':':
1236 token.type_ = tokenMemberSeparator;
1237 break;
1238 case 0:
1239 token.type_ = tokenEndOfStream;
1240 break;
1241 default:
1242 ok = false;
1243 break;
1244 }
1245 if (!ok)
1246 token.type_ = tokenError;
1247 token.end_ = current_;
1248 return ok;
1249}
1250
1251void OurReader::skipSpaces() {
1252 while (current_ != end_) {
1253 Char c = *current_;
1254 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1255 ++current_;
1256 else
1257 break;
1258 }
1259}
1260
1261void OurReader::skipBom(bool skipBom) {
1262 // The default behavior is to skip BOM.
1263 if (skipBom) {
1264 if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1265 begin_ += 3;
1266 current_ = begin_;
1267 }
1268 }
1269}
1270
1271bool OurReader::match(const Char* pattern, int patternLength) {
1272 if (end_ - current_ < patternLength)
1273 return false;
1274 int index = patternLength;
1275 while (index--)
1276 if (current_[index] != pattern[index])
1277 return false;
1278 current_ += patternLength;
1279 return true;
1280}
1281
1282bool OurReader::readComment() {
1283 const Location commentBegin = current_ - 1;
1284 const Char c = getNextChar();
1285 bool successful = false;
1286 bool cStyleWithEmbeddedNewline = false;
1287
1288 const bool isCStyleComment = (c == '*');
1289 const bool isCppStyleComment = (c == '/');
1290 if (isCStyleComment) {
1291 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1292 } else if (isCppStyleComment) {
1293 successful = readCppStyleComment();
1294 }
1295
1296 if (!successful)
1297 return false;
1298
1299 if (collectComments_) {
1300 CommentPlacement placement = commentBefore;
1301
1302 if (!lastValueHasAComment_) {
1303 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1304 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1305 placement = commentAfterOnSameLine;
1306 lastValueHasAComment_ = true;
1307 }
1308 }
1309 }
1310
1311 addComment(commentBegin, current_, placement);
1312 }
1313 return true;
1314}
1315
1316String OurReader::normalizeEOL(OurReader::Location begin,
1317 OurReader::Location end) {
1318 String normalized;
1319 normalized.reserve(static_cast<size_t>(end - begin));
1320 OurReader::Location current = begin;
1321 while (current != end) {
1322 char c = *current++;
1323 if (c == '\r') {
1324 if (current != end && *current == '\n')
1325 // convert dos EOL
1326 ++current;
1327 // convert Mac EOL
1328 normalized += '\n';
1329 } else {
1330 normalized += c;
1331 }
1332 }
1333 return normalized;
1334}
1335
1336void OurReader::addComment(Location begin, Location end,
1337 CommentPlacement placement) {
1338 assert(collectComments_);
1339 const String& normalized = normalizeEOL(begin, end);
1340 if (placement == commentAfterOnSameLine) {
1341 assert(lastValue_ != nullptr);
1342 lastValue_->setComment(normalized, placement);
1343 } else {
1344 commentsBefore_ += normalized;
1345 }
1346}
1347
1348bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1349 *containsNewLineResult = false;
1350
1351 while ((current_ + 1) < end_) {
1352 Char c = getNextChar();
1353 if (c == '*' && *current_ == '/')
1354 break;
1355 if (c == '\n')
1356 *containsNewLineResult = true;
1357 }
1358
1359 return getNextChar() == '/';
1360}
1361
1362bool OurReader::readCppStyleComment() {
1363 while (current_ != end_) {
1364 Char c = getNextChar();
1365 if (c == '\n')
1366 break;
1367 if (c == '\r') {
1368 // Consume DOS EOL. It will be normalized in addComment.
1369 if (current_ != end_ && *current_ == '\n')
1370 getNextChar();
1371 // Break on Moc OS 9 EOL.
1372 break;
1373 }
1374 }
1375 return true;
1376}
1377
1378bool OurReader::readNumber(bool checkInf) {
1379 Location p = current_;
1380 if (checkInf && p != end_ && *p == 'I') {
1381 current_ = ++p;
1382 return false;
1383 }
1384 char c = '0'; // stopgap for already consumed character
1385 // integral part
1386 while (c >= '0' && c <= '9')
1387 c = (current_ = p) < end_ ? *p++ : '\0';
1388 // fractional part
1389 if (c == '.') {
1390 c = (current_ = p) < end_ ? *p++ : '\0';
1391 while (c >= '0' && c <= '9')
1392 c = (current_ = p) < end_ ? *p++ : '\0';
1393 }
1394 // exponential part
1395 if (c == 'e' || c == 'E') {
1396 c = (current_ = p) < end_ ? *p++ : '\0';
1397 if (c == '+' || c == '-')
1398 c = (current_ = p) < end_ ? *p++ : '\0';
1399 while (c >= '0' && c <= '9')
1400 c = (current_ = p) < end_ ? *p++ : '\0';
1401 }
1402 return true;
1403}
1404bool OurReader::readString() {
1405 Char c = 0;
1406 while (current_ != end_) {
1407 c = getNextChar();
1408 if (c == '\\')
1409 getNextChar();
1410 else if (c == '"')
1411 break;
1412 }
1413 return c == '"';
1414}
1415
1416bool OurReader::readStringSingleQuote() {
1417 Char c = 0;
1418 while (current_ != end_) {
1419 c = getNextChar();
1420 if (c == '\\')
1421 getNextChar();
1422 else if (c == '\'')
1423 break;
1424 }
1425 return c == '\'';
1426}
1427
1428bool OurReader::readObject(Token& token) {
1429 Token tokenName;
1430 String name;
1431 Value init(objectValue);
1432 currentValue().swapPayload(init);
1433 currentValue().setOffsetStart(token.start_ - begin_);
1434 while (readTokenSkippingComments(tokenName)) {
1435 if (tokenName.type_ == tokenObjectEnd &&
1436 (name.empty() ||
1437 features_.allowTrailingCommas_)) // empty object or trailing comma
1438 return true;
1439 name.clear();
1440 if (tokenName.type_ == tokenString) {
1441 if (!decodeString(tokenName, name))
1442 return recoverFromError(tokenObjectEnd);
1443 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1444 Value numberName;
1445 if (!decodeNumber(tokenName, numberName))
1446 return recoverFromError(tokenObjectEnd);
1447 name = numberName.asString();
1448 } else {
1449 break;
1450 }
1451 if (name.length() >= (1U << 30))
1452 throwRuntimeError("keylength >= 2^30");
1453 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1454 String msg = "Duplicate key: '" + name + "'";
1455 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1456 }
1457
1458 Token colon;
1459 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1460 return addErrorAndRecover("Missing ':' after object member name", colon,
1461 tokenObjectEnd);
1462 }
1463 Value& value = currentValue()[name];
1464 nodes_.push(&value);
1465 bool ok = readValue();
1466 nodes_.pop();
1467 if (!ok) // error already set
1468 return recoverFromError(tokenObjectEnd);
1469
1470 Token comma;
1471 if (!readTokenSkippingComments(comma) ||
1472 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
1473 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1474 comma, tokenObjectEnd);
1475 }
1476 if (comma.type_ == tokenObjectEnd)
1477 return true;
1478 }
1479 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1480 tokenObjectEnd);
1481}
1482
1483bool OurReader::readArray(Token& token) {
1484 Value init(arrayValue);
1485 currentValue().swapPayload(init);
1486 currentValue().setOffsetStart(token.start_ - begin_);
1487 int index = 0;
1488 for (;;) {
1489 skipSpaces();
1490 if (current_ != end_ && *current_ == ']' &&
1491 (index == 0 ||
1492 (features_.allowTrailingCommas_ &&
1493 !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1494 // comma
1495 {
1496 Token endArray;
1497 readToken(endArray);
1498 return true;
1499 }
1500 Value& value = currentValue()[index++];
1501 nodes_.push(&value);
1502 bool ok = readValue();
1503 nodes_.pop();
1504 if (!ok) // error already set
1505 return recoverFromError(tokenArrayEnd);
1506
1507 Token currentToken;
1508 // Accept Comment after last item in the array.
1509 ok = readTokenSkippingComments(currentToken);
1510 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1511 currentToken.type_ != tokenArrayEnd);
1512 if (!ok || badTokenType) {
1513 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1514 currentToken, tokenArrayEnd);
1515 }
1516 if (currentToken.type_ == tokenArrayEnd)
1517 break;
1518 }
1519 return true;
1520}
1521
1522bool OurReader::decodeNumber(Token& token) {
1523 Value decoded;
1524 if (!decodeNumber(token, decoded))
1525 return false;
1526 currentValue().swapPayload(decoded);
1527 currentValue().setOffsetStart(token.start_ - begin_);
1528 currentValue().setOffsetLimit(token.end_ - begin_);
1529 return true;
1530}
1531
1532bool OurReader::decodeNumber(Token& token, Value& decoded) {
1533 // Attempts to parse the number as an integer. If the number is
1534 // larger than the maximum supported value of an integer then
1535 // we decode the number as a double.
1536 Location current = token.start_;
1537 const bool isNegative = *current == '-';
1538 if (isNegative) {
1539 ++current;
1540 }
1541
1542 // We assume we can represent the largest and smallest integer types as
1543 // unsigned integers with separate sign. This is only true if they can fit
1544 // into an unsigned integer.
1546 "Int must be smaller than UInt");
1547
1548 // We need to convert minLargestInt into a positive number. The easiest way
1549 // to do this conversion is to assume our "threshold" value of minLargestInt
1550 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1551 // be a safe assumption.
1553 "The absolute value of minLargestInt must be greater than or "
1554 "equal to maxLargestInt");
1555 static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1556 "The absolute value of minLargestInt must be only 1 magnitude "
1557 "larger than maxLargest Int");
1558
1559 static constexpr Value::LargestUInt positive_threshold =
1561 static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1562
1563 // For the negative values, we have to be more careful. Since typically
1564 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1565 // then take the inverse. This assumes that minLargestInt is only a single
1566 // power of 10 different in magnitude, which we check above. For the last
1567 // digit, we take the modulus before negating for the same reason.
1568 static constexpr auto negative_threshold =
1570 static constexpr auto negative_last_digit =
1572
1573 const Value::LargestUInt threshold =
1574 isNegative ? negative_threshold : positive_threshold;
1575 const Value::UInt max_last_digit =
1576 isNegative ? negative_last_digit : positive_last_digit;
1577
1578 Value::LargestUInt value = 0;
1579 while (current < token.end_) {
1580 Char c = *current++;
1581 if (c < '0' || c > '9')
1582 return decodeDouble(token, decoded);
1583
1584 const auto digit(static_cast<Value::UInt>(c - '0'));
1585 if (value >= threshold) {
1586 // We've hit or exceeded the max value divided by 10 (rounded down). If
1587 // a) we've only just touched the limit, meaning value == threshold,
1588 // b) this is the last digit, or
1589 // c) it's small enough to fit in that rounding delta, we're okay.
1590 // Otherwise treat this number as a double to avoid overflow.
1591 if (value > threshold || current != token.end_ ||
1592 digit > max_last_digit) {
1593 return decodeDouble(token, decoded);
1594 }
1595 }
1596 value = value * 10 + digit;
1597 }
1598
1599 if (isNegative) {
1600 // We use the same magnitude assumption here, just in case.
1601 const auto last_digit = static_cast<Value::UInt>(value % 10);
1602 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1603 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1604 decoded = Value::LargestInt(value);
1605 } else {
1606 decoded = value;
1607 }
1608
1609 return true;
1610}
1611
1612bool OurReader::decodeDouble(Token& token) {
1613 Value decoded;
1614 if (!decodeDouble(token, decoded))
1615 return false;
1616 currentValue().swapPayload(decoded);
1617 currentValue().setOffsetStart(token.start_ - begin_);
1618 currentValue().setOffsetLimit(token.end_ - begin_);
1619 return true;
1620}
1621
1622bool OurReader::decodeDouble(Token& token, Value& decoded) {
1623 double value = 0;
1624 IStringStream is(String(token.start_, token.end_));
1625 if (!(is >> value)) {
1626 if (value == std::numeric_limits<double>::max())
1627 value = std::numeric_limits<double>::infinity();
1628 else if (value == std::numeric_limits<double>::lowest())
1629 value = -std::numeric_limits<double>::infinity();
1630 else if (!std::isinf(value))
1631 return addError(
1632 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1633 }
1634 decoded = value;
1635 return true;
1636}
1637
1638bool OurReader::decodeString(Token& token) {
1639 String decoded_string;
1640 if (!decodeString(token, decoded_string))
1641 return false;
1642 Value decoded(decoded_string);
1643 currentValue().swapPayload(decoded);
1644 currentValue().setOffsetStart(token.start_ - begin_);
1645 currentValue().setOffsetLimit(token.end_ - begin_);
1646 return true;
1647}
1648
1649bool OurReader::decodeString(Token& token, String& decoded) {
1650 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1651 Location current = token.start_ + 1; // skip '"'
1652 Location end = token.end_ - 1; // do not include '"'
1653 while (current != end) {
1654 Char c = *current++;
1655 if (c == '"')
1656 break;
1657 if (c == '\\') {
1658 if (current == end)
1659 return addError("Empty escape sequence in string", token, current);
1660 Char escape = *current++;
1661 switch (escape) {
1662 case '"':
1663 decoded += '"';
1664 break;
1665 case '/':
1666 decoded += '/';
1667 break;
1668 case '\\':
1669 decoded += '\\';
1670 break;
1671 case 'b':
1672 decoded += '\b';
1673 break;
1674 case 'f':
1675 decoded += '\f';
1676 break;
1677 case 'n':
1678 decoded += '\n';
1679 break;
1680 case 'r':
1681 decoded += '\r';
1682 break;
1683 case 't':
1684 decoded += '\t';
1685 break;
1686 case 'u': {
1687 unsigned int unicode;
1688 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1689 return false;
1690 decoded += codePointToUTF8(unicode);
1691 } break;
1692 default:
1693 return addError("Bad escape sequence in string", token, current);
1694 }
1695 } else {
1696 decoded += c;
1697 }
1698 }
1699 return true;
1700}
1701
1702bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1703 Location end, unsigned int& unicode) {
1704
1705 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1706 return false;
1707 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1708 // surrogate pairs
1709 if (end - current < 6)
1710 return addError(
1711 "additional six characters expected to parse unicode surrogate pair.",
1712 token, current);
1713 if (*(current++) == '\\' && *(current++) == 'u') {
1714 unsigned int surrogatePair;
1715 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1716 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1717 } else
1718 return false;
1719 } else
1720 return addError("expecting another \\u token to begin the second half of "
1721 "a unicode surrogate pair",
1722 token, current);
1723 }
1724 return true;
1725}
1726
1727bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1728 Location end,
1729 unsigned int& ret_unicode) {
1730 if (end - current < 4)
1731 return addError(
1732 "Bad unicode escape sequence in string: four digits expected.", token,
1733 current);
1734 int unicode = 0;
1735 for (int index = 0; index < 4; ++index) {
1736 Char c = *current++;
1737 unicode *= 16;
1738 if (c >= '0' && c <= '9')
1739 unicode += c - '0';
1740 else if (c >= 'a' && c <= 'f')
1741 unicode += c - 'a' + 10;
1742 else if (c >= 'A' && c <= 'F')
1743 unicode += c - 'A' + 10;
1744 else
1745 return addError(
1746 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1747 token, current);
1748 }
1749 ret_unicode = static_cast<unsigned int>(unicode);
1750 return true;
1751}
1752
1753bool OurReader::addError(const String& message, Token& token, Location extra) {
1754 ErrorInfo info;
1755 info.token_ = token;
1756 info.message_ = message;
1757 info.extra_ = extra;
1758 errors_.push_back(info);
1759 return false;
1760}
1761
1762bool OurReader::recoverFromError(TokenType skipUntilToken) {
1763 size_t errorCount = errors_.size();
1764 Token skip;
1765 for (;;) {
1766 if (!readToken(skip))
1767 errors_.resize(errorCount); // discard errors caused by recovery
1768 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1769 break;
1770 }
1771 errors_.resize(errorCount);
1772 return false;
1773}
1774
1775bool OurReader::addErrorAndRecover(const String& message, Token& token,
1776 TokenType skipUntilToken) {
1777 addError(message, token);
1778 return recoverFromError(skipUntilToken);
1779}
1780
1781Value& OurReader::currentValue() { return *(nodes_.top()); }
1782
1783OurReader::Char OurReader::getNextChar() {
1784 if (current_ == end_)
1785 return 0;
1786 return *current_++;
1787}
1788
1789void OurReader::getLocationLineAndColumn(Location location, int& line,
1790 int& column) const {
1791 Location current = begin_;
1792 Location lastLineStart = current;
1793 line = 0;
1794 while (current < location && current != end_) {
1795 Char c = *current++;
1796 if (c == '\r') {
1797 if (current != end_ && *current == '\n')
1798 ++current;
1799 lastLineStart = current;
1800 ++line;
1801 } else if (c == '\n') {
1802 lastLineStart = current;
1803 ++line;
1804 }
1805 }
1806 // column & line start at 1
1807 column = int(location - lastLineStart) + 1;
1808 ++line;
1809}
1810
1811String OurReader::getLocationLineAndColumn(Location location) const {
1812 int line, column;
1813 getLocationLineAndColumn(location, line, column);
1814 char buffer[18 + 16 + 16 + 1];
1815 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1816 return buffer;
1817}
1818
1819String OurReader::getFormattedErrorMessages() const {
1820 String formattedMessage;
1821 for (const auto& error : errors_) {
1822 formattedMessage +=
1823 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1824 formattedMessage += " " + error.message_ + "\n";
1825 if (error.extra_)
1826 formattedMessage +=
1827 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1828 }
1829 return formattedMessage;
1830}
1831
1832std::vector<CharReader::StructuredError>
1833OurReader::getStructuredErrors() const {
1834 std::vector<CharReader::StructuredError> allErrors;
1835 for (const auto& error : errors_) {
1836 CharReader::StructuredError structured;
1837 structured.offset_start = error.token_.start_ - begin_;
1838 structured.offset_limit = error.token_.end_ - begin_;
1839 structured.message = error.message_;
1840 allErrors.push_back(structured);
1841 }
1842 return allErrors;
1843}
1844
1845class OurCharReader : public CharReader {
1846
1847public:
1848 OurCharReader(bool collectComments, OurFeatures const& features)
1849 : CharReader(
1850 std::unique_ptr<OurImpl>(new OurImpl(collectComments, features))) {}
1851
1852protected:
1853 class OurImpl : public Impl {
1854 public:
1855 OurImpl(bool collectComments, OurFeatures const& features)
1856 : collectComments_(collectComments), reader_(features) {}
1857
1858 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1859 String* errs) override {
1860 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1861 if (errs) {
1862 *errs = reader_.getFormattedErrorMessages();
1863 }
1864 return ok;
1865 }
1866
1867 std::vector<CharReader::StructuredError>
1868 getStructuredErrors() const override {
1869 return reader_.getStructuredErrors();
1870 }
1871
1872 private:
1873 bool const collectComments_;
1874 OurReader reader_;
1875 };
1876};
1877
1881 bool collectComments = settings_["collectComments"].asBool();
1882 OurFeatures features = OurFeatures::all();
1883 features.allowComments_ = settings_["allowComments"].asBool();
1884 features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1885 features.strictRoot_ = settings_["strictRoot"].asBool();
1886 features.allowDroppedNullPlaceholders_ =
1887 settings_["allowDroppedNullPlaceholders"].asBool();
1888 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1889 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1890
1891 // Stack limit is always a size_t, so we get this as an unsigned int
1892 // regardless of it we have 64-bit integer support enabled.
1893 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1894 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1895 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1896 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1897 features.skipBom_ = settings_["skipBom"].asBool();
1898 return new OurCharReader(collectComments, features);
1899}
1900
1902 static const auto& valid_keys = *new std::set<String>{
1903 "collectComments",
1904 "allowComments",
1905 "allowTrailingCommas",
1906 "strictRoot",
1907 "allowDroppedNullPlaceholders",
1908 "allowNumericKeys",
1909 "allowSingleQuotes",
1910 "stackLimit",
1911 "failIfExtra",
1912 "rejectDupKeys",
1913 "allowSpecialFloats",
1914 "skipBom",
1915 };
1916 for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1917 auto key = si.name();
1918 if (valid_keys.count(key))
1919 continue;
1920 if (invalid)
1921 (*invalid)[key] = *si;
1922 else
1923 return false;
1924 }
1925 return invalid ? invalid->empty() : true;
1926}
1927
1929 return settings_[key];
1930}
1931// static
1934 (*settings)["allowComments"] = false;
1935 (*settings)["allowTrailingCommas"] = false;
1936 (*settings)["strictRoot"] = true;
1937 (*settings)["allowDroppedNullPlaceholders"] = false;
1938 (*settings)["allowNumericKeys"] = false;
1939 (*settings)["allowSingleQuotes"] = false;
1940 (*settings)["stackLimit"] = 1000;
1941 (*settings)["failIfExtra"] = true;
1942 (*settings)["rejectDupKeys"] = true;
1943 (*settings)["allowSpecialFloats"] = false;
1944 (*settings)["skipBom"] = true;
1946}
1947// static
1950 (*settings)["collectComments"] = true;
1951 (*settings)["allowComments"] = true;
1952 (*settings)["allowTrailingCommas"] = true;
1953 (*settings)["strictRoot"] = false;
1954 (*settings)["allowDroppedNullPlaceholders"] = false;
1955 (*settings)["allowNumericKeys"] = false;
1956 (*settings)["allowSingleQuotes"] = false;
1957 (*settings)["stackLimit"] = 1000;
1958 (*settings)["failIfExtra"] = false;
1959 (*settings)["rejectDupKeys"] = false;
1960 (*settings)["allowSpecialFloats"] = false;
1961 (*settings)["skipBom"] = true;
1963}
1964// static
1967 (*settings)["allowComments"] = false;
1968 (*settings)["allowTrailingCommas"] = false;
1969 (*settings)["strictRoot"] = false;
1970 (*settings)["allowDroppedNullPlaceholders"] = false;
1971 (*settings)["allowNumericKeys"] = false;
1972 (*settings)["allowSingleQuotes"] = false;
1973 (*settings)["stackLimit"] = 1000;
1974 (*settings)["failIfExtra"] = true;
1975 (*settings)["rejectDupKeys"] = false;
1976 (*settings)["allowSpecialFloats"] = false;
1977 (*settings)["skipBom"] = false;
1979}
1980
1981std::vector<CharReader::StructuredError>
1983 return _impl->getStructuredErrors();
1984}
1985
1986bool CharReader::parse(char const* beginDoc, char const* endDoc, Value* root,
1987 String* errs) {
1988 return _impl->parse(beginDoc, endDoc, root, errs);
1989}
1990
1992// global functions
1993
1994bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1995 String* errs) {
1996 OStringStream ssin;
1997 ssin << sin.rdbuf();
1998 String doc = std::move(ssin).str();
1999 char const* begin = doc.data();
2000 char const* end = begin + doc.size();
2001 // Note that we do not actually need a null-terminator.
2002 CharReaderPtr const reader(fact.newCharReader());
2003 return reader->parse(begin, end, root, errs);
2004}
2005
2008 String errs;
2009 bool ok = parseFromStream(b, sin, &root, &errs);
2010 if (!ok) {
2011 throwRuntimeError(errs);
2012 }
2013 return sin;
2014}
2015
2016} // namespace Json
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
virtual std::vector< StructuredError > getStructuredErrors() const =0
Build a CharReader implementation.
Definition reader.h:314
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
static void ecma404Mode(Json::Value *settings)
ECMA-404 mode.
Value & operator[](const String &key)
A simple way to update a specific setting.
CharReader * newCharReader() const override
Allocate a CharReader via operator new().
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
Json::Value settings_
Configuration of this builder.
Definition reader.h:360
~CharReaderBuilder() override
bool validate(Json::Value *invalid) const
Interface for reading JSON from a char array.
Definition reader.h:245
CharReader(std::unique_ptr< Impl > impl)
Definition reader.h:296
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
virtual bool parse(char const *beginDoc, char const *endDoc, Value *root, String *errs)
Read a Value from a JSON document.
Configuration passed to reader and writer.
bool strictRoot_
true if root must be either an array or an object value.
bool allowComments_
true if comments are allowed. Default: true.
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Features()
Initialize the configuration like JsonConfig::allFeatures;.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
char Char
Definition reader.h:39
Reader()
Constructs a Reader allowing all features for parsing.
bool pushError(const Value &value, const String &message)
Add a semantic error message.
bool good() const
Return whether there are any errors.
const Char * Location
Definition reader.h:40
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Represents a JSON value.
Definition value.h:194
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition value.h:227
Json::UInt UInt
Definition value.h:201
bool isArray() const
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... *‍/.
Definition value.h:574
ptrdiff_t getOffsetLimit() const
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
void setOffsetLimit(ptrdiff_t limit)
Json::LargestInt LargestInt
Definition value.h:207
Json::LargestUInt LargestUInt
Definition value.h:208
bool isObject() const
void setOffsetStart(ptrdiff_t start)
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition value.h:234
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition value.h:229
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition value.h:224
ptrdiff_t getOffsetStart() const
#define jsoncpp_snprintf
Definition config.h:63
#define JSONCPP_DEPRECATED_STACK_LIMIT
static size_t const stackLimit_g
JSON (JavaScript Object Notation).
Definition allocator.h:15
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition config.h:136
CommentPlacement
Definition value.h:119
@ commentAfterOnSameLine
a comment just after a value on the same line
Definition value.h:121
@ commentBefore
a comment placed on the line before a value
Definition value.h:120
@ commentAfter
a comment on the line after a value (only make sense for
Definition value.h:122
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition config.h:133
@ arrayValue
array value (ordered list)
Definition value.h:115
@ objectValue
object value (collection of name/value pairs).
Definition value.h:116
std::istream IStream
Definition config.h:139
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition json_tool.h:39
std::auto_ptr< CharReader > CharReaderPtr
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition config.h:132
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
An error tagged with where in the JSON text it was encountered.
Definition reader.h:47