JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <algorithm>
14 #include <cassert>
15 #include <cmath>
16 #include <cstring>
17 #include <iostream>
18 #include <istream>
19 #include <limits>
20 #include <memory>
21 #include <set>
22 #include <sstream>
23 #include <utility>
24 
25 #include <cstdio>
26 #if __cplusplus >= 201103L
27 
28 #if !defined(sscanf)
29 #define sscanf std::sscanf
30 #endif
31 
32 #endif //__cplusplus
33 
34 #if defined(_MSC_VER)
35 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
36 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
37 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
38 #endif //_MSC_VER
39 
40 #if defined(_MSC_VER)
41 // Disable warning about strdup being deprecated.
42 #pragma warning(disable : 4996)
43 #endif
44 
45 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
46 // time to change the stack limit
47 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
48 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
49 #endif
50 
51 static size_t const stackLimit_g =
52  JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
53 
54 namespace Json {
55 
56 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
57 using CharReaderPtr = std::unique_ptr<CharReader>;
58 #else
59 using CharReaderPtr = std::auto_ptr<CharReader>;
60 #endif
61 
62 // Implementation of class Features
63 // ////////////////////////////////
64 
65 Features::Features() = default;
66 
67 Features Features::all() { return {}; }
68 
70  Features features;
71  features.allowComments_ = false;
72  features.strictRoot_ = true;
73  features.allowDroppedNullPlaceholders_ = false;
74  features.allowNumericKeys_ = false;
75  return features;
76 }
77 
78 // Implementation of class Reader
79 // ////////////////////////////////
80 
81 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
82  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
83 }
84 
85 // Class Reader
86 // //////////////////////////////////////////////////////////////////
87 
88 Reader::Reader() : features_(Features::all()) {}
89 
90 Reader::Reader(const Features& features) : features_(features) {}
91 
92 bool Reader::parse(const std::string& document, Value& root,
93  bool collectComments) {
94  document_.assign(document.begin(), document.end());
95  const char* begin = document_.c_str();
96  const char* end = begin + document_.length();
97  return parse(begin, end, root, collectComments);
98 }
99 
100 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
101  // std::istream_iterator<char> begin(is);
102  // std::istream_iterator<char> end;
103  // Those would allow streamed input from a file, if parse() were a
104  // template function.
105 
106  // Since String is reference-counted, this at least does not
107  // create an extra copy.
108  String doc(std::istreambuf_iterator<char>(is), {});
109  return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
110 }
111 
112 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
113  bool collectComments) {
114  if (!features_.allowComments_) {
115  collectComments = false;
116  }
117 
118  begin_ = beginDoc;
119  end_ = endDoc;
120  collectComments_ = collectComments;
121  current_ = begin_;
122  lastValueEnd_ = nullptr;
123  lastValue_ = nullptr;
124  commentsBefore_.clear();
125  errors_.clear();
126  while (!nodes_.empty())
127  nodes_.pop();
128  nodes_.push(&root);
129 
130  bool successful = readValue();
131  Token token;
132  readTokenSkippingComments(token);
133  if (collectComments_ && !commentsBefore_.empty())
134  root.setComment(commentsBefore_, commentAfter);
135  if (features_.strictRoot_) {
136  if (!root.isArray() && !root.isObject()) {
137  // Set error location to start of doc, ideally should be first token found
138  // in doc
139  token.type_ = tokenError;
140  token.start_ = beginDoc;
141  token.end_ = endDoc;
142  addError(
143  "A valid JSON document must be either an array or an object value.",
144  token);
145  return false;
146  }
147  }
148  return successful;
149 }
150 
151 bool Reader::readValue() {
152  // readValue() may call itself only if it calls readObject() or ReadArray().
153  // These methods execute nodes_.push() just before and nodes_.pop)() just
154  // after calling readValue(). parse() executes one nodes_.push(), so > instead
155  // of >=.
156  if (nodes_.size() > stackLimit_g)
157  throwRuntimeError("Exceeded stackLimit in readValue().");
158 
159  Token token;
160  readTokenSkippingComments(token);
161  bool successful = true;
162 
163  if (collectComments_ && !commentsBefore_.empty()) {
164  currentValue().setComment(commentsBefore_, commentBefore);
165  commentsBefore_.clear();
166  }
167 
168  switch (token.type_) {
169  case tokenObjectBegin:
170  successful = readObject(token);
171  currentValue().setOffsetLimit(current_ - begin_);
172  break;
173  case tokenArrayBegin:
174  successful = readArray(token);
175  currentValue().setOffsetLimit(current_ - begin_);
176  break;
177  case tokenNumber:
178  successful = decodeNumber(token);
179  break;
180  case tokenString:
181  successful = decodeString(token);
182  break;
183  case tokenTrue: {
184  Value v(true);
185  currentValue().swapPayload(v);
186  currentValue().setOffsetStart(token.start_ - begin_);
187  currentValue().setOffsetLimit(token.end_ - begin_);
188  } break;
189  case tokenFalse: {
190  Value v(false);
191  currentValue().swapPayload(v);
192  currentValue().setOffsetStart(token.start_ - begin_);
193  currentValue().setOffsetLimit(token.end_ - begin_);
194  } break;
195  case tokenNull: {
196  Value v;
197  currentValue().swapPayload(v);
198  currentValue().setOffsetStart(token.start_ - begin_);
199  currentValue().setOffsetLimit(token.end_ - begin_);
200  } break;
201  case tokenArraySeparator:
202  case tokenObjectEnd:
203  case tokenArrayEnd:
204  if (features_.allowDroppedNullPlaceholders_) {
205  // "Un-read" the current token and mark the current value as a null
206  // token.
207  current_--;
208  Value v;
209  currentValue().swapPayload(v);
210  currentValue().setOffsetStart(current_ - begin_ - 1);
211  currentValue().setOffsetLimit(current_ - begin_);
212  break;
213  } // Else, fall through...
214  default:
215  currentValue().setOffsetStart(token.start_ - begin_);
216  currentValue().setOffsetLimit(token.end_ - begin_);
217  return addError("Syntax error: value, object or array expected.", token);
218  }
219 
220  if (collectComments_) {
221  lastValueEnd_ = current_;
222  lastValue_ = &currentValue();
223  }
224 
225  return successful;
226 }
227 
228 bool Reader::readTokenSkippingComments(Token& token) {
229  bool success = readToken(token);
230  if (features_.allowComments_) {
231  while (success && token.type_ == tokenComment) {
232  success = readToken(token);
233  }
234  }
235  return success;
236 }
237 
238 bool Reader::readToken(Token& token) {
239  skipSpaces();
240  token.start_ = current_;
241  Char c = getNextChar();
242  bool ok = true;
243  switch (c) {
244  case '{':
245  token.type_ = tokenObjectBegin;
246  break;
247  case '}':
248  token.type_ = tokenObjectEnd;
249  break;
250  case '[':
251  token.type_ = tokenArrayBegin;
252  break;
253  case ']':
254  token.type_ = tokenArrayEnd;
255  break;
256  case '"':
257  token.type_ = tokenString;
258  ok = readString();
259  break;
260  case '/':
261  token.type_ = tokenComment;
262  ok = readComment();
263  break;
264  case '0':
265  case '1':
266  case '2':
267  case '3':
268  case '4':
269  case '5':
270  case '6':
271  case '7':
272  case '8':
273  case '9':
274  case '-':
275  token.type_ = tokenNumber;
276  readNumber();
277  break;
278  case 't':
279  token.type_ = tokenTrue;
280  ok = match("rue", 3);
281  break;
282  case 'f':
283  token.type_ = tokenFalse;
284  ok = match("alse", 4);
285  break;
286  case 'n':
287  token.type_ = tokenNull;
288  ok = match("ull", 3);
289  break;
290  case ',':
291  token.type_ = tokenArraySeparator;
292  break;
293  case ':':
294  token.type_ = tokenMemberSeparator;
295  break;
296  case 0:
297  token.type_ = tokenEndOfStream;
298  break;
299  default:
300  ok = false;
301  break;
302  }
303  if (!ok)
304  token.type_ = tokenError;
305  token.end_ = current_;
306  return ok;
307 }
308 
309 void Reader::skipSpaces() {
310  while (current_ != end_) {
311  Char c = *current_;
312  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
313  ++current_;
314  else
315  break;
316  }
317 }
318 
319 bool Reader::match(const Char* pattern, int patternLength) {
320  if (end_ - current_ < patternLength)
321  return false;
322  int index = patternLength;
323  while (index--)
324  if (current_[index] != pattern[index])
325  return false;
326  current_ += patternLength;
327  return true;
328 }
329 
330 bool Reader::readComment() {
331  Location commentBegin = current_ - 1;
332  Char c = getNextChar();
333  bool successful = false;
334  if (c == '*')
335  successful = readCStyleComment();
336  else if (c == '/')
337  successful = readCppStyleComment();
338  if (!successful)
339  return false;
340 
341  if (collectComments_) {
342  CommentPlacement placement = commentBefore;
343  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
344  if (c != '*' || !containsNewLine(commentBegin, current_))
345  placement = commentAfterOnSameLine;
346  }
347 
348  addComment(commentBegin, current_, placement);
349  }
350  return true;
351 }
352 
353 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
354  String normalized;
355  normalized.reserve(static_cast<size_t>(end - begin));
356  Reader::Location current = begin;
357  while (current != end) {
358  char c = *current++;
359  if (c == '\r') {
360  if (current != end && *current == '\n')
361  // convert dos EOL
362  ++current;
363  // convert Mac EOL
364  normalized += '\n';
365  } else {
366  normalized += c;
367  }
368  }
369  return normalized;
370 }
371 
372 void Reader::addComment(Location begin, Location end,
373  CommentPlacement placement) {
374  assert(collectComments_);
375  const String& normalized = normalizeEOL(begin, end);
376  if (placement == commentAfterOnSameLine) {
377  assert(lastValue_ != nullptr);
378  lastValue_->setComment(normalized, placement);
379  } else {
380  commentsBefore_ += normalized;
381  }
382 }
383 
384 bool Reader::readCStyleComment() {
385  while ((current_ + 1) < end_) {
386  Char c = getNextChar();
387  if (c == '*' && *current_ == '/')
388  break;
389  }
390  return getNextChar() == '/';
391 }
392 
393 bool Reader::readCppStyleComment() {
394  while (current_ != end_) {
395  Char c = getNextChar();
396  if (c == '\n')
397  break;
398  if (c == '\r') {
399  // Consume DOS EOL. It will be normalized in addComment.
400  if (current_ != end_ && *current_ == '\n')
401  getNextChar();
402  // Break on Moc OS 9 EOL.
403  break;
404  }
405  }
406  return true;
407 }
408 
409 void Reader::readNumber() {
410  Location p = current_;
411  char c = '0'; // stopgap for already consumed character
412  // integral part
413  while (c >= '0' && c <= '9')
414  c = (current_ = p) < end_ ? *p++ : '\0';
415  // fractional part
416  if (c == '.') {
417  c = (current_ = p) < end_ ? *p++ : '\0';
418  while (c >= '0' && c <= '9')
419  c = (current_ = p) < end_ ? *p++ : '\0';
420  }
421  // exponential part
422  if (c == 'e' || c == 'E') {
423  c = (current_ = p) < end_ ? *p++ : '\0';
424  if (c == '+' || c == '-')
425  c = (current_ = p) < end_ ? *p++ : '\0';
426  while (c >= '0' && c <= '9')
427  c = (current_ = p) < end_ ? *p++ : '\0';
428  }
429 }
430 
431 bool Reader::readString() {
432  Char c = '\0';
433  while (current_ != end_) {
434  c = getNextChar();
435  if (c == '\\')
436  getNextChar();
437  else if (c == '"')
438  break;
439  }
440  return c == '"';
441 }
442 
443 bool Reader::readObject(Token& token) {
444  Token tokenName;
445  String name;
446  Value init(objectValue);
447  currentValue().swapPayload(init);
448  currentValue().setOffsetStart(token.start_ - begin_);
449  while (readTokenSkippingComments(tokenName)) {
450  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
451  return true;
452  name.clear();
453  if (tokenName.type_ == tokenString) {
454  if (!decodeString(tokenName, name))
455  return recoverFromError(tokenObjectEnd);
456  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
457  Value numberName;
458  if (!decodeNumber(tokenName, numberName))
459  return recoverFromError(tokenObjectEnd);
460  name = numberName.asString();
461  } else {
462  break;
463  }
464 
465  Token colon;
466  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
467  return addErrorAndRecover("Missing ':' after object member name", colon,
468  tokenObjectEnd);
469  }
470  Value& value = currentValue()[name];
471  nodes_.push(&value);
472  bool ok = readValue();
473  nodes_.pop();
474  if (!ok) // error already set
475  return recoverFromError(tokenObjectEnd);
476 
477  Token comma;
478  if (!readTokenSkippingComments(comma) ||
479  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
480  return addErrorAndRecover("Missing ',' or '}' in object declaration",
481  comma, tokenObjectEnd);
482  }
483  if (comma.type_ == tokenObjectEnd)
484  return true;
485  }
486  return addErrorAndRecover("Missing '}' or object member name", tokenName,
487  tokenObjectEnd);
488 }
489 
490 bool Reader::readArray(Token& token) {
491  Value init(arrayValue);
492  currentValue().swapPayload(init);
493  currentValue().setOffsetStart(token.start_ - begin_);
494  skipSpaces();
495  if (current_ != end_ && *current_ == ']') // empty array
496  {
497  Token endArray;
498  readToken(endArray);
499  return true;
500  }
501  int index = 0;
502  for (;;) {
503  Value& value = currentValue()[index++];
504  nodes_.push(&value);
505  bool ok = readValue();
506  nodes_.pop();
507  if (!ok) // error already set
508  return recoverFromError(tokenArrayEnd);
509 
510  Token currentToken;
511  // Accept Comment after last item in the array.
512  ok = readTokenSkippingComments(currentToken);
513  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
514  currentToken.type_ != tokenArrayEnd);
515  if (!ok || badTokenType) {
516  return addErrorAndRecover("Missing ',' or ']' in array declaration",
517  currentToken, tokenArrayEnd);
518  }
519  if (currentToken.type_ == tokenArrayEnd)
520  break;
521  }
522  return true;
523 }
524 
525 bool Reader::decodeNumber(Token& token) {
526  Value decoded;
527  if (!decodeNumber(token, decoded))
528  return false;
529  currentValue().swapPayload(decoded);
530  currentValue().setOffsetStart(token.start_ - begin_);
531  currentValue().setOffsetLimit(token.end_ - begin_);
532  return true;
533 }
534 
535 bool Reader::decodeNumber(Token& token, Value& decoded) {
536  // Attempts to parse the number as an integer. If the number is
537  // larger than the maximum supported value of an integer then
538  // we decode the number as a double.
539  Location current = token.start_;
540  bool isNegative = *current == '-';
541  if (isNegative)
542  ++current;
543  // TODO: Help the compiler do the div and mod at compile time or get rid of
544  // them.
545  Value::LargestUInt maxIntegerValue =
546  isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
548  Value::LargestUInt threshold = maxIntegerValue / 10;
549  Value::LargestUInt value = 0;
550  while (current < token.end_) {
551  Char c = *current++;
552  if (c < '0' || c > '9')
553  return decodeDouble(token, decoded);
554  auto digit(static_cast<Value::UInt>(c - '0'));
555  if (value >= threshold) {
556  // We've hit or exceeded the max value divided by 10 (rounded down). If
557  // a) we've only just touched the limit, b) this is the last digit, and
558  // c) it's small enough to fit in that rounding delta, we're okay.
559  // Otherwise treat this number as a double to avoid overflow.
560  if (value > threshold || current != token.end_ ||
561  digit > maxIntegerValue % 10) {
562  return decodeDouble(token, decoded);
563  }
564  }
565  value = value * 10 + digit;
566  }
567  if (isNegative && value == maxIntegerValue)
568  decoded = Value::minLargestInt;
569  else if (isNegative)
570  decoded = -Value::LargestInt(value);
571  else if (value <= Value::LargestUInt(Value::maxInt))
572  decoded = Value::LargestInt(value);
573  else
574  decoded = value;
575  return true;
576 }
577 
578 bool Reader::decodeDouble(Token& token) {
579  Value decoded;
580  if (!decodeDouble(token, decoded))
581  return false;
582  currentValue().swapPayload(decoded);
583  currentValue().setOffsetStart(token.start_ - begin_);
584  currentValue().setOffsetLimit(token.end_ - begin_);
585  return true;
586 }
587 
588 bool Reader::decodeDouble(Token& token, Value& decoded) {
589  double value = 0;
590  IStringStream is(String(token.start_, token.end_));
591  if (!(is >> value)) {
592  if (value == std::numeric_limits<double>::max())
593  value = std::numeric_limits<double>::infinity();
594  else if (value == std::numeric_limits<double>::lowest())
595  value = -std::numeric_limits<double>::infinity();
596  else if (!std::isinf(value))
597  return addError(
598  "'" + String(token.start_, token.end_) + "' is not a number.", token);
599  }
600  decoded = value;
601  return true;
602 }
603 
604 bool Reader::decodeString(Token& token) {
605  String decoded_string;
606  if (!decodeString(token, decoded_string))
607  return false;
608  Value decoded(decoded_string);
609  currentValue().swapPayload(decoded);
610  currentValue().setOffsetStart(token.start_ - begin_);
611  currentValue().setOffsetLimit(token.end_ - begin_);
612  return true;
613 }
614 
615 bool Reader::decodeString(Token& token, String& decoded) {
616  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
617  Location current = token.start_ + 1; // skip '"'
618  Location end = token.end_ - 1; // do not include '"'
619  while (current != end) {
620  Char c = *current++;
621  if (c == '"')
622  break;
623  if (c == '\\') {
624  if (current == end)
625  return addError("Empty escape sequence in string", token, current);
626  Char escape = *current++;
627  switch (escape) {
628  case '"':
629  decoded += '"';
630  break;
631  case '/':
632  decoded += '/';
633  break;
634  case '\\':
635  decoded += '\\';
636  break;
637  case 'b':
638  decoded += '\b';
639  break;
640  case 'f':
641  decoded += '\f';
642  break;
643  case 'n':
644  decoded += '\n';
645  break;
646  case 'r':
647  decoded += '\r';
648  break;
649  case 't':
650  decoded += '\t';
651  break;
652  case 'u': {
653  unsigned int unicode;
654  if (!decodeUnicodeCodePoint(token, current, end, unicode))
655  return false;
656  decoded += codePointToUTF8(unicode);
657  } break;
658  default:
659  return addError("Bad escape sequence in string", token, current);
660  }
661  } else {
662  decoded += c;
663  }
664  }
665  return true;
666 }
667 
668 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
669  Location end, unsigned int& unicode) {
670 
671  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
672  return false;
673  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
674  // surrogate pairs
675  if (end - current < 6)
676  return addError(
677  "additional six characters expected to parse unicode surrogate pair.",
678  token, current);
679  if (*(current++) == '\\' && *(current++) == 'u') {
680  unsigned int surrogatePair;
681  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
682  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
683  } else
684  return false;
685  } else
686  return addError("expecting another \\u token to begin the second half of "
687  "a unicode surrogate pair",
688  token, current);
689  }
690  return true;
691 }
692 
693 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
694  Location end,
695  unsigned int& ret_unicode) {
696  if (end - current < 4)
697  return addError(
698  "Bad unicode escape sequence in string: four digits expected.", token,
699  current);
700  int unicode = 0;
701  for (int index = 0; index < 4; ++index) {
702  Char c = *current++;
703  unicode *= 16;
704  if (c >= '0' && c <= '9')
705  unicode += c - '0';
706  else if (c >= 'a' && c <= 'f')
707  unicode += c - 'a' + 10;
708  else if (c >= 'A' && c <= 'F')
709  unicode += c - 'A' + 10;
710  else
711  return addError(
712  "Bad unicode escape sequence in string: hexadecimal digit expected.",
713  token, current);
714  }
715  ret_unicode = static_cast<unsigned int>(unicode);
716  return true;
717 }
718 
719 bool Reader::addError(const String& message, Token& token, Location extra) {
720  ErrorInfo info;
721  info.token_ = token;
722  info.message_ = message;
723  info.extra_ = extra;
724  errors_.push_back(info);
725  return false;
726 }
727 
728 bool Reader::recoverFromError(TokenType skipUntilToken) {
729  size_t const errorCount = errors_.size();
730  Token skip;
731  for (;;) {
732  if (!readToken(skip))
733  errors_.resize(errorCount); // discard errors caused by recovery
734  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
735  break;
736  }
737  errors_.resize(errorCount);
738  return false;
739 }
740 
741 bool Reader::addErrorAndRecover(const String& message, Token& token,
742  TokenType skipUntilToken) {
743  addError(message, token);
744  return recoverFromError(skipUntilToken);
745 }
746 
747 Value& Reader::currentValue() { return *(nodes_.top()); }
748 
749 Reader::Char Reader::getNextChar() {
750  if (current_ == end_)
751  return 0;
752  return *current_++;
753 }
754 
755 void Reader::getLocationLineAndColumn(Location location, int& line,
756  int& column) const {
757  Location current = begin_;
758  Location lastLineStart = current;
759  line = 0;
760  while (current < location && current != end_) {
761  Char c = *current++;
762  if (c == '\r') {
763  if (current != end_ && *current == '\n')
764  ++current;
765  lastLineStart = current;
766  ++line;
767  } else if (c == '\n') {
768  lastLineStart = current;
769  ++line;
770  }
771  }
772  // column & line start at 1
773  column = int(location - lastLineStart) + 1;
774  ++line;
775 }
776 
777 String Reader::getLocationLineAndColumn(Location location) const {
778  int line, column;
779  getLocationLineAndColumn(location, line, column);
780  char buffer[18 + 16 + 16 + 1];
781  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
782  return buffer;
783 }
784 
785 // Deprecated. Preserved for backward compatibility
786 String Reader::getFormatedErrorMessages() const {
787  return getFormattedErrorMessages();
788 }
789 
791  String formattedMessage;
792  for (const auto& error : errors_) {
793  formattedMessage +=
794  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
795  formattedMessage += " " + error.message_ + "\n";
796  if (error.extra_)
797  formattedMessage +=
798  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
799  }
800  return formattedMessage;
801 }
802 
803 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
804  std::vector<Reader::StructuredError> allErrors;
805  for (const auto& error : errors_) {
806  Reader::StructuredError structured;
807  structured.offset_start = error.token_.start_ - begin_;
808  structured.offset_limit = error.token_.end_ - begin_;
809  structured.message = error.message_;
810  allErrors.push_back(structured);
811  }
812  return allErrors;
813 }
814 
815 bool Reader::pushError(const Value& value, const String& message) {
816  ptrdiff_t const length = end_ - begin_;
817  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
818  return false;
819  Token token;
820  token.type_ = tokenError;
821  token.start_ = begin_ + value.getOffsetStart();
822  token.end_ = begin_ + value.getOffsetLimit();
823  ErrorInfo info;
824  info.token_ = token;
825  info.message_ = message;
826  info.extra_ = nullptr;
827  errors_.push_back(info);
828  return true;
829 }
830 
831 bool Reader::pushError(const Value& value, const String& message,
832  const Value& extra) {
833  ptrdiff_t const length = end_ - begin_;
834  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
835  extra.getOffsetLimit() > length)
836  return false;
837  Token token;
838  token.type_ = tokenError;
839  token.start_ = begin_ + value.getOffsetStart();
840  token.end_ = begin_ + value.getOffsetLimit();
841  ErrorInfo info;
842  info.token_ = token;
843  info.message_ = message;
844  info.extra_ = begin_ + extra.getOffsetStart();
845  errors_.push_back(info);
846  return true;
847 }
848 
849 bool Reader::good() const { return errors_.empty(); }
850 
851 // Originally copied from the Features class (now deprecated), used internally
852 // for features implementation.
853 class OurFeatures {
854 public:
855  static OurFeatures all();
856  bool allowComments_;
857  bool allowTrailingCommas_;
858  bool strictRoot_;
859  bool allowDroppedNullPlaceholders_;
860  bool allowNumericKeys_;
861  bool allowSingleQuotes_;
862  bool failIfExtra_;
863  bool rejectDupKeys_;
864  bool allowSpecialFloats_;
865  bool skipBom_;
866  size_t stackLimit_;
867 }; // OurFeatures
868 
869 OurFeatures OurFeatures::all() { return {}; }
870 
871 // Implementation of class Reader
872 // ////////////////////////////////
873 
874 // Originally copied from the Reader class (now deprecated), used internally
875 // for implementing JSON reading.
876 class OurReader {
877 public:
878  using Char = char;
879  using Location = const Char*;
880 
881  explicit OurReader(OurFeatures const& features);
882  bool parse(const char* beginDoc, const char* endDoc, Value& root,
883  bool collectComments = true);
884  String getFormattedErrorMessages() const;
885  std::vector<CharReader::StructuredError> getStructuredErrors() const;
886 
887 private:
888  OurReader(OurReader const&); // no impl
889  void operator=(OurReader const&); // no impl
890 
891  enum TokenType {
892  tokenEndOfStream = 0,
893  tokenObjectBegin,
894  tokenObjectEnd,
895  tokenArrayBegin,
896  tokenArrayEnd,
897  tokenString,
898  tokenNumber,
899  tokenTrue,
900  tokenFalse,
901  tokenNull,
902  tokenNaN,
903  tokenPosInf,
904  tokenNegInf,
905  tokenArraySeparator,
906  tokenMemberSeparator,
907  tokenComment,
908  tokenError
909  };
910 
911  class Token {
912  public:
913  TokenType type_;
914  Location start_;
915  Location end_;
916  };
917 
918  class ErrorInfo {
919  public:
920  Token token_;
921  String message_;
922  Location extra_;
923  };
924 
925  using Errors = std::deque<ErrorInfo>;
926 
927  bool readToken(Token& token);
928  bool readTokenSkippingComments(Token& token);
929  void skipSpaces();
930  void skipBom(bool skipBom);
931  bool match(const Char* pattern, int patternLength);
932  bool readComment();
933  bool readCStyleComment(bool* containsNewLineResult);
934  bool readCppStyleComment();
935  bool readString();
936  bool readStringSingleQuote();
937  bool readNumber(bool checkInf);
938  bool readValue();
939  bool readObject(Token& token);
940  bool readArray(Token& token);
941  bool decodeNumber(Token& token);
942  bool decodeNumber(Token& token, Value& decoded);
943  bool decodeString(Token& token);
944  bool decodeString(Token& token, String& decoded);
945  bool decodeDouble(Token& token);
946  bool decodeDouble(Token& token, Value& decoded);
947  bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
948  unsigned int& unicode);
949  bool decodeUnicodeEscapeSequence(Token& token, Location& current,
950  Location end, unsigned int& unicode);
951  bool addError(const String& message, Token& token, Location extra = nullptr);
952  bool recoverFromError(TokenType skipUntilToken);
953  bool addErrorAndRecover(const String& message, Token& token,
954  TokenType skipUntilToken);
955  void skipUntilSpace();
956  Value& currentValue();
957  Char getNextChar();
958  void getLocationLineAndColumn(Location location, int& line,
959  int& column) const;
960  String getLocationLineAndColumn(Location location) const;
961  void addComment(Location begin, Location end, CommentPlacement placement);
962 
963  static String normalizeEOL(Location begin, Location end);
964  static bool containsNewLine(Location begin, Location end);
965 
966  using Nodes = std::stack<Value*>;
967 
968  Nodes nodes_{};
969  Errors errors_{};
970  String document_{};
971  Location begin_ = nullptr;
972  Location end_ = nullptr;
973  Location current_ = nullptr;
974  Location lastValueEnd_ = nullptr;
975  Value* lastValue_ = nullptr;
976  bool lastValueHasAComment_ = false;
977  String commentsBefore_{};
978 
979  OurFeatures const features_;
980  bool collectComments_ = false;
981 }; // OurReader
982 
983 // complete copy of Read impl, for OurReader
984 
985 bool OurReader::containsNewLine(OurReader::Location begin,
986  OurReader::Location end) {
987  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
988 }
989 
990 OurReader::OurReader(OurFeatures const& features) : features_(features) {}
991 
992 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
993  bool collectComments) {
994  if (!features_.allowComments_) {
995  collectComments = false;
996  }
997 
998  begin_ = beginDoc;
999  end_ = endDoc;
1000  collectComments_ = collectComments;
1001  current_ = begin_;
1002  lastValueEnd_ = nullptr;
1003  lastValue_ = nullptr;
1004  commentsBefore_.clear();
1005  errors_.clear();
1006  while (!nodes_.empty())
1007  nodes_.pop();
1008  nodes_.push(&root);
1009 
1010  // skip byte order mark if it exists at the beginning of the UTF-8 text.
1011  skipBom(features_.skipBom_);
1012  bool successful = readValue();
1013  nodes_.pop();
1014  Token token;
1015  readTokenSkippingComments(token);
1016  if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1017  addError("Extra non-whitespace after JSON value.", token);
1018  return false;
1019  }
1020  if (collectComments_ && !commentsBefore_.empty())
1021  root.setComment(commentsBefore_, commentAfter);
1022  if (features_.strictRoot_) {
1023  if (!root.isArray() && !root.isObject()) {
1024  // Set error location to start of doc, ideally should be first token found
1025  // in doc
1026  token.type_ = tokenError;
1027  token.start_ = beginDoc;
1028  token.end_ = endDoc;
1029  addError(
1030  "A valid JSON document must be either an array or an object value.",
1031  token);
1032  return false;
1033  }
1034  }
1035  return successful;
1036 }
1037 
1038 bool OurReader::readValue() {
1039  // To preserve the old behaviour we cast size_t to int.
1040  if (nodes_.size() > features_.stackLimit_)
1041  throwRuntimeError("Exceeded stackLimit in readValue().");
1042  Token token;
1043  readTokenSkippingComments(token);
1044  bool successful = true;
1045 
1046  if (collectComments_ && !commentsBefore_.empty()) {
1047  currentValue().setComment(commentsBefore_, commentBefore);
1048  commentsBefore_.clear();
1049  }
1050 
1051  switch (token.type_) {
1052  case tokenObjectBegin:
1053  successful = readObject(token);
1054  currentValue().setOffsetLimit(current_ - begin_);
1055  break;
1056  case tokenArrayBegin:
1057  successful = readArray(token);
1058  currentValue().setOffsetLimit(current_ - begin_);
1059  break;
1060  case tokenNumber:
1061  successful = decodeNumber(token);
1062  break;
1063  case tokenString:
1064  successful = decodeString(token);
1065  break;
1066  case tokenTrue: {
1067  Value v(true);
1068  currentValue().swapPayload(v);
1069  currentValue().setOffsetStart(token.start_ - begin_);
1070  currentValue().setOffsetLimit(token.end_ - begin_);
1071  } break;
1072  case tokenFalse: {
1073  Value v(false);
1074  currentValue().swapPayload(v);
1075  currentValue().setOffsetStart(token.start_ - begin_);
1076  currentValue().setOffsetLimit(token.end_ - begin_);
1077  } break;
1078  case tokenNull: {
1079  Value v;
1080  currentValue().swapPayload(v);
1081  currentValue().setOffsetStart(token.start_ - begin_);
1082  currentValue().setOffsetLimit(token.end_ - begin_);
1083  } break;
1084  case tokenNaN: {
1085  Value v(std::numeric_limits<double>::quiet_NaN());
1086  currentValue().swapPayload(v);
1087  currentValue().setOffsetStart(token.start_ - begin_);
1088  currentValue().setOffsetLimit(token.end_ - begin_);
1089  } break;
1090  case tokenPosInf: {
1091  Value v(std::numeric_limits<double>::infinity());
1092  currentValue().swapPayload(v);
1093  currentValue().setOffsetStart(token.start_ - begin_);
1094  currentValue().setOffsetLimit(token.end_ - begin_);
1095  } break;
1096  case tokenNegInf: {
1097  Value v(-std::numeric_limits<double>::infinity());
1098  currentValue().swapPayload(v);
1099  currentValue().setOffsetStart(token.start_ - begin_);
1100  currentValue().setOffsetLimit(token.end_ - begin_);
1101  } break;
1102  case tokenArraySeparator:
1103  case tokenObjectEnd:
1104  case tokenArrayEnd:
1105  if (features_.allowDroppedNullPlaceholders_) {
1106  // "Un-read" the current token and mark the current value as a null
1107  // token.
1108  current_--;
1109  Value v;
1110  currentValue().swapPayload(v);
1111  currentValue().setOffsetStart(current_ - begin_ - 1);
1112  currentValue().setOffsetLimit(current_ - begin_);
1113  break;
1114  } // else, fall through ...
1115  default:
1116  currentValue().setOffsetStart(token.start_ - begin_);
1117  currentValue().setOffsetLimit(token.end_ - begin_);
1118  return addError("Syntax error: value, object or array expected.", token);
1119  }
1120 
1121  if (collectComments_) {
1122  lastValueEnd_ = current_;
1123  lastValueHasAComment_ = false;
1124  lastValue_ = &currentValue();
1125  }
1126 
1127  return successful;
1128 }
1129 
1130 bool OurReader::readTokenSkippingComments(Token& token) {
1131  bool success = readToken(token);
1132  if (features_.allowComments_) {
1133  while (success && token.type_ == tokenComment) {
1134  success = readToken(token);
1135  }
1136  }
1137  return success;
1138 }
1139 
1140 bool OurReader::readToken(Token& token) {
1141  skipSpaces();
1142  token.start_ = current_;
1143  Char c = getNextChar();
1144  bool ok = true;
1145  switch (c) {
1146  case '{':
1147  token.type_ = tokenObjectBegin;
1148  break;
1149  case '}':
1150  token.type_ = tokenObjectEnd;
1151  break;
1152  case '[':
1153  token.type_ = tokenArrayBegin;
1154  break;
1155  case ']':
1156  token.type_ = tokenArrayEnd;
1157  break;
1158  case '"':
1159  token.type_ = tokenString;
1160  ok = readString();
1161  break;
1162  case '\'':
1163  if (features_.allowSingleQuotes_) {
1164  token.type_ = tokenString;
1165  ok = readStringSingleQuote();
1166  } else {
1167  // If we don't allow single quotes, this is a failure case.
1168  ok = false;
1169  }
1170  break;
1171  case '/':
1172  token.type_ = tokenComment;
1173  ok = readComment();
1174  break;
1175  case '0':
1176  case '1':
1177  case '2':
1178  case '3':
1179  case '4':
1180  case '5':
1181  case '6':
1182  case '7':
1183  case '8':
1184  case '9':
1185  token.type_ = tokenNumber;
1186  readNumber(false);
1187  break;
1188  case '-':
1189  if (readNumber(true)) {
1190  token.type_ = tokenNumber;
1191  } else {
1192  token.type_ = tokenNegInf;
1193  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1194  }
1195  break;
1196  case '+':
1197  if (readNumber(true)) {
1198  token.type_ = tokenNumber;
1199  } else {
1200  token.type_ = tokenPosInf;
1201  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1202  }
1203  break;
1204  case 't':
1205  token.type_ = tokenTrue;
1206  ok = match("rue", 3);
1207  break;
1208  case 'f':
1209  token.type_ = tokenFalse;
1210  ok = match("alse", 4);
1211  break;
1212  case 'n':
1213  token.type_ = tokenNull;
1214  ok = match("ull", 3);
1215  break;
1216  case 'N':
1217  if (features_.allowSpecialFloats_) {
1218  token.type_ = tokenNaN;
1219  ok = match("aN", 2);
1220  } else {
1221  ok = false;
1222  }
1223  break;
1224  case 'I':
1225  if (features_.allowSpecialFloats_) {
1226  token.type_ = tokenPosInf;
1227  ok = match("nfinity", 7);
1228  } else {
1229  ok = false;
1230  }
1231  break;
1232  case ',':
1233  token.type_ = tokenArraySeparator;
1234  break;
1235  case ':':
1236  token.type_ = tokenMemberSeparator;
1237  break;
1238  case 0:
1239  token.type_ = tokenEndOfStream;
1240  break;
1241  default:
1242  ok = false;
1243  break;
1244  }
1245  if (!ok)
1246  token.type_ = tokenError;
1247  token.end_ = current_;
1248  return ok;
1249 }
1250 
1251 void OurReader::skipSpaces() {
1252  while (current_ != end_) {
1253  Char c = *current_;
1254  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1255  ++current_;
1256  else
1257  break;
1258  }
1259 }
1260 
1261 void OurReader::skipBom(bool skipBom) {
1262  // The default behavior is to skip BOM.
1263  if (skipBom) {
1264  if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1265  begin_ += 3;
1266  current_ = begin_;
1267  }
1268  }
1269 }
1270 
1271 bool OurReader::match(const Char* pattern, int patternLength) {
1272  if (end_ - current_ < patternLength)
1273  return false;
1274  int index = patternLength;
1275  while (index--)
1276  if (current_[index] != pattern[index])
1277  return false;
1278  current_ += patternLength;
1279  return true;
1280 }
1281 
1282 bool OurReader::readComment() {
1283  const Location commentBegin = current_ - 1;
1284  const Char c = getNextChar();
1285  bool successful = false;
1286  bool cStyleWithEmbeddedNewline = false;
1287 
1288  const bool isCStyleComment = (c == '*');
1289  const bool isCppStyleComment = (c == '/');
1290  if (isCStyleComment) {
1291  successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1292  } else if (isCppStyleComment) {
1293  successful = readCppStyleComment();
1294  }
1295 
1296  if (!successful)
1297  return false;
1298 
1299  if (collectComments_) {
1300  CommentPlacement placement = commentBefore;
1301 
1302  if (!lastValueHasAComment_) {
1303  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1304  if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1305  placement = commentAfterOnSameLine;
1306  lastValueHasAComment_ = true;
1307  }
1308  }
1309  }
1310 
1311  addComment(commentBegin, current_, placement);
1312  }
1313  return true;
1314 }
1315 
1316 String OurReader::normalizeEOL(OurReader::Location begin,
1317  OurReader::Location end) {
1318  String normalized;
1319  normalized.reserve(static_cast<size_t>(end - begin));
1320  OurReader::Location current = begin;
1321  while (current != end) {
1322  char c = *current++;
1323  if (c == '\r') {
1324  if (current != end && *current == '\n')
1325  // convert dos EOL
1326  ++current;
1327  // convert Mac EOL
1328  normalized += '\n';
1329  } else {
1330  normalized += c;
1331  }
1332  }
1333  return normalized;
1334 }
1335 
1336 void OurReader::addComment(Location begin, Location end,
1337  CommentPlacement placement) {
1338  assert(collectComments_);
1339  const String& normalized = normalizeEOL(begin, end);
1340  if (placement == commentAfterOnSameLine) {
1341  assert(lastValue_ != nullptr);
1342  lastValue_->setComment(normalized, placement);
1343  } else {
1344  commentsBefore_ += normalized;
1345  }
1346 }
1347 
1348 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1349  *containsNewLineResult = false;
1350 
1351  while ((current_ + 1) < end_) {
1352  Char c = getNextChar();
1353  if (c == '*' && *current_ == '/')
1354  break;
1355  if (c == '\n')
1356  *containsNewLineResult = true;
1357  }
1358 
1359  return getNextChar() == '/';
1360 }
1361 
1362 bool OurReader::readCppStyleComment() {
1363  while (current_ != end_) {
1364  Char c = getNextChar();
1365  if (c == '\n')
1366  break;
1367  if (c == '\r') {
1368  // Consume DOS EOL. It will be normalized in addComment.
1369  if (current_ != end_ && *current_ == '\n')
1370  getNextChar();
1371  // Break on Moc OS 9 EOL.
1372  break;
1373  }
1374  }
1375  return true;
1376 }
1377 
1378 bool OurReader::readNumber(bool checkInf) {
1379  Location p = current_;
1380  if (checkInf && p != end_ && *p == 'I') {
1381  current_ = ++p;
1382  return false;
1383  }
1384  char c = '0'; // stopgap for already consumed character
1385  // integral part
1386  while (c >= '0' && c <= '9')
1387  c = (current_ = p) < end_ ? *p++ : '\0';
1388  // fractional part
1389  if (c == '.') {
1390  c = (current_ = p) < end_ ? *p++ : '\0';
1391  while (c >= '0' && c <= '9')
1392  c = (current_ = p) < end_ ? *p++ : '\0';
1393  }
1394  // exponential part
1395  if (c == 'e' || c == 'E') {
1396  c = (current_ = p) < end_ ? *p++ : '\0';
1397  if (c == '+' || c == '-')
1398  c = (current_ = p) < end_ ? *p++ : '\0';
1399  while (c >= '0' && c <= '9')
1400  c = (current_ = p) < end_ ? *p++ : '\0';
1401  }
1402  return true;
1403 }
1404 bool OurReader::readString() {
1405  Char c = 0;
1406  while (current_ != end_) {
1407  c = getNextChar();
1408  if (c == '\\')
1409  getNextChar();
1410  else if (c == '"')
1411  break;
1412  }
1413  return c == '"';
1414 }
1415 
1416 bool OurReader::readStringSingleQuote() {
1417  Char c = 0;
1418  while (current_ != end_) {
1419  c = getNextChar();
1420  if (c == '\\')
1421  getNextChar();
1422  else if (c == '\'')
1423  break;
1424  }
1425  return c == '\'';
1426 }
1427 
1428 bool OurReader::readObject(Token& token) {
1429  Token tokenName;
1430  String name;
1431  Value init(objectValue);
1432  currentValue().swapPayload(init);
1433  currentValue().setOffsetStart(token.start_ - begin_);
1434  while (readTokenSkippingComments(tokenName)) {
1435  if (tokenName.type_ == tokenObjectEnd &&
1436  (name.empty() ||
1437  features_.allowTrailingCommas_)) // empty object or trailing comma
1438  return true;
1439  name.clear();
1440  if (tokenName.type_ == tokenString) {
1441  if (!decodeString(tokenName, name))
1442  return recoverFromError(tokenObjectEnd);
1443  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1444  Value numberName;
1445  if (!decodeNumber(tokenName, numberName))
1446  return recoverFromError(tokenObjectEnd);
1447  name = numberName.asString();
1448  } else {
1449  break;
1450  }
1451  if (name.length() >= (1U << 30))
1452  throwRuntimeError("keylength >= 2^30");
1453  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1454  String msg = "Duplicate key: '" + name + "'";
1455  return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1456  }
1457 
1458  Token colon;
1459  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1460  return addErrorAndRecover("Missing ':' after object member name", colon,
1461  tokenObjectEnd);
1462  }
1463  Value& value = currentValue()[name];
1464  nodes_.push(&value);
1465  bool ok = readValue();
1466  nodes_.pop();
1467  if (!ok) // error already set
1468  return recoverFromError(tokenObjectEnd);
1469 
1470  Token comma;
1471  if (!readTokenSkippingComments(comma) ||
1472  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
1473  return addErrorAndRecover("Missing ',' or '}' in object declaration",
1474  comma, tokenObjectEnd);
1475  }
1476  if (comma.type_ == tokenObjectEnd)
1477  return true;
1478  }
1479  return addErrorAndRecover("Missing '}' or object member name", tokenName,
1480  tokenObjectEnd);
1481 }
1482 
1483 bool OurReader::readArray(Token& token) {
1484  Value init(arrayValue);
1485  currentValue().swapPayload(init);
1486  currentValue().setOffsetStart(token.start_ - begin_);
1487  int index = 0;
1488  for (;;) {
1489  skipSpaces();
1490  if (current_ != end_ && *current_ == ']' &&
1491  (index == 0 ||
1492  (features_.allowTrailingCommas_ &&
1493  !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1494  // comma
1495  {
1496  Token endArray;
1497  readToken(endArray);
1498  return true;
1499  }
1500  Value& value = currentValue()[index++];
1501  nodes_.push(&value);
1502  bool ok = readValue();
1503  nodes_.pop();
1504  if (!ok) // error already set
1505  return recoverFromError(tokenArrayEnd);
1506 
1507  Token currentToken;
1508  // Accept Comment after last item in the array.
1509  ok = readTokenSkippingComments(currentToken);
1510  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1511  currentToken.type_ != tokenArrayEnd);
1512  if (!ok || badTokenType) {
1513  return addErrorAndRecover("Missing ',' or ']' in array declaration",
1514  currentToken, tokenArrayEnd);
1515  }
1516  if (currentToken.type_ == tokenArrayEnd)
1517  break;
1518  }
1519  return true;
1520 }
1521 
1522 bool OurReader::decodeNumber(Token& token) {
1523  Value decoded;
1524  if (!decodeNumber(token, decoded))
1525  return false;
1526  currentValue().swapPayload(decoded);
1527  currentValue().setOffsetStart(token.start_ - begin_);
1528  currentValue().setOffsetLimit(token.end_ - begin_);
1529  return true;
1530 }
1531 
1532 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1533  // Attempts to parse the number as an integer. If the number is
1534  // larger than the maximum supported value of an integer then
1535  // we decode the number as a double.
1536  Location current = token.start_;
1537  const bool isNegative = *current == '-';
1538  if (isNegative) {
1539  ++current;
1540  }
1541 
1542  // We assume we can represent the largest and smallest integer types as
1543  // unsigned integers with separate sign. This is only true if they can fit
1544  // into an unsigned integer.
1546  "Int must be smaller than UInt");
1547 
1548  // We need to convert minLargestInt into a positive number. The easiest way
1549  // to do this conversion is to assume our "threshold" value of minLargestInt
1550  // divided by 10 can fit in maxLargestInt when absolute valued. This should
1551  // be a safe assumption.
1552  static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1553  "The absolute value of minLargestInt must be greater than or "
1554  "equal to maxLargestInt");
1555  static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1556  "The absolute value of minLargestInt must be only 1 magnitude "
1557  "larger than maxLargest Int");
1558 
1559  static constexpr Value::LargestUInt positive_threshold =
1560  Value::maxLargestUInt / 10;
1561  static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1562 
1563  // For the negative values, we have to be more careful. Since typically
1564  // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1565  // then take the inverse. This assumes that minLargestInt is only a single
1566  // power of 10 different in magnitude, which we check above. For the last
1567  // digit, we take the modulus before negating for the same reason.
1568  static constexpr auto negative_threshold =
1570  static constexpr auto negative_last_digit =
1572 
1573  const Value::LargestUInt threshold =
1574  isNegative ? negative_threshold : positive_threshold;
1575  const Value::UInt max_last_digit =
1576  isNegative ? negative_last_digit : positive_last_digit;
1577 
1578  Value::LargestUInt value = 0;
1579  while (current < token.end_) {
1580  Char c = *current++;
1581  if (c < '0' || c > '9')
1582  return decodeDouble(token, decoded);
1583 
1584  const auto digit(static_cast<Value::UInt>(c - '0'));
1585  if (value >= threshold) {
1586  // We've hit or exceeded the max value divided by 10 (rounded down). If
1587  // a) we've only just touched the limit, meaning value == threshold,
1588  // b) this is the last digit, or
1589  // c) it's small enough to fit in that rounding delta, we're okay.
1590  // Otherwise treat this number as a double to avoid overflow.
1591  if (value > threshold || current != token.end_ ||
1592  digit > max_last_digit) {
1593  return decodeDouble(token, decoded);
1594  }
1595  }
1596  value = value * 10 + digit;
1597  }
1598 
1599  if (isNegative) {
1600  // We use the same magnitude assumption here, just in case.
1601  const auto last_digit = static_cast<Value::UInt>(value % 10);
1602  decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1603  } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1604  decoded = Value::LargestInt(value);
1605  } else {
1606  decoded = value;
1607  }
1608 
1609  return true;
1610 }
1611 
1612 bool OurReader::decodeDouble(Token& token) {
1613  Value decoded;
1614  if (!decodeDouble(token, decoded))
1615  return false;
1616  currentValue().swapPayload(decoded);
1617  currentValue().setOffsetStart(token.start_ - begin_);
1618  currentValue().setOffsetLimit(token.end_ - begin_);
1619  return true;
1620 }
1621 
1622 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1623  double value = 0;
1624  IStringStream is(String(token.start_, token.end_));
1625  if (!(is >> value)) {
1626  if (value == std::numeric_limits<double>::max())
1627  value = std::numeric_limits<double>::infinity();
1628  else if (value == std::numeric_limits<double>::lowest())
1629  value = -std::numeric_limits<double>::infinity();
1630  else if (!std::isinf(value))
1631  return addError(
1632  "'" + String(token.start_, token.end_) + "' is not a number.", token);
1633  }
1634  decoded = value;
1635  return true;
1636 }
1637 
1638 bool OurReader::decodeString(Token& token) {
1639  String decoded_string;
1640  if (!decodeString(token, decoded_string))
1641  return false;
1642  Value decoded(decoded_string);
1643  currentValue().swapPayload(decoded);
1644  currentValue().setOffsetStart(token.start_ - begin_);
1645  currentValue().setOffsetLimit(token.end_ - begin_);
1646  return true;
1647 }
1648 
1649 bool OurReader::decodeString(Token& token, String& decoded) {
1650  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1651  Location current = token.start_ + 1; // skip '"'
1652  Location end = token.end_ - 1; // do not include '"'
1653  while (current != end) {
1654  Char c = *current++;
1655  if (c == '"')
1656  break;
1657  if (c == '\\') {
1658  if (current == end)
1659  return addError("Empty escape sequence in string", token, current);
1660  Char escape = *current++;
1661  switch (escape) {
1662  case '"':
1663  decoded += '"';
1664  break;
1665  case '/':
1666  decoded += '/';
1667  break;
1668  case '\\':
1669  decoded += '\\';
1670  break;
1671  case 'b':
1672  decoded += '\b';
1673  break;
1674  case 'f':
1675  decoded += '\f';
1676  break;
1677  case 'n':
1678  decoded += '\n';
1679  break;
1680  case 'r':
1681  decoded += '\r';
1682  break;
1683  case 't':
1684  decoded += '\t';
1685  break;
1686  case 'u': {
1687  unsigned int unicode;
1688  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1689  return false;
1690  decoded += codePointToUTF8(unicode);
1691  } break;
1692  default:
1693  return addError("Bad escape sequence in string", token, current);
1694  }
1695  } else {
1696  decoded += c;
1697  }
1698  }
1699  return true;
1700 }
1701 
1702 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1703  Location end, unsigned int& unicode) {
1704 
1705  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1706  return false;
1707  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1708  // surrogate pairs
1709  if (end - current < 6)
1710  return addError(
1711  "additional six characters expected to parse unicode surrogate pair.",
1712  token, current);
1713  if (*(current++) == '\\' && *(current++) == 'u') {
1714  unsigned int surrogatePair;
1715  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1716  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1717  } else
1718  return false;
1719  } else
1720  return addError("expecting another \\u token to begin the second half of "
1721  "a unicode surrogate pair",
1722  token, current);
1723  }
1724  return true;
1725 }
1726 
1727 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1728  Location end,
1729  unsigned int& ret_unicode) {
1730  if (end - current < 4)
1731  return addError(
1732  "Bad unicode escape sequence in string: four digits expected.", token,
1733  current);
1734  int unicode = 0;
1735  for (int index = 0; index < 4; ++index) {
1736  Char c = *current++;
1737  unicode *= 16;
1738  if (c >= '0' && c <= '9')
1739  unicode += c - '0';
1740  else if (c >= 'a' && c <= 'f')
1741  unicode += c - 'a' + 10;
1742  else if (c >= 'A' && c <= 'F')
1743  unicode += c - 'A' + 10;
1744  else
1745  return addError(
1746  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1747  token, current);
1748  }
1749  ret_unicode = static_cast<unsigned int>(unicode);
1750  return true;
1751 }
1752 
1753 bool OurReader::addError(const String& message, Token& token, Location extra) {
1754  ErrorInfo info;
1755  info.token_ = token;
1756  info.message_ = message;
1757  info.extra_ = extra;
1758  errors_.push_back(info);
1759  return false;
1760 }
1761 
1762 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1763  size_t errorCount = errors_.size();
1764  Token skip;
1765  for (;;) {
1766  if (!readToken(skip))
1767  errors_.resize(errorCount); // discard errors caused by recovery
1768  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1769  break;
1770  }
1771  errors_.resize(errorCount);
1772  return false;
1773 }
1774 
1775 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1776  TokenType skipUntilToken) {
1777  addError(message, token);
1778  return recoverFromError(skipUntilToken);
1779 }
1780 
1781 Value& OurReader::currentValue() { return *(nodes_.top()); }
1782 
1783 OurReader::Char OurReader::getNextChar() {
1784  if (current_ == end_)
1785  return 0;
1786  return *current_++;
1787 }
1788 
1789 void OurReader::getLocationLineAndColumn(Location location, int& line,
1790  int& column) const {
1791  Location current = begin_;
1792  Location lastLineStart = current;
1793  line = 0;
1794  while (current < location && current != end_) {
1795  Char c = *current++;
1796  if (c == '\r') {
1797  if (current != end_ && *current == '\n')
1798  ++current;
1799  lastLineStart = current;
1800  ++line;
1801  } else if (c == '\n') {
1802  lastLineStart = current;
1803  ++line;
1804  }
1805  }
1806  // column & line start at 1
1807  column = int(location - lastLineStart) + 1;
1808  ++line;
1809 }
1810 
1811 String OurReader::getLocationLineAndColumn(Location location) const {
1812  int line, column;
1813  getLocationLineAndColumn(location, line, column);
1814  char buffer[18 + 16 + 16 + 1];
1815  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1816  return buffer;
1817 }
1818 
1819 String OurReader::getFormattedErrorMessages() const {
1820  String formattedMessage;
1821  for (const auto& error : errors_) {
1822  formattedMessage +=
1823  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1824  formattedMessage += " " + error.message_ + "\n";
1825  if (error.extra_)
1826  formattedMessage +=
1827  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1828  }
1829  return formattedMessage;
1830 }
1831 
1832 std::vector<CharReader::StructuredError>
1833 OurReader::getStructuredErrors() const {
1834  std::vector<CharReader::StructuredError> allErrors;
1835  for (const auto& error : errors_) {
1836  CharReader::StructuredError structured;
1837  structured.offset_start = error.token_.start_ - begin_;
1838  structured.offset_limit = error.token_.end_ - begin_;
1839  structured.message = error.message_;
1840  allErrors.push_back(structured);
1841  }
1842  return allErrors;
1843 }
1844 
1845 class OurCharReader : public CharReader {
1846 
1847 public:
1848  OurCharReader(bool collectComments, OurFeatures const& features)
1849  : CharReader(
1850  std::unique_ptr<OurImpl>(new OurImpl(collectComments, features))) {}
1851 
1852 protected:
1853  class OurImpl : public Impl {
1854  public:
1855  OurImpl(bool collectComments, OurFeatures const& features)
1856  : collectComments_(collectComments), reader_(features) {}
1857 
1858  bool parse(char const* beginDoc, char const* endDoc, Value* root,
1859  String* errs) override {
1860  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1861  if (errs) {
1862  *errs = reader_.getFormattedErrorMessages();
1863  }
1864  return ok;
1865  }
1866 
1867  std::vector<CharReader::StructuredError>
1868  getStructuredErrors() const override {
1869  return reader_.getStructuredErrors();
1870  }
1871 
1872  private:
1873  bool const collectComments_;
1874  OurReader reader_;
1875  };
1876 };
1877 
1881  bool collectComments = settings_["collectComments"].asBool();
1882  OurFeatures features = OurFeatures::all();
1883  features.allowComments_ = settings_["allowComments"].asBool();
1884  features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1885  features.strictRoot_ = settings_["strictRoot"].asBool();
1886  features.allowDroppedNullPlaceholders_ =
1887  settings_["allowDroppedNullPlaceholders"].asBool();
1888  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1889  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1890 
1891  // Stack limit is always a size_t, so we get this as an unsigned int
1892  // regardless of it we have 64-bit integer support enabled.
1893  features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1894  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1895  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1896  features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1897  features.skipBom_ = settings_["skipBom"].asBool();
1898  return new OurCharReader(collectComments, features);
1899 }
1900 
1902  static const auto& valid_keys = *new std::set<String>{
1903  "collectComments",
1904  "allowComments",
1905  "allowTrailingCommas",
1906  "strictRoot",
1907  "allowDroppedNullPlaceholders",
1908  "allowNumericKeys",
1909  "allowSingleQuotes",
1910  "stackLimit",
1911  "failIfExtra",
1912  "rejectDupKeys",
1913  "allowSpecialFloats",
1914  "skipBom",
1915  };
1916  for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1917  auto key = si.name();
1918  if (valid_keys.count(key))
1919  continue;
1920  if (invalid)
1921  (*invalid)[key] = *si;
1922  else
1923  return false;
1924  }
1925  return invalid ? invalid->empty() : true;
1926 }
1927 
1929  return settings_[key];
1930 }
1931 // static
1934  (*settings)["allowComments"] = false;
1935  (*settings)["allowTrailingCommas"] = false;
1936  (*settings)["strictRoot"] = true;
1937  (*settings)["allowDroppedNullPlaceholders"] = false;
1938  (*settings)["allowNumericKeys"] = false;
1939  (*settings)["allowSingleQuotes"] = false;
1940  (*settings)["stackLimit"] = 1000;
1941  (*settings)["failIfExtra"] = true;
1942  (*settings)["rejectDupKeys"] = true;
1943  (*settings)["allowSpecialFloats"] = false;
1944  (*settings)["skipBom"] = true;
1946 }
1947 // static
1950  (*settings)["collectComments"] = true;
1951  (*settings)["allowComments"] = true;
1952  (*settings)["allowTrailingCommas"] = true;
1953  (*settings)["strictRoot"] = false;
1954  (*settings)["allowDroppedNullPlaceholders"] = false;
1955  (*settings)["allowNumericKeys"] = false;
1956  (*settings)["allowSingleQuotes"] = false;
1957  (*settings)["stackLimit"] = 1000;
1958  (*settings)["failIfExtra"] = false;
1959  (*settings)["rejectDupKeys"] = false;
1960  (*settings)["allowSpecialFloats"] = false;
1961  (*settings)["skipBom"] = true;
1963 }
1964 // static
1967  (*settings)["allowComments"] = false;
1968  (*settings)["allowTrailingCommas"] = false;
1969  (*settings)["strictRoot"] = false;
1970  (*settings)["allowDroppedNullPlaceholders"] = false;
1971  (*settings)["allowNumericKeys"] = false;
1972  (*settings)["allowSingleQuotes"] = false;
1973  (*settings)["stackLimit"] = 1000;
1974  (*settings)["failIfExtra"] = true;
1975  (*settings)["rejectDupKeys"] = false;
1976  (*settings)["allowSpecialFloats"] = false;
1977  (*settings)["skipBom"] = false;
1979 }
1980 
1981 std::vector<CharReader::StructuredError>
1983  return _impl->getStructuredErrors();
1984 }
1985 
1986 bool CharReader::parse(char const* beginDoc, char const* endDoc, Value* root,
1987  String* errs) {
1988  return _impl->parse(beginDoc, endDoc, root, errs);
1989 }
1990 
1992 // global functions
1993 
1994 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1995  String* errs) {
1996  OStringStream ssin;
1997  ssin << sin.rdbuf();
1998  String doc = std::move(ssin).str();
1999  char const* begin = doc.data();
2000  char const* end = begin + doc.size();
2001  // Note that we do not actually need a null-terminator.
2002  CharReaderPtr const reader(fact.newCharReader());
2003  return reader->parse(begin, end, root, errs);
2004 }
2005 
2008  String errs;
2009  bool ok = parseFromStream(b, sin, &root, &errs);
2010  if (!ok) {
2011  throwRuntimeError(errs);
2012  }
2013  return sin;
2014 }
2015 
2016 } // namespace Json
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
Build a CharReader implementation.
Definition: reader.h:314
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
static void ecma404Mode(Json::Value *settings)
ECMA-404 mode.
Value & operator[](const String &key)
A simple way to update a specific setting.
CharReader * newCharReader() const override
Allocate a CharReader via operator new().
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
Json::Value settings_
Configuration of this builder.
Definition: reader.h:360
~CharReaderBuilder() override
bool validate(Json::Value *invalid) const
Interface for reading JSON from a char array.
Definition: reader.h:245
CharReader(std::unique_ptr< Impl > impl)
Definition: reader.h:296
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
virtual bool parse(char const *beginDoc, char const *endDoc, Value *root, String *errs)
Read a Value from a JSON document.
Configuration passed to reader and writer.
Definition: json_features.h:22
bool strictRoot_
true if root must be either an array or an object value.
Definition: json_features.h:49
bool allowComments_
true if comments are allowed. Default: true.
Definition: json_features.h:45
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: json_features.h:52
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:67
Features()
Initialize the configuration like JsonConfig::allFeatures;.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:69
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: json_features.h:55
char Char
Definition: reader.h:39
Reader()
Constructs a Reader allowing all features for parsing.
Definition: json_reader.cpp:88
bool pushError(const Value &value, const String &message)
Add a semantic error message.
bool good() const
Return whether there are any errors.
const Char * Location
Definition: reader.h:40
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Definition: json_reader.cpp:92
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Represents a JSON value.
Definition: value.h:194
const_iterator begin() const
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
Definition: json_value.cpp:882
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition: value.h:227
Json::UInt UInt
Definition: value.h:201
bool isArray() const
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... *‍/.
Definition: value.h:574
ptrdiff_t getOffsetLimit() const
const_iterator end() const
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
Definition: json_value.cpp:456
void setOffsetLimit(ptrdiff_t limit)
Json::LargestInt LargestInt
Definition: value.h:207
Json::LargestUInt LargestUInt
Definition: value.h:208
UInt asUInt() const
Definition: json_value.cpp:676
bool isObject() const
void setOffsetStart(ptrdiff_t start)
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:234
bool asBool() const
Definition: json_value.cpp:804
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition: value.h:229
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:224
ptrdiff_t getOffsetStart() const
#define jsoncpp_snprintf
Definition: config.h:63
#define JSONCPP_DEPRECATED_STACK_LIMIT
Definition: json_reader.cpp:48
static size_t const stackLimit_g
Definition: json_reader.cpp:51
JSON (JavaScript Object Notation).
Definition: allocator.h:15
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition: config.h:135
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition: config.h:138
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition: config.h:132
CommentPlacement
Definition: value.h:119
@ commentAfterOnSameLine
a comment just after a value on the same line
Definition: value.h:121
@ commentBefore
a comment placed on the line before a value
Definition: value.h:120
@ commentAfter
a comment on the line after a value (only make sense for
Definition: value.h:122
@ arrayValue
array value (ordered list)
Definition: value.h:115
@ objectValue
object value (collection of name/value pairs).
Definition: value.h:116
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:39
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
std::istream IStream
Definition: config.h:139
std::auto_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:59
An error tagged with where in the JSON text it was encountered.
Definition: reader.h:47