JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <algorithm>
14 #include <cassert>
15 #include <cstring>
16 #include <iostream>
17 #include <istream>
18 #include <limits>
19 #include <memory>
20 #include <set>
21 #include <sstream>
22 #include <utility>
23 
24 #include <cstdio>
25 #if __cplusplus >= 201103L
26 
27 #if !defined(sscanf)
28 #define sscanf std::sscanf
29 #endif
30 
31 #endif //__cplusplus
32 
33 #if defined(_MSC_VER)
34 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
35 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
36 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
37 #endif //_MSC_VER
38 
39 #if defined(_MSC_VER)
40 // Disable warning about strdup being deprecated.
41 #pragma warning(disable : 4996)
42 #endif
43 
44 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
45 // time to change the stack limit
46 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
47 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
48 #endif
49 
50 static size_t const stackLimit_g =
51  JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
52 
53 namespace Json {
54 
55 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
56 using CharReaderPtr = std::unique_ptr<CharReader>;
57 #else
58 using CharReaderPtr = std::auto_ptr<CharReader>;
59 #endif
60 
61 // Implementation of class Features
62 // ////////////////////////////////
63 
64 Features::Features() = default;
65 
66 Features Features::all() { return {}; }
67 
69  Features features;
70  features.allowComments_ = false;
71  features.strictRoot_ = true;
72  features.allowDroppedNullPlaceholders_ = false;
73  features.allowNumericKeys_ = false;
74  return features;
75 }
76 
77 // Implementation of class Reader
78 // ////////////////////////////////
79 
80 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
81  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
82 }
83 
84 // Class Reader
85 // //////////////////////////////////////////////////////////////////
86 
87 Reader::Reader() : features_(Features::all()) {}
88 
89 Reader::Reader(const Features& features) : features_(features) {}
90 
91 bool Reader::parse(const std::string& document, Value& root,
92  bool collectComments) {
93  document_.assign(document.begin(), document.end());
94  const char* begin = document_.c_str();
95  const char* end = begin + document_.length();
96  return parse(begin, end, root, collectComments);
97 }
98 
99 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
100  // std::istream_iterator<char> begin(is);
101  // std::istream_iterator<char> end;
102  // Those would allow streamed input from a file, if parse() were a
103  // template function.
104 
105  // Since String is reference-counted, this at least does not
106  // create an extra copy.
107  String doc;
108  std::getline(is, doc, static_cast<char> EOF);
109  return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
110 }
111 
112 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
113  bool collectComments) {
114  if (!features_.allowComments_) {
115  collectComments = false;
116  }
117 
118  begin_ = beginDoc;
119  end_ = endDoc;
120  collectComments_ = collectComments;
121  current_ = begin_;
122  lastValueEnd_ = nullptr;
123  lastValue_ = nullptr;
124  commentsBefore_.clear();
125  errors_.clear();
126  while (!nodes_.empty())
127  nodes_.pop();
128  nodes_.push(&root);
129 
130  bool successful = readValue();
131  Token token;
132  skipCommentTokens(token);
133  if (collectComments_ && !commentsBefore_.empty())
134  root.setComment(commentsBefore_, commentAfter);
135  if (features_.strictRoot_) {
136  if (!root.isArray() && !root.isObject()) {
137  // Set error location to start of doc, ideally should be first token found
138  // in doc
139  token.type_ = tokenError;
140  token.start_ = beginDoc;
141  token.end_ = endDoc;
142  addError(
143  "A valid JSON document must be either an array or an object value.",
144  token);
145  return false;
146  }
147  }
148  return successful;
149 }
150 
151 bool Reader::readValue() {
152  // readValue() may call itself only if it calls readObject() or ReadArray().
153  // These methods execute nodes_.push() just before and nodes_.pop)() just
154  // after calling readValue(). parse() executes one nodes_.push(), so > instead
155  // of >=.
156  if (nodes_.size() > stackLimit_g)
157  throwRuntimeError("Exceeded stackLimit in readValue().");
158 
159  Token token;
160  skipCommentTokens(token);
161  bool successful = true;
162 
163  if (collectComments_ && !commentsBefore_.empty()) {
164  currentValue().setComment(commentsBefore_, commentBefore);
165  commentsBefore_.clear();
166  }
167 
168  switch (token.type_) {
169  case tokenObjectBegin:
170  successful = readObject(token);
171  currentValue().setOffsetLimit(current_ - begin_);
172  break;
173  case tokenArrayBegin:
174  successful = readArray(token);
175  currentValue().setOffsetLimit(current_ - begin_);
176  break;
177  case tokenNumber:
178  successful = decodeNumber(token);
179  break;
180  case tokenString:
181  successful = decodeString(token);
182  break;
183  case tokenTrue: {
184  Value v(true);
185  currentValue().swapPayload(v);
186  currentValue().setOffsetStart(token.start_ - begin_);
187  currentValue().setOffsetLimit(token.end_ - begin_);
188  } break;
189  case tokenFalse: {
190  Value v(false);
191  currentValue().swapPayload(v);
192  currentValue().setOffsetStart(token.start_ - begin_);
193  currentValue().setOffsetLimit(token.end_ - begin_);
194  } break;
195  case tokenNull: {
196  Value v;
197  currentValue().swapPayload(v);
198  currentValue().setOffsetStart(token.start_ - begin_);
199  currentValue().setOffsetLimit(token.end_ - begin_);
200  } break;
201  case tokenArraySeparator:
202  case tokenObjectEnd:
203  case tokenArrayEnd:
204  if (features_.allowDroppedNullPlaceholders_) {
205  // "Un-read" the current token and mark the current value as a null
206  // token.
207  current_--;
208  Value v;
209  currentValue().swapPayload(v);
210  currentValue().setOffsetStart(current_ - begin_ - 1);
211  currentValue().setOffsetLimit(current_ - begin_);
212  break;
213  } // Else, fall through...
214  default:
215  currentValue().setOffsetStart(token.start_ - begin_);
216  currentValue().setOffsetLimit(token.end_ - begin_);
217  return addError("Syntax error: value, object or array expected.", token);
218  }
219 
220  if (collectComments_) {
221  lastValueEnd_ = current_;
222  lastValue_ = &currentValue();
223  }
224 
225  return successful;
226 }
227 
228 void Reader::skipCommentTokens(Token& token) {
229  if (features_.allowComments_) {
230  do {
231  readToken(token);
232  } while (token.type_ == tokenComment);
233  } else {
234  readToken(token);
235  }
236 }
237 
238 bool Reader::readToken(Token& token) {
239  skipSpaces();
240  token.start_ = current_;
241  Char c = getNextChar();
242  bool ok = true;
243  switch (c) {
244  case '{':
245  token.type_ = tokenObjectBegin;
246  break;
247  case '}':
248  token.type_ = tokenObjectEnd;
249  break;
250  case '[':
251  token.type_ = tokenArrayBegin;
252  break;
253  case ']':
254  token.type_ = tokenArrayEnd;
255  break;
256  case '"':
257  token.type_ = tokenString;
258  ok = readString();
259  break;
260  case '/':
261  token.type_ = tokenComment;
262  ok = readComment();
263  break;
264  case '0':
265  case '1':
266  case '2':
267  case '3':
268  case '4':
269  case '5':
270  case '6':
271  case '7':
272  case '8':
273  case '9':
274  case '-':
275  token.type_ = tokenNumber;
276  readNumber();
277  break;
278  case 't':
279  token.type_ = tokenTrue;
280  ok = match("rue", 3);
281  break;
282  case 'f':
283  token.type_ = tokenFalse;
284  ok = match("alse", 4);
285  break;
286  case 'n':
287  token.type_ = tokenNull;
288  ok = match("ull", 3);
289  break;
290  case ',':
291  token.type_ = tokenArraySeparator;
292  break;
293  case ':':
294  token.type_ = tokenMemberSeparator;
295  break;
296  case 0:
297  token.type_ = tokenEndOfStream;
298  break;
299  default:
300  ok = false;
301  break;
302  }
303  if (!ok)
304  token.type_ = tokenError;
305  token.end_ = current_;
306  return ok;
307 }
308 
309 void Reader::skipSpaces() {
310  while (current_ != end_) {
311  Char c = *current_;
312  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
313  ++current_;
314  else
315  break;
316  }
317 }
318 
319 bool Reader::match(const Char* pattern, int patternLength) {
320  if (end_ - current_ < patternLength)
321  return false;
322  int index = patternLength;
323  while (index--)
324  if (current_[index] != pattern[index])
325  return false;
326  current_ += patternLength;
327  return true;
328 }
329 
330 bool Reader::readComment() {
331  Location commentBegin = current_ - 1;
332  Char c = getNextChar();
333  bool successful = false;
334  if (c == '*')
335  successful = readCStyleComment();
336  else if (c == '/')
337  successful = readCppStyleComment();
338  if (!successful)
339  return false;
340 
341  if (collectComments_) {
342  CommentPlacement placement = commentBefore;
343  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
344  if (c != '*' || !containsNewLine(commentBegin, current_))
345  placement = commentAfterOnSameLine;
346  }
347 
348  addComment(commentBegin, current_, placement);
349  }
350  return true;
351 }
352 
353 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
354  String normalized;
355  normalized.reserve(static_cast<size_t>(end - begin));
356  Reader::Location current = begin;
357  while (current != end) {
358  char c = *current++;
359  if (c == '\r') {
360  if (current != end && *current == '\n')
361  // convert dos EOL
362  ++current;
363  // convert Mac EOL
364  normalized += '\n';
365  } else {
366  normalized += c;
367  }
368  }
369  return normalized;
370 }
371 
372 void Reader::addComment(Location begin, Location end,
373  CommentPlacement placement) {
374  assert(collectComments_);
375  const String& normalized = normalizeEOL(begin, end);
376  if (placement == commentAfterOnSameLine) {
377  assert(lastValue_ != nullptr);
378  lastValue_->setComment(normalized, placement);
379  } else {
380  commentsBefore_ += normalized;
381  }
382 }
383 
384 bool Reader::readCStyleComment() {
385  while ((current_ + 1) < end_) {
386  Char c = getNextChar();
387  if (c == '*' && *current_ == '/')
388  break;
389  }
390  return getNextChar() == '/';
391 }
392 
393 bool Reader::readCppStyleComment() {
394  while (current_ != end_) {
395  Char c = getNextChar();
396  if (c == '\n')
397  break;
398  if (c == '\r') {
399  // Consume DOS EOL. It will be normalized in addComment.
400  if (current_ != end_ && *current_ == '\n')
401  getNextChar();
402  // Break on Moc OS 9 EOL.
403  break;
404  }
405  }
406  return true;
407 }
408 
409 void Reader::readNumber() {
410  Location p = current_;
411  char c = '0'; // stopgap for already consumed character
412  // integral part
413  while (c >= '0' && c <= '9')
414  c = (current_ = p) < end_ ? *p++ : '\0';
415  // fractional part
416  if (c == '.') {
417  c = (current_ = p) < end_ ? *p++ : '\0';
418  while (c >= '0' && c <= '9')
419  c = (current_ = p) < end_ ? *p++ : '\0';
420  }
421  // exponential part
422  if (c == 'e' || c == 'E') {
423  c = (current_ = p) < end_ ? *p++ : '\0';
424  if (c == '+' || c == '-')
425  c = (current_ = p) < end_ ? *p++ : '\0';
426  while (c >= '0' && c <= '9')
427  c = (current_ = p) < end_ ? *p++ : '\0';
428  }
429 }
430 
431 bool Reader::readString() {
432  Char c = '\0';
433  while (current_ != end_) {
434  c = getNextChar();
435  if (c == '\\')
436  getNextChar();
437  else if (c == '"')
438  break;
439  }
440  return c == '"';
441 }
442 
443 bool Reader::readObject(Token& token) {
444  Token tokenName;
445  String name;
446  Value init(objectValue);
447  currentValue().swapPayload(init);
448  currentValue().setOffsetStart(token.start_ - begin_);
449  while (readToken(tokenName)) {
450  bool initialTokenOk = true;
451  while (tokenName.type_ == tokenComment && initialTokenOk)
452  initialTokenOk = readToken(tokenName);
453  if (!initialTokenOk)
454  break;
455  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
456  return true;
457  name.clear();
458  if (tokenName.type_ == tokenString) {
459  if (!decodeString(tokenName, name))
460  return recoverFromError(tokenObjectEnd);
461  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
462  Value numberName;
463  if (!decodeNumber(tokenName, numberName))
464  return recoverFromError(tokenObjectEnd);
465  name = numberName.asString();
466  } else {
467  break;
468  }
469 
470  Token colon;
471  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
472  return addErrorAndRecover("Missing ':' after object member name", colon,
473  tokenObjectEnd);
474  }
475  Value& value = currentValue()[name];
476  nodes_.push(&value);
477  bool ok = readValue();
478  nodes_.pop();
479  if (!ok) // error already set
480  return recoverFromError(tokenObjectEnd);
481 
482  Token comma;
483  if (!readToken(comma) ||
484  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
485  comma.type_ != tokenComment)) {
486  return addErrorAndRecover("Missing ',' or '}' in object declaration",
487  comma, tokenObjectEnd);
488  }
489  bool finalizeTokenOk = true;
490  while (comma.type_ == tokenComment && finalizeTokenOk)
491  finalizeTokenOk = readToken(comma);
492  if (comma.type_ == tokenObjectEnd)
493  return true;
494  }
495  return addErrorAndRecover("Missing '}' or object member name", tokenName,
496  tokenObjectEnd);
497 }
498 
499 bool Reader::readArray(Token& token) {
500  Value init(arrayValue);
501  currentValue().swapPayload(init);
502  currentValue().setOffsetStart(token.start_ - begin_);
503  skipSpaces();
504  if (current_ != end_ && *current_ == ']') // empty array
505  {
506  Token endArray;
507  readToken(endArray);
508  return true;
509  }
510  int index = 0;
511  for (;;) {
512  Value& value = currentValue()[index++];
513  nodes_.push(&value);
514  bool ok = readValue();
515  nodes_.pop();
516  if (!ok) // error already set
517  return recoverFromError(tokenArrayEnd);
518 
519  Token currentToken;
520  // Accept Comment after last item in the array.
521  ok = readToken(currentToken);
522  while (currentToken.type_ == tokenComment && ok) {
523  ok = readToken(currentToken);
524  }
525  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
526  currentToken.type_ != tokenArrayEnd);
527  if (!ok || badTokenType) {
528  return addErrorAndRecover("Missing ',' or ']' in array declaration",
529  currentToken, tokenArrayEnd);
530  }
531  if (currentToken.type_ == tokenArrayEnd)
532  break;
533  }
534  return true;
535 }
536 
537 bool Reader::decodeNumber(Token& token) {
538  Value decoded;
539  if (!decodeNumber(token, decoded))
540  return false;
541  currentValue().swapPayload(decoded);
542  currentValue().setOffsetStart(token.start_ - begin_);
543  currentValue().setOffsetLimit(token.end_ - begin_);
544  return true;
545 }
546 
547 bool Reader::decodeNumber(Token& token, Value& decoded) {
548  // Attempts to parse the number as an integer. If the number is
549  // larger than the maximum supported value of an integer then
550  // we decode the number as a double.
551  Location current = token.start_;
552  bool isNegative = *current == '-';
553  if (isNegative)
554  ++current;
555  // TODO: Help the compiler do the div and mod at compile time or get rid of
556  // them.
557  Value::LargestUInt maxIntegerValue =
558  isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
560  Value::LargestUInt threshold = maxIntegerValue / 10;
561  Value::LargestUInt value = 0;
562  while (current < token.end_) {
563  Char c = *current++;
564  if (c < '0' || c > '9')
565  return decodeDouble(token, decoded);
566  auto digit(static_cast<Value::UInt>(c - '0'));
567  if (value >= threshold) {
568  // We've hit or exceeded the max value divided by 10 (rounded down). If
569  // a) we've only just touched the limit, b) this is the last digit, and
570  // c) it's small enough to fit in that rounding delta, we're okay.
571  // Otherwise treat this number as a double to avoid overflow.
572  if (value > threshold || current != token.end_ ||
573  digit > maxIntegerValue % 10) {
574  return decodeDouble(token, decoded);
575  }
576  }
577  value = value * 10 + digit;
578  }
579  if (isNegative && value == maxIntegerValue)
580  decoded = Value::minLargestInt;
581  else if (isNegative)
582  decoded = -Value::LargestInt(value);
583  else if (value <= Value::LargestUInt(Value::maxInt))
584  decoded = Value::LargestInt(value);
585  else
586  decoded = value;
587  return true;
588 }
589 
590 bool Reader::decodeDouble(Token& token) {
591  Value decoded;
592  if (!decodeDouble(token, decoded))
593  return false;
594  currentValue().swapPayload(decoded);
595  currentValue().setOffsetStart(token.start_ - begin_);
596  currentValue().setOffsetLimit(token.end_ - begin_);
597  return true;
598 }
599 
600 bool Reader::decodeDouble(Token& token, Value& decoded) {
601  double value = 0;
602  String buffer(token.start_, token.end_);
603  IStringStream is(buffer);
604  if (!(is >> value))
605  return addError(
606  "'" + String(token.start_, token.end_) + "' is not a number.", token);
607  decoded = value;
608  return true;
609 }
610 
611 bool Reader::decodeString(Token& token) {
612  String decoded_string;
613  if (!decodeString(token, decoded_string))
614  return false;
615  Value decoded(decoded_string);
616  currentValue().swapPayload(decoded);
617  currentValue().setOffsetStart(token.start_ - begin_);
618  currentValue().setOffsetLimit(token.end_ - begin_);
619  return true;
620 }
621 
622 bool Reader::decodeString(Token& token, String& decoded) {
623  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
624  Location current = token.start_ + 1; // skip '"'
625  Location end = token.end_ - 1; // do not include '"'
626  while (current != end) {
627  Char c = *current++;
628  if (c == '"')
629  break;
630  if (c == '\\') {
631  if (current == end)
632  return addError("Empty escape sequence in string", token, current);
633  Char escape = *current++;
634  switch (escape) {
635  case '"':
636  decoded += '"';
637  break;
638  case '/':
639  decoded += '/';
640  break;
641  case '\\':
642  decoded += '\\';
643  break;
644  case 'b':
645  decoded += '\b';
646  break;
647  case 'f':
648  decoded += '\f';
649  break;
650  case 'n':
651  decoded += '\n';
652  break;
653  case 'r':
654  decoded += '\r';
655  break;
656  case 't':
657  decoded += '\t';
658  break;
659  case 'u': {
660  unsigned int unicode;
661  if (!decodeUnicodeCodePoint(token, current, end, unicode))
662  return false;
663  decoded += codePointToUTF8(unicode);
664  } break;
665  default:
666  return addError("Bad escape sequence in string", token, current);
667  }
668  } else {
669  decoded += c;
670  }
671  }
672  return true;
673 }
674 
675 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
676  Location end, unsigned int& unicode) {
677 
678  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
679  return false;
680  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
681  // surrogate pairs
682  if (end - current < 6)
683  return addError(
684  "additional six characters expected to parse unicode surrogate pair.",
685  token, current);
686  if (*(current++) == '\\' && *(current++) == 'u') {
687  unsigned int surrogatePair;
688  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
689  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
690  } else
691  return false;
692  } else
693  return addError("expecting another \\u token to begin the second half of "
694  "a unicode surrogate pair",
695  token, current);
696  }
697  return true;
698 }
699 
700 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
701  Location end,
702  unsigned int& ret_unicode) {
703  if (end - current < 4)
704  return addError(
705  "Bad unicode escape sequence in string: four digits expected.", token,
706  current);
707  int unicode = 0;
708  for (int index = 0; index < 4; ++index) {
709  Char c = *current++;
710  unicode *= 16;
711  if (c >= '0' && c <= '9')
712  unicode += c - '0';
713  else if (c >= 'a' && c <= 'f')
714  unicode += c - 'a' + 10;
715  else if (c >= 'A' && c <= 'F')
716  unicode += c - 'A' + 10;
717  else
718  return addError(
719  "Bad unicode escape sequence in string: hexadecimal digit expected.",
720  token, current);
721  }
722  ret_unicode = static_cast<unsigned int>(unicode);
723  return true;
724 }
725 
726 bool Reader::addError(const String& message, Token& token, Location extra) {
727  ErrorInfo info;
728  info.token_ = token;
729  info.message_ = message;
730  info.extra_ = extra;
731  errors_.push_back(info);
732  return false;
733 }
734 
735 bool Reader::recoverFromError(TokenType skipUntilToken) {
736  size_t const errorCount = errors_.size();
737  Token skip;
738  for (;;) {
739  if (!readToken(skip))
740  errors_.resize(errorCount); // discard errors caused by recovery
741  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
742  break;
743  }
744  errors_.resize(errorCount);
745  return false;
746 }
747 
748 bool Reader::addErrorAndRecover(const String& message, Token& token,
749  TokenType skipUntilToken) {
750  addError(message, token);
751  return recoverFromError(skipUntilToken);
752 }
753 
754 Value& Reader::currentValue() { return *(nodes_.top()); }
755 
756 Reader::Char Reader::getNextChar() {
757  if (current_ == end_)
758  return 0;
759  return *current_++;
760 }
761 
762 void Reader::getLocationLineAndColumn(Location location, int& line,
763  int& column) const {
764  Location current = begin_;
765  Location lastLineStart = current;
766  line = 0;
767  while (current < location && current != end_) {
768  Char c = *current++;
769  if (c == '\r') {
770  if (*current == '\n')
771  ++current;
772  lastLineStart = current;
773  ++line;
774  } else if (c == '\n') {
775  lastLineStart = current;
776  ++line;
777  }
778  }
779  // column & line start at 1
780  column = int(location - lastLineStart) + 1;
781  ++line;
782 }
783 
784 String Reader::getLocationLineAndColumn(Location location) const {
785  int line, column;
786  getLocationLineAndColumn(location, line, column);
787  char buffer[18 + 16 + 16 + 1];
788  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
789  return buffer;
790 }
791 
792 // Deprecated. Preserved for backward compatibility
793 String Reader::getFormatedErrorMessages() const {
794  return getFormattedErrorMessages();
795 }
796 
797 String Reader::getFormattedErrorMessages() const {
798  String formattedMessage;
799  for (const auto& error : errors_) {
800  formattedMessage +=
801  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
802  formattedMessage += " " + error.message_ + "\n";
803  if (error.extra_)
804  formattedMessage +=
805  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
806  }
807  return formattedMessage;
808 }
809 
810 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
811  std::vector<Reader::StructuredError> allErrors;
812  for (const auto& error : errors_) {
813  Reader::StructuredError structured;
814  structured.offset_start = error.token_.start_ - begin_;
815  structured.offset_limit = error.token_.end_ - begin_;
816  structured.message = error.message_;
817  allErrors.push_back(structured);
818  }
819  return allErrors;
820 }
821 
822 bool Reader::pushError(const Value& value, const String& message) {
823  ptrdiff_t const length = end_ - begin_;
824  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
825  return false;
826  Token token;
827  token.type_ = tokenError;
828  token.start_ = begin_ + value.getOffsetStart();
829  token.end_ = begin_ + value.getOffsetLimit();
830  ErrorInfo info;
831  info.token_ = token;
832  info.message_ = message;
833  info.extra_ = nullptr;
834  errors_.push_back(info);
835  return true;
836 }
837 
838 bool Reader::pushError(const Value& value, const String& message,
839  const Value& extra) {
840  ptrdiff_t const length = end_ - begin_;
841  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
842  extra.getOffsetLimit() > length)
843  return false;
844  Token token;
845  token.type_ = tokenError;
846  token.start_ = begin_ + value.getOffsetStart();
847  token.end_ = begin_ + value.getOffsetLimit();
848  ErrorInfo info;
849  info.token_ = token;
850  info.message_ = message;
851  info.extra_ = begin_ + extra.getOffsetStart();
852  errors_.push_back(info);
853  return true;
854 }
855 
856 bool Reader::good() const { return errors_.empty(); }
857 
858 // Originally copied from the Features class (now deprecated), used internally
859 // for features implementation.
860 class OurFeatures {
861 public:
862  static OurFeatures all();
863  bool allowComments_;
864  bool allowTrailingCommas_;
865  bool strictRoot_;
866  bool allowDroppedNullPlaceholders_;
867  bool allowNumericKeys_;
868  bool allowSingleQuotes_;
869  bool failIfExtra_;
870  bool rejectDupKeys_;
871  bool allowSpecialFloats_;
872  bool skipBom_;
873  size_t stackLimit_;
874 }; // OurFeatures
875 
876 OurFeatures OurFeatures::all() { return {}; }
877 
878 // Implementation of class Reader
879 // ////////////////////////////////
880 
881 // Originally copied from the Reader class (now deprecated), used internally
882 // for implementing JSON reading.
883 class OurReader {
884 public:
885  using Char = char;
886  using Location = const Char*;
887  struct StructuredError {
888  ptrdiff_t offset_start;
889  ptrdiff_t offset_limit;
890  String message;
891  };
892 
893  explicit OurReader(OurFeatures const& features);
894  bool parse(const char* beginDoc, const char* endDoc, Value& root,
895  bool collectComments = true);
896  String getFormattedErrorMessages() const;
897  std::vector<StructuredError> getStructuredErrors() const;
898 
899 private:
900  OurReader(OurReader const&); // no impl
901  void operator=(OurReader const&); // no impl
902 
903  enum TokenType {
904  tokenEndOfStream = 0,
905  tokenObjectBegin,
906  tokenObjectEnd,
907  tokenArrayBegin,
908  tokenArrayEnd,
909  tokenString,
910  tokenNumber,
911  tokenTrue,
912  tokenFalse,
913  tokenNull,
914  tokenNaN,
915  tokenPosInf,
916  tokenNegInf,
917  tokenArraySeparator,
918  tokenMemberSeparator,
919  tokenComment,
920  tokenError
921  };
922 
923  class Token {
924  public:
925  TokenType type_;
926  Location start_;
927  Location end_;
928  };
929 
930  class ErrorInfo {
931  public:
932  Token token_;
933  String message_;
934  Location extra_;
935  };
936 
937  using Errors = std::deque<ErrorInfo>;
938 
939  bool readToken(Token& token);
940  void skipSpaces();
941  void skipBom(bool skipBom);
942  bool match(const Char* pattern, int patternLength);
943  bool readComment();
944  bool readCStyleComment(bool* containsNewLineResult);
945  bool readCppStyleComment();
946  bool readString();
947  bool readStringSingleQuote();
948  bool readNumber(bool checkInf);
949  bool readValue();
950  bool readObject(Token& token);
951  bool readArray(Token& token);
952  bool decodeNumber(Token& token);
953  bool decodeNumber(Token& token, Value& decoded);
954  bool decodeString(Token& token);
955  bool decodeString(Token& token, String& decoded);
956  bool decodeDouble(Token& token);
957  bool decodeDouble(Token& token, Value& decoded);
958  bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
959  unsigned int& unicode);
960  bool decodeUnicodeEscapeSequence(Token& token, Location& current,
961  Location end, unsigned int& unicode);
962  bool addError(const String& message, Token& token, Location extra = nullptr);
963  bool recoverFromError(TokenType skipUntilToken);
964  bool addErrorAndRecover(const String& message, Token& token,
965  TokenType skipUntilToken);
966  void skipUntilSpace();
967  Value& currentValue();
968  Char getNextChar();
969  void getLocationLineAndColumn(Location location, int& line,
970  int& column) const;
971  String getLocationLineAndColumn(Location location) const;
972  void addComment(Location begin, Location end, CommentPlacement placement);
973  void skipCommentTokens(Token& token);
974 
975  static String normalizeEOL(Location begin, Location end);
976  static bool containsNewLine(Location begin, Location end);
977 
978  using Nodes = std::stack<Value*>;
979 
980  Nodes nodes_{};
981  Errors errors_{};
982  String document_{};
983  Location begin_ = nullptr;
984  Location end_ = nullptr;
985  Location current_ = nullptr;
986  Location lastValueEnd_ = nullptr;
987  Value* lastValue_ = nullptr;
988  bool lastValueHasAComment_ = false;
989  String commentsBefore_{};
990 
991  OurFeatures const features_;
992  bool collectComments_ = false;
993 }; // OurReader
994 
995 // complete copy of Read impl, for OurReader
996 
997 bool OurReader::containsNewLine(OurReader::Location begin,
998  OurReader::Location end) {
999  return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
1000 }
1001 
1002 OurReader::OurReader(OurFeatures const& features) : features_(features) {}
1003 
1004 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1005  bool collectComments) {
1006  if (!features_.allowComments_) {
1007  collectComments = false;
1008  }
1009 
1010  begin_ = beginDoc;
1011  end_ = endDoc;
1012  collectComments_ = collectComments;
1013  current_ = begin_;
1014  lastValueEnd_ = nullptr;
1015  lastValue_ = nullptr;
1016  commentsBefore_.clear();
1017  errors_.clear();
1018  while (!nodes_.empty())
1019  nodes_.pop();
1020  nodes_.push(&root);
1021 
1022  // skip byte order mark if it exists at the beginning of the UTF-8 text.
1023  skipBom(features_.skipBom_);
1024  bool successful = readValue();
1025  nodes_.pop();
1026  Token token;
1027  skipCommentTokens(token);
1028  if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1029  addError("Extra non-whitespace after JSON value.", token);
1030  return false;
1031  }
1032  if (collectComments_ && !commentsBefore_.empty())
1033  root.setComment(commentsBefore_, commentAfter);
1034  if (features_.strictRoot_) {
1035  if (!root.isArray() && !root.isObject()) {
1036  // Set error location to start of doc, ideally should be first token found
1037  // in doc
1038  token.type_ = tokenError;
1039  token.start_ = beginDoc;
1040  token.end_ = endDoc;
1041  addError(
1042  "A valid JSON document must be either an array or an object value.",
1043  token);
1044  return false;
1045  }
1046  }
1047  return successful;
1048 }
1049 
1050 bool OurReader::readValue() {
1051  // To preserve the old behaviour we cast size_t to int.
1052  if (nodes_.size() > features_.stackLimit_)
1053  throwRuntimeError("Exceeded stackLimit in readValue().");
1054  Token token;
1055  skipCommentTokens(token);
1056  bool successful = true;
1057 
1058  if (collectComments_ && !commentsBefore_.empty()) {
1059  currentValue().setComment(commentsBefore_, commentBefore);
1060  commentsBefore_.clear();
1061  }
1062 
1063  switch (token.type_) {
1064  case tokenObjectBegin:
1065  successful = readObject(token);
1066  currentValue().setOffsetLimit(current_ - begin_);
1067  break;
1068  case tokenArrayBegin:
1069  successful = readArray(token);
1070  currentValue().setOffsetLimit(current_ - begin_);
1071  break;
1072  case tokenNumber:
1073  successful = decodeNumber(token);
1074  break;
1075  case tokenString:
1076  successful = decodeString(token);
1077  break;
1078  case tokenTrue: {
1079  Value v(true);
1080  currentValue().swapPayload(v);
1081  currentValue().setOffsetStart(token.start_ - begin_);
1082  currentValue().setOffsetLimit(token.end_ - begin_);
1083  } break;
1084  case tokenFalse: {
1085  Value v(false);
1086  currentValue().swapPayload(v);
1087  currentValue().setOffsetStart(token.start_ - begin_);
1088  currentValue().setOffsetLimit(token.end_ - begin_);
1089  } break;
1090  case tokenNull: {
1091  Value v;
1092  currentValue().swapPayload(v);
1093  currentValue().setOffsetStart(token.start_ - begin_);
1094  currentValue().setOffsetLimit(token.end_ - begin_);
1095  } break;
1096  case tokenNaN: {
1097  Value v(std::numeric_limits<double>::quiet_NaN());
1098  currentValue().swapPayload(v);
1099  currentValue().setOffsetStart(token.start_ - begin_);
1100  currentValue().setOffsetLimit(token.end_ - begin_);
1101  } break;
1102  case tokenPosInf: {
1103  Value v(std::numeric_limits<double>::infinity());
1104  currentValue().swapPayload(v);
1105  currentValue().setOffsetStart(token.start_ - begin_);
1106  currentValue().setOffsetLimit(token.end_ - begin_);
1107  } break;
1108  case tokenNegInf: {
1109  Value v(-std::numeric_limits<double>::infinity());
1110  currentValue().swapPayload(v);
1111  currentValue().setOffsetStart(token.start_ - begin_);
1112  currentValue().setOffsetLimit(token.end_ - begin_);
1113  } break;
1114  case tokenArraySeparator:
1115  case tokenObjectEnd:
1116  case tokenArrayEnd:
1117  if (features_.allowDroppedNullPlaceholders_) {
1118  // "Un-read" the current token and mark the current value as a null
1119  // token.
1120  current_--;
1121  Value v;
1122  currentValue().swapPayload(v);
1123  currentValue().setOffsetStart(current_ - begin_ - 1);
1124  currentValue().setOffsetLimit(current_ - begin_);
1125  break;
1126  } // else, fall through ...
1127  default:
1128  currentValue().setOffsetStart(token.start_ - begin_);
1129  currentValue().setOffsetLimit(token.end_ - begin_);
1130  return addError("Syntax error: value, object or array expected.", token);
1131  }
1132 
1133  if (collectComments_) {
1134  lastValueEnd_ = current_;
1135  lastValueHasAComment_ = false;
1136  lastValue_ = &currentValue();
1137  }
1138 
1139  return successful;
1140 }
1141 
1142 void OurReader::skipCommentTokens(Token& token) {
1143  if (features_.allowComments_) {
1144  do {
1145  readToken(token);
1146  } while (token.type_ == tokenComment);
1147  } else {
1148  readToken(token);
1149  }
1150 }
1151 
1152 bool OurReader::readToken(Token& token) {
1153  skipSpaces();
1154  token.start_ = current_;
1155  Char c = getNextChar();
1156  bool ok = true;
1157  switch (c) {
1158  case '{':
1159  token.type_ = tokenObjectBegin;
1160  break;
1161  case '}':
1162  token.type_ = tokenObjectEnd;
1163  break;
1164  case '[':
1165  token.type_ = tokenArrayBegin;
1166  break;
1167  case ']':
1168  token.type_ = tokenArrayEnd;
1169  break;
1170  case '"':
1171  token.type_ = tokenString;
1172  ok = readString();
1173  break;
1174  case '\'':
1175  if (features_.allowSingleQuotes_) {
1176  token.type_ = tokenString;
1177  ok = readStringSingleQuote();
1178  break;
1179  } // else fall through
1180  case '/':
1181  token.type_ = tokenComment;
1182  ok = readComment();
1183  break;
1184  case '0':
1185  case '1':
1186  case '2':
1187  case '3':
1188  case '4':
1189  case '5':
1190  case '6':
1191  case '7':
1192  case '8':
1193  case '9':
1194  token.type_ = tokenNumber;
1195  readNumber(false);
1196  break;
1197  case '-':
1198  if (readNumber(true)) {
1199  token.type_ = tokenNumber;
1200  } else {
1201  token.type_ = tokenNegInf;
1202  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1203  }
1204  break;
1205  case '+':
1206  if (readNumber(true)) {
1207  token.type_ = tokenNumber;
1208  } else {
1209  token.type_ = tokenPosInf;
1210  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1211  }
1212  break;
1213  case 't':
1214  token.type_ = tokenTrue;
1215  ok = match("rue", 3);
1216  break;
1217  case 'f':
1218  token.type_ = tokenFalse;
1219  ok = match("alse", 4);
1220  break;
1221  case 'n':
1222  token.type_ = tokenNull;
1223  ok = match("ull", 3);
1224  break;
1225  case 'N':
1226  if (features_.allowSpecialFloats_) {
1227  token.type_ = tokenNaN;
1228  ok = match("aN", 2);
1229  } else {
1230  ok = false;
1231  }
1232  break;
1233  case 'I':
1234  if (features_.allowSpecialFloats_) {
1235  token.type_ = tokenPosInf;
1236  ok = match("nfinity", 7);
1237  } else {
1238  ok = false;
1239  }
1240  break;
1241  case ',':
1242  token.type_ = tokenArraySeparator;
1243  break;
1244  case ':':
1245  token.type_ = tokenMemberSeparator;
1246  break;
1247  case 0:
1248  token.type_ = tokenEndOfStream;
1249  break;
1250  default:
1251  ok = false;
1252  break;
1253  }
1254  if (!ok)
1255  token.type_ = tokenError;
1256  token.end_ = current_;
1257  return ok;
1258 }
1259 
1260 void OurReader::skipSpaces() {
1261  while (current_ != end_) {
1262  Char c = *current_;
1263  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1264  ++current_;
1265  else
1266  break;
1267  }
1268 }
1269 
1270 void OurReader::skipBom(bool skipBom) {
1271  // The default behavior is to skip BOM.
1272  if (skipBom) {
1273  if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1274  begin_ += 3;
1275  current_ = begin_;
1276  }
1277  }
1278 }
1279 
1280 bool OurReader::match(const Char* pattern, int patternLength) {
1281  if (end_ - current_ < patternLength)
1282  return false;
1283  int index = patternLength;
1284  while (index--)
1285  if (current_[index] != pattern[index])
1286  return false;
1287  current_ += patternLength;
1288  return true;
1289 }
1290 
1291 bool OurReader::readComment() {
1292  const Location commentBegin = current_ - 1;
1293  const Char c = getNextChar();
1294  bool successful = false;
1295  bool cStyleWithEmbeddedNewline = false;
1296 
1297  const bool isCStyleComment = (c == '*');
1298  const bool isCppStyleComment = (c == '/');
1299  if (isCStyleComment) {
1300  successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1301  } else if (isCppStyleComment) {
1302  successful = readCppStyleComment();
1303  }
1304 
1305  if (!successful)
1306  return false;
1307 
1308  if (collectComments_) {
1309  CommentPlacement placement = commentBefore;
1310 
1311  if (!lastValueHasAComment_) {
1312  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1313  if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1314  placement = commentAfterOnSameLine;
1315  lastValueHasAComment_ = true;
1316  }
1317  }
1318  }
1319 
1320  addComment(commentBegin, current_, placement);
1321  }
1322  return true;
1323 }
1324 
1325 String OurReader::normalizeEOL(OurReader::Location begin,
1326  OurReader::Location end) {
1327  String normalized;
1328  normalized.reserve(static_cast<size_t>(end - begin));
1329  OurReader::Location current = begin;
1330  while (current != end) {
1331  char c = *current++;
1332  if (c == '\r') {
1333  if (current != end && *current == '\n')
1334  // convert dos EOL
1335  ++current;
1336  // convert Mac EOL
1337  normalized += '\n';
1338  } else {
1339  normalized += c;
1340  }
1341  }
1342  return normalized;
1343 }
1344 
1345 void OurReader::addComment(Location begin, Location end,
1346  CommentPlacement placement) {
1347  assert(collectComments_);
1348  const String& normalized = normalizeEOL(begin, end);
1349  if (placement == commentAfterOnSameLine) {
1350  assert(lastValue_ != nullptr);
1351  lastValue_->setComment(normalized, placement);
1352  } else {
1353  commentsBefore_ += normalized;
1354  }
1355 }
1356 
1357 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1358  *containsNewLineResult = false;
1359 
1360  while ((current_ + 1) < end_) {
1361  Char c = getNextChar();
1362  if (c == '*' && *current_ == '/')
1363  break;
1364  if (c == '\n')
1365  *containsNewLineResult = true;
1366  }
1367 
1368  return getNextChar() == '/';
1369 }
1370 
1371 bool OurReader::readCppStyleComment() {
1372  while (current_ != end_) {
1373  Char c = getNextChar();
1374  if (c == '\n')
1375  break;
1376  if (c == '\r') {
1377  // Consume DOS EOL. It will be normalized in addComment.
1378  if (current_ != end_ && *current_ == '\n')
1379  getNextChar();
1380  // Break on Moc OS 9 EOL.
1381  break;
1382  }
1383  }
1384  return true;
1385 }
1386 
1387 bool OurReader::readNumber(bool checkInf) {
1388  Location p = current_;
1389  if (checkInf && p != end_ && *p == 'I') {
1390  current_ = ++p;
1391  return false;
1392  }
1393  char c = '0'; // stopgap for already consumed character
1394  // integral part
1395  while (c >= '0' && c <= '9')
1396  c = (current_ = p) < end_ ? *p++ : '\0';
1397  // fractional part
1398  if (c == '.') {
1399  c = (current_ = p) < end_ ? *p++ : '\0';
1400  while (c >= '0' && c <= '9')
1401  c = (current_ = p) < end_ ? *p++ : '\0';
1402  }
1403  // exponential part
1404  if (c == 'e' || c == 'E') {
1405  c = (current_ = p) < end_ ? *p++ : '\0';
1406  if (c == '+' || c == '-')
1407  c = (current_ = p) < end_ ? *p++ : '\0';
1408  while (c >= '0' && c <= '9')
1409  c = (current_ = p) < end_ ? *p++ : '\0';
1410  }
1411  return true;
1412 }
1413 bool OurReader::readString() {
1414  Char c = 0;
1415  while (current_ != end_) {
1416  c = getNextChar();
1417  if (c == '\\')
1418  getNextChar();
1419  else if (c == '"')
1420  break;
1421  }
1422  return c == '"';
1423 }
1424 
1425 bool OurReader::readStringSingleQuote() {
1426  Char c = 0;
1427  while (current_ != end_) {
1428  c = getNextChar();
1429  if (c == '\\')
1430  getNextChar();
1431  else if (c == '\'')
1432  break;
1433  }
1434  return c == '\'';
1435 }
1436 
1437 bool OurReader::readObject(Token& token) {
1438  Token tokenName;
1439  String name;
1440  Value init(objectValue);
1441  currentValue().swapPayload(init);
1442  currentValue().setOffsetStart(token.start_ - begin_);
1443  while (readToken(tokenName)) {
1444  bool initialTokenOk = true;
1445  while (tokenName.type_ == tokenComment && initialTokenOk)
1446  initialTokenOk = readToken(tokenName);
1447  if (!initialTokenOk)
1448  break;
1449  if (tokenName.type_ == tokenObjectEnd &&
1450  (name.empty() ||
1451  features_.allowTrailingCommas_)) // empty object or trailing comma
1452  return true;
1453  name.clear();
1454  if (tokenName.type_ == tokenString) {
1455  if (!decodeString(tokenName, name))
1456  return recoverFromError(tokenObjectEnd);
1457  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1458  Value numberName;
1459  if (!decodeNumber(tokenName, numberName))
1460  return recoverFromError(tokenObjectEnd);
1461  name = numberName.asString();
1462  } else {
1463  break;
1464  }
1465  if (name.length() >= (1U << 30))
1466  throwRuntimeError("keylength >= 2^30");
1467  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1468  String msg = "Duplicate key: '" + name + "'";
1469  return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1470  }
1471 
1472  Token colon;
1473  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1474  return addErrorAndRecover("Missing ':' after object member name", colon,
1475  tokenObjectEnd);
1476  }
1477  Value& value = currentValue()[name];
1478  nodes_.push(&value);
1479  bool ok = readValue();
1480  nodes_.pop();
1481  if (!ok) // error already set
1482  return recoverFromError(tokenObjectEnd);
1483 
1484  Token comma;
1485  if (!readToken(comma) ||
1486  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1487  comma.type_ != tokenComment)) {
1488  return addErrorAndRecover("Missing ',' or '}' in object declaration",
1489  comma, tokenObjectEnd);
1490  }
1491  bool finalizeTokenOk = true;
1492  while (comma.type_ == tokenComment && finalizeTokenOk)
1493  finalizeTokenOk = readToken(comma);
1494  if (comma.type_ == tokenObjectEnd)
1495  return true;
1496  }
1497  return addErrorAndRecover("Missing '}' or object member name", tokenName,
1498  tokenObjectEnd);
1499 }
1500 
1501 bool OurReader::readArray(Token& token) {
1502  Value init(arrayValue);
1503  currentValue().swapPayload(init);
1504  currentValue().setOffsetStart(token.start_ - begin_);
1505  int index = 0;
1506  for (;;) {
1507  skipSpaces();
1508  if (current_ != end_ && *current_ == ']' &&
1509  (index == 0 ||
1510  (features_.allowTrailingCommas_ &&
1511  !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1512  // comma
1513  {
1514  Token endArray;
1515  readToken(endArray);
1516  return true;
1517  }
1518  Value& value = currentValue()[index++];
1519  nodes_.push(&value);
1520  bool ok = readValue();
1521  nodes_.pop();
1522  if (!ok) // error already set
1523  return recoverFromError(tokenArrayEnd);
1524 
1525  Token currentToken;
1526  // Accept Comment after last item in the array.
1527  ok = readToken(currentToken);
1528  while (currentToken.type_ == tokenComment && ok) {
1529  ok = readToken(currentToken);
1530  }
1531  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1532  currentToken.type_ != tokenArrayEnd);
1533  if (!ok || badTokenType) {
1534  return addErrorAndRecover("Missing ',' or ']' in array declaration",
1535  currentToken, tokenArrayEnd);
1536  }
1537  if (currentToken.type_ == tokenArrayEnd)
1538  break;
1539  }
1540  return true;
1541 }
1542 
1543 bool OurReader::decodeNumber(Token& token) {
1544  Value decoded;
1545  if (!decodeNumber(token, decoded))
1546  return false;
1547  currentValue().swapPayload(decoded);
1548  currentValue().setOffsetStart(token.start_ - begin_);
1549  currentValue().setOffsetLimit(token.end_ - begin_);
1550  return true;
1551 }
1552 
1553 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1554  // Attempts to parse the number as an integer. If the number is
1555  // larger than the maximum supported value of an integer then
1556  // we decode the number as a double.
1557  Location current = token.start_;
1558  const bool isNegative = *current == '-';
1559  if (isNegative) {
1560  ++current;
1561  }
1562 
1563  // We assume we can represent the largest and smallest integer types as
1564  // unsigned integers with separate sign. This is only true if they can fit
1565  // into an unsigned integer.
1567  "Int must be smaller than UInt");
1568 
1569  // We need to convert minLargestInt into a positive number. The easiest way
1570  // to do this conversion is to assume our "threshold" value of minLargestInt
1571  // divided by 10 can fit in maxLargestInt when absolute valued. This should
1572  // be a safe assumption.
1573  static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1574  "The absolute value of minLargestInt must be greater than or "
1575  "equal to maxLargestInt");
1576  static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1577  "The absolute value of minLargestInt must be only 1 magnitude "
1578  "larger than maxLargest Int");
1579 
1580  static constexpr Value::LargestUInt positive_threshold =
1581  Value::maxLargestUInt / 10;
1582  static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1583 
1584  // For the negative values, we have to be more careful. Since typically
1585  // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1586  // then take the inverse. This assumes that minLargestInt is only a single
1587  // power of 10 different in magnitude, which we check above. For the last
1588  // digit, we take the modulus before negating for the same reason.
1589  static constexpr auto negative_threshold =
1591  static constexpr auto negative_last_digit =
1593 
1594  const Value::LargestUInt threshold =
1595  isNegative ? negative_threshold : positive_threshold;
1596  const Value::UInt max_last_digit =
1597  isNegative ? negative_last_digit : positive_last_digit;
1598 
1599  Value::LargestUInt value = 0;
1600  while (current < token.end_) {
1601  Char c = *current++;
1602  if (c < '0' || c > '9')
1603  return decodeDouble(token, decoded);
1604 
1605  const auto digit(static_cast<Value::UInt>(c - '0'));
1606  if (value >= threshold) {
1607  // We've hit or exceeded the max value divided by 10 (rounded down). If
1608  // a) we've only just touched the limit, meaing value == threshold,
1609  // b) this is the last digit, or
1610  // c) it's small enough to fit in that rounding delta, we're okay.
1611  // Otherwise treat this number as a double to avoid overflow.
1612  if (value > threshold || current != token.end_ ||
1613  digit > max_last_digit) {
1614  return decodeDouble(token, decoded);
1615  }
1616  }
1617  value = value * 10 + digit;
1618  }
1619 
1620  if (isNegative) {
1621  // We use the same magnitude assumption here, just in case.
1622  const auto last_digit = static_cast<Value::UInt>(value % 10);
1623  decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1624  } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1625  decoded = Value::LargestInt(value);
1626  } else {
1627  decoded = value;
1628  }
1629 
1630  return true;
1631 }
1632 
1633 bool OurReader::decodeDouble(Token& token) {
1634  Value decoded;
1635  if (!decodeDouble(token, decoded))
1636  return false;
1637  currentValue().swapPayload(decoded);
1638  currentValue().setOffsetStart(token.start_ - begin_);
1639  currentValue().setOffsetLimit(token.end_ - begin_);
1640  return true;
1641 }
1642 
1643 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1644  double value = 0;
1645  const String buffer(token.start_, token.end_);
1646  IStringStream is(buffer);
1647  if (!(is >> value)) {
1648  return addError(
1649  "'" + String(token.start_, token.end_) + "' is not a number.", token);
1650  }
1651  decoded = value;
1652  return true;
1653 }
1654 
1655 bool OurReader::decodeString(Token& token) {
1656  String decoded_string;
1657  if (!decodeString(token, decoded_string))
1658  return false;
1659  Value decoded(decoded_string);
1660  currentValue().swapPayload(decoded);
1661  currentValue().setOffsetStart(token.start_ - begin_);
1662  currentValue().setOffsetLimit(token.end_ - begin_);
1663  return true;
1664 }
1665 
1666 bool OurReader::decodeString(Token& token, String& decoded) {
1667  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1668  Location current = token.start_ + 1; // skip '"'
1669  Location end = token.end_ - 1; // do not include '"'
1670  while (current != end) {
1671  Char c = *current++;
1672  if (c == '"')
1673  break;
1674  if (c == '\\') {
1675  if (current == end)
1676  return addError("Empty escape sequence in string", token, current);
1677  Char escape = *current++;
1678  switch (escape) {
1679  case '"':
1680  decoded += '"';
1681  break;
1682  case '/':
1683  decoded += '/';
1684  break;
1685  case '\\':
1686  decoded += '\\';
1687  break;
1688  case 'b':
1689  decoded += '\b';
1690  break;
1691  case 'f':
1692  decoded += '\f';
1693  break;
1694  case 'n':
1695  decoded += '\n';
1696  break;
1697  case 'r':
1698  decoded += '\r';
1699  break;
1700  case 't':
1701  decoded += '\t';
1702  break;
1703  case 'u': {
1704  unsigned int unicode;
1705  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1706  return false;
1707  decoded += codePointToUTF8(unicode);
1708  } break;
1709  default:
1710  return addError("Bad escape sequence in string", token, current);
1711  }
1712  } else {
1713  decoded += c;
1714  }
1715  }
1716  return true;
1717 }
1718 
1719 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1720  Location end, unsigned int& unicode) {
1721 
1722  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1723  return false;
1724  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1725  // surrogate pairs
1726  if (end - current < 6)
1727  return addError(
1728  "additional six characters expected to parse unicode surrogate pair.",
1729  token, current);
1730  if (*(current++) == '\\' && *(current++) == 'u') {
1731  unsigned int surrogatePair;
1732  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1733  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1734  } else
1735  return false;
1736  } else
1737  return addError("expecting another \\u token to begin the second half of "
1738  "a unicode surrogate pair",
1739  token, current);
1740  }
1741  return true;
1742 }
1743 
1744 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1745  Location end,
1746  unsigned int& ret_unicode) {
1747  if (end - current < 4)
1748  return addError(
1749  "Bad unicode escape sequence in string: four digits expected.", token,
1750  current);
1751  int unicode = 0;
1752  for (int index = 0; index < 4; ++index) {
1753  Char c = *current++;
1754  unicode *= 16;
1755  if (c >= '0' && c <= '9')
1756  unicode += c - '0';
1757  else if (c >= 'a' && c <= 'f')
1758  unicode += c - 'a' + 10;
1759  else if (c >= 'A' && c <= 'F')
1760  unicode += c - 'A' + 10;
1761  else
1762  return addError(
1763  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1764  token, current);
1765  }
1766  ret_unicode = static_cast<unsigned int>(unicode);
1767  return true;
1768 }
1769 
1770 bool OurReader::addError(const String& message, Token& token, Location extra) {
1771  ErrorInfo info;
1772  info.token_ = token;
1773  info.message_ = message;
1774  info.extra_ = extra;
1775  errors_.push_back(info);
1776  return false;
1777 }
1778 
1779 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1780  size_t errorCount = errors_.size();
1781  Token skip;
1782  for (;;) {
1783  if (!readToken(skip))
1784  errors_.resize(errorCount); // discard errors caused by recovery
1785  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1786  break;
1787  }
1788  errors_.resize(errorCount);
1789  return false;
1790 }
1791 
1792 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1793  TokenType skipUntilToken) {
1794  addError(message, token);
1795  return recoverFromError(skipUntilToken);
1796 }
1797 
1798 Value& OurReader::currentValue() { return *(nodes_.top()); }
1799 
1800 OurReader::Char OurReader::getNextChar() {
1801  if (current_ == end_)
1802  return 0;
1803  return *current_++;
1804 }
1805 
1806 void OurReader::getLocationLineAndColumn(Location location, int& line,
1807  int& column) const {
1808  Location current = begin_;
1809  Location lastLineStart = current;
1810  line = 0;
1811  while (current < location && current != end_) {
1812  Char c = *current++;
1813  if (c == '\r') {
1814  if (*current == '\n')
1815  ++current;
1816  lastLineStart = current;
1817  ++line;
1818  } else if (c == '\n') {
1819  lastLineStart = current;
1820  ++line;
1821  }
1822  }
1823  // column & line start at 1
1824  column = int(location - lastLineStart) + 1;
1825  ++line;
1826 }
1827 
1828 String OurReader::getLocationLineAndColumn(Location location) const {
1829  int line, column;
1830  getLocationLineAndColumn(location, line, column);
1831  char buffer[18 + 16 + 16 + 1];
1832  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1833  return buffer;
1834 }
1835 
1836 String OurReader::getFormattedErrorMessages() const {
1837  String formattedMessage;
1838  for (const auto& error : errors_) {
1839  formattedMessage +=
1840  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1841  formattedMessage += " " + error.message_ + "\n";
1842  if (error.extra_)
1843  formattedMessage +=
1844  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1845  }
1846  return formattedMessage;
1847 }
1848 
1849 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1850  std::vector<OurReader::StructuredError> allErrors;
1851  for (const auto& error : errors_) {
1852  OurReader::StructuredError structured;
1853  structured.offset_start = error.token_.start_ - begin_;
1854  structured.offset_limit = error.token_.end_ - begin_;
1855  structured.message = error.message_;
1856  allErrors.push_back(structured);
1857  }
1858  return allErrors;
1859 }
1860 
1861 class OurCharReader : public CharReader {
1862  bool const collectComments_;
1863  OurReader reader_;
1864 
1865 public:
1866  OurCharReader(bool collectComments, OurFeatures const& features)
1867  : collectComments_(collectComments), reader_(features) {}
1868  bool parse(char const* beginDoc, char const* endDoc, Value* root,
1869  String* errs) override {
1870  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1871  if (errs) {
1872  *errs = reader_.getFormattedErrorMessages();
1873  }
1874  return ok;
1875  }
1876 };
1877 
1881  bool collectComments = settings_["collectComments"].asBool();
1882  OurFeatures features = OurFeatures::all();
1883  features.allowComments_ = settings_["allowComments"].asBool();
1884  features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1885  features.strictRoot_ = settings_["strictRoot"].asBool();
1886  features.allowDroppedNullPlaceholders_ =
1887  settings_["allowDroppedNullPlaceholders"].asBool();
1888  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1889  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1890 
1891  // Stack limit is always a size_t, so we get this as an unsigned int
1892  // regardless of it we have 64-bit integer support enabled.
1893  features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1894  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1895  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1896  features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1897  features.skipBom_ = settings_["skipBom"].asBool();
1898  return new OurCharReader(collectComments, features);
1899 }
1900 
1902  static const auto& valid_keys = *new std::set<String>{
1903  "collectComments",
1904  "allowComments",
1905  "allowTrailingCommas",
1906  "strictRoot",
1907  "allowDroppedNullPlaceholders",
1908  "allowNumericKeys",
1909  "allowSingleQuotes",
1910  "stackLimit",
1911  "failIfExtra",
1912  "rejectDupKeys",
1913  "allowSpecialFloats",
1914  "skipBom",
1915  };
1916  for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1917  auto key = si.name();
1918  if (valid_keys.count(key))
1919  continue;
1920  if (invalid)
1921  (*invalid)[std::move(key)] = *si;
1922  else
1923  return false;
1924  }
1925  return invalid ? invalid->empty() : true;
1926 }
1927 
1929  return settings_[key];
1930 }
1931 // static
1934  (*settings)["allowComments"] = false;
1935  (*settings)["allowTrailingCommas"] = false;
1936  (*settings)["strictRoot"] = true;
1937  (*settings)["allowDroppedNullPlaceholders"] = false;
1938  (*settings)["allowNumericKeys"] = false;
1939  (*settings)["allowSingleQuotes"] = false;
1940  (*settings)["stackLimit"] = 1000;
1941  (*settings)["failIfExtra"] = true;
1942  (*settings)["rejectDupKeys"] = true;
1943  (*settings)["allowSpecialFloats"] = false;
1944  (*settings)["skipBom"] = true;
1946 }
1947 // static
1950  (*settings)["collectComments"] = true;
1951  (*settings)["allowComments"] = true;
1952  (*settings)["allowTrailingCommas"] = true;
1953  (*settings)["strictRoot"] = false;
1954  (*settings)["allowDroppedNullPlaceholders"] = false;
1955  (*settings)["allowNumericKeys"] = false;
1956  (*settings)["allowSingleQuotes"] = false;
1957  (*settings)["stackLimit"] = 1000;
1958  (*settings)["failIfExtra"] = false;
1959  (*settings)["rejectDupKeys"] = false;
1960  (*settings)["allowSpecialFloats"] = false;
1961  (*settings)["skipBom"] = true;
1963 }
1964 
1966 // global functions
1967 
1968 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1969  String* errs) {
1970  OStringStream ssin;
1971  ssin << sin.rdbuf();
1972  String doc = ssin.str();
1973  char const* begin = doc.data();
1974  char const* end = begin + doc.size();
1975  // Note that we do not actually need a null-terminator.
1976  CharReaderPtr const reader(fact.newCharReader());
1977  return reader->parse(begin, end, root, errs);
1978 }
1979 
1982  String errs;
1983  bool ok = parseFromStream(b, sin, &root, &errs);
1984  if (!ok) {
1985  throwRuntimeError(errs);
1986  }
1987  return sin;
1988 }
1989 
1990 } // namespace Json
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:39
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:223
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
array value (ordered list)
Definition: value.h:114
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition: config.h:135
bool asBool() const
Definition: json_value.cpp:804
Json::LargestUInt LargestUInt
Definition: value.h:207
Json::Value settings_
Configuration of this builder.
Definition: reader.h:332
Json::UInt UInt
Definition: value.h:200
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
Definition: json_value.cpp:882
object value (collection of name/value pairs).
Definition: value.h:115
std::auto_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:58
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:233
CharReader * newCharReader() const override
Allocate a CharReader via operator new().
Features()
Initialize the configuration like JsonConfig::allFeatures;.
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition: value.h:228
IStream & operator>>(IStream &, Value &)
Read from &#39;sin&#39; into &#39;root&#39;.
bool allowComments_
true if comments are allowed. Default: true.
Definition: json_features.h:44
CommentPlacement
Definition: value.h:118
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition: config.h:138
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: json_features.h:54
static size_t const stackLimit_g
Definition: json_reader.cpp:50
UInt asUInt() const
Definition: json_value.cpp:676
JSON (JavaScript Object Notation).
Definition: allocator.h:14
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: json_features.h:51
#define jsoncpp_snprintf
Definition: config.h:63
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition: value.h:226
const_iterator begin() const
~CharReaderBuilder() override
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
bool validate(Json::Value *invalid) const
Interface for reading JSON from a char array.
Definition: reader.h:245
Json::LargestInt LargestInt
Definition: value.h:206
Represents a JSON value.
Definition: value.h:193
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:66
a comment on the line after a value (only make sense for
Definition: value.h:121
std::istream IStream
Definition: config.h:139
#define JSONCPP_DEPRECATED_STACK_LIMIT
Definition: json_reader.cpp:47
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:68
bool strictRoot_
true if root must be either an array or an object value.
Definition: json_features.h:48
Build a CharReader implementation.
Definition: reader.h:289
Configuration passed to reader and writer.
Definition: json_features.h:21
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
a comment placed on the line before a value
Definition: value.h:119
a comment just after a value on the same line
Definition: value.h:120
const_iterator end() const
Value & operator[](const String &key)
A simple way to update a specific setting.
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition: config.h:132