JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <istream>
16 #include <limits>
17 #include <memory>
18 #include <set>
19 #include <sstream>
20 #include <utility>
21 
22 #include <cstdio>
23 #if __cplusplus >= 201103L
24 
25 #if !defined(sscanf)
26 #define sscanf std::sscanf
27 #endif
28 
29 #endif //__cplusplus
30 
31 #if defined(_MSC_VER)
32 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
33 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
34 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
35 #endif //_MSC_VER
36 
37 #if defined(_MSC_VER)
38 // Disable warning about strdup being deprecated.
39 #pragma warning(disable : 4996)
40 #endif
41 
42 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
43 // time to change the stack limit
44 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
45 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
46 #endif
47 
48 static size_t const stackLimit_g =
49  JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
50 
51 namespace Json {
52 
53 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
54 typedef std::unique_ptr<CharReader> CharReaderPtr;
55 #else
56 typedef std::auto_ptr<CharReader> CharReaderPtr;
57 #endif
58 
59 // Implementation of class Features
60 // ////////////////////////////////
61 
62 Features::Features() = default;
63 
64 Features Features::all() { return {}; }
65 
67  Features features;
68  features.allowComments_ = false;
69  features.strictRoot_ = true;
70  features.allowDroppedNullPlaceholders_ = false;
71  features.allowNumericKeys_ = false;
72  return features;
73 }
74 
75 // Implementation of class Reader
76 // ////////////////////////////////
77 
78 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
79  for (; begin < end; ++begin)
80  if (*begin == '\n' || *begin == '\r')
81  return true;
82  return false;
83 }
84 
85 // Class Reader
86 // //////////////////////////////////////////////////////////////////
87 
88 Reader::Reader()
89  : errors_(), document_(), commentsBefore_(), features_(Features::all()) {}
90 
91 Reader::Reader(const Features& features)
92  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95 
96 bool Reader::parse(const std::string& document,
97  Value& root,
98  bool collectComments) {
99  document_.assign(document.begin(), document.end());
100  const char* begin = document_.c_str();
101  const char* end = begin + document_.length();
102  return parse(begin, end, root, collectComments);
103 }
104 
105 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
106  // std::istream_iterator<char> begin(is);
107  // std::istream_iterator<char> end;
108  // Those would allow streamed input from a file, if parse() were a
109  // template function.
110 
111  // Since String is reference-counted, this at least does not
112  // create an extra copy.
113  String doc;
114  std::getline(is, doc, (char)EOF);
115  return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
116 }
117 
118 bool Reader::parse(const char* beginDoc,
119  const char* endDoc,
120  Value& root,
121  bool collectComments) {
122  if (!features_.allowComments_) {
123  collectComments = false;
124  }
125 
126  begin_ = beginDoc;
127  end_ = endDoc;
128  collectComments_ = collectComments;
129  current_ = begin_;
130  lastValueEnd_ = nullptr;
131  lastValue_ = nullptr;
132  commentsBefore_.clear();
133  errors_.clear();
134  while (!nodes_.empty())
135  nodes_.pop();
136  nodes_.push(&root);
137 
138  bool successful = readValue();
139  Token token;
140  skipCommentTokens(token);
141  if (collectComments_ && !commentsBefore_.empty())
142  root.setComment(commentsBefore_, commentAfter);
143  if (features_.strictRoot_) {
144  if (!root.isArray() && !root.isObject()) {
145  // Set error location to start of doc, ideally should be first token found
146  // in doc
147  token.type_ = tokenError;
148  token.start_ = beginDoc;
149  token.end_ = endDoc;
150  addError(
151  "A valid JSON document must be either an array or an object value.",
152  token);
153  return false;
154  }
155  }
156  return successful;
157 }
158 
159 bool Reader::readValue() {
160  // readValue() may call itself only if it calls readObject() or ReadArray().
161  // These methods execute nodes_.push() just before and nodes_.pop)() just
162  // after calling readValue(). parse() executes one nodes_.push(), so > instead
163  // of >=.
164  if (nodes_.size() > stackLimit_g)
165  throwRuntimeError("Exceeded stackLimit in readValue().");
166 
167  Token token;
168  skipCommentTokens(token);
169  bool successful = true;
170 
171  if (collectComments_ && !commentsBefore_.empty()) {
172  currentValue().setComment(commentsBefore_, commentBefore);
173  commentsBefore_.clear();
174  }
175 
176  switch (token.type_) {
177  case tokenObjectBegin:
178  successful = readObject(token);
179  currentValue().setOffsetLimit(current_ - begin_);
180  break;
181  case tokenArrayBegin:
182  successful = readArray(token);
183  currentValue().setOffsetLimit(current_ - begin_);
184  break;
185  case tokenNumber:
186  successful = decodeNumber(token);
187  break;
188  case tokenString:
189  successful = decodeString(token);
190  break;
191  case tokenTrue: {
192  Value v(true);
193  currentValue().swapPayload(v);
194  currentValue().setOffsetStart(token.start_ - begin_);
195  currentValue().setOffsetLimit(token.end_ - begin_);
196  } break;
197  case tokenFalse: {
198  Value v(false);
199  currentValue().swapPayload(v);
200  currentValue().setOffsetStart(token.start_ - begin_);
201  currentValue().setOffsetLimit(token.end_ - begin_);
202  } break;
203  case tokenNull: {
204  Value v;
205  currentValue().swapPayload(v);
206  currentValue().setOffsetStart(token.start_ - begin_);
207  currentValue().setOffsetLimit(token.end_ - begin_);
208  } break;
209  case tokenArraySeparator:
210  case tokenObjectEnd:
211  case tokenArrayEnd:
212  if (features_.allowDroppedNullPlaceholders_) {
213  // "Un-read" the current token and mark the current value as a null
214  // token.
215  current_--;
216  Value v;
217  currentValue().swapPayload(v);
218  currentValue().setOffsetStart(current_ - begin_ - 1);
219  currentValue().setOffsetLimit(current_ - begin_);
220  break;
221  } // Else, fall through...
222  default:
223  currentValue().setOffsetStart(token.start_ - begin_);
224  currentValue().setOffsetLimit(token.end_ - begin_);
225  return addError("Syntax error: value, object or array expected.", token);
226  }
227 
228  if (collectComments_) {
229  lastValueEnd_ = current_;
230  lastValue_ = &currentValue();
231  }
232 
233  return successful;
234 }
235 
236 void Reader::skipCommentTokens(Token& token) {
237  if (features_.allowComments_) {
238  do {
239  readToken(token);
240  } while (token.type_ == tokenComment);
241  } else {
242  readToken(token);
243  }
244 }
245 
246 bool Reader::readToken(Token& token) {
247  skipSpaces();
248  token.start_ = current_;
249  Char c = getNextChar();
250  bool ok = true;
251  switch (c) {
252  case '{':
253  token.type_ = tokenObjectBegin;
254  break;
255  case '}':
256  token.type_ = tokenObjectEnd;
257  break;
258  case '[':
259  token.type_ = tokenArrayBegin;
260  break;
261  case ']':
262  token.type_ = tokenArrayEnd;
263  break;
264  case '"':
265  token.type_ = tokenString;
266  ok = readString();
267  break;
268  case '/':
269  token.type_ = tokenComment;
270  ok = readComment();
271  break;
272  case '0':
273  case '1':
274  case '2':
275  case '3':
276  case '4':
277  case '5':
278  case '6':
279  case '7':
280  case '8':
281  case '9':
282  case '-':
283  token.type_ = tokenNumber;
284  readNumber();
285  break;
286  case 't':
287  token.type_ = tokenTrue;
288  ok = match("rue", 3);
289  break;
290  case 'f':
291  token.type_ = tokenFalse;
292  ok = match("alse", 4);
293  break;
294  case 'n':
295  token.type_ = tokenNull;
296  ok = match("ull", 3);
297  break;
298  case ',':
299  token.type_ = tokenArraySeparator;
300  break;
301  case ':':
302  token.type_ = tokenMemberSeparator;
303  break;
304  case 0:
305  token.type_ = tokenEndOfStream;
306  break;
307  default:
308  ok = false;
309  break;
310  }
311  if (!ok)
312  token.type_ = tokenError;
313  token.end_ = current_;
314  return true;
315 }
316 
317 void Reader::skipSpaces() {
318  while (current_ != end_) {
319  Char c = *current_;
320  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
321  ++current_;
322  else
323  break;
324  }
325 }
326 
327 bool Reader::match(Location pattern, int patternLength) {
328  if (end_ - current_ < patternLength)
329  return false;
330  int index = patternLength;
331  while (index--)
332  if (current_[index] != pattern[index])
333  return false;
334  current_ += patternLength;
335  return true;
336 }
337 
338 bool Reader::readComment() {
339  Location commentBegin = current_ - 1;
340  Char c = getNextChar();
341  bool successful = false;
342  if (c == '*')
343  successful = readCStyleComment();
344  else if (c == '/')
345  successful = readCppStyleComment();
346  if (!successful)
347  return false;
348 
349  if (collectComments_) {
350  CommentPlacement placement = commentBefore;
351  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
352  if (c != '*' || !containsNewLine(commentBegin, current_))
353  placement = commentAfterOnSameLine;
354  }
355 
356  addComment(commentBegin, current_, placement);
357  }
358  return true;
359 }
360 
361 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
362  String normalized;
363  normalized.reserve(static_cast<size_t>(end - begin));
364  Reader::Location current = begin;
365  while (current != end) {
366  char c = *current++;
367  if (c == '\r') {
368  if (current != end && *current == '\n')
369  // convert dos EOL
370  ++current;
371  // convert Mac EOL
372  normalized += '\n';
373  } else {
374  normalized += c;
375  }
376  }
377  return normalized;
378 }
379 
380 void Reader::addComment(Location begin,
381  Location end,
382  CommentPlacement placement) {
383  assert(collectComments_);
384  const String& normalized = normalizeEOL(begin, end);
385  if (placement == commentAfterOnSameLine) {
386  assert(lastValue_ != nullptr);
387  lastValue_->setComment(normalized, placement);
388  } else {
389  commentsBefore_ += normalized;
390  }
391 }
392 
393 bool Reader::readCStyleComment() {
394  while ((current_ + 1) < end_) {
395  Char c = getNextChar();
396  if (c == '*' && *current_ == '/')
397  break;
398  }
399  return getNextChar() == '/';
400 }
401 
402 bool Reader::readCppStyleComment() {
403  while (current_ != end_) {
404  Char c = getNextChar();
405  if (c == '\n')
406  break;
407  if (c == '\r') {
408  // Consume DOS EOL. It will be normalized in addComment.
409  if (current_ != end_ && *current_ == '\n')
410  getNextChar();
411  // Break on Moc OS 9 EOL.
412  break;
413  }
414  }
415  return true;
416 }
417 
418 void Reader::readNumber() {
419  const char* p = current_;
420  char c = '0'; // stopgap for already consumed character
421  // integral part
422  while (c >= '0' && c <= '9')
423  c = (current_ = p) < end_ ? *p++ : '\0';
424  // fractional part
425  if (c == '.') {
426  c = (current_ = p) < end_ ? *p++ : '\0';
427  while (c >= '0' && c <= '9')
428  c = (current_ = p) < end_ ? *p++ : '\0';
429  }
430  // exponential part
431  if (c == 'e' || c == 'E') {
432  c = (current_ = p) < end_ ? *p++ : '\0';
433  if (c == '+' || c == '-')
434  c = (current_ = p) < end_ ? *p++ : '\0';
435  while (c >= '0' && c <= '9')
436  c = (current_ = p) < end_ ? *p++ : '\0';
437  }
438 }
439 
440 bool Reader::readString() {
441  Char c = '\0';
442  while (current_ != end_) {
443  c = getNextChar();
444  if (c == '\\')
445  getNextChar();
446  else if (c == '"')
447  break;
448  }
449  return c == '"';
450 }
451 
452 bool Reader::readObject(Token& token) {
453  Token tokenName;
454  String name;
455  Value init(objectValue);
456  currentValue().swapPayload(init);
457  currentValue().setOffsetStart(token.start_ - begin_);
458  while (readToken(tokenName)) {
459  bool initialTokenOk = true;
460  while (tokenName.type_ == tokenComment && initialTokenOk)
461  initialTokenOk = readToken(tokenName);
462  if (!initialTokenOk)
463  break;
464  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
465  return true;
466  name.clear();
467  if (tokenName.type_ == tokenString) {
468  if (!decodeString(tokenName, name))
469  return recoverFromError(tokenObjectEnd);
470  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
471  Value numberName;
472  if (!decodeNumber(tokenName, numberName))
473  return recoverFromError(tokenObjectEnd);
474  name = String(numberName.asCString());
475  } else {
476  break;
477  }
478 
479  Token colon;
480  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
481  return addErrorAndRecover("Missing ':' after object member name", colon,
482  tokenObjectEnd);
483  }
484  Value& value = currentValue()[name];
485  nodes_.push(&value);
486  bool ok = readValue();
487  nodes_.pop();
488  if (!ok) // error already set
489  return recoverFromError(tokenObjectEnd);
490 
491  Token comma;
492  if (!readToken(comma) ||
493  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
494  comma.type_ != tokenComment)) {
495  return addErrorAndRecover("Missing ',' or '}' in object declaration",
496  comma, tokenObjectEnd);
497  }
498  bool finalizeTokenOk = true;
499  while (comma.type_ == tokenComment && finalizeTokenOk)
500  finalizeTokenOk = readToken(comma);
501  if (comma.type_ == tokenObjectEnd)
502  return true;
503  }
504  return addErrorAndRecover("Missing '}' or object member name", tokenName,
505  tokenObjectEnd);
506 }
507 
508 bool Reader::readArray(Token& token) {
509  Value init(arrayValue);
510  currentValue().swapPayload(init);
511  currentValue().setOffsetStart(token.start_ - begin_);
512  skipSpaces();
513  if (current_ != end_ && *current_ == ']') // empty array
514  {
515  Token endArray;
516  readToken(endArray);
517  return true;
518  }
519  int index = 0;
520  for (;;) {
521  Value& value = currentValue()[index++];
522  nodes_.push(&value);
523  bool ok = readValue();
524  nodes_.pop();
525  if (!ok) // error already set
526  return recoverFromError(tokenArrayEnd);
527 
528  Token currentToken;
529  // Accept Comment after last item in the array.
530  ok = readToken(currentToken);
531  while (currentToken.type_ == tokenComment && ok) {
532  ok = readToken(currentToken);
533  }
534  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
535  currentToken.type_ != tokenArrayEnd);
536  if (!ok || badTokenType) {
537  return addErrorAndRecover("Missing ',' or ']' in array declaration",
538  currentToken, tokenArrayEnd);
539  }
540  if (currentToken.type_ == tokenArrayEnd)
541  break;
542  }
543  return true;
544 }
545 
546 bool Reader::decodeNumber(Token& token) {
547  Value decoded;
548  if (!decodeNumber(token, decoded))
549  return false;
550  currentValue().swapPayload(decoded);
551  currentValue().setOffsetStart(token.start_ - begin_);
552  currentValue().setOffsetLimit(token.end_ - begin_);
553  return true;
554 }
555 
556 bool Reader::decodeNumber(Token& token, Value& decoded) {
557  // Attempts to parse the number as an integer. If the number is
558  // larger than the maximum supported value of an integer then
559  // we decode the number as a double.
560  Location current = token.start_;
561  bool isNegative = *current == '-';
562  if (isNegative)
563  ++current;
564  // TODO: Help the compiler do the div and mod at compile time or get rid of
565  // them.
566  Value::LargestUInt maxIntegerValue =
567  isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
569  Value::LargestUInt threshold = maxIntegerValue / 10;
570  Value::LargestUInt value = 0;
571  while (current < token.end_) {
572  Char c = *current++;
573  if (c < '0' || c > '9')
574  return decodeDouble(token, decoded);
575  auto digit(static_cast<Value::UInt>(c - '0'));
576  if (value >= threshold) {
577  // We've hit or exceeded the max value divided by 10 (rounded down). If
578  // a) we've only just touched the limit, b) this is the last digit, and
579  // c) it's small enough to fit in that rounding delta, we're okay.
580  // Otherwise treat this number as a double to avoid overflow.
581  if (value > threshold || current != token.end_ ||
582  digit > maxIntegerValue % 10) {
583  return decodeDouble(token, decoded);
584  }
585  }
586  value = value * 10 + digit;
587  }
588  if (isNegative && value == maxIntegerValue)
589  decoded = Value::minLargestInt;
590  else if (isNegative)
591  decoded = -Value::LargestInt(value);
592  else if (value <= Value::LargestUInt(Value::maxInt))
593  decoded = Value::LargestInt(value);
594  else
595  decoded = value;
596  return true;
597 }
598 
599 bool Reader::decodeDouble(Token& token) {
600  Value decoded;
601  if (!decodeDouble(token, decoded))
602  return false;
603  currentValue().swapPayload(decoded);
604  currentValue().setOffsetStart(token.start_ - begin_);
605  currentValue().setOffsetLimit(token.end_ - begin_);
606  return true;
607 }
608 
609 bool Reader::decodeDouble(Token& token, Value& decoded) {
610  double value = 0;
611  String buffer(token.start_, token.end_);
612  IStringStream is(buffer);
613  if (!(is >> value))
614  return addError(
615  "'" + String(token.start_, token.end_) + "' is not a number.", token);
616  decoded = value;
617  return true;
618 }
619 
620 bool Reader::decodeString(Token& token) {
621  String decoded_string;
622  if (!decodeString(token, decoded_string))
623  return false;
624  Value decoded(decoded_string);
625  currentValue().swapPayload(decoded);
626  currentValue().setOffsetStart(token.start_ - begin_);
627  currentValue().setOffsetLimit(token.end_ - begin_);
628  return true;
629 }
630 
631 bool Reader::decodeString(Token& token, String& decoded) {
632  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
633  Location current = token.start_ + 1; // skip '"'
634  Location end = token.end_ - 1; // do not include '"'
635  while (current != end) {
636  Char c = *current++;
637  if (c == '"')
638  break;
639  else if (c == '\\') {
640  if (current == end)
641  return addError("Empty escape sequence in string", token, current);
642  Char escape = *current++;
643  switch (escape) {
644  case '"':
645  decoded += '"';
646  break;
647  case '/':
648  decoded += '/';
649  break;
650  case '\\':
651  decoded += '\\';
652  break;
653  case 'b':
654  decoded += '\b';
655  break;
656  case 'f':
657  decoded += '\f';
658  break;
659  case 'n':
660  decoded += '\n';
661  break;
662  case 'r':
663  decoded += '\r';
664  break;
665  case 't':
666  decoded += '\t';
667  break;
668  case 'u': {
669  unsigned int unicode;
670  if (!decodeUnicodeCodePoint(token, current, end, unicode))
671  return false;
672  decoded += codePointToUTF8(unicode);
673  } break;
674  default:
675  return addError("Bad escape sequence in string", token, current);
676  }
677  } else {
678  decoded += c;
679  }
680  }
681  return true;
682 }
683 
684 bool Reader::decodeUnicodeCodePoint(Token& token,
685  Location& current,
686  Location end,
687  unsigned int& unicode) {
688 
689  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
690  return false;
691  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
692  // surrogate pairs
693  if (end - current < 6)
694  return addError(
695  "additional six characters expected to parse unicode surrogate pair.",
696  token, current);
697  if (*(current++) == '\\' && *(current++) == 'u') {
698  unsigned int surrogatePair;
699  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
700  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
701  } else
702  return false;
703  } else
704  return addError("expecting another \\u token to begin the second half of "
705  "a unicode surrogate pair",
706  token, current);
707  }
708  return true;
709 }
710 
711 bool Reader::decodeUnicodeEscapeSequence(Token& token,
712  Location& current,
713  Location end,
714  unsigned int& ret_unicode) {
715  if (end - current < 4)
716  return addError(
717  "Bad unicode escape sequence in string: four digits expected.", token,
718  current);
719  int unicode = 0;
720  for (int index = 0; index < 4; ++index) {
721  Char c = *current++;
722  unicode *= 16;
723  if (c >= '0' && c <= '9')
724  unicode += c - '0';
725  else if (c >= 'a' && c <= 'f')
726  unicode += c - 'a' + 10;
727  else if (c >= 'A' && c <= 'F')
728  unicode += c - 'A' + 10;
729  else
730  return addError(
731  "Bad unicode escape sequence in string: hexadecimal digit expected.",
732  token, current);
733  }
734  ret_unicode = static_cast<unsigned int>(unicode);
735  return true;
736 }
737 
738 bool Reader::addError(const String& message, Token& token, Location extra) {
739  ErrorInfo info;
740  info.token_ = token;
741  info.message_ = message;
742  info.extra_ = extra;
743  errors_.push_back(info);
744  return false;
745 }
746 
747 bool Reader::recoverFromError(TokenType skipUntilToken) {
748  size_t const errorCount = errors_.size();
749  Token skip;
750  for (;;) {
751  if (!readToken(skip))
752  errors_.resize(errorCount); // discard errors caused by recovery
753  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
754  break;
755  }
756  errors_.resize(errorCount);
757  return false;
758 }
759 
760 bool Reader::addErrorAndRecover(const String& message,
761  Token& token,
762  TokenType skipUntilToken) {
763  addError(message, token);
764  return recoverFromError(skipUntilToken);
765 }
766 
767 Value& Reader::currentValue() { return *(nodes_.top()); }
768 
769 Reader::Char Reader::getNextChar() {
770  if (current_ == end_)
771  return 0;
772  return *current_++;
773 }
774 
775 void Reader::getLocationLineAndColumn(Location location,
776  int& line,
777  int& column) const {
778  Location current = begin_;
779  Location lastLineStart = current;
780  line = 0;
781  while (current < location && current != end_) {
782  Char c = *current++;
783  if (c == '\r') {
784  if (*current == '\n')
785  ++current;
786  lastLineStart = current;
787  ++line;
788  } else if (c == '\n') {
789  lastLineStart = current;
790  ++line;
791  }
792  }
793  // column & line start at 1
794  column = int(location - lastLineStart) + 1;
795  ++line;
796 }
797 
798 String Reader::getLocationLineAndColumn(Location location) const {
799  int line, column;
800  getLocationLineAndColumn(location, line, column);
801  char buffer[18 + 16 + 16 + 1];
802  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
803  return buffer;
804 }
805 
806 // Deprecated. Preserved for backward compatibility
807 String Reader::getFormatedErrorMessages() const {
808  return getFormattedErrorMessages();
809 }
810 
812  String formattedMessage;
813  for (const auto& error : errors_) {
814  formattedMessage +=
815  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
816  formattedMessage += " " + error.message_ + "\n";
817  if (error.extra_)
818  formattedMessage +=
819  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
820  }
821  return formattedMessage;
822 }
823 
824 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
825  std::vector<Reader::StructuredError> allErrors;
826  for (const auto& error : errors_) {
827  Reader::StructuredError structured;
828  structured.offset_start = error.token_.start_ - begin_;
829  structured.offset_limit = error.token_.end_ - begin_;
830  structured.message = error.message_;
831  allErrors.push_back(structured);
832  }
833  return allErrors;
834 }
835 
836 bool Reader::pushError(const Value& value, const String& message) {
837  ptrdiff_t const length = end_ - begin_;
838  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
839  return false;
840  Token token;
841  token.type_ = tokenError;
842  token.start_ = begin_ + value.getOffsetStart();
843  token.end_ = begin_ + value.getOffsetLimit();
844  ErrorInfo info;
845  info.token_ = token;
846  info.message_ = message;
847  info.extra_ = nullptr;
848  errors_.push_back(info);
849  return true;
850 }
851 
852 bool Reader::pushError(const Value& value,
853  const String& message,
854  const Value& extra) {
855  ptrdiff_t const length = end_ - begin_;
856  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
857  extra.getOffsetLimit() > length)
858  return false;
859  Token token;
860  token.type_ = tokenError;
861  token.start_ = begin_ + value.getOffsetStart();
862  token.end_ = begin_ + value.getOffsetLimit();
863  ErrorInfo info;
864  info.token_ = token;
865  info.message_ = message;
866  info.extra_ = begin_ + extra.getOffsetStart();
867  errors_.push_back(info);
868  return true;
869 }
870 
871 bool Reader::good() const { return errors_.empty(); }
872 
873 // Originally copied from the Features class (now deprecated), used internally
874 // for features implementation.
875 class OurFeatures {
876 public:
877  static OurFeatures all();
878  bool allowComments_;
879  bool strictRoot_;
880  bool allowDroppedNullPlaceholders_;
881  bool allowNumericKeys_;
882  bool allowSingleQuotes_;
883  bool failIfExtra_;
884  bool rejectDupKeys_;
885  bool allowSpecialFloats_;
886  size_t stackLimit_;
887 }; // OurFeatures
888 
889 OurFeatures OurFeatures::all() { return {}; }
890 
891 // Implementation of class Reader
892 // ////////////////////////////////
893 
894 // Originally copied from the Reader class (now deprecated), used internally
895 // for implementing JSON reading.
896 class OurReader {
897 public:
898  typedef char Char;
899  typedef const Char* Location;
900  struct StructuredError {
901  ptrdiff_t offset_start;
902  ptrdiff_t offset_limit;
903  String message;
904  };
905 
906  OurReader(OurFeatures const& features);
907  bool parse(const char* beginDoc,
908  const char* endDoc,
909  Value& root,
910  bool collectComments = true);
911  String getFormattedErrorMessages() const;
912  std::vector<StructuredError> getStructuredErrors() const;
913  bool pushError(const Value& value, const String& message);
914  bool pushError(const Value& value, const String& message, const Value& extra);
915  bool good() const;
916 
917 private:
918  OurReader(OurReader const&); // no impl
919  void operator=(OurReader const&); // no impl
920 
921  enum TokenType {
922  tokenEndOfStream = 0,
923  tokenObjectBegin,
924  tokenObjectEnd,
925  tokenArrayBegin,
926  tokenArrayEnd,
927  tokenString,
928  tokenNumber,
929  tokenTrue,
930  tokenFalse,
931  tokenNull,
932  tokenNaN,
933  tokenPosInf,
934  tokenNegInf,
935  tokenArraySeparator,
936  tokenMemberSeparator,
937  tokenComment,
938  tokenError
939  };
940 
941  class Token {
942  public:
943  TokenType type_;
944  Location start_;
945  Location end_;
946  };
947 
948  class ErrorInfo {
949  public:
950  Token token_;
951  String message_;
952  Location extra_;
953  };
954 
955  typedef std::deque<ErrorInfo> Errors;
956 
957  bool readToken(Token& token);
958  void skipSpaces();
959  bool match(Location pattern, int patternLength);
960  bool readComment();
961  bool readCStyleComment();
962  bool readCppStyleComment();
963  bool readString();
964  bool readStringSingleQuote();
965  bool readNumber(bool checkInf);
966  bool readValue();
967  bool readObject(Token& token);
968  bool readArray(Token& token);
969  bool decodeNumber(Token& token);
970  bool decodeNumber(Token& token, Value& decoded);
971  bool decodeString(Token& token);
972  bool decodeString(Token& token, String& decoded);
973  bool decodeDouble(Token& token);
974  bool decodeDouble(Token& token, Value& decoded);
975  bool decodeUnicodeCodePoint(Token& token,
976  Location& current,
977  Location end,
978  unsigned int& unicode);
979  bool decodeUnicodeEscapeSequence(Token& token,
980  Location& current,
981  Location end,
982  unsigned int& unicode);
983  bool addError(const String& message, Token& token, Location extra = nullptr);
984  bool recoverFromError(TokenType skipUntilToken);
985  bool addErrorAndRecover(const String& message,
986  Token& token,
987  TokenType skipUntilToken);
988  void skipUntilSpace();
989  Value& currentValue();
990  Char getNextChar();
991  void
992  getLocationLineAndColumn(Location location, int& line, int& column) const;
993  String getLocationLineAndColumn(Location location) const;
994  void addComment(Location begin, Location end, CommentPlacement placement);
995  void skipCommentTokens(Token& token);
996 
997  static String normalizeEOL(Location begin, Location end);
998  static bool containsNewLine(Location begin, Location end);
999 
1000  typedef std::stack<Value*> Nodes;
1001  Nodes nodes_;
1002  Errors errors_;
1003  String document_;
1004  Location begin_;
1005  Location end_;
1006  Location current_;
1007  Location lastValueEnd_;
1008  Value* lastValue_;
1009  String commentsBefore_;
1010 
1011  OurFeatures const features_;
1012  bool collectComments_;
1013 }; // OurReader
1014 
1015 // complete copy of Read impl, for OurReader
1016 
1017 bool OurReader::containsNewLine(OurReader::Location begin,
1018  OurReader::Location end) {
1019  for (; begin < end; ++begin)
1020  if (*begin == '\n' || *begin == '\r')
1021  return true;
1022  return false;
1023 }
1024 
1025 OurReader::OurReader(OurFeatures const& features)
1026  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1027  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
1028 }
1029 
1030 bool OurReader::parse(const char* beginDoc,
1031  const char* endDoc,
1032  Value& root,
1033  bool collectComments) {
1034  if (!features_.allowComments_) {
1035  collectComments = false;
1036  }
1037 
1038  begin_ = beginDoc;
1039  end_ = endDoc;
1040  collectComments_ = collectComments;
1041  current_ = begin_;
1042  lastValueEnd_ = nullptr;
1043  lastValue_ = nullptr;
1044  commentsBefore_.clear();
1045  errors_.clear();
1046  while (!nodes_.empty())
1047  nodes_.pop();
1048  nodes_.push(&root);
1049 
1050  bool successful = readValue();
1051  Token token;
1052  skipCommentTokens(token);
1053  if (features_.failIfExtra_) {
1054  if ((features_.strictRoot_ || token.type_ != tokenError) &&
1055  token.type_ != tokenEndOfStream) {
1056  addError("Extra non-whitespace after JSON value.", token);
1057  return false;
1058  }
1059  }
1060  if (collectComments_ && !commentsBefore_.empty())
1061  root.setComment(commentsBefore_, commentAfter);
1062  if (features_.strictRoot_) {
1063  if (!root.isArray() && !root.isObject()) {
1064  // Set error location to start of doc, ideally should be first token found
1065  // in doc
1066  token.type_ = tokenError;
1067  token.start_ = beginDoc;
1068  token.end_ = endDoc;
1069  addError(
1070  "A valid JSON document must be either an array or an object value.",
1071  token);
1072  return false;
1073  }
1074  }
1075  return successful;
1076 }
1077 
1078 bool OurReader::readValue() {
1079  // To preserve the old behaviour we cast size_t to int.
1080  if (nodes_.size() > features_.stackLimit_)
1081  throwRuntimeError("Exceeded stackLimit in readValue().");
1082  Token token;
1083  skipCommentTokens(token);
1084  bool successful = true;
1085 
1086  if (collectComments_ && !commentsBefore_.empty()) {
1087  currentValue().setComment(commentsBefore_, commentBefore);
1088  commentsBefore_.clear();
1089  }
1090 
1091  switch (token.type_) {
1092  case tokenObjectBegin:
1093  successful = readObject(token);
1094  currentValue().setOffsetLimit(current_ - begin_);
1095  break;
1096  case tokenArrayBegin:
1097  successful = readArray(token);
1098  currentValue().setOffsetLimit(current_ - begin_);
1099  break;
1100  case tokenNumber:
1101  successful = decodeNumber(token);
1102  break;
1103  case tokenString:
1104  successful = decodeString(token);
1105  break;
1106  case tokenTrue: {
1107  Value v(true);
1108  currentValue().swapPayload(v);
1109  currentValue().setOffsetStart(token.start_ - begin_);
1110  currentValue().setOffsetLimit(token.end_ - begin_);
1111  } break;
1112  case tokenFalse: {
1113  Value v(false);
1114  currentValue().swapPayload(v);
1115  currentValue().setOffsetStart(token.start_ - begin_);
1116  currentValue().setOffsetLimit(token.end_ - begin_);
1117  } break;
1118  case tokenNull: {
1119  Value v;
1120  currentValue().swapPayload(v);
1121  currentValue().setOffsetStart(token.start_ - begin_);
1122  currentValue().setOffsetLimit(token.end_ - begin_);
1123  } break;
1124  case tokenNaN: {
1125  Value v(std::numeric_limits<double>::quiet_NaN());
1126  currentValue().swapPayload(v);
1127  currentValue().setOffsetStart(token.start_ - begin_);
1128  currentValue().setOffsetLimit(token.end_ - begin_);
1129  } break;
1130  case tokenPosInf: {
1131  Value v(std::numeric_limits<double>::infinity());
1132  currentValue().swapPayload(v);
1133  currentValue().setOffsetStart(token.start_ - begin_);
1134  currentValue().setOffsetLimit(token.end_ - begin_);
1135  } break;
1136  case tokenNegInf: {
1137  Value v(-std::numeric_limits<double>::infinity());
1138  currentValue().swapPayload(v);
1139  currentValue().setOffsetStart(token.start_ - begin_);
1140  currentValue().setOffsetLimit(token.end_ - begin_);
1141  } break;
1142  case tokenArraySeparator:
1143  case tokenObjectEnd:
1144  case tokenArrayEnd:
1145  if (features_.allowDroppedNullPlaceholders_) {
1146  // "Un-read" the current token and mark the current value as a null
1147  // token.
1148  current_--;
1149  Value v;
1150  currentValue().swapPayload(v);
1151  currentValue().setOffsetStart(current_ - begin_ - 1);
1152  currentValue().setOffsetLimit(current_ - begin_);
1153  break;
1154  } // else, fall through ...
1155  default:
1156  currentValue().setOffsetStart(token.start_ - begin_);
1157  currentValue().setOffsetLimit(token.end_ - begin_);
1158  return addError("Syntax error: value, object or array expected.", token);
1159  }
1160 
1161  if (collectComments_) {
1162  lastValueEnd_ = current_;
1163  lastValue_ = &currentValue();
1164  }
1165 
1166  return successful;
1167 }
1168 
1169 void OurReader::skipCommentTokens(Token& token) {
1170  if (features_.allowComments_) {
1171  do {
1172  readToken(token);
1173  } while (token.type_ == tokenComment);
1174  } else {
1175  readToken(token);
1176  }
1177 }
1178 
1179 bool OurReader::readToken(Token& token) {
1180  skipSpaces();
1181  token.start_ = current_;
1182  Char c = getNextChar();
1183  bool ok = true;
1184  switch (c) {
1185  case '{':
1186  token.type_ = tokenObjectBegin;
1187  break;
1188  case '}':
1189  token.type_ = tokenObjectEnd;
1190  break;
1191  case '[':
1192  token.type_ = tokenArrayBegin;
1193  break;
1194  case ']':
1195  token.type_ = tokenArrayEnd;
1196  break;
1197  case '"':
1198  token.type_ = tokenString;
1199  ok = readString();
1200  break;
1201  case '\'':
1202  if (features_.allowSingleQuotes_) {
1203  token.type_ = tokenString;
1204  ok = readStringSingleQuote();
1205  break;
1206  } // else fall through
1207  case '/':
1208  token.type_ = tokenComment;
1209  ok = readComment();
1210  break;
1211  case '0':
1212  case '1':
1213  case '2':
1214  case '3':
1215  case '4':
1216  case '5':
1217  case '6':
1218  case '7':
1219  case '8':
1220  case '9':
1221  token.type_ = tokenNumber;
1222  readNumber(false);
1223  break;
1224  case '-':
1225  if (readNumber(true)) {
1226  token.type_ = tokenNumber;
1227  } else {
1228  token.type_ = tokenNegInf;
1229  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1230  }
1231  break;
1232  case 't':
1233  token.type_ = tokenTrue;
1234  ok = match("rue", 3);
1235  break;
1236  case 'f':
1237  token.type_ = tokenFalse;
1238  ok = match("alse", 4);
1239  break;
1240  case 'n':
1241  token.type_ = tokenNull;
1242  ok = match("ull", 3);
1243  break;
1244  case 'N':
1245  if (features_.allowSpecialFloats_) {
1246  token.type_ = tokenNaN;
1247  ok = match("aN", 2);
1248  } else {
1249  ok = false;
1250  }
1251  break;
1252  case 'I':
1253  if (features_.allowSpecialFloats_) {
1254  token.type_ = tokenPosInf;
1255  ok = match("nfinity", 7);
1256  } else {
1257  ok = false;
1258  }
1259  break;
1260  case ',':
1261  token.type_ = tokenArraySeparator;
1262  break;
1263  case ':':
1264  token.type_ = tokenMemberSeparator;
1265  break;
1266  case 0:
1267  token.type_ = tokenEndOfStream;
1268  break;
1269  default:
1270  ok = false;
1271  break;
1272  }
1273  if (!ok)
1274  token.type_ = tokenError;
1275  token.end_ = current_;
1276  return true;
1277 }
1278 
1279 void OurReader::skipSpaces() {
1280  while (current_ != end_) {
1281  Char c = *current_;
1282  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1283  ++current_;
1284  else
1285  break;
1286  }
1287 }
1288 
1289 bool OurReader::match(Location pattern, int patternLength) {
1290  if (end_ - current_ < patternLength)
1291  return false;
1292  int index = patternLength;
1293  while (index--)
1294  if (current_[index] != pattern[index])
1295  return false;
1296  current_ += patternLength;
1297  return true;
1298 }
1299 
1300 bool OurReader::readComment() {
1301  Location commentBegin = current_ - 1;
1302  Char c = getNextChar();
1303  bool successful = false;
1304  if (c == '*')
1305  successful = readCStyleComment();
1306  else if (c == '/')
1307  successful = readCppStyleComment();
1308  if (!successful)
1309  return false;
1310 
1311  if (collectComments_) {
1312  CommentPlacement placement = commentBefore;
1313  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1314  if (c != '*' || !containsNewLine(commentBegin, current_))
1315  placement = commentAfterOnSameLine;
1316  }
1317 
1318  addComment(commentBegin, current_, placement);
1319  }
1320  return true;
1321 }
1322 
1323 String OurReader::normalizeEOL(OurReader::Location begin,
1324  OurReader::Location end) {
1325  String normalized;
1326  normalized.reserve(static_cast<size_t>(end - begin));
1327  OurReader::Location current = begin;
1328  while (current != end) {
1329  char c = *current++;
1330  if (c == '\r') {
1331  if (current != end && *current == '\n')
1332  // convert dos EOL
1333  ++current;
1334  // convert Mac EOL
1335  normalized += '\n';
1336  } else {
1337  normalized += c;
1338  }
1339  }
1340  return normalized;
1341 }
1342 
1343 void OurReader::addComment(Location begin,
1344  Location end,
1345  CommentPlacement placement) {
1346  assert(collectComments_);
1347  const String& normalized = normalizeEOL(begin, end);
1348  if (placement == commentAfterOnSameLine) {
1349  assert(lastValue_ != nullptr);
1350  lastValue_->setComment(normalized, placement);
1351  } else {
1352  commentsBefore_ += normalized;
1353  }
1354 }
1355 
1356 bool OurReader::readCStyleComment() {
1357  while ((current_ + 1) < end_) {
1358  Char c = getNextChar();
1359  if (c == '*' && *current_ == '/')
1360  break;
1361  }
1362  return getNextChar() == '/';
1363 }
1364 
1365 bool OurReader::readCppStyleComment() {
1366  while (current_ != end_) {
1367  Char c = getNextChar();
1368  if (c == '\n')
1369  break;
1370  if (c == '\r') {
1371  // Consume DOS EOL. It will be normalized in addComment.
1372  if (current_ != end_ && *current_ == '\n')
1373  getNextChar();
1374  // Break on Moc OS 9 EOL.
1375  break;
1376  }
1377  }
1378  return true;
1379 }
1380 
1381 bool OurReader::readNumber(bool checkInf) {
1382  const char* p = current_;
1383  if (checkInf && p != end_ && *p == 'I') {
1384  current_ = ++p;
1385  return false;
1386  }
1387  char c = '0'; // stopgap for already consumed character
1388  // integral part
1389  while (c >= '0' && c <= '9')
1390  c = (current_ = p) < end_ ? *p++ : '\0';
1391  // fractional part
1392  if (c == '.') {
1393  c = (current_ = p) < end_ ? *p++ : '\0';
1394  while (c >= '0' && c <= '9')
1395  c = (current_ = p) < end_ ? *p++ : '\0';
1396  }
1397  // exponential part
1398  if (c == 'e' || c == 'E') {
1399  c = (current_ = p) < end_ ? *p++ : '\0';
1400  if (c == '+' || c == '-')
1401  c = (current_ = p) < end_ ? *p++ : '\0';
1402  while (c >= '0' && c <= '9')
1403  c = (current_ = p) < end_ ? *p++ : '\0';
1404  }
1405  return true;
1406 }
1407 bool OurReader::readString() {
1408  Char c = 0;
1409  while (current_ != end_) {
1410  c = getNextChar();
1411  if (c == '\\')
1412  getNextChar();
1413  else if (c == '"')
1414  break;
1415  }
1416  return c == '"';
1417 }
1418 
1419 bool OurReader::readStringSingleQuote() {
1420  Char c = 0;
1421  while (current_ != end_) {
1422  c = getNextChar();
1423  if (c == '\\')
1424  getNextChar();
1425  else if (c == '\'')
1426  break;
1427  }
1428  return c == '\'';
1429 }
1430 
1431 bool OurReader::readObject(Token& token) {
1432  Token tokenName;
1433  String name;
1434  Value init(objectValue);
1435  currentValue().swapPayload(init);
1436  currentValue().setOffsetStart(token.start_ - begin_);
1437  while (readToken(tokenName)) {
1438  bool initialTokenOk = true;
1439  while (tokenName.type_ == tokenComment && initialTokenOk)
1440  initialTokenOk = readToken(tokenName);
1441  if (!initialTokenOk)
1442  break;
1443  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1444  return true;
1445  name.clear();
1446  if (tokenName.type_ == tokenString) {
1447  if (!decodeString(tokenName, name))
1448  return recoverFromError(tokenObjectEnd);
1449  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1450  Value numberName;
1451  if (!decodeNumber(tokenName, numberName))
1452  return recoverFromError(tokenObjectEnd);
1453  name = numberName.asString();
1454  } else {
1455  break;
1456  }
1457  if (name.length() >= (1U << 30))
1458  throwRuntimeError("keylength >= 2^30");
1459  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1460  String msg = "Duplicate key: '" + name + "'";
1461  return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1462  }
1463 
1464  Token colon;
1465  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1466  return addErrorAndRecover("Missing ':' after object member name", colon,
1467  tokenObjectEnd);
1468  }
1469  Value& value = currentValue()[name];
1470  nodes_.push(&value);
1471  bool ok = readValue();
1472  nodes_.pop();
1473  if (!ok) // error already set
1474  return recoverFromError(tokenObjectEnd);
1475 
1476  Token comma;
1477  if (!readToken(comma) ||
1478  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1479  comma.type_ != tokenComment)) {
1480  return addErrorAndRecover("Missing ',' or '}' in object declaration",
1481  comma, tokenObjectEnd);
1482  }
1483  bool finalizeTokenOk = true;
1484  while (comma.type_ == tokenComment && finalizeTokenOk)
1485  finalizeTokenOk = readToken(comma);
1486  if (comma.type_ == tokenObjectEnd)
1487  return true;
1488  }
1489  return addErrorAndRecover("Missing '}' or object member name", tokenName,
1490  tokenObjectEnd);
1491 }
1492 
1493 bool OurReader::readArray(Token& token) {
1494  Value init(arrayValue);
1495  currentValue().swapPayload(init);
1496  currentValue().setOffsetStart(token.start_ - begin_);
1497  skipSpaces();
1498  if (current_ != end_ && *current_ == ']') // empty array
1499  {
1500  Token endArray;
1501  readToken(endArray);
1502  return true;
1503  }
1504  int index = 0;
1505  for (;;) {
1506  Value& value = currentValue()[index++];
1507  nodes_.push(&value);
1508  bool ok = readValue();
1509  nodes_.pop();
1510  if (!ok) // error already set
1511  return recoverFromError(tokenArrayEnd);
1512 
1513  Token currentToken;
1514  // Accept Comment after last item in the array.
1515  ok = readToken(currentToken);
1516  while (currentToken.type_ == tokenComment && ok) {
1517  ok = readToken(currentToken);
1518  }
1519  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1520  currentToken.type_ != tokenArrayEnd);
1521  if (!ok || badTokenType) {
1522  return addErrorAndRecover("Missing ',' or ']' in array declaration",
1523  currentToken, tokenArrayEnd);
1524  }
1525  if (currentToken.type_ == tokenArrayEnd)
1526  break;
1527  }
1528  return true;
1529 }
1530 
1531 bool OurReader::decodeNumber(Token& token) {
1532  Value decoded;
1533  if (!decodeNumber(token, decoded))
1534  return false;
1535  currentValue().swapPayload(decoded);
1536  currentValue().setOffsetStart(token.start_ - begin_);
1537  currentValue().setOffsetLimit(token.end_ - begin_);
1538  return true;
1539 }
1540 
1541 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1542  // Attempts to parse the number as an integer. If the number is
1543  // larger than the maximum supported value of an integer then
1544  // we decode the number as a double.
1545  Location current = token.start_;
1546  bool isNegative = *current == '-';
1547  if (isNegative)
1548  ++current;
1549 
1550  // TODO(issue #960): Change to constexpr
1551  static const auto positive_threshold = Value::maxLargestUInt / 10;
1552  static const auto positive_last_digit = Value::maxLargestUInt % 10;
1553  static const auto negative_threshold =
1555  static const auto negative_last_digit =
1557 
1558  const auto threshold = isNegative ? negative_threshold : positive_threshold;
1559  const auto last_digit =
1560  isNegative ? negative_last_digit : positive_last_digit;
1561 
1562  Value::LargestUInt value = 0;
1563  while (current < token.end_) {
1564  Char c = *current++;
1565  if (c < '0' || c > '9')
1566  return decodeDouble(token, decoded);
1567 
1568  const auto digit(static_cast<Value::UInt>(c - '0'));
1569  if (value >= threshold) {
1570  // We've hit or exceeded the max value divided by 10 (rounded down). If
1571  // a) we've only just touched the limit, meaing value == threshold,
1572  // b) this is the last digit, or
1573  // c) it's small enough to fit in that rounding delta, we're okay.
1574  // Otherwise treat this number as a double to avoid overflow.
1575  if (value > threshold || current != token.end_ || digit > last_digit) {
1576  return decodeDouble(token, decoded);
1577  }
1578  }
1579  value = value * 10 + digit;
1580  }
1581 
1582  if (isNegative)
1583  decoded = -Value::LargestInt(value);
1584  else if (value <= Value::LargestUInt(Value::maxLargestInt))
1585  decoded = Value::LargestInt(value);
1586  else
1587  decoded = value;
1588 
1589  return true;
1590 }
1591 
1592 bool OurReader::decodeDouble(Token& token) {
1593  Value decoded;
1594  if (!decodeDouble(token, decoded))
1595  return false;
1596  currentValue().swapPayload(decoded);
1597  currentValue().setOffsetStart(token.start_ - begin_);
1598  currentValue().setOffsetLimit(token.end_ - begin_);
1599  return true;
1600 }
1601 
1602 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1603  double value = 0;
1604  const int bufferSize = 32;
1605  int count;
1606  ptrdiff_t const length = token.end_ - token.start_;
1607 
1608  // Sanity check to avoid buffer overflow exploits.
1609  if (length < 0) {
1610  return addError("Unable to parse token length", token);
1611  }
1612  auto const ulength = static_cast<size_t>(length);
1613 
1614  // Avoid using a string constant for the format control string given to
1615  // sscanf, as this can cause hard to debug crashes on OS X. See here for more
1616  // info:
1617  //
1618  // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
1619  char format[] = "%lf";
1620 
1621  if (length <= bufferSize) {
1622  Char buffer[bufferSize + 1];
1623  memcpy(buffer, token.start_, ulength);
1624  buffer[length] = 0;
1625  fixNumericLocaleInput(buffer, buffer + length);
1626  count = sscanf(buffer, format, &value);
1627  } else {
1628  String buffer(token.start_, token.end_);
1629  count = sscanf(buffer.c_str(), format, &value);
1630  }
1631 
1632  if (count != 1)
1633  return addError(
1634  "'" + String(token.start_, token.end_) + "' is not a number.", token);
1635  decoded = value;
1636  return true;
1637 }
1638 
1639 bool OurReader::decodeString(Token& token) {
1640  String decoded_string;
1641  if (!decodeString(token, decoded_string))
1642  return false;
1643  Value decoded(decoded_string);
1644  currentValue().swapPayload(decoded);
1645  currentValue().setOffsetStart(token.start_ - begin_);
1646  currentValue().setOffsetLimit(token.end_ - begin_);
1647  return true;
1648 }
1649 
1650 bool OurReader::decodeString(Token& token, String& decoded) {
1651  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1652  Location current = token.start_ + 1; // skip '"'
1653  Location end = token.end_ - 1; // do not include '"'
1654  while (current != end) {
1655  Char c = *current++;
1656  if (c == '"')
1657  break;
1658  else if (c == '\\') {
1659  if (current == end)
1660  return addError("Empty escape sequence in string", token, current);
1661  Char escape = *current++;
1662  switch (escape) {
1663  case '"':
1664  decoded += '"';
1665  break;
1666  case '/':
1667  decoded += '/';
1668  break;
1669  case '\\':
1670  decoded += '\\';
1671  break;
1672  case 'b':
1673  decoded += '\b';
1674  break;
1675  case 'f':
1676  decoded += '\f';
1677  break;
1678  case 'n':
1679  decoded += '\n';
1680  break;
1681  case 'r':
1682  decoded += '\r';
1683  break;
1684  case 't':
1685  decoded += '\t';
1686  break;
1687  case 'u': {
1688  unsigned int unicode;
1689  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1690  return false;
1691  decoded += codePointToUTF8(unicode);
1692  } break;
1693  default:
1694  return addError("Bad escape sequence in string", token, current);
1695  }
1696  } else {
1697  decoded += c;
1698  }
1699  }
1700  return true;
1701 }
1702 
1703 bool OurReader::decodeUnicodeCodePoint(Token& token,
1704  Location& current,
1705  Location end,
1706  unsigned int& unicode) {
1707 
1708  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1709  return false;
1710  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1711  // surrogate pairs
1712  if (end - current < 6)
1713  return addError(
1714  "additional six characters expected to parse unicode surrogate pair.",
1715  token, current);
1716  if (*(current++) == '\\' && *(current++) == 'u') {
1717  unsigned int surrogatePair;
1718  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1719  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1720  } else
1721  return false;
1722  } else
1723  return addError("expecting another \\u token to begin the second half of "
1724  "a unicode surrogate pair",
1725  token, current);
1726  }
1727  return true;
1728 }
1729 
1730 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1731  Location& current,
1732  Location end,
1733  unsigned int& ret_unicode) {
1734  if (end - current < 4)
1735  return addError(
1736  "Bad unicode escape sequence in string: four digits expected.", token,
1737  current);
1738  int unicode = 0;
1739  for (int index = 0; index < 4; ++index) {
1740  Char c = *current++;
1741  unicode *= 16;
1742  if (c >= '0' && c <= '9')
1743  unicode += c - '0';
1744  else if (c >= 'a' && c <= 'f')
1745  unicode += c - 'a' + 10;
1746  else if (c >= 'A' && c <= 'F')
1747  unicode += c - 'A' + 10;
1748  else
1749  return addError(
1750  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1751  token, current);
1752  }
1753  ret_unicode = static_cast<unsigned int>(unicode);
1754  return true;
1755 }
1756 
1757 bool OurReader::addError(const String& message, Token& token, Location extra) {
1758  ErrorInfo info;
1759  info.token_ = token;
1760  info.message_ = message;
1761  info.extra_ = extra;
1762  errors_.push_back(info);
1763  return false;
1764 }
1765 
1766 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1767  size_t errorCount = errors_.size();
1768  Token skip;
1769  for (;;) {
1770  if (!readToken(skip))
1771  errors_.resize(errorCount); // discard errors caused by recovery
1772  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1773  break;
1774  }
1775  errors_.resize(errorCount);
1776  return false;
1777 }
1778 
1779 bool OurReader::addErrorAndRecover(const String& message,
1780  Token& token,
1781  TokenType skipUntilToken) {
1782  addError(message, token);
1783  return recoverFromError(skipUntilToken);
1784 }
1785 
1786 Value& OurReader::currentValue() { return *(nodes_.top()); }
1787 
1788 OurReader::Char OurReader::getNextChar() {
1789  if (current_ == end_)
1790  return 0;
1791  return *current_++;
1792 }
1793 
1794 void OurReader::getLocationLineAndColumn(Location location,
1795  int& line,
1796  int& column) const {
1797  Location current = begin_;
1798  Location lastLineStart = current;
1799  line = 0;
1800  while (current < location && current != end_) {
1801  Char c = *current++;
1802  if (c == '\r') {
1803  if (*current == '\n')
1804  ++current;
1805  lastLineStart = current;
1806  ++line;
1807  } else if (c == '\n') {
1808  lastLineStart = current;
1809  ++line;
1810  }
1811  }
1812  // column & line start at 1
1813  column = int(location - lastLineStart) + 1;
1814  ++line;
1815 }
1816 
1817 String OurReader::getLocationLineAndColumn(Location location) const {
1818  int line, column;
1819  getLocationLineAndColumn(location, line, column);
1820  char buffer[18 + 16 + 16 + 1];
1821  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1822  return buffer;
1823 }
1824 
1825 String OurReader::getFormattedErrorMessages() const {
1826  String formattedMessage;
1827  for (const auto& error : errors_) {
1828  formattedMessage +=
1829  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1830  formattedMessage += " " + error.message_ + "\n";
1831  if (error.extra_)
1832  formattedMessage +=
1833  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1834  }
1835  return formattedMessage;
1836 }
1837 
1838 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1839  std::vector<OurReader::StructuredError> allErrors;
1840  for (const auto& error : errors_) {
1841  OurReader::StructuredError structured;
1842  structured.offset_start = error.token_.start_ - begin_;
1843  structured.offset_limit = error.token_.end_ - begin_;
1844  structured.message = error.message_;
1845  allErrors.push_back(structured);
1846  }
1847  return allErrors;
1848 }
1849 
1850 bool OurReader::pushError(const Value& value, const String& message) {
1851  ptrdiff_t length = end_ - begin_;
1852  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
1853  return false;
1854  Token token;
1855  token.type_ = tokenError;
1856  token.start_ = begin_ + value.getOffsetStart();
1857  token.end_ = begin_ + value.getOffsetLimit();
1858  ErrorInfo info;
1859  info.token_ = token;
1860  info.message_ = message;
1861  info.extra_ = nullptr;
1862  errors_.push_back(info);
1863  return true;
1864 }
1865 
1866 bool OurReader::pushError(const Value& value,
1867  const String& message,
1868  const Value& extra) {
1869  ptrdiff_t length = end_ - begin_;
1870  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
1871  extra.getOffsetLimit() > length)
1872  return false;
1873  Token token;
1874  token.type_ = tokenError;
1875  token.start_ = begin_ + value.getOffsetStart();
1876  token.end_ = begin_ + value.getOffsetLimit();
1877  ErrorInfo info;
1878  info.token_ = token;
1879  info.message_ = message;
1880  info.extra_ = begin_ + extra.getOffsetStart();
1881  errors_.push_back(info);
1882  return true;
1883 }
1884 
1885 bool OurReader::good() const { return errors_.empty(); }
1886 
1887 class OurCharReader : public CharReader {
1888  bool const collectComments_;
1889  OurReader reader_;
1890 
1891 public:
1892  OurCharReader(bool collectComments, OurFeatures const& features)
1893  : collectComments_(collectComments), reader_(features) {}
1894  bool parse(char const* beginDoc,
1895  char const* endDoc,
1896  Value* root,
1897  String* errs) override {
1898  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1899  if (errs) {
1900  *errs = reader_.getFormattedErrorMessages();
1901  }
1902  return ok;
1903  }
1904 };
1905 
1909  bool collectComments = settings_["collectComments"].asBool();
1910  OurFeatures features = OurFeatures::all();
1911  features.allowComments_ = settings_["allowComments"].asBool();
1912  features.strictRoot_ = settings_["strictRoot"].asBool();
1913  features.allowDroppedNullPlaceholders_ =
1914  settings_["allowDroppedNullPlaceholders"].asBool();
1915  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1916  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1917 
1918  // Stack limit is always a size_t, so we get this as an unsigned int
1919  // regardless of it we have 64-bit integer support enabled.
1920  features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1921  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1922  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1923  features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1924  return new OurCharReader(collectComments, features);
1925 }
1926 static void getValidReaderKeys(std::set<String>* valid_keys) {
1927  valid_keys->clear();
1928  valid_keys->insert("collectComments");
1929  valid_keys->insert("allowComments");
1930  valid_keys->insert("strictRoot");
1931  valid_keys->insert("allowDroppedNullPlaceholders");
1932  valid_keys->insert("allowNumericKeys");
1933  valid_keys->insert("allowSingleQuotes");
1934  valid_keys->insert("stackLimit");
1935  valid_keys->insert("failIfExtra");
1936  valid_keys->insert("rejectDupKeys");
1937  valid_keys->insert("allowSpecialFloats");
1938 }
1940  Json::Value my_invalid;
1941  if (!invalid)
1942  invalid = &my_invalid; // so we do not need to test for NULL
1943  Json::Value& inv = *invalid;
1944  std::set<String> valid_keys;
1945  getValidReaderKeys(&valid_keys);
1947  size_t n = keys.size();
1948  for (size_t i = 0; i < n; ++i) {
1949  String const& key = keys[i];
1950  if (valid_keys.find(key) == valid_keys.end()) {
1951  inv[key] = settings_[key];
1952  }
1953  }
1954  return inv.empty();
1955 }
1957  return settings_[key];
1958 }
1959 // static
1962  (*settings)["allowComments"] = false;
1963  (*settings)["strictRoot"] = true;
1964  (*settings)["allowDroppedNullPlaceholders"] = false;
1965  (*settings)["allowNumericKeys"] = false;
1966  (*settings)["allowSingleQuotes"] = false;
1967  (*settings)["stackLimit"] = 1000;
1968  (*settings)["failIfExtra"] = true;
1969  (*settings)["rejectDupKeys"] = true;
1970  (*settings)["allowSpecialFloats"] = false;
1972 }
1973 // static
1976  (*settings)["collectComments"] = true;
1977  (*settings)["allowComments"] = true;
1978  (*settings)["strictRoot"] = false;
1979  (*settings)["allowDroppedNullPlaceholders"] = false;
1980  (*settings)["allowNumericKeys"] = false;
1981  (*settings)["allowSingleQuotes"] = false;
1982  (*settings)["stackLimit"] = 1000;
1983  (*settings)["failIfExtra"] = false;
1984  (*settings)["rejectDupKeys"] = false;
1985  (*settings)["allowSpecialFloats"] = false;
1987 }
1988 
1990 // global functions
1991 
1993  IStream& sin,
1994  Value* root,
1995  String* errs) {
1996  OStringStream ssin;
1997  ssin << sin.rdbuf();
1998  String doc = ssin.str();
1999  char const* begin = doc.data();
2000  char const* end = begin + doc.size();
2001  // Note that we do not actually need a null-terminator.
2002  CharReaderPtr const reader(fact.newCharReader());
2003  return reader->parse(begin, end, root, errs);
2004 }
2005 
2008  String errs;
2009  bool ok = parseFromStream(b, sin, &root, &errs);
2010  if (!ok) {
2011  throwRuntimeError(errs);
2012  }
2013  return sin;
2014 }
2015 
2016 } // namespace Json
std::vector< String > Members
Definition: value.h:193
bool isArray() const
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:39
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured erros encounted while parsing.
void fixNumericLocaleInput(Iter begin, Iter end)
Definition: json_tool.h:103
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
array value (ordered list)
Definition: value.h:105
std::auto_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:56
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition: config.h:160
bool asBool() const
Definition: json_value.cpp:845
Json::Value settings_
Configuration of this builder.
Definition: reader.h:344
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
Definition: json_value.cpp:923
Members getMemberNames() const
Return a list of the member names.
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... */.
Definition: value.h:590
object value (collection of name/value pairs).
Definition: value.h:106
char Char
Definition: reader.h:37
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
Definition: json_value.cpp:486
static const Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:225
ptrdiff_t getOffsetStart() const
Json::LargestUInt LargestUInt
Definition: value.h:203
CharReader * newCharReader() const override
Allocate a CharReader via operator new().
Features()
Initialize the configuration like JsonConfig::allFeatures;.
An error tagged with where in the JSON text it was encountered.
Definition: reader.h:46
static const LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:216
IStream & operator>>(IStream &, Value &)
Read from &#39;sin&#39; into &#39;root&#39;.
bool allowComments_
true if comments are allowed. Default: true.
Definition: features.h:44
CommentPlacement
Definition: value.h:109
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition: config.h:163
const Char * Location
Definition: reader.h:38
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: features.h:54
static size_t const stackLimit_g
Definition: json_reader.cpp:48
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Definition: json_reader.cpp:96
UInt asUInt() const
Definition: json_value.cpp:717
JSON (JavaScript Object Notation).
Definition: allocator.h:14
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: features.h:51
#define jsoncpp_snprintf
Definition: config.h:74
bool pushError(const Value &value, const String &message)
Add a semantic error message.
Json::LargestInt LargestInt
Definition: value.h:202
ptrdiff_t getOffsetLimit() const
~CharReaderBuilder() override
bool good() const
Return whether there are any errors.
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
bool validate(Json::Value *invalid) const
Interface for reading JSON from a char array.
Definition: reader.h:254
Represents a JSON value.
Definition: value.h:189
void setOffsetStart(ptrdiff_t start)
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:64
a comment on the line after a value (only make sense for
Definition: value.h:112
static void getValidReaderKeys(std::set< String > *valid_keys)
std::istream IStream
Definition: config.h:164
#define JSONCPP_DEPRECATED_STACK_LIMIT
Definition: json_reader.cpp:45
void setOffsetLimit(ptrdiff_t limit)
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:66
bool strictRoot_
true if root must be either an array or an object value.
Definition: features.h:48
Build a CharReader implementation.
Definition: reader.h:302
bool isObject() const
Configuration passed to reader and writer.
Definition: features.h:21
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
a comment placed on the line before a value
Definition: value.h:110
a comment just after a value on the same line
Definition: value.h:111
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, std::string *errs)
Consume entire stream and use its begin/end.
Value & operator[](const String &key)
A simple way to update a specific setting.
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition: config.h:157
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
static const LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition: value.h:218
static const LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition: value.h:220