JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <cassert>
14 #include <cstring>
15 #include <istream>
16 #include <limits>
17 #include <memory>
18 #include <set>
19 #include <sstream>
20 #include <utility>
21 
22 #include <cstdio>
23 #if __cplusplus >= 201103L
24 
25 #if !defined(sscanf)
26 #define sscanf std::sscanf
27 #endif
28 
29 #endif //__cplusplus
30 
31 #if defined(_MSC_VER)
32 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
33 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
34 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
35 #endif //_MSC_VER
36 
37 #if defined(_MSC_VER)
38 // Disable warning about strdup being deprecated.
39 #pragma warning(disable : 4996)
40 #endif
41 
42 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
43 // time to change the stack limit
44 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
45 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
46 #endif
47 
48 static size_t const stackLimit_g =
49  JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
50 
51 namespace Json {
52 
53 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
54 typedef std::unique_ptr<CharReader> CharReaderPtr;
55 #else
56 typedef std::auto_ptr<CharReader> CharReaderPtr;
57 #endif
58 
59 // Implementation of class Features
60 // ////////////////////////////////
61 
62 Features::Features() = default;
63 
64 Features Features::all() { return {}; }
65 
67  Features features;
68  features.allowComments_ = false;
69  features.strictRoot_ = true;
70  features.allowDroppedNullPlaceholders_ = false;
71  features.allowNumericKeys_ = false;
72  return features;
73 }
74 
75 // Implementation of class Reader
76 // ////////////////////////////////
77 
78 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
79  for (; begin < end; ++begin)
80  if (*begin == '\n' || *begin == '\r')
81  return true;
82  return false;
83 }
84 
85 // Class Reader
86 // //////////////////////////////////////////////////////////////////
87 
88 Reader::Reader()
89  : errors_(), document_(), commentsBefore_(), features_(Features::all()) {}
90 
91 Reader::Reader(const Features& features)
92  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
93  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
94 }
95 
96 bool Reader::parse(const std::string& document,
97  Value& root,
98  bool collectComments) {
99  document_.assign(document.begin(), document.end());
100  const char* begin = document_.c_str();
101  const char* end = begin + document_.length();
102  return parse(begin, end, root, collectComments);
103 }
104 
105 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
106  // std::istream_iterator<char> begin(is);
107  // std::istream_iterator<char> end;
108  // Those would allow streamed input from a file, if parse() were a
109  // template function.
110 
111  // Since String is reference-counted, this at least does not
112  // create an extra copy.
113  String doc;
114  std::getline(is, doc, (char)EOF);
115  return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
116 }
117 
118 bool Reader::parse(const char* beginDoc,
119  const char* endDoc,
120  Value& root,
121  bool collectComments) {
122  if (!features_.allowComments_) {
123  collectComments = false;
124  }
125 
126  begin_ = beginDoc;
127  end_ = endDoc;
128  collectComments_ = collectComments;
129  current_ = begin_;
130  lastValueEnd_ = nullptr;
131  lastValue_ = nullptr;
132  commentsBefore_.clear();
133  errors_.clear();
134  while (!nodes_.empty())
135  nodes_.pop();
136  nodes_.push(&root);
137 
138  bool successful = readValue();
139  Token token;
140  skipCommentTokens(token);
141  if (collectComments_ && !commentsBefore_.empty())
142  root.setComment(commentsBefore_, commentAfter);
143  if (features_.strictRoot_) {
144  if (!root.isArray() && !root.isObject()) {
145  // Set error location to start of doc, ideally should be first token found
146  // in doc
147  token.type_ = tokenError;
148  token.start_ = beginDoc;
149  token.end_ = endDoc;
150  addError(
151  "A valid JSON document must be either an array or an object value.",
152  token);
153  return false;
154  }
155  }
156  return successful;
157 }
158 
159 bool Reader::readValue() {
160  // readValue() may call itself only if it calls readObject() or ReadArray().
161  // These methods execute nodes_.push() just before and nodes_.pop)() just
162  // after calling readValue(). parse() executes one nodes_.push(), so > instead
163  // of >=.
164  if (nodes_.size() > stackLimit_g)
165  throwRuntimeError("Exceeded stackLimit in readValue().");
166 
167  Token token;
168  skipCommentTokens(token);
169  bool successful = true;
170 
171  if (collectComments_ && !commentsBefore_.empty()) {
172  currentValue().setComment(commentsBefore_, commentBefore);
173  commentsBefore_.clear();
174  }
175 
176  switch (token.type_) {
177  case tokenObjectBegin:
178  successful = readObject(token);
179  currentValue().setOffsetLimit(current_ - begin_);
180  break;
181  case tokenArrayBegin:
182  successful = readArray(token);
183  currentValue().setOffsetLimit(current_ - begin_);
184  break;
185  case tokenNumber:
186  successful = decodeNumber(token);
187  break;
188  case tokenString:
189  successful = decodeString(token);
190  break;
191  case tokenTrue: {
192  Value v(true);
193  currentValue().swapPayload(v);
194  currentValue().setOffsetStart(token.start_ - begin_);
195  currentValue().setOffsetLimit(token.end_ - begin_);
196  } break;
197  case tokenFalse: {
198  Value v(false);
199  currentValue().swapPayload(v);
200  currentValue().setOffsetStart(token.start_ - begin_);
201  currentValue().setOffsetLimit(token.end_ - begin_);
202  } break;
203  case tokenNull: {
204  Value v;
205  currentValue().swapPayload(v);
206  currentValue().setOffsetStart(token.start_ - begin_);
207  currentValue().setOffsetLimit(token.end_ - begin_);
208  } break;
209  case tokenArraySeparator:
210  case tokenObjectEnd:
211  case tokenArrayEnd:
212  if (features_.allowDroppedNullPlaceholders_) {
213  // "Un-read" the current token and mark the current value as a null
214  // token.
215  current_--;
216  Value v;
217  currentValue().swapPayload(v);
218  currentValue().setOffsetStart(current_ - begin_ - 1);
219  currentValue().setOffsetLimit(current_ - begin_);
220  break;
221  } // Else, fall through...
222  default:
223  currentValue().setOffsetStart(token.start_ - begin_);
224  currentValue().setOffsetLimit(token.end_ - begin_);
225  return addError("Syntax error: value, object or array expected.", token);
226  }
227 
228  if (collectComments_) {
229  lastValueEnd_ = current_;
230  lastValue_ = &currentValue();
231  }
232 
233  return successful;
234 }
235 
236 void Reader::skipCommentTokens(Token& token) {
237  if (features_.allowComments_) {
238  do {
239  readToken(token);
240  } while (token.type_ == tokenComment);
241  } else {
242  readToken(token);
243  }
244 }
245 
246 bool Reader::readToken(Token& token) {
247  skipSpaces();
248  token.start_ = current_;
249  Char c = getNextChar();
250  bool ok = true;
251  switch (c) {
252  case '{':
253  token.type_ = tokenObjectBegin;
254  break;
255  case '}':
256  token.type_ = tokenObjectEnd;
257  break;
258  case '[':
259  token.type_ = tokenArrayBegin;
260  break;
261  case ']':
262  token.type_ = tokenArrayEnd;
263  break;
264  case '"':
265  token.type_ = tokenString;
266  ok = readString();
267  break;
268  case '/':
269  token.type_ = tokenComment;
270  ok = readComment();
271  break;
272  case '0':
273  case '1':
274  case '2':
275  case '3':
276  case '4':
277  case '5':
278  case '6':
279  case '7':
280  case '8':
281  case '9':
282  case '-':
283  token.type_ = tokenNumber;
284  readNumber();
285  break;
286  case 't':
287  token.type_ = tokenTrue;
288  ok = match("rue", 3);
289  break;
290  case 'f':
291  token.type_ = tokenFalse;
292  ok = match("alse", 4);
293  break;
294  case 'n':
295  token.type_ = tokenNull;
296  ok = match("ull", 3);
297  break;
298  case ',':
299  token.type_ = tokenArraySeparator;
300  break;
301  case ':':
302  token.type_ = tokenMemberSeparator;
303  break;
304  case 0:
305  token.type_ = tokenEndOfStream;
306  break;
307  default:
308  ok = false;
309  break;
310  }
311  if (!ok)
312  token.type_ = tokenError;
313  token.end_ = current_;
314  return true;
315 }
316 
317 void Reader::skipSpaces() {
318  while (current_ != end_) {
319  Char c = *current_;
320  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
321  ++current_;
322  else
323  break;
324  }
325 }
326 
327 bool Reader::match(Location pattern, int patternLength) {
328  if (end_ - current_ < patternLength)
329  return false;
330  int index = patternLength;
331  while (index--)
332  if (current_[index] != pattern[index])
333  return false;
334  current_ += patternLength;
335  return true;
336 }
337 
338 bool Reader::readComment() {
339  Location commentBegin = current_ - 1;
340  Char c = getNextChar();
341  bool successful = false;
342  if (c == '*')
343  successful = readCStyleComment();
344  else if (c == '/')
345  successful = readCppStyleComment();
346  if (!successful)
347  return false;
348 
349  if (collectComments_) {
350  CommentPlacement placement = commentBefore;
351  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
352  if (c != '*' || !containsNewLine(commentBegin, current_))
353  placement = commentAfterOnSameLine;
354  }
355 
356  addComment(commentBegin, current_, placement);
357  }
358  return true;
359 }
360 
361 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
362  String normalized;
363  normalized.reserve(static_cast<size_t>(end - begin));
364  Reader::Location current = begin;
365  while (current != end) {
366  char c = *current++;
367  if (c == '\r') {
368  if (current != end && *current == '\n')
369  // convert dos EOL
370  ++current;
371  // convert Mac EOL
372  normalized += '\n';
373  } else {
374  normalized += c;
375  }
376  }
377  return normalized;
378 }
379 
380 void Reader::addComment(Location begin,
381  Location end,
382  CommentPlacement placement) {
383  assert(collectComments_);
384  const String& normalized = normalizeEOL(begin, end);
385  if (placement == commentAfterOnSameLine) {
386  assert(lastValue_ != nullptr);
387  lastValue_->setComment(normalized, placement);
388  } else {
389  commentsBefore_ += normalized;
390  }
391 }
392 
393 bool Reader::readCStyleComment() {
394  while ((current_ + 1) < end_) {
395  Char c = getNextChar();
396  if (c == '*' && *current_ == '/')
397  break;
398  }
399  return getNextChar() == '/';
400 }
401 
402 bool Reader::readCppStyleComment() {
403  while (current_ != end_) {
404  Char c = getNextChar();
405  if (c == '\n')
406  break;
407  if (c == '\r') {
408  // Consume DOS EOL. It will be normalized in addComment.
409  if (current_ != end_ && *current_ == '\n')
410  getNextChar();
411  // Break on Moc OS 9 EOL.
412  break;
413  }
414  }
415  return true;
416 }
417 
418 void Reader::readNumber() {
419  const char* p = current_;
420  char c = '0'; // stopgap for already consumed character
421  // integral part
422  while (c >= '0' && c <= '9')
423  c = (current_ = p) < end_ ? *p++ : '\0';
424  // fractional part
425  if (c == '.') {
426  c = (current_ = p) < end_ ? *p++ : '\0';
427  while (c >= '0' && c <= '9')
428  c = (current_ = p) < end_ ? *p++ : '\0';
429  }
430  // exponential part
431  if (c == 'e' || c == 'E') {
432  c = (current_ = p) < end_ ? *p++ : '\0';
433  if (c == '+' || c == '-')
434  c = (current_ = p) < end_ ? *p++ : '\0';
435  while (c >= '0' && c <= '9')
436  c = (current_ = p) < end_ ? *p++ : '\0';
437  }
438 }
439 
440 bool Reader::readString() {
441  Char c = '\0';
442  while (current_ != end_) {
443  c = getNextChar();
444  if (c == '\\')
445  getNextChar();
446  else if (c == '"')
447  break;
448  }
449  return c == '"';
450 }
451 
452 bool Reader::readObject(Token& token) {
453  Token tokenName;
454  String name;
455  Value init(objectValue);
456  currentValue().swapPayload(init);
457  currentValue().setOffsetStart(token.start_ - begin_);
458  while (readToken(tokenName)) {
459  bool initialTokenOk = true;
460  while (tokenName.type_ == tokenComment && initialTokenOk)
461  initialTokenOk = readToken(tokenName);
462  if (!initialTokenOk)
463  break;
464  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
465  return true;
466  name.clear();
467  if (tokenName.type_ == tokenString) {
468  if (!decodeString(tokenName, name))
469  return recoverFromError(tokenObjectEnd);
470  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
471  Value numberName;
472  if (!decodeNumber(tokenName, numberName))
473  return recoverFromError(tokenObjectEnd);
474  name = String(numberName.asCString());
475  } else {
476  break;
477  }
478 
479  Token colon;
480  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
481  return addErrorAndRecover("Missing ':' after object member name", colon,
482  tokenObjectEnd);
483  }
484  Value& value = currentValue()[name];
485  nodes_.push(&value);
486  bool ok = readValue();
487  nodes_.pop();
488  if (!ok) // error already set
489  return recoverFromError(tokenObjectEnd);
490 
491  Token comma;
492  if (!readToken(comma) ||
493  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
494  comma.type_ != tokenComment)) {
495  return addErrorAndRecover("Missing ',' or '}' in object declaration",
496  comma, tokenObjectEnd);
497  }
498  bool finalizeTokenOk = true;
499  while (comma.type_ == tokenComment && finalizeTokenOk)
500  finalizeTokenOk = readToken(comma);
501  if (comma.type_ == tokenObjectEnd)
502  return true;
503  }
504  return addErrorAndRecover("Missing '}' or object member name", tokenName,
505  tokenObjectEnd);
506 }
507 
508 bool Reader::readArray(Token& token) {
509  Value init(arrayValue);
510  currentValue().swapPayload(init);
511  currentValue().setOffsetStart(token.start_ - begin_);
512  skipSpaces();
513  if (current_ != end_ && *current_ == ']') // empty array
514  {
515  Token endArray;
516  readToken(endArray);
517  return true;
518  }
519  int index = 0;
520  for (;;) {
521  Value& value = currentValue()[index++];
522  nodes_.push(&value);
523  bool ok = readValue();
524  nodes_.pop();
525  if (!ok) // error already set
526  return recoverFromError(tokenArrayEnd);
527 
528  Token currentToken;
529  // Accept Comment after last item in the array.
530  ok = readToken(currentToken);
531  while (currentToken.type_ == tokenComment && ok) {
532  ok = readToken(currentToken);
533  }
534  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
535  currentToken.type_ != tokenArrayEnd);
536  if (!ok || badTokenType) {
537  return addErrorAndRecover("Missing ',' or ']' in array declaration",
538  currentToken, tokenArrayEnd);
539  }
540  if (currentToken.type_ == tokenArrayEnd)
541  break;
542  }
543  return true;
544 }
545 
546 bool Reader::decodeNumber(Token& token) {
547  Value decoded;
548  if (!decodeNumber(token, decoded))
549  return false;
550  currentValue().swapPayload(decoded);
551  currentValue().setOffsetStart(token.start_ - begin_);
552  currentValue().setOffsetLimit(token.end_ - begin_);
553  return true;
554 }
555 
556 bool Reader::decodeNumber(Token& token, Value& decoded) {
557  // Attempts to parse the number as an integer. If the number is
558  // larger than the maximum supported value of an integer then
559  // we decode the number as a double.
560  Location current = token.start_;
561  bool isNegative = *current == '-';
562  if (isNegative)
563  ++current;
564  // TODO: Help the compiler do the div and mod at compile time or get rid of
565  // them.
566  Value::LargestUInt maxIntegerValue =
567  isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
569  Value::LargestUInt threshold = maxIntegerValue / 10;
570  Value::LargestUInt value = 0;
571  while (current < token.end_) {
572  Char c = *current++;
573  if (c < '0' || c > '9')
574  return decodeDouble(token, decoded);
575  auto digit(static_cast<Value::UInt>(c - '0'));
576  if (value >= threshold) {
577  // We've hit or exceeded the max value divided by 10 (rounded down). If
578  // a) we've only just touched the limit, b) this is the last digit, and
579  // c) it's small enough to fit in that rounding delta, we're okay.
580  // Otherwise treat this number as a double to avoid overflow.
581  if (value > threshold || current != token.end_ ||
582  digit > maxIntegerValue % 10) {
583  return decodeDouble(token, decoded);
584  }
585  }
586  value = value * 10 + digit;
587  }
588  if (isNegative && value == maxIntegerValue)
589  decoded = Value::minLargestInt;
590  else if (isNegative)
591  decoded = -Value::LargestInt(value);
592  else if (value <= Value::LargestUInt(Value::maxInt))
593  decoded = Value::LargestInt(value);
594  else
595  decoded = value;
596  return true;
597 }
598 
599 bool Reader::decodeDouble(Token& token) {
600  Value decoded;
601  if (!decodeDouble(token, decoded))
602  return false;
603  currentValue().swapPayload(decoded);
604  currentValue().setOffsetStart(token.start_ - begin_);
605  currentValue().setOffsetLimit(token.end_ - begin_);
606  return true;
607 }
608 
609 bool Reader::decodeDouble(Token& token, Value& decoded) {
610  double value = 0;
611  String buffer(token.start_, token.end_);
612  IStringStream is(buffer);
613  if (!(is >> value))
614  return addError(
615  "'" + String(token.start_, token.end_) + "' is not a number.", token);
616  decoded = value;
617  return true;
618 }
619 
620 bool Reader::decodeString(Token& token) {
621  String decoded_string;
622  if (!decodeString(token, decoded_string))
623  return false;
624  Value decoded(decoded_string);
625  currentValue().swapPayload(decoded);
626  currentValue().setOffsetStart(token.start_ - begin_);
627  currentValue().setOffsetLimit(token.end_ - begin_);
628  return true;
629 }
630 
631 bool Reader::decodeString(Token& token, String& decoded) {
632  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
633  Location current = token.start_ + 1; // skip '"'
634  Location end = token.end_ - 1; // do not include '"'
635  while (current != end) {
636  Char c = *current++;
637  if (c == '"')
638  break;
639  else if (c == '\\') {
640  if (current == end)
641  return addError("Empty escape sequence in string", token, current);
642  Char escape = *current++;
643  switch (escape) {
644  case '"':
645  decoded += '"';
646  break;
647  case '/':
648  decoded += '/';
649  break;
650  case '\\':
651  decoded += '\\';
652  break;
653  case 'b':
654  decoded += '\b';
655  break;
656  case 'f':
657  decoded += '\f';
658  break;
659  case 'n':
660  decoded += '\n';
661  break;
662  case 'r':
663  decoded += '\r';
664  break;
665  case 't':
666  decoded += '\t';
667  break;
668  case 'u': {
669  unsigned int unicode;
670  if (!decodeUnicodeCodePoint(token, current, end, unicode))
671  return false;
672  decoded += codePointToUTF8(unicode);
673  } break;
674  default:
675  return addError("Bad escape sequence in string", token, current);
676  }
677  } else {
678  decoded += c;
679  }
680  }
681  return true;
682 }
683 
684 bool Reader::decodeUnicodeCodePoint(Token& token,
685  Location& current,
686  Location end,
687  unsigned int& unicode) {
688 
689  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
690  return false;
691  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
692  // surrogate pairs
693  if (end - current < 6)
694  return addError(
695  "additional six characters expected to parse unicode surrogate pair.",
696  token, current);
697  if (*(current++) == '\\' && *(current++) == 'u') {
698  unsigned int surrogatePair;
699  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
700  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
701  } else
702  return false;
703  } else
704  return addError("expecting another \\u token to begin the second half of "
705  "a unicode surrogate pair",
706  token, current);
707  }
708  return true;
709 }
710 
711 bool Reader::decodeUnicodeEscapeSequence(Token& token,
712  Location& current,
713  Location end,
714  unsigned int& ret_unicode) {
715  if (end - current < 4)
716  return addError(
717  "Bad unicode escape sequence in string: four digits expected.", token,
718  current);
719  int unicode = 0;
720  for (int index = 0; index < 4; ++index) {
721  Char c = *current++;
722  unicode *= 16;
723  if (c >= '0' && c <= '9')
724  unicode += c - '0';
725  else if (c >= 'a' && c <= 'f')
726  unicode += c - 'a' + 10;
727  else if (c >= 'A' && c <= 'F')
728  unicode += c - 'A' + 10;
729  else
730  return addError(
731  "Bad unicode escape sequence in string: hexadecimal digit expected.",
732  token, current);
733  }
734  ret_unicode = static_cast<unsigned int>(unicode);
735  return true;
736 }
737 
738 bool Reader::addError(const String& message, Token& token, Location extra) {
739  ErrorInfo info;
740  info.token_ = token;
741  info.message_ = message;
742  info.extra_ = extra;
743  errors_.push_back(info);
744  return false;
745 }
746 
747 bool Reader::recoverFromError(TokenType skipUntilToken) {
748  size_t const errorCount = errors_.size();
749  Token skip;
750  for (;;) {
751  if (!readToken(skip))
752  errors_.resize(errorCount); // discard errors caused by recovery
753  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
754  break;
755  }
756  errors_.resize(errorCount);
757  return false;
758 }
759 
760 bool Reader::addErrorAndRecover(const String& message,
761  Token& token,
762  TokenType skipUntilToken) {
763  addError(message, token);
764  return recoverFromError(skipUntilToken);
765 }
766 
767 Value& Reader::currentValue() { return *(nodes_.top()); }
768 
769 Reader::Char Reader::getNextChar() {
770  if (current_ == end_)
771  return 0;
772  return *current_++;
773 }
774 
775 void Reader::getLocationLineAndColumn(Location location,
776  int& line,
777  int& column) const {
778  Location current = begin_;
779  Location lastLineStart = current;
780  line = 0;
781  while (current < location && current != end_) {
782  Char c = *current++;
783  if (c == '\r') {
784  if (*current == '\n')
785  ++current;
786  lastLineStart = current;
787  ++line;
788  } else if (c == '\n') {
789  lastLineStart = current;
790  ++line;
791  }
792  }
793  // column & line start at 1
794  column = int(location - lastLineStart) + 1;
795  ++line;
796 }
797 
798 String Reader::getLocationLineAndColumn(Location location) const {
799  int line, column;
800  getLocationLineAndColumn(location, line, column);
801  char buffer[18 + 16 + 16 + 1];
802  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
803  return buffer;
804 }
805 
806 // Deprecated. Preserved for backward compatibility
807 String Reader::getFormatedErrorMessages() const {
808  return getFormattedErrorMessages();
809 }
810 
812  String formattedMessage;
813  for (const auto& error : errors_) {
814  formattedMessage +=
815  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
816  formattedMessage += " " + error.message_ + "\n";
817  if (error.extra_)
818  formattedMessage +=
819  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
820  }
821  return formattedMessage;
822 }
823 
824 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
825  std::vector<Reader::StructuredError> allErrors;
826  for (const auto& error : errors_) {
827  Reader::StructuredError structured;
828  structured.offset_start = error.token_.start_ - begin_;
829  structured.offset_limit = error.token_.end_ - begin_;
830  structured.message = error.message_;
831  allErrors.push_back(structured);
832  }
833  return allErrors;
834 }
835 
836 bool Reader::pushError(const Value& value, const String& message) {
837  ptrdiff_t const length = end_ - begin_;
838  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
839  return false;
840  Token token;
841  token.type_ = tokenError;
842  token.start_ = begin_ + value.getOffsetStart();
843  token.end_ = begin_ + value.getOffsetLimit();
844  ErrorInfo info;
845  info.token_ = token;
846  info.message_ = message;
847  info.extra_ = nullptr;
848  errors_.push_back(info);
849  return true;
850 }
851 
852 bool Reader::pushError(const Value& value,
853  const String& message,
854  const Value& extra) {
855  ptrdiff_t const length = end_ - begin_;
856  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
857  extra.getOffsetLimit() > length)
858  return false;
859  Token token;
860  token.type_ = tokenError;
861  token.start_ = begin_ + value.getOffsetStart();
862  token.end_ = begin_ + value.getOffsetLimit();
863  ErrorInfo info;
864  info.token_ = token;
865  info.message_ = message;
866  info.extra_ = begin_ + extra.getOffsetStart();
867  errors_.push_back(info);
868  return true;
869 }
870 
871 bool Reader::good() const { return errors_.empty(); }
872 
873 // Originally copied from the Features class (now deprecated), used internally
874 // for features implementation.
875 class OurFeatures {
876 public:
877  static OurFeatures all();
878  bool allowComments_;
879  bool strictRoot_;
880  bool allowDroppedNullPlaceholders_;
881  bool allowNumericKeys_;
882  bool allowSingleQuotes_;
883  bool failIfExtra_;
884  bool rejectDupKeys_;
885  bool allowSpecialFloats_;
886  size_t stackLimit_;
887 }; // OurFeatures
888 
889 OurFeatures OurFeatures::all() { return {}; }
890 
891 // Implementation of class Reader
892 // ////////////////////////////////
893 
894 // Originally copied from the Reader class (now deprecated), used internally
895 // for implementing JSON reading.
896 class OurReader {
897 public:
898  typedef char Char;
899  typedef const Char* Location;
900  struct StructuredError {
901  ptrdiff_t offset_start;
902  ptrdiff_t offset_limit;
903  String message;
904  };
905 
906  OurReader(OurFeatures const& features);
907  bool parse(const char* beginDoc,
908  const char* endDoc,
909  Value& root,
910  bool collectComments = true);
911  String getFormattedErrorMessages() const;
912  std::vector<StructuredError> getStructuredErrors() const;
913  bool pushError(const Value& value, const String& message);
914  bool pushError(const Value& value, const String& message, const Value& extra);
915  bool good() const;
916 
917 private:
918  OurReader(OurReader const&); // no impl
919  void operator=(OurReader const&); // no impl
920 
921  enum TokenType {
922  tokenEndOfStream = 0,
923  tokenObjectBegin,
924  tokenObjectEnd,
925  tokenArrayBegin,
926  tokenArrayEnd,
927  tokenString,
928  tokenNumber,
929  tokenTrue,
930  tokenFalse,
931  tokenNull,
932  tokenNaN,
933  tokenPosInf,
934  tokenNegInf,
935  tokenArraySeparator,
936  tokenMemberSeparator,
937  tokenComment,
938  tokenError
939  };
940 
941  class Token {
942  public:
943  TokenType type_;
944  Location start_;
945  Location end_;
946  };
947 
948  class ErrorInfo {
949  public:
950  Token token_;
951  String message_;
952  Location extra_;
953  };
954 
955  typedef std::deque<ErrorInfo> Errors;
956 
957  bool readToken(Token& token);
958  void skipSpaces();
959  bool match(Location pattern, int patternLength);
960  bool readComment();
961  bool readCStyleComment();
962  bool readCppStyleComment();
963  bool readString();
964  bool readStringSingleQuote();
965  bool readNumber(bool checkInf);
966  bool readValue();
967  bool readObject(Token& token);
968  bool readArray(Token& token);
969  bool decodeNumber(Token& token);
970  bool decodeNumber(Token& token, Value& decoded);
971  bool decodeString(Token& token);
972  bool decodeString(Token& token, String& decoded);
973  bool decodeDouble(Token& token);
974  bool decodeDouble(Token& token, Value& decoded);
975  bool decodeUnicodeCodePoint(Token& token,
976  Location& current,
977  Location end,
978  unsigned int& unicode);
979  bool decodeUnicodeEscapeSequence(Token& token,
980  Location& current,
981  Location end,
982  unsigned int& unicode);
983  bool addError(const String& message, Token& token, Location extra = nullptr);
984  bool recoverFromError(TokenType skipUntilToken);
985  bool addErrorAndRecover(const String& message,
986  Token& token,
987  TokenType skipUntilToken);
988  void skipUntilSpace();
989  Value& currentValue();
990  Char getNextChar();
991  void
992  getLocationLineAndColumn(Location location, int& line, int& column) const;
993  String getLocationLineAndColumn(Location location) const;
994  void addComment(Location begin, Location end, CommentPlacement placement);
995  void skipCommentTokens(Token& token);
996 
997  static String normalizeEOL(Location begin, Location end);
998  static bool containsNewLine(Location begin, Location end);
999 
1000  typedef std::stack<Value*> Nodes;
1001  Nodes nodes_;
1002  Errors errors_;
1003  String document_;
1004  Location begin_;
1005  Location end_;
1006  Location current_;
1007  Location lastValueEnd_;
1008  Value* lastValue_;
1009  String commentsBefore_;
1010 
1011  OurFeatures const features_;
1012  bool collectComments_;
1013 }; // OurReader
1014 
1015 // complete copy of Read impl, for OurReader
1016 
1017 bool OurReader::containsNewLine(OurReader::Location begin,
1018  OurReader::Location end) {
1019  for (; begin < end; ++begin)
1020  if (*begin == '\n' || *begin == '\r')
1021  return true;
1022  return false;
1023 }
1024 
1025 OurReader::OurReader(OurFeatures const& features)
1026  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1027  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
1028 }
1029 
1030 bool OurReader::parse(const char* beginDoc,
1031  const char* endDoc,
1032  Value& root,
1033  bool collectComments) {
1034  if (!features_.allowComments_) {
1035  collectComments = false;
1036  }
1037 
1038  begin_ = beginDoc;
1039  end_ = endDoc;
1040  collectComments_ = collectComments;
1041  current_ = begin_;
1042  lastValueEnd_ = nullptr;
1043  lastValue_ = nullptr;
1044  commentsBefore_.clear();
1045  errors_.clear();
1046  while (!nodes_.empty())
1047  nodes_.pop();
1048  nodes_.push(&root);
1049 
1050  bool successful = readValue();
1051  nodes_.pop();
1052  Token token;
1053  skipCommentTokens(token);
1054  if (features_.failIfExtra_) {
1055  if ((features_.strictRoot_ || token.type_ != tokenError) &&
1056  token.type_ != tokenEndOfStream) {
1057  addError("Extra non-whitespace after JSON value.", token);
1058  return false;
1059  }
1060  }
1061  if (collectComments_ && !commentsBefore_.empty())
1062  root.setComment(commentsBefore_, commentAfter);
1063  if (features_.strictRoot_) {
1064  if (!root.isArray() && !root.isObject()) {
1065  // Set error location to start of doc, ideally should be first token found
1066  // in doc
1067  token.type_ = tokenError;
1068  token.start_ = beginDoc;
1069  token.end_ = endDoc;
1070  addError(
1071  "A valid JSON document must be either an array or an object value.",
1072  token);
1073  return false;
1074  }
1075  }
1076  return successful;
1077 }
1078 
1079 bool OurReader::readValue() {
1080  // To preserve the old behaviour we cast size_t to int.
1081  if (nodes_.size() > features_.stackLimit_)
1082  throwRuntimeError("Exceeded stackLimit in readValue().");
1083  Token token;
1084  skipCommentTokens(token);
1085  bool successful = true;
1086 
1087  if (collectComments_ && !commentsBefore_.empty()) {
1088  currentValue().setComment(commentsBefore_, commentBefore);
1089  commentsBefore_.clear();
1090  }
1091 
1092  switch (token.type_) {
1093  case tokenObjectBegin:
1094  successful = readObject(token);
1095  currentValue().setOffsetLimit(current_ - begin_);
1096  break;
1097  case tokenArrayBegin:
1098  successful = readArray(token);
1099  currentValue().setOffsetLimit(current_ - begin_);
1100  break;
1101  case tokenNumber:
1102  successful = decodeNumber(token);
1103  break;
1104  case tokenString:
1105  successful = decodeString(token);
1106  break;
1107  case tokenTrue: {
1108  Value v(true);
1109  currentValue().swapPayload(v);
1110  currentValue().setOffsetStart(token.start_ - begin_);
1111  currentValue().setOffsetLimit(token.end_ - begin_);
1112  } break;
1113  case tokenFalse: {
1114  Value v(false);
1115  currentValue().swapPayload(v);
1116  currentValue().setOffsetStart(token.start_ - begin_);
1117  currentValue().setOffsetLimit(token.end_ - begin_);
1118  } break;
1119  case tokenNull: {
1120  Value v;
1121  currentValue().swapPayload(v);
1122  currentValue().setOffsetStart(token.start_ - begin_);
1123  currentValue().setOffsetLimit(token.end_ - begin_);
1124  } break;
1125  case tokenNaN: {
1126  Value v(std::numeric_limits<double>::quiet_NaN());
1127  currentValue().swapPayload(v);
1128  currentValue().setOffsetStart(token.start_ - begin_);
1129  currentValue().setOffsetLimit(token.end_ - begin_);
1130  } break;
1131  case tokenPosInf: {
1132  Value v(std::numeric_limits<double>::infinity());
1133  currentValue().swapPayload(v);
1134  currentValue().setOffsetStart(token.start_ - begin_);
1135  currentValue().setOffsetLimit(token.end_ - begin_);
1136  } break;
1137  case tokenNegInf: {
1138  Value v(-std::numeric_limits<double>::infinity());
1139  currentValue().swapPayload(v);
1140  currentValue().setOffsetStart(token.start_ - begin_);
1141  currentValue().setOffsetLimit(token.end_ - begin_);
1142  } break;
1143  case tokenArraySeparator:
1144  case tokenObjectEnd:
1145  case tokenArrayEnd:
1146  if (features_.allowDroppedNullPlaceholders_) {
1147  // "Un-read" the current token and mark the current value as a null
1148  // token.
1149  current_--;
1150  Value v;
1151  currentValue().swapPayload(v);
1152  currentValue().setOffsetStart(current_ - begin_ - 1);
1153  currentValue().setOffsetLimit(current_ - begin_);
1154  break;
1155  } // else, fall through ...
1156  default:
1157  currentValue().setOffsetStart(token.start_ - begin_);
1158  currentValue().setOffsetLimit(token.end_ - begin_);
1159  return addError("Syntax error: value, object or array expected.", token);
1160  }
1161 
1162  if (collectComments_) {
1163  lastValueEnd_ = current_;
1164  lastValue_ = &currentValue();
1165  }
1166 
1167  return successful;
1168 }
1169 
1170 void OurReader::skipCommentTokens(Token& token) {
1171  if (features_.allowComments_) {
1172  do {
1173  readToken(token);
1174  } while (token.type_ == tokenComment);
1175  } else {
1176  readToken(token);
1177  }
1178 }
1179 
1180 bool OurReader::readToken(Token& token) {
1181  skipSpaces();
1182  token.start_ = current_;
1183  Char c = getNextChar();
1184  bool ok = true;
1185  switch (c) {
1186  case '{':
1187  token.type_ = tokenObjectBegin;
1188  break;
1189  case '}':
1190  token.type_ = tokenObjectEnd;
1191  break;
1192  case '[':
1193  token.type_ = tokenArrayBegin;
1194  break;
1195  case ']':
1196  token.type_ = tokenArrayEnd;
1197  break;
1198  case '"':
1199  token.type_ = tokenString;
1200  ok = readString();
1201  break;
1202  case '\'':
1203  if (features_.allowSingleQuotes_) {
1204  token.type_ = tokenString;
1205  ok = readStringSingleQuote();
1206  break;
1207  } // else fall through
1208  case '/':
1209  token.type_ = tokenComment;
1210  ok = readComment();
1211  break;
1212  case '0':
1213  case '1':
1214  case '2':
1215  case '3':
1216  case '4':
1217  case '5':
1218  case '6':
1219  case '7':
1220  case '8':
1221  case '9':
1222  token.type_ = tokenNumber;
1223  readNumber(false);
1224  break;
1225  case '-':
1226  if (readNumber(true)) {
1227  token.type_ = tokenNumber;
1228  } else {
1229  token.type_ = tokenNegInf;
1230  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1231  }
1232  break;
1233  case 't':
1234  token.type_ = tokenTrue;
1235  ok = match("rue", 3);
1236  break;
1237  case 'f':
1238  token.type_ = tokenFalse;
1239  ok = match("alse", 4);
1240  break;
1241  case 'n':
1242  token.type_ = tokenNull;
1243  ok = match("ull", 3);
1244  break;
1245  case 'N':
1246  if (features_.allowSpecialFloats_) {
1247  token.type_ = tokenNaN;
1248  ok = match("aN", 2);
1249  } else {
1250  ok = false;
1251  }
1252  break;
1253  case 'I':
1254  if (features_.allowSpecialFloats_) {
1255  token.type_ = tokenPosInf;
1256  ok = match("nfinity", 7);
1257  } else {
1258  ok = false;
1259  }
1260  break;
1261  case ',':
1262  token.type_ = tokenArraySeparator;
1263  break;
1264  case ':':
1265  token.type_ = tokenMemberSeparator;
1266  break;
1267  case 0:
1268  token.type_ = tokenEndOfStream;
1269  break;
1270  default:
1271  ok = false;
1272  break;
1273  }
1274  if (!ok)
1275  token.type_ = tokenError;
1276  token.end_ = current_;
1277  return true;
1278 }
1279 
1280 void OurReader::skipSpaces() {
1281  while (current_ != end_) {
1282  Char c = *current_;
1283  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1284  ++current_;
1285  else
1286  break;
1287  }
1288 }
1289 
1290 bool OurReader::match(Location pattern, int patternLength) {
1291  if (end_ - current_ < patternLength)
1292  return false;
1293  int index = patternLength;
1294  while (index--)
1295  if (current_[index] != pattern[index])
1296  return false;
1297  current_ += patternLength;
1298  return true;
1299 }
1300 
1301 bool OurReader::readComment() {
1302  Location commentBegin = current_ - 1;
1303  Char c = getNextChar();
1304  bool successful = false;
1305  if (c == '*')
1306  successful = readCStyleComment();
1307  else if (c == '/')
1308  successful = readCppStyleComment();
1309  if (!successful)
1310  return false;
1311 
1312  if (collectComments_) {
1313  CommentPlacement placement = commentBefore;
1314  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1315  if (c != '*' || !containsNewLine(commentBegin, current_))
1316  placement = commentAfterOnSameLine;
1317  }
1318 
1319  addComment(commentBegin, current_, placement);
1320  }
1321  return true;
1322 }
1323 
1324 String OurReader::normalizeEOL(OurReader::Location begin,
1325  OurReader::Location end) {
1326  String normalized;
1327  normalized.reserve(static_cast<size_t>(end - begin));
1328  OurReader::Location current = begin;
1329  while (current != end) {
1330  char c = *current++;
1331  if (c == '\r') {
1332  if (current != end && *current == '\n')
1333  // convert dos EOL
1334  ++current;
1335  // convert Mac EOL
1336  normalized += '\n';
1337  } else {
1338  normalized += c;
1339  }
1340  }
1341  return normalized;
1342 }
1343 
1344 void OurReader::addComment(Location begin,
1345  Location end,
1346  CommentPlacement placement) {
1347  assert(collectComments_);
1348  const String& normalized = normalizeEOL(begin, end);
1349  if (placement == commentAfterOnSameLine) {
1350  assert(lastValue_ != nullptr);
1351  lastValue_->setComment(normalized, placement);
1352  } else {
1353  commentsBefore_ += normalized;
1354  }
1355 }
1356 
1357 bool OurReader::readCStyleComment() {
1358  while ((current_ + 1) < end_) {
1359  Char c = getNextChar();
1360  if (c == '*' && *current_ == '/')
1361  break;
1362  }
1363  return getNextChar() == '/';
1364 }
1365 
1366 bool OurReader::readCppStyleComment() {
1367  while (current_ != end_) {
1368  Char c = getNextChar();
1369  if (c == '\n')
1370  break;
1371  if (c == '\r') {
1372  // Consume DOS EOL. It will be normalized in addComment.
1373  if (current_ != end_ && *current_ == '\n')
1374  getNextChar();
1375  // Break on Moc OS 9 EOL.
1376  break;
1377  }
1378  }
1379  return true;
1380 }
1381 
1382 bool OurReader::readNumber(bool checkInf) {
1383  const char* p = current_;
1384  if (checkInf && p != end_ && *p == 'I') {
1385  current_ = ++p;
1386  return false;
1387  }
1388  char c = '0'; // stopgap for already consumed character
1389  // integral part
1390  while (c >= '0' && c <= '9')
1391  c = (current_ = p) < end_ ? *p++ : '\0';
1392  // fractional part
1393  if (c == '.') {
1394  c = (current_ = p) < end_ ? *p++ : '\0';
1395  while (c >= '0' && c <= '9')
1396  c = (current_ = p) < end_ ? *p++ : '\0';
1397  }
1398  // exponential part
1399  if (c == 'e' || c == 'E') {
1400  c = (current_ = p) < end_ ? *p++ : '\0';
1401  if (c == '+' || c == '-')
1402  c = (current_ = p) < end_ ? *p++ : '\0';
1403  while (c >= '0' && c <= '9')
1404  c = (current_ = p) < end_ ? *p++ : '\0';
1405  }
1406  return true;
1407 }
1408 bool OurReader::readString() {
1409  Char c = 0;
1410  while (current_ != end_) {
1411  c = getNextChar();
1412  if (c == '\\')
1413  getNextChar();
1414  else if (c == '"')
1415  break;
1416  }
1417  return c == '"';
1418 }
1419 
1420 bool OurReader::readStringSingleQuote() {
1421  Char c = 0;
1422  while (current_ != end_) {
1423  c = getNextChar();
1424  if (c == '\\')
1425  getNextChar();
1426  else if (c == '\'')
1427  break;
1428  }
1429  return c == '\'';
1430 }
1431 
1432 bool OurReader::readObject(Token& token) {
1433  Token tokenName;
1434  String name;
1435  Value init(objectValue);
1436  currentValue().swapPayload(init);
1437  currentValue().setOffsetStart(token.start_ - begin_);
1438  while (readToken(tokenName)) {
1439  bool initialTokenOk = true;
1440  while (tokenName.type_ == tokenComment && initialTokenOk)
1441  initialTokenOk = readToken(tokenName);
1442  if (!initialTokenOk)
1443  break;
1444  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1445  return true;
1446  name.clear();
1447  if (tokenName.type_ == tokenString) {
1448  if (!decodeString(tokenName, name))
1449  return recoverFromError(tokenObjectEnd);
1450  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1451  Value numberName;
1452  if (!decodeNumber(tokenName, numberName))
1453  return recoverFromError(tokenObjectEnd);
1454  name = numberName.asString();
1455  } else {
1456  break;
1457  }
1458  if (name.length() >= (1U << 30))
1459  throwRuntimeError("keylength >= 2^30");
1460  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1461  String msg = "Duplicate key: '" + name + "'";
1462  return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1463  }
1464 
1465  Token colon;
1466  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1467  return addErrorAndRecover("Missing ':' after object member name", colon,
1468  tokenObjectEnd);
1469  }
1470  Value& value = currentValue()[name];
1471  nodes_.push(&value);
1472  bool ok = readValue();
1473  nodes_.pop();
1474  if (!ok) // error already set
1475  return recoverFromError(tokenObjectEnd);
1476 
1477  Token comma;
1478  if (!readToken(comma) ||
1479  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1480  comma.type_ != tokenComment)) {
1481  return addErrorAndRecover("Missing ',' or '}' in object declaration",
1482  comma, tokenObjectEnd);
1483  }
1484  bool finalizeTokenOk = true;
1485  while (comma.type_ == tokenComment && finalizeTokenOk)
1486  finalizeTokenOk = readToken(comma);
1487  if (comma.type_ == tokenObjectEnd)
1488  return true;
1489  }
1490  return addErrorAndRecover("Missing '}' or object member name", tokenName,
1491  tokenObjectEnd);
1492 }
1493 
1494 bool OurReader::readArray(Token& token) {
1495  Value init(arrayValue);
1496  currentValue().swapPayload(init);
1497  currentValue().setOffsetStart(token.start_ - begin_);
1498  skipSpaces();
1499  if (current_ != end_ && *current_ == ']') // empty array
1500  {
1501  Token endArray;
1502  readToken(endArray);
1503  return true;
1504  }
1505  int index = 0;
1506  for (;;) {
1507  Value& value = currentValue()[index++];
1508  nodes_.push(&value);
1509  bool ok = readValue();
1510  nodes_.pop();
1511  if (!ok) // error already set
1512  return recoverFromError(tokenArrayEnd);
1513 
1514  Token currentToken;
1515  // Accept Comment after last item in the array.
1516  ok = readToken(currentToken);
1517  while (currentToken.type_ == tokenComment && ok) {
1518  ok = readToken(currentToken);
1519  }
1520  bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1521  currentToken.type_ != tokenArrayEnd);
1522  if (!ok || badTokenType) {
1523  return addErrorAndRecover("Missing ',' or ']' in array declaration",
1524  currentToken, tokenArrayEnd);
1525  }
1526  if (currentToken.type_ == tokenArrayEnd)
1527  break;
1528  }
1529  return true;
1530 }
1531 
1532 bool OurReader::decodeNumber(Token& token) {
1533  Value decoded;
1534  if (!decodeNumber(token, decoded))
1535  return false;
1536  currentValue().swapPayload(decoded);
1537  currentValue().setOffsetStart(token.start_ - begin_);
1538  currentValue().setOffsetLimit(token.end_ - begin_);
1539  return true;
1540 }
1541 
1542 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1543  // Attempts to parse the number as an integer. If the number is
1544  // larger than the maximum supported value of an integer then
1545  // we decode the number as a double.
1546  Location current = token.start_;
1547  bool isNegative = *current == '-';
1548  if (isNegative)
1549  ++current;
1550 
1551  // TODO(issue #960): Change to constexpr
1552  static const auto positive_threshold = Value::maxLargestUInt / 10;
1553  static const auto positive_last_digit = Value::maxLargestUInt % 10;
1554  static const auto negative_threshold =
1556  static const auto negative_last_digit =
1558 
1559  const auto threshold = isNegative ? negative_threshold : positive_threshold;
1560  const auto last_digit =
1561  isNegative ? negative_last_digit : positive_last_digit;
1562 
1563  Value::LargestUInt value = 0;
1564  while (current < token.end_) {
1565  Char c = *current++;
1566  if (c < '0' || c > '9')
1567  return decodeDouble(token, decoded);
1568 
1569  const auto digit(static_cast<Value::UInt>(c - '0'));
1570  if (value >= threshold) {
1571  // We've hit or exceeded the max value divided by 10 (rounded down). If
1572  // a) we've only just touched the limit, meaing value == threshold,
1573  // b) this is the last digit, or
1574  // c) it's small enough to fit in that rounding delta, we're okay.
1575  // Otherwise treat this number as a double to avoid overflow.
1576  if (value > threshold || current != token.end_ || digit > last_digit) {
1577  return decodeDouble(token, decoded);
1578  }
1579  }
1580  value = value * 10 + digit;
1581  }
1582 
1583  if (isNegative)
1584  decoded = -Value::LargestInt(value);
1585  else if (value <= Value::LargestUInt(Value::maxLargestInt))
1586  decoded = Value::LargestInt(value);
1587  else
1588  decoded = value;
1589 
1590  return true;
1591 }
1592 
1593 bool OurReader::decodeDouble(Token& token) {
1594  Value decoded;
1595  if (!decodeDouble(token, decoded))
1596  return false;
1597  currentValue().swapPayload(decoded);
1598  currentValue().setOffsetStart(token.start_ - begin_);
1599  currentValue().setOffsetLimit(token.end_ - begin_);
1600  return true;
1601 }
1602 
1603 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1604  double value = 0;
1605  const int bufferSize = 32;
1606  int count;
1607  ptrdiff_t const length = token.end_ - token.start_;
1608 
1609  // Sanity check to avoid buffer overflow exploits.
1610  if (length < 0) {
1611  return addError("Unable to parse token length", token);
1612  }
1613  auto const ulength = static_cast<size_t>(length);
1614 
1615  // Avoid using a string constant for the format control string given to
1616  // sscanf, as this can cause hard to debug crashes on OS X. See here for more
1617  // info:
1618  //
1619  // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
1620  char format[] = "%lf";
1621 
1622  if (length <= bufferSize) {
1623  Char buffer[bufferSize + 1];
1624  memcpy(buffer, token.start_, ulength);
1625  buffer[length] = 0;
1626  fixNumericLocaleInput(buffer, buffer + length);
1627  count = sscanf(buffer, format, &value);
1628  } else {
1629  String buffer(token.start_, token.end_);
1630  count = sscanf(buffer.c_str(), format, &value);
1631  }
1632 
1633  if (count != 1)
1634  return addError(
1635  "'" + String(token.start_, token.end_) + "' is not a number.", token);
1636  decoded = value;
1637  return true;
1638 }
1639 
1640 bool OurReader::decodeString(Token& token) {
1641  String decoded_string;
1642  if (!decodeString(token, decoded_string))
1643  return false;
1644  Value decoded(decoded_string);
1645  currentValue().swapPayload(decoded);
1646  currentValue().setOffsetStart(token.start_ - begin_);
1647  currentValue().setOffsetLimit(token.end_ - begin_);
1648  return true;
1649 }
1650 
1651 bool OurReader::decodeString(Token& token, String& decoded) {
1652  decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1653  Location current = token.start_ + 1; // skip '"'
1654  Location end = token.end_ - 1; // do not include '"'
1655  while (current != end) {
1656  Char c = *current++;
1657  if (c == '"')
1658  break;
1659  else if (c == '\\') {
1660  if (current == end)
1661  return addError("Empty escape sequence in string", token, current);
1662  Char escape = *current++;
1663  switch (escape) {
1664  case '"':
1665  decoded += '"';
1666  break;
1667  case '/':
1668  decoded += '/';
1669  break;
1670  case '\\':
1671  decoded += '\\';
1672  break;
1673  case 'b':
1674  decoded += '\b';
1675  break;
1676  case 'f':
1677  decoded += '\f';
1678  break;
1679  case 'n':
1680  decoded += '\n';
1681  break;
1682  case 'r':
1683  decoded += '\r';
1684  break;
1685  case 't':
1686  decoded += '\t';
1687  break;
1688  case 'u': {
1689  unsigned int unicode;
1690  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1691  return false;
1692  decoded += codePointToUTF8(unicode);
1693  } break;
1694  default:
1695  return addError("Bad escape sequence in string", token, current);
1696  }
1697  } else {
1698  decoded += c;
1699  }
1700  }
1701  return true;
1702 }
1703 
1704 bool OurReader::decodeUnicodeCodePoint(Token& token,
1705  Location& current,
1706  Location end,
1707  unsigned int& unicode) {
1708 
1709  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1710  return false;
1711  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1712  // surrogate pairs
1713  if (end - current < 6)
1714  return addError(
1715  "additional six characters expected to parse unicode surrogate pair.",
1716  token, current);
1717  if (*(current++) == '\\' && *(current++) == 'u') {
1718  unsigned int surrogatePair;
1719  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1720  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1721  } else
1722  return false;
1723  } else
1724  return addError("expecting another \\u token to begin the second half of "
1725  "a unicode surrogate pair",
1726  token, current);
1727  }
1728  return true;
1729 }
1730 
1731 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1732  Location& current,
1733  Location end,
1734  unsigned int& ret_unicode) {
1735  if (end - current < 4)
1736  return addError(
1737  "Bad unicode escape sequence in string: four digits expected.", token,
1738  current);
1739  int unicode = 0;
1740  for (int index = 0; index < 4; ++index) {
1741  Char c = *current++;
1742  unicode *= 16;
1743  if (c >= '0' && c <= '9')
1744  unicode += c - '0';
1745  else if (c >= 'a' && c <= 'f')
1746  unicode += c - 'a' + 10;
1747  else if (c >= 'A' && c <= 'F')
1748  unicode += c - 'A' + 10;
1749  else
1750  return addError(
1751  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1752  token, current);
1753  }
1754  ret_unicode = static_cast<unsigned int>(unicode);
1755  return true;
1756 }
1757 
1758 bool OurReader::addError(const String& message, Token& token, Location extra) {
1759  ErrorInfo info;
1760  info.token_ = token;
1761  info.message_ = message;
1762  info.extra_ = extra;
1763  errors_.push_back(info);
1764  return false;
1765 }
1766 
1767 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1768  size_t errorCount = errors_.size();
1769  Token skip;
1770  for (;;) {
1771  if (!readToken(skip))
1772  errors_.resize(errorCount); // discard errors caused by recovery
1773  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1774  break;
1775  }
1776  errors_.resize(errorCount);
1777  return false;
1778 }
1779 
1780 bool OurReader::addErrorAndRecover(const String& message,
1781  Token& token,
1782  TokenType skipUntilToken) {
1783  addError(message, token);
1784  return recoverFromError(skipUntilToken);
1785 }
1786 
1787 Value& OurReader::currentValue() { return *(nodes_.top()); }
1788 
1789 OurReader::Char OurReader::getNextChar() {
1790  if (current_ == end_)
1791  return 0;
1792  return *current_++;
1793 }
1794 
1795 void OurReader::getLocationLineAndColumn(Location location,
1796  int& line,
1797  int& column) const {
1798  Location current = begin_;
1799  Location lastLineStart = current;
1800  line = 0;
1801  while (current < location && current != end_) {
1802  Char c = *current++;
1803  if (c == '\r') {
1804  if (*current == '\n')
1805  ++current;
1806  lastLineStart = current;
1807  ++line;
1808  } else if (c == '\n') {
1809  lastLineStart = current;
1810  ++line;
1811  }
1812  }
1813  // column & line start at 1
1814  column = int(location - lastLineStart) + 1;
1815  ++line;
1816 }
1817 
1818 String OurReader::getLocationLineAndColumn(Location location) const {
1819  int line, column;
1820  getLocationLineAndColumn(location, line, column);
1821  char buffer[18 + 16 + 16 + 1];
1822  jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1823  return buffer;
1824 }
1825 
1826 String OurReader::getFormattedErrorMessages() const {
1827  String formattedMessage;
1828  for (const auto& error : errors_) {
1829  formattedMessage +=
1830  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1831  formattedMessage += " " + error.message_ + "\n";
1832  if (error.extra_)
1833  formattedMessage +=
1834  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1835  }
1836  return formattedMessage;
1837 }
1838 
1839 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1840  std::vector<OurReader::StructuredError> allErrors;
1841  for (const auto& error : errors_) {
1842  OurReader::StructuredError structured;
1843  structured.offset_start = error.token_.start_ - begin_;
1844  structured.offset_limit = error.token_.end_ - begin_;
1845  structured.message = error.message_;
1846  allErrors.push_back(structured);
1847  }
1848  return allErrors;
1849 }
1850 
1851 bool OurReader::pushError(const Value& value, const String& message) {
1852  ptrdiff_t length = end_ - begin_;
1853  if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
1854  return false;
1855  Token token;
1856  token.type_ = tokenError;
1857  token.start_ = begin_ + value.getOffsetStart();
1858  token.end_ = begin_ + value.getOffsetLimit();
1859  ErrorInfo info;
1860  info.token_ = token;
1861  info.message_ = message;
1862  info.extra_ = nullptr;
1863  errors_.push_back(info);
1864  return true;
1865 }
1866 
1867 bool OurReader::pushError(const Value& value,
1868  const String& message,
1869  const Value& extra) {
1870  ptrdiff_t length = end_ - begin_;
1871  if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
1872  extra.getOffsetLimit() > length)
1873  return false;
1874  Token token;
1875  token.type_ = tokenError;
1876  token.start_ = begin_ + value.getOffsetStart();
1877  token.end_ = begin_ + value.getOffsetLimit();
1878  ErrorInfo info;
1879  info.token_ = token;
1880  info.message_ = message;
1881  info.extra_ = begin_ + extra.getOffsetStart();
1882  errors_.push_back(info);
1883  return true;
1884 }
1885 
1886 bool OurReader::good() const { return errors_.empty(); }
1887 
1888 class OurCharReader : public CharReader {
1889  bool const collectComments_;
1890  OurReader reader_;
1891 
1892 public:
1893  OurCharReader(bool collectComments, OurFeatures const& features)
1894  : collectComments_(collectComments), reader_(features) {}
1895  bool parse(char const* beginDoc,
1896  char const* endDoc,
1897  Value* root,
1898  String* errs) override {
1899  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1900  if (errs) {
1901  *errs = reader_.getFormattedErrorMessages();
1902  }
1903  return ok;
1904  }
1905 };
1906 
1910  bool collectComments = settings_["collectComments"].asBool();
1911  OurFeatures features = OurFeatures::all();
1912  features.allowComments_ = settings_["allowComments"].asBool();
1913  features.strictRoot_ = settings_["strictRoot"].asBool();
1914  features.allowDroppedNullPlaceholders_ =
1915  settings_["allowDroppedNullPlaceholders"].asBool();
1916  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1917  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1918 
1919  // Stack limit is always a size_t, so we get this as an unsigned int
1920  // regardless of it we have 64-bit integer support enabled.
1921  features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1922  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1923  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1924  features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1925  return new OurCharReader(collectComments, features);
1926 }
1927 static void getValidReaderKeys(std::set<String>* valid_keys) {
1928  valid_keys->clear();
1929  valid_keys->insert("collectComments");
1930  valid_keys->insert("allowComments");
1931  valid_keys->insert("strictRoot");
1932  valid_keys->insert("allowDroppedNullPlaceholders");
1933  valid_keys->insert("allowNumericKeys");
1934  valid_keys->insert("allowSingleQuotes");
1935  valid_keys->insert("stackLimit");
1936  valid_keys->insert("failIfExtra");
1937  valid_keys->insert("rejectDupKeys");
1938  valid_keys->insert("allowSpecialFloats");
1939 }
1941  Json::Value my_invalid;
1942  if (!invalid)
1943  invalid = &my_invalid; // so we do not need to test for NULL
1944  Json::Value& inv = *invalid;
1945  std::set<String> valid_keys;
1946  getValidReaderKeys(&valid_keys);
1948  size_t n = keys.size();
1949  for (size_t i = 0; i < n; ++i) {
1950  String const& key = keys[i];
1951  if (valid_keys.find(key) == valid_keys.end()) {
1952  inv[key] = settings_[key];
1953  }
1954  }
1955  return inv.empty();
1956 }
1958  return settings_[key];
1959 }
1960 // static
1963  (*settings)["allowComments"] = false;
1964  (*settings)["strictRoot"] = true;
1965  (*settings)["allowDroppedNullPlaceholders"] = false;
1966  (*settings)["allowNumericKeys"] = false;
1967  (*settings)["allowSingleQuotes"] = false;
1968  (*settings)["stackLimit"] = 1000;
1969  (*settings)["failIfExtra"] = true;
1970  (*settings)["rejectDupKeys"] = true;
1971  (*settings)["allowSpecialFloats"] = false;
1973 }
1974 // static
1977  (*settings)["collectComments"] = true;
1978  (*settings)["allowComments"] = true;
1979  (*settings)["strictRoot"] = false;
1980  (*settings)["allowDroppedNullPlaceholders"] = false;
1981  (*settings)["allowNumericKeys"] = false;
1982  (*settings)["allowSingleQuotes"] = false;
1983  (*settings)["stackLimit"] = 1000;
1984  (*settings)["failIfExtra"] = false;
1985  (*settings)["rejectDupKeys"] = false;
1986  (*settings)["allowSpecialFloats"] = false;
1988 }
1989 
1991 // global functions
1992 
1994  IStream& sin,
1995  Value* root,
1996  String* errs) {
1997  OStringStream ssin;
1998  ssin << sin.rdbuf();
1999  String doc = ssin.str();
2000  char const* begin = doc.data();
2001  char const* end = begin + doc.size();
2002  // Note that we do not actually need a null-terminator.
2003  CharReaderPtr const reader(fact.newCharReader());
2004  return reader->parse(begin, end, root, errs);
2005 }
2006 
2009  String errs;
2010  bool ok = parseFromStream(b, sin, &root, &errs);
2011  if (!ok) {
2012  throwRuntimeError(errs);
2013  }
2014  return sin;
2015 }
2016 
2017 } // namespace Json
std::vector< String > Members
Definition: value.h:180
bool isArray() const
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:39
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured erros encounted while parsing.
void fixNumericLocaleInput(Iter begin, Iter end)
Definition: json_tool.h:103
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
array value (ordered list)
Definition: value.h:92
std::auto_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:56
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition: config.h:165
bool asBool() const
Definition: json_value.cpp:845
Json::Value settings_
Configuration of this builder.
Definition: reader.h:344
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
Definition: json_value.cpp:923
Members getMemberNames() const
Return a list of the member names.
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... */.
Definition: value.h:580
object value (collection of name/value pairs).
Definition: value.h:93
char Char
Definition: reader.h:37
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
Definition: json_value.cpp:486
static const Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:215
ptrdiff_t getOffsetStart() const
Json::LargestUInt LargestUInt
Definition: value.h:190
CharReader * newCharReader() const override
Allocate a CharReader via operator new().
Features()
Initialize the configuration like JsonConfig::allFeatures;.
An error tagged with where in the JSON text it was encountered.
Definition: reader.h:46
static const LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:206
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
bool allowComments_
true if comments are allowed. Default: true.
Definition: features.h:44
CommentPlacement
Definition: value.h:96
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition: config.h:168
const Char * Location
Definition: reader.h:38
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: features.h:54
static size_t const stackLimit_g
Definition: json_reader.cpp:48
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Definition: json_reader.cpp:96
JSON (JavaScript Object Notation).
Definition: allocator.h:14
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: features.h:51
#define jsoncpp_snprintf
Definition: config.h:79
bool pushError(const Value &value, const String &message)
Add a semantic error message.
Json::LargestInt LargestInt
Definition: value.h:189
ptrdiff_t getOffsetLimit() const
~CharReaderBuilder() override
bool good() const
Return whether there are any errors.
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
bool validate(Json::Value *invalid) const
Interface for reading JSON from a char array.
Definition: reader.h:254
Represents a JSON value.
Definition: value.h:176
void setOffsetStart(ptrdiff_t start)
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:64
a comment on the line after a value (only make sense for
Definition: value.h:99
static void getValidReaderKeys(std::set< String > *valid_keys)
void throwRuntimeError(String const &msg)
used internally
Definition: json_value.cpp:235
std::istream IStream
Definition: config.h:169
#define JSONCPP_DEPRECATED_STACK_LIMIT
Definition: json_reader.cpp:45
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
void setOffsetLimit(ptrdiff_t limit)
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:66
bool strictRoot_
true if root must be either an array or an object value.
Definition: features.h:48
Build a CharReader implementation.
Definition: reader.h:302
bool isObject() const
Configuration passed to reader and writer.
Definition: features.h:21
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
a comment placed on the line before a value
Definition: value.h:97
a comment just after a value on the same line
Definition: value.h:98
Value & operator[](const String &key)
A simple way to update a specific setting.
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition: config.h:162
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
static const LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition: value.h:208
static const LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition: value.h:210