GCC Code Coverage Report


Directory: ./
File: submodules/json-c/json_tokener.c
Date: 2023-09-29 04:53:15
Exec Total Coverage
Lines: 0 549 0.0%
Branches: 0 521 0.0%

Line Branch Exec Source
1 /*
2 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
3 *
4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5 * Michael Clark <michael@metaparadigm.com>
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
9 *
10 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (https://www.opensource.org/licenses/mit-license.php)
14 */
15
16 #include "config.h"
17
18 #include "math_compat.h"
19 #include <assert.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <math.h>
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "debug.h"
29 #include "json_inttypes.h"
30 #include "json_object.h"
31 #include "json_object_private.h"
32 #include "json_tokener.h"
33 #include "json_util.h"
34 #include "printbuf.h"
35 #include "strdup_compat.h"
36
37 #ifdef HAVE_LOCALE_H
38 #include <locale.h>
39 #endif /* HAVE_LOCALE_H */
40 #ifdef HAVE_XLOCALE_H
41 #include <xlocale.h>
42 #endif
43 #ifdef HAVE_STRINGS_H
44 #include <strings.h>
45 #endif /* HAVE_STRINGS_H */
46
47 #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9)
48
49 #if !HAVE_STRNCASECMP && defined(_MSC_VER)
50 /* MSC has the version as _strnicmp */
51 #define strncasecmp _strnicmp
52 #elif !HAVE_STRNCASECMP
53 #error You do not have strncasecmp on your system.
54 #endif /* HAVE_STRNCASECMP */
55
56 #if defined(_MSC_VER) && (_MSC_VER <= 1800)
57 /* VS2013 doesn't know about "inline" */
58 #define inline __inline
59 #elif defined(AIX_CC)
60 #define inline
61 #endif
62
63 /* The following helper functions are used to speed up parsing. They
64 * are faster than their ctype counterparts because they assume that
65 * the input is in ASCII and that the locale is set to "C". The
66 * compiler will also inline these functions, providing an additional
67 * speedup by saving on function calls.
68 */
69 static inline int is_ws_char(char c)
70 {
71 return c == ' '
72 || c == '\t'
73 || c == '\n'
74 || c == '\r';
75 }
76
77 static inline int is_hex_char(char c)
78 {
79 return (c >= '0' && c <= '9')
80 || (c >= 'A' && c <= 'F')
81 || (c >= 'a' && c <= 'f');
82 }
83
84 /* Use C99 NAN by default; if not available, nan("") should work too. */
85 #ifndef NAN
86 #define NAN nan("")
87 #endif /* !NAN */
88
89 static const char json_null_str[] = "null";
90 static const int json_null_str_len = sizeof(json_null_str) - 1;
91 static const char json_inf_str[] = "Infinity";
92 /* Swapped case "Infinity" to avoid need to call tolower() on input chars: */
93 static const char json_inf_str_invert[] = "iNFINITY";
94 static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;
95 static const char json_nan_str[] = "NaN";
96 static const int json_nan_str_len = sizeof(json_nan_str) - 1;
97 static const char json_true_str[] = "true";
98 static const int json_true_str_len = sizeof(json_true_str) - 1;
99 static const char json_false_str[] = "false";
100 static const int json_false_str_len = sizeof(json_false_str) - 1;
101
102 /* clang-format off */
103 static const char *json_tokener_errors[] = {
104 "success",
105 "continue",
106 "nesting too deep",
107 "unexpected end of data",
108 "unexpected character",
109 "null expected",
110 "boolean expected",
111 "number expected",
112 "array value separator ',' expected",
113 "quoted object property name expected",
114 "object property name separator ':' expected",
115 "object value separator ',' expected",
116 "invalid string sequence",
117 "expected comment",
118 "invalid utf-8 string",
119 "buffer size overflow"
120 };
121 /* clang-format on */
122
123 /**
124 * validete the utf-8 string in strict model.
125 * if not utf-8 format, return err.
126 */
127 static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
128
129 static int json_tokener_parse_double(const char *buf, int len, double *retval);
130
131 const char *json_tokener_error_desc(enum json_tokener_error jerr)
132 {
133 int jerr_int = (int)jerr;
134 if (jerr_int < 0 ||
135 jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
136 return "Unknown error, "
137 "invalid json_tokener_error value passed to json_tokener_error_desc()";
138 return json_tokener_errors[jerr];
139 }
140
141 enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
142 {
143 return tok->err;
144 }
145
146 /* Stuff for decoding unicode sequences */
147 #define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
148 #define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
149 #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
150 static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};
151
152 struct json_tokener *json_tokener_new_ex(int depth)
153 {
154 struct json_tokener *tok;
155
156 tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener));
157 if (!tok)
158 return NULL;
159 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
160 if (!tok->stack)
161 {
162 free(tok);
163 return NULL;
164 }
165 tok->pb = printbuf_new();
166 if (!tok->pb)
167 {
168 free(tok->stack);
169 free(tok);
170 return NULL;
171 }
172 tok->max_depth = depth;
173 json_tokener_reset(tok);
174 return tok;
175 }
176
177 struct json_tokener *json_tokener_new(void)
178 {
179 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
180 }
181
182 void json_tokener_free(struct json_tokener *tok)
183 {
184 json_tokener_reset(tok);
185 if (tok->pb)
186 printbuf_free(tok->pb);
187 free(tok->stack);
188 free(tok);
189 }
190
191 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
192 {
193 tok->stack[depth].state = json_tokener_state_eatws;
194 tok->stack[depth].saved_state = json_tokener_state_start;
195 json_object_put(tok->stack[depth].current);
196 tok->stack[depth].current = NULL;
197 free(tok->stack[depth].obj_field_name);
198 tok->stack[depth].obj_field_name = NULL;
199 }
200
201 void json_tokener_reset(struct json_tokener *tok)
202 {
203 int i;
204 if (!tok)
205 return;
206
207 for (i = tok->depth; i >= 0; i--)
208 json_tokener_reset_level(tok, i);
209 tok->depth = 0;
210 tok->err = json_tokener_success;
211 }
212
213 struct json_object *json_tokener_parse(const char *str)
214 {
215 enum json_tokener_error jerr_ignored;
216 struct json_object *obj;
217 obj = json_tokener_parse_verbose(str, &jerr_ignored);
218 return obj;
219 }
220
221 struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
222 {
223 struct json_tokener *tok;
224 struct json_object *obj;
225
226 tok = json_tokener_new();
227 if (!tok)
228 return NULL;
229 obj = json_tokener_parse_ex(tok, str, -1);
230 *error = tok->err;
231 if (tok->err != json_tokener_success
232 #if 0
233 /* This would be a more sensible default, and cause parsing
234 * things like "null123" to fail when the caller can't know
235 * where the parsing left off, but starting to fail would
236 * be a notable behaviour change. Save for a 1.0 release.
237 */
238 || json_tokener_get_parse_end(tok) != strlen(str)
239 #endif
240 )
241
242 {
243 if (obj != NULL)
244 json_object_put(obj);
245 obj = NULL;
246 }
247
248 json_tokener_free(tok);
249 return obj;
250 }
251
252 #define state tok->stack[tok->depth].state
253 #define saved_state tok->stack[tok->depth].saved_state
254 #define current tok->stack[tok->depth].current
255 #define obj_field_name tok->stack[tok->depth].obj_field_name
256
257 /* Optimization:
258 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
259 * iterating character-by character. A large performance boost is
260 * achieved by using tighter loops to locally handle units such as
261 * comments and strings. Loops that handle an entire token within
262 * their scope also gather entire strings and pass them to
263 * printbuf_memappend() in a single call, rather than calling
264 * printbuf_memappend() one char at a time.
265 *
266 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
267 * common to both the main loop and the tighter loops.
268 */
269
270 /* PEEK_CHAR(dest, tok) macro:
271 * Peeks at the current char and stores it in dest.
272 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
273 * Implicit inputs: str, len, nBytesp vars
274 */
275 #define PEEK_CHAR(dest, tok) \
276 (((tok)->char_offset == len) \
277 ? (((tok)->depth == 0 && state == json_tokener_state_eatws && \
278 saved_state == json_tokener_state_finish) \
279 ? (((tok)->err = json_tokener_success), 0) \
280 : (((tok)->err = json_tokener_continue), 0)) \
281 : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \
282 (!json_tokener_validate_utf8(*str, nBytesp))) \
283 ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \
284 : (((dest) = *str), 1)))
285
286 /* ADVANCE_CHAR() macro:
287 * Increments str & tok->char_offset.
288 * For convenience of existing conditionals, returns the old value of c (0 on eof)
289 * Implicit inputs: c var
290 */
291 #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c)
292
293 /* End optimization macro defs */
294
295 struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len)
296 {
297 struct json_object *obj = NULL;
298 char c = '\1';
299 unsigned int nBytes = 0;
300 unsigned int *nBytesp = &nBytes;
301
302 #ifdef HAVE_USELOCALE
303 locale_t oldlocale = uselocale(NULL);
304 locale_t newloc;
305 #elif defined(HAVE_SETLOCALE)
306 char *oldlocale = NULL;
307 #endif
308
309 tok->char_offset = 0;
310 tok->err = json_tokener_success;
311
312 /* this interface is presently not 64-bit clean due to the int len argument
313 * and the internal printbuf interface that takes 32-bit int len arguments
314 * so the function limits the maximum string size to INT32_MAX (2GB).
315 * If the function is called with len == -1 then strlen is called to check
316 * the string length is less than INT32_MAX (2GB)
317 */
318 if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX))
319 {
320 tok->err = json_tokener_error_size;
321 return NULL;
322 }
323
324 #ifdef HAVE_USELOCALE
325 {
326 locale_t duploc = duplocale(oldlocale);
327 newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);
328 if (newloc == NULL)
329 {
330 freelocale(duploc);
331 return NULL;
332 }
333 uselocale(newloc);
334 }
335 #elif defined(HAVE_SETLOCALE)
336 {
337 char *tmplocale;
338 tmplocale = setlocale(LC_NUMERIC, NULL);
339 if (tmplocale)
340 oldlocale = strdup(tmplocale);
341 setlocale(LC_NUMERIC, "C");
342 }
343 #endif
344
345 while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !
346 {
347
348 redo_char:
349 switch (state)
350 {
351
352 case json_tokener_state_eatws:
353 /* Advance until we change state */
354 while (is_ws_char(c))
355 {
356 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
357 goto out;
358 }
359 if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))
360 {
361 printbuf_reset(tok->pb);
362 printbuf_memappend_fast(tok->pb, &c, 1);
363 state = json_tokener_state_comment_start;
364 }
365 else
366 {
367 state = saved_state;
368 goto redo_char;
369 }
370 break;
371
372 case json_tokener_state_start:
373 switch (c)
374 {
375 case '{':
376 state = json_tokener_state_eatws;
377 saved_state = json_tokener_state_object_field_start;
378 current = json_object_new_object();
379 if (current == NULL)
380 goto out;
381 break;
382 case '[':
383 state = json_tokener_state_eatws;
384 saved_state = json_tokener_state_array;
385 current = json_object_new_array();
386 if (current == NULL)
387 goto out;
388 break;
389 case 'I':
390 case 'i':
391 state = json_tokener_state_inf;
392 printbuf_reset(tok->pb);
393 tok->st_pos = 0;
394 goto redo_char;
395 case 'N':
396 case 'n':
397 state = json_tokener_state_null; // or NaN
398 printbuf_reset(tok->pb);
399 tok->st_pos = 0;
400 goto redo_char;
401 case '\'':
402 if (tok->flags & JSON_TOKENER_STRICT)
403 {
404 /* in STRICT mode only double-quote are allowed */
405 tok->err = json_tokener_error_parse_unexpected;
406 goto out;
407 }
408 /* FALLTHRU */
409 case '"':
410 state = json_tokener_state_string;
411 printbuf_reset(tok->pb);
412 tok->quote_char = c;
413 break;
414 case 'T':
415 case 't':
416 case 'F':
417 case 'f':
418 state = json_tokener_state_boolean;
419 printbuf_reset(tok->pb);
420 tok->st_pos = 0;
421 goto redo_char;
422 case '0':
423 case '1':
424 case '2':
425 case '3':
426 case '4':
427 case '5':
428 case '6':
429 case '7':
430 case '8':
431 case '9':
432 case '-':
433 state = json_tokener_state_number;
434 printbuf_reset(tok->pb);
435 tok->is_double = 0;
436 goto redo_char;
437 default: tok->err = json_tokener_error_parse_unexpected; goto out;
438 }
439 break;
440
441 case json_tokener_state_finish:
442 if (tok->depth == 0)
443 goto out;
444 obj = json_object_get(current);
445 json_tokener_reset_level(tok, tok->depth);
446 tok->depth--;
447 goto redo_char;
448
449 case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
450 {
451 /* If we were guaranteed to have len set, then we could (usually) handle
452 * the entire "Infinity" check in a single strncmp (strncasecmp), but
453 * since len might be -1 (i.e. "read until \0"), we need to check it
454 * a character at a time.
455 * Trying to handle it both ways would make this code considerably more
456 * complicated with likely little performance benefit.
457 */
458 int is_negative = 0;
459
460 /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
461 while (tok->st_pos < (int)json_inf_str_len)
462 {
463 char inf_char = *str;
464 if (inf_char != json_inf_str[tok->st_pos] &&
465 ((tok->flags & JSON_TOKENER_STRICT) ||
466 inf_char != json_inf_str_invert[tok->st_pos])
467 )
468 {
469 tok->err = json_tokener_error_parse_unexpected;
470 goto out;
471 }
472 tok->st_pos++;
473 (void)ADVANCE_CHAR(str, tok);
474 if (!PEEK_CHAR(c, tok))
475 {
476 /* out of input chars, for now at least */
477 goto out;
478 }
479 }
480 /* We checked the full length of "Infinity", so create the object.
481 * When handling -Infinity, the number parsing code will have dropped
482 * the "-" into tok->pb for us, so check it now.
483 */
484 if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
485 {
486 is_negative = 1;
487 }
488 current = json_object_new_double(is_negative ? -INFINITY : INFINITY);
489 if (current == NULL)
490 goto out;
491 saved_state = json_tokener_state_finish;
492 state = json_tokener_state_eatws;
493 goto redo_char;
494 }
495 break;
496 case json_tokener_state_null: /* aka starts with 'n' */
497 {
498 int size;
499 int size_nan;
500 printbuf_memappend_fast(tok->pb, &c, 1);
501 size = json_min(tok->st_pos + 1, json_null_str_len);
502 size_nan = json_min(tok->st_pos + 1, json_nan_str_len);
503 if ((!(tok->flags & JSON_TOKENER_STRICT) &&
504 strncasecmp(json_null_str, tok->pb->buf, size) == 0) ||
505 (strncmp(json_null_str, tok->pb->buf, size) == 0))
506 {
507 if (tok->st_pos == json_null_str_len)
508 {
509 current = NULL;
510 saved_state = json_tokener_state_finish;
511 state = json_tokener_state_eatws;
512 goto redo_char;
513 }
514 }
515 else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
516 strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
517 (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0))
518 {
519 if (tok->st_pos == json_nan_str_len)
520 {
521 current = json_object_new_double(NAN);
522 if (current == NULL)
523 goto out;
524 saved_state = json_tokener_state_finish;
525 state = json_tokener_state_eatws;
526 goto redo_char;
527 }
528 }
529 else
530 {
531 tok->err = json_tokener_error_parse_null;
532 goto out;
533 }
534 tok->st_pos++;
535 }
536 break;
537
538 case json_tokener_state_comment_start:
539 if (c == '*')
540 {
541 state = json_tokener_state_comment;
542 }
543 else if (c == '/')
544 {
545 state = json_tokener_state_comment_eol;
546 }
547 else
548 {
549 tok->err = json_tokener_error_parse_comment;
550 goto out;
551 }
552 printbuf_memappend_fast(tok->pb, &c, 1);
553 break;
554
555 case json_tokener_state_comment:
556 {
557 /* Advance until we change state */
558 const char *case_start = str;
559 while (c != '*')
560 {
561 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
562 {
563 printbuf_memappend_fast(tok->pb, case_start,
564 str - case_start);
565 goto out;
566 }
567 }
568 printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start);
569 state = json_tokener_state_comment_end;
570 }
571 break;
572
573 case json_tokener_state_comment_eol:
574 {
575 /* Advance until we change state */
576 const char *case_start = str;
577 while (c != '\n')
578 {
579 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
580 {
581 printbuf_memappend_fast(tok->pb, case_start,
582 str - case_start);
583 goto out;
584 }
585 }
586 printbuf_memappend_fast(tok->pb, case_start, str - case_start);
587 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
588 state = json_tokener_state_eatws;
589 }
590 break;
591
592 case json_tokener_state_comment_end:
593 printbuf_memappend_fast(tok->pb, &c, 1);
594 if (c == '/')
595 {
596 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
597 state = json_tokener_state_eatws;
598 }
599 else
600 {
601 state = json_tokener_state_comment;
602 }
603 break;
604
605 case json_tokener_state_string:
606 {
607 /* Advance until we change state */
608 const char *case_start = str;
609 while (1)
610 {
611 if (c == tok->quote_char)
612 {
613 printbuf_memappend_fast(tok->pb, case_start,
614 str - case_start);
615 current =
616 json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
617 if (current == NULL)
618 goto out;
619 saved_state = json_tokener_state_finish;
620 state = json_tokener_state_eatws;
621 break;
622 }
623 else if (c == '\\')
624 {
625 printbuf_memappend_fast(tok->pb, case_start,
626 str - case_start);
627 saved_state = json_tokener_state_string;
628 state = json_tokener_state_string_escape;
629 break;
630 }
631 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
632 {
633 printbuf_memappend_fast(tok->pb, case_start,
634 str - case_start);
635 goto out;
636 }
637 }
638 }
639 break;
640
641 case json_tokener_state_string_escape:
642 switch (c)
643 {
644 case '"':
645 case '\\':
646 case '/':
647 printbuf_memappend_fast(tok->pb, &c, 1);
648 state = saved_state;
649 break;
650 case 'b':
651 case 'n':
652 case 'r':
653 case 't':
654 case 'f':
655 if (c == 'b')
656 printbuf_memappend_fast(tok->pb, "\b", 1);
657 else if (c == 'n')
658 printbuf_memappend_fast(tok->pb, "\n", 1);
659 else if (c == 'r')
660 printbuf_memappend_fast(tok->pb, "\r", 1);
661 else if (c == 't')
662 printbuf_memappend_fast(tok->pb, "\t", 1);
663 else if (c == 'f')
664 printbuf_memappend_fast(tok->pb, "\f", 1);
665 state = saved_state;
666 break;
667 case 'u':
668 tok->ucs_char = 0;
669 tok->st_pos = 0;
670 state = json_tokener_state_escape_unicode;
671 break;
672 default: tok->err = json_tokener_error_parse_string; goto out;
673 }
674 break;
675
676 // ===================================================
677
678 case json_tokener_state_escape_unicode:
679 {
680 /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */
681 while (1)
682 {
683 if (!c || !is_hex_char(c))
684 {
685 tok->err = json_tokener_error_parse_string;
686 goto out;
687 }
688 tok->ucs_char |=
689 ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4));
690 tok->st_pos++;
691 if (tok->st_pos >= 4)
692 break;
693
694 (void)ADVANCE_CHAR(str, tok);
695 if (!PEEK_CHAR(c, tok))
696 {
697 /*
698 * We're out of characters in the current call to
699 * json_tokener_parse(), but a subsequent call might
700 * provide us with more, so leave our current state
701 * as-is (including tok->high_surrogate) and return.
702 */
703 goto out;
704 }
705 }
706 tok->st_pos = 0;
707
708 /* Now, we have a full \uNNNN sequence in tok->ucs_char */
709
710 /* If the *previous* sequence was a high surrogate ... */
711 if (tok->high_surrogate)
712 {
713 if (IS_LOW_SURROGATE(tok->ucs_char))
714 {
715 /* Recalculate the ucs_char, then fall thru to process normally */
716 tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate,
717 tok->ucs_char);
718 }
719 else
720 {
721 /* High surrogate was not followed by a low surrogate
722 * Replace the high and process the rest normally
723 */
724 printbuf_memappend_fast(tok->pb,
725 (char *)utf8_replacement_char, 3);
726 }
727 tok->high_surrogate = 0;
728 }
729
730 if (tok->ucs_char < 0x80)
731 {
732 unsigned char unescaped_utf[1];
733 unescaped_utf[0] = tok->ucs_char;
734 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1);
735 }
736 else if (tok->ucs_char < 0x800)
737 {
738 unsigned char unescaped_utf[2];
739 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
740 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
741 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2);
742 }
743 else if (IS_HIGH_SURROGATE(tok->ucs_char))
744 {
745 /*
746 * The next two characters should be \u, HOWEVER,
747 * we can't simply peek ahead here, because the
748 * characters we need might not be passed to us
749 * until a subsequent call to json_tokener_parse.
750 * Instead, transition through a couple of states.
751 * (now):
752 * _escape_unicode => _unicode_need_escape
753 * (see a '\\' char):
754 * _unicode_need_escape => _unicode_need_u
755 * (see a 'u' char):
756 * _unicode_need_u => _escape_unicode
757 * ...and we'll end up back around here.
758 */
759 tok->high_surrogate = tok->ucs_char;
760 tok->ucs_char = 0;
761 state = json_tokener_state_escape_unicode_need_escape;
762 break;
763 }
764 else if (IS_LOW_SURROGATE(tok->ucs_char))
765 {
766 /* Got a low surrogate not preceded by a high */
767 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
768 }
769 else if (tok->ucs_char < 0x10000)
770 {
771 unsigned char unescaped_utf[3];
772 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
773 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
774 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
775 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3);
776 }
777 else if (tok->ucs_char < 0x110000)
778 {
779 unsigned char unescaped_utf[4];
780 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
781 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
782 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
783 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
784 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4);
785 }
786 else
787 {
788 /* Don't know what we got--insert the replacement char */
789 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
790 }
791 state = saved_state; // i.e. _state_string or _state_object_field
792 }
793 break;
794
795 case json_tokener_state_escape_unicode_need_escape:
796 // We get here after processing a high_surrogate
797 // require a '\\' char
798 if (!c || c != '\\')
799 {
800 /* Got a high surrogate without another sequence following
801 * it. Put a replacement char in for the high surrogate
802 * and pop back up to _state_string or _state_object_field.
803 */
804 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
805 tok->high_surrogate = 0;
806 tok->ucs_char = 0;
807 tok->st_pos = 0;
808 state = saved_state;
809 goto redo_char;
810 }
811 state = json_tokener_state_escape_unicode_need_u;
812 break;
813
814 case json_tokener_state_escape_unicode_need_u:
815 /* We already had a \ char, check that it's \u */
816 if (!c || c != 'u')
817 {
818 /* Got a high surrogate with some non-unicode escape
819 * sequence following it.
820 * Put a replacement char in for the high surrogate
821 * and handle the escape sequence normally.
822 */
823 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
824 tok->high_surrogate = 0;
825 tok->ucs_char = 0;
826 tok->st_pos = 0;
827 state = json_tokener_state_string_escape;
828 goto redo_char;
829 }
830 state = json_tokener_state_escape_unicode;
831 break;
832
833 // ===================================================
834
835 case json_tokener_state_boolean:
836 {
837 int size1, size2;
838 printbuf_memappend_fast(tok->pb, &c, 1);
839 size1 = json_min(tok->st_pos + 1, json_true_str_len);
840 size2 = json_min(tok->st_pos + 1, json_false_str_len);
841 if ((!(tok->flags & JSON_TOKENER_STRICT) &&
842 strncasecmp(json_true_str, tok->pb->buf, size1) == 0) ||
843 (strncmp(json_true_str, tok->pb->buf, size1) == 0))
844 {
845 if (tok->st_pos == json_true_str_len)
846 {
847 current = json_object_new_boolean(1);
848 if (current == NULL)
849 goto out;
850 saved_state = json_tokener_state_finish;
851 state = json_tokener_state_eatws;
852 goto redo_char;
853 }
854 }
855 else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
856 strncasecmp(json_false_str, tok->pb->buf, size2) == 0) ||
857 (strncmp(json_false_str, tok->pb->buf, size2) == 0))
858 {
859 if (tok->st_pos == json_false_str_len)
860 {
861 current = json_object_new_boolean(0);
862 if (current == NULL)
863 goto out;
864 saved_state = json_tokener_state_finish;
865 state = json_tokener_state_eatws;
866 goto redo_char;
867 }
868 }
869 else
870 {
871 tok->err = json_tokener_error_parse_boolean;
872 goto out;
873 }
874 tok->st_pos++;
875 }
876 break;
877
878 case json_tokener_state_number:
879 {
880 /* Advance until we change state */
881 const char *case_start = str;
882 int case_len = 0;
883 int is_exponent = 0;
884 int neg_sign_ok = 1;
885 int pos_sign_ok = 0;
886 if (printbuf_length(tok->pb) > 0)
887 {
888 /* We don't save all state from the previous incremental parse
889 so we need to re-generate it based on the saved string so far.
890 */
891 char *e_loc = strchr(tok->pb->buf, 'e');
892 if (!e_loc)
893 e_loc = strchr(tok->pb->buf, 'E');
894 if (e_loc)
895 {
896 char *last_saved_char =
897 &tok->pb->buf[printbuf_length(tok->pb) - 1];
898 is_exponent = 1;
899 pos_sign_ok = neg_sign_ok = 1;
900 /* If the "e" isn't at the end, we can't start with a '-' */
901 if (e_loc != last_saved_char)
902 {
903 neg_sign_ok = 0;
904 pos_sign_ok = 0;
905 }
906 // else leave it set to 1, i.e. start of the new input
907 }
908 }
909
910 while (c && ((c >= '0' && c <= '9') ||
911 (!is_exponent && (c == 'e' || c == 'E')) ||
912 (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') ||
913 (!tok->is_double && c == '.')))
914 {
915 pos_sign_ok = neg_sign_ok = 0;
916 ++case_len;
917
918 /* non-digit characters checks */
919 /* note: since the main loop condition to get here was
920 * an input starting with 0-9 or '-', we are
921 * protected from input starting with '.' or
922 * e/E.
923 */
924 switch (c)
925 {
926 case '.':
927 tok->is_double = 1;
928 pos_sign_ok = 1;
929 neg_sign_ok = 1;
930 break;
931 case 'e': /* FALLTHRU */
932 case 'E':
933 is_exponent = 1;
934 tok->is_double = 1;
935 /* the exponent part can begin with a negative sign */
936 pos_sign_ok = neg_sign_ok = 1;
937 break;
938 default: break;
939 }
940
941 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
942 {
943 printbuf_memappend_fast(tok->pb, case_start, case_len);
944 goto out;
945 }
946 }
947 /*
948 Now we know c isn't a valid number char, but check whether
949 it might have been intended to be, and return a potentially
950 more understandable error right away.
951 However, if we're at the top-level, use the number as-is
952 because c can be part of a new object to parse on the
953 next call to json_tokener_parse().
954 */
955 if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' &&
956 c != 'I' && c != 'i' && !is_ws_char(c))
957 {
958 tok->err = json_tokener_error_parse_number;
959 goto out;
960 }
961 if (case_len > 0)
962 printbuf_memappend_fast(tok->pb, case_start, case_len);
963
964 // Check for -Infinity
965 if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I'))
966 {
967 state = json_tokener_state_inf;
968 tok->st_pos = 0;
969 goto redo_char;
970 }
971 if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT))
972 {
973 /* Trim some chars off the end, to allow things
974 like "123e+" to parse ok. */
975 while (printbuf_length(tok->pb) > 1)
976 {
977 char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1];
978 if (last_char != 'e' && last_char != 'E' &&
979 last_char != '-' && last_char != '+')
980 {
981 break;
982 }
983 tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0';
984 printbuf_length(tok->pb)--;
985 }
986 }
987 }
988 {
989 int64_t num64;
990 uint64_t numuint64;
991 double numd;
992 if (!tok->is_double && tok->pb->buf[0] == '-' &&
993 json_parse_int64(tok->pb->buf, &num64) == 0)
994 {
995 if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT))
996 {
997 tok->err = json_tokener_error_parse_number;
998 goto out;
999 }
1000 current = json_object_new_int64(num64);
1001 if (current == NULL)
1002 goto out;
1003 }
1004 else if (!tok->is_double && tok->pb->buf[0] != '-' &&
1005 json_parse_uint64(tok->pb->buf, &numuint64) == 0)
1006 {
1007 if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT))
1008 {
1009 tok->err = json_tokener_error_parse_number;
1010 goto out;
1011 }
1012 if (numuint64 && tok->pb->buf[0] == '0' &&
1013 (tok->flags & JSON_TOKENER_STRICT))
1014 {
1015 tok->err = json_tokener_error_parse_number;
1016 goto out;
1017 }
1018 if (numuint64 <= INT64_MAX)
1019 {
1020 num64 = (uint64_t)numuint64;
1021 current = json_object_new_int64(num64);
1022 if (current == NULL)
1023 goto out;
1024 }
1025 else
1026 {
1027 current = json_object_new_uint64(numuint64);
1028 if (current == NULL)
1029 goto out;
1030 }
1031 }
1032 else if (tok->is_double &&
1033 json_tokener_parse_double(
1034 tok->pb->buf, printbuf_length(tok->pb), &numd) == 0)
1035 {
1036 current = json_object_new_double_s(numd, tok->pb->buf);
1037 if (current == NULL)
1038 goto out;
1039 }
1040 else
1041 {
1042 tok->err = json_tokener_error_parse_number;
1043 goto out;
1044 }
1045 saved_state = json_tokener_state_finish;
1046 state = json_tokener_state_eatws;
1047 goto redo_char;
1048 }
1049 break;
1050
1051 case json_tokener_state_array_after_sep:
1052 case json_tokener_state_array:
1053 if (c == ']')
1054 {
1055 // Minimize memory usage; assume parsed objs are unlikely to be changed
1056 json_object_array_shrink(current, 0);
1057
1058 if (state == json_tokener_state_array_after_sep &&
1059 (tok->flags & JSON_TOKENER_STRICT))
1060 {
1061 tok->err = json_tokener_error_parse_unexpected;
1062 goto out;
1063 }
1064 saved_state = json_tokener_state_finish;
1065 state = json_tokener_state_eatws;
1066 }
1067 else
1068 {
1069 if (tok->depth >= tok->max_depth - 1)
1070 {
1071 tok->err = json_tokener_error_depth;
1072 goto out;
1073 }
1074 state = json_tokener_state_array_add;
1075 tok->depth++;
1076 json_tokener_reset_level(tok, tok->depth);
1077 goto redo_char;
1078 }
1079 break;
1080
1081 case json_tokener_state_array_add:
1082 if (json_object_array_add(current, obj) != 0)
1083 goto out;
1084 saved_state = json_tokener_state_array_sep;
1085 state = json_tokener_state_eatws;
1086 goto redo_char;
1087
1088 case json_tokener_state_array_sep:
1089 if (c == ']')
1090 {
1091 // Minimize memory usage; assume parsed objs are unlikely to be changed
1092 json_object_array_shrink(current, 0);
1093
1094 saved_state = json_tokener_state_finish;
1095 state = json_tokener_state_eatws;
1096 }
1097 else if (c == ',')
1098 {
1099 saved_state = json_tokener_state_array_after_sep;
1100 state = json_tokener_state_eatws;
1101 }
1102 else
1103 {
1104 tok->err = json_tokener_error_parse_array;
1105 goto out;
1106 }
1107 break;
1108
1109 case json_tokener_state_object_field_start:
1110 case json_tokener_state_object_field_start_after_sep:
1111 if (c == '}')
1112 {
1113 if (state == json_tokener_state_object_field_start_after_sep &&
1114 (tok->flags & JSON_TOKENER_STRICT))
1115 {
1116 tok->err = json_tokener_error_parse_unexpected;
1117 goto out;
1118 }
1119 saved_state = json_tokener_state_finish;
1120 state = json_tokener_state_eatws;
1121 }
1122 else if (c == '"' || c == '\'')
1123 {
1124 tok->quote_char = c;
1125 printbuf_reset(tok->pb);
1126 state = json_tokener_state_object_field;
1127 }
1128 else
1129 {
1130 tok->err = json_tokener_error_parse_object_key_name;
1131 goto out;
1132 }
1133 break;
1134
1135 case json_tokener_state_object_field:
1136 {
1137 /* Advance until we change state */
1138 const char *case_start = str;
1139 while (1)
1140 {
1141 if (c == tok->quote_char)
1142 {
1143 printbuf_memappend_fast(tok->pb, case_start,
1144 str - case_start);
1145 obj_field_name = strdup(tok->pb->buf);
1146 saved_state = json_tokener_state_object_field_end;
1147 state = json_tokener_state_eatws;
1148 break;
1149 }
1150 else if (c == '\\')
1151 {
1152 printbuf_memappend_fast(tok->pb, case_start,
1153 str - case_start);
1154 saved_state = json_tokener_state_object_field;
1155 state = json_tokener_state_string_escape;
1156 break;
1157 }
1158 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
1159 {
1160 printbuf_memappend_fast(tok->pb, case_start,
1161 str - case_start);
1162 goto out;
1163 }
1164 }
1165 }
1166 break;
1167
1168 case json_tokener_state_object_field_end:
1169 if (c == ':')
1170 {
1171 saved_state = json_tokener_state_object_value;
1172 state = json_tokener_state_eatws;
1173 }
1174 else
1175 {
1176 tok->err = json_tokener_error_parse_object_key_sep;
1177 goto out;
1178 }
1179 break;
1180
1181 case json_tokener_state_object_value:
1182 if (tok->depth >= tok->max_depth - 1)
1183 {
1184 tok->err = json_tokener_error_depth;
1185 goto out;
1186 }
1187 state = json_tokener_state_object_value_add;
1188 tok->depth++;
1189 json_tokener_reset_level(tok, tok->depth);
1190 goto redo_char;
1191
1192 case json_tokener_state_object_value_add:
1193 json_object_object_add(current, obj_field_name, obj);
1194 free(obj_field_name);
1195 obj_field_name = NULL;
1196 saved_state = json_tokener_state_object_sep;
1197 state = json_tokener_state_eatws;
1198 goto redo_char;
1199
1200 case json_tokener_state_object_sep:
1201 /* { */
1202 if (c == '}')
1203 {
1204 saved_state = json_tokener_state_finish;
1205 state = json_tokener_state_eatws;
1206 }
1207 else if (c == ',')
1208 {
1209 saved_state = json_tokener_state_object_field_start_after_sep;
1210 state = json_tokener_state_eatws;
1211 }
1212 else
1213 {
1214 tok->err = json_tokener_error_parse_object_value_sep;
1215 goto out;
1216 }
1217 break;
1218 }
1219 (void)ADVANCE_CHAR(str, tok);
1220 if (!c) // This is the char *before* advancing
1221 break;
1222 } /* while(PEEK_CHAR) */
1223
1224 out:
1225 if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))
1226 {
1227 tok->err = json_tokener_error_parse_utf8_string;
1228 }
1229 if (c && (state == json_tokener_state_finish) && (tok->depth == 0) &&
1230 (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) ==
1231 JSON_TOKENER_STRICT)
1232 {
1233 /* unexpected char after JSON data */
1234 tok->err = json_tokener_error_parse_unexpected;
1235 }
1236 if (!c)
1237 {
1238 /* We hit an eof char (0) */
1239 if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish)
1240 tok->err = json_tokener_error_parse_eof;
1241 }
1242
1243 #ifdef HAVE_USELOCALE
1244 uselocale(oldlocale);
1245 freelocale(newloc);
1246 #elif defined(HAVE_SETLOCALE)
1247 setlocale(LC_NUMERIC, oldlocale);
1248 free(oldlocale);
1249 #endif
1250
1251 if (tok->err == json_tokener_success)
1252 {
1253 json_object *ret = json_object_get(current);
1254 int ii;
1255
1256 /* Partially reset, so we parse additional objects on subsequent calls. */
1257 for (ii = tok->depth; ii >= 0; ii--)
1258 json_tokener_reset_level(tok, ii);
1259 return ret;
1260 }
1261
1262 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err],
1263 tok->char_offset);
1264 return NULL;
1265 }
1266
1267 static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
1268 {
1269 unsigned char chr = c;
1270 if (*nBytes == 0)
1271 {
1272 if (chr >= 0x80)
1273 {
1274 if ((chr & 0xe0) == 0xc0)
1275 *nBytes = 1;
1276 else if ((chr & 0xf0) == 0xe0)
1277 *nBytes = 2;
1278 else if ((chr & 0xf8) == 0xf0)
1279 *nBytes = 3;
1280 else
1281 return 0;
1282 }
1283 }
1284 else
1285 {
1286 if ((chr & 0xC0) != 0x80)
1287 return 0;
1288 (*nBytes)--;
1289 }
1290 return 1;
1291 }
1292
1293 void json_tokener_set_flags(struct json_tokener *tok, int flags)
1294 {
1295 tok->flags = flags;
1296 }
1297
1298 size_t json_tokener_get_parse_end(struct json_tokener *tok)
1299 {
1300 assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */
1301 return (size_t)tok->char_offset;
1302 }
1303
1304 static int json_tokener_parse_double(const char *buf, int len, double *retval)
1305 {
1306 char *end;
1307 *retval = strtod(buf, &end);
1308 if (buf + len == end)
1309 return 0; // It worked
1310 return 1;
1311 }
1312