GCC Code Coverage Report

Directory:	./
File:	submodules/json-c/json_tokener.c
Date:	2023-09-29 04:53:15

	Exec	Total	Coverage
Lines:	0	549	0.0%
Branches:	0	521	0.0%

Line	Exec	Source
1		/*
2		* $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
3		*
4		* Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5		* Michael Clark <michael@metaparadigm.com>
6		*
7		* This library is free software; you can redistribute it and/or modify
8		* it under the terms of the MIT license. See COPYING for details.
9		*
10		*
11		* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12		* The copyrights to the contents of this file are licensed under the MIT License
13		* (https://www.opensource.org/licenses/mit-license.php)
14		*/
15
16		#include "config.h"
17
18		#include "math_compat.h"
19		#include <assert.h>
20		#include <errno.h>
21		#include <limits.h>
22		#include <math.h>
23		#include <stddef.h>
24		#include <stdio.h>
25		#include <stdlib.h>
26		#include <string.h>
27
28		#include "debug.h"
29		#include "json_inttypes.h"
30		#include "json_object.h"
31		#include "json_object_private.h"
32		#include "json_tokener.h"
33		#include "json_util.h"
34		#include "printbuf.h"
35		#include "strdup_compat.h"
36
37		#ifdef HAVE_LOCALE_H
38		#include <locale.h>
39		#endif /* HAVE_LOCALE_H */
40		#ifdef HAVE_XLOCALE_H
41		#include <xlocale.h>
42		#endif
43		#ifdef HAVE_STRINGS_H
44		#include <strings.h>
45		#endif /* HAVE_STRINGS_H */
46
47		#define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9)
48
49		#if !HAVE_STRNCASECMP && defined(_MSC_VER)
50		/* MSC has the version as _strnicmp */
51		#define strncasecmp _strnicmp
52		#elif !HAVE_STRNCASECMP
53		#error You do not have strncasecmp on your system.
54		#endif /* HAVE_STRNCASECMP */
55
56		#if defined(_MSC_VER) && (_MSC_VER <= 1800)
57		/* VS2013 doesn't know about "inline" */
58		#define inline __inline
59		#elif defined(AIX_CC)
60		#define inline
61		#endif
62
63		/* The following helper functions are used to speed up parsing. They
64		* are faster than their ctype counterparts because they assume that
65		* the input is in ASCII and that the locale is set to "C". The
66		* compiler will also inline these functions, providing an additional
67		* speedup by saving on function calls.
68		*/
69	✗	static inline int is_ws_char(char c)
70		{
71	✗	return c == ' '
72	✗	\|\| c == '\t'
73		\|\| c == '\n'
74	✗	\|\| c == '\r';
75		}
76
77	✗	static inline int is_hex_char(char c)
78		{
79	✗	return (c >= '0' && c <= '9')
80	✗	\|\| (c >= 'A' && c <= 'F')
81	✗	\|\| (c >= 'a' && c <= 'f');
82		}
83
84		/* Use C99 NAN by default; if not available, nan("") should work too. */
85		#ifndef NAN
86		#define NAN nan("")
87		#endif /* !NAN */
88
89		static const char json_null_str[] = "null";
90		static const int json_null_str_len = sizeof(json_null_str) - 1;
91		static const char json_inf_str[] = "Infinity";
92		/* Swapped case "Infinity" to avoid need to call tolower() on input chars: */
93		static const char json_inf_str_invert[] = "iNFINITY";
94		static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;
95		static const char json_nan_str[] = "NaN";
96		static const int json_nan_str_len = sizeof(json_nan_str) - 1;
97		static const char json_true_str[] = "true";
98		static const int json_true_str_len = sizeof(json_true_str) - 1;
99		static const char json_false_str[] = "false";
100		static const int json_false_str_len = sizeof(json_false_str) - 1;
101
102		/* clang-format off */
103		static const char *json_tokener_errors[] = {
104		"success",
105		"continue",
106		"nesting too deep",
107		"unexpected end of data",
108		"unexpected character",
109		"null expected",
110		"boolean expected",
111		"number expected",
112		"array value separator ',' expected",
113		"quoted object property name expected",
114		"object property name separator ':' expected",
115		"object value separator ',' expected",
116		"invalid string sequence",
117		"expected comment",
118		"invalid utf-8 string",
119		"buffer size overflow"
120		};
121		/* clang-format on */
122
123		/**
124		* validete the utf-8 string in strict model.
125		* if not utf-8 format, return err.
126		*/
127		static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
128
129		static int json_tokener_parse_double(const char buf, int len, double retval);
130
131	✗	const char *json_tokener_error_desc(enum json_tokener_error jerr)
132		{
133		int jerr_int = (int)jerr;
134	✗	if (jerr_int < 0 \|\|
135		jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
136		return "Unknown error, "
137		"invalid json_tokener_error value passed to json_tokener_error_desc()";
138	✗	return json_tokener_errors[jerr];
139		}
140
141	✗	enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
142		{
143	✗	return tok->err;
144		}
145
146		/* Stuff for decoding unicode sequences */
147		#define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
148		#define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
149		#define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
150		static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};
151
152	✗	struct json_tokener *json_tokener_new_ex(int depth)
153		{
154		struct json_tokener *tok;
155
156	✗	tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener));
157	✗	if (!tok)
158		return NULL;
159	✗	tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
160	✗	if (!tok->stack)
161		{
162	✗	free(tok);
163	✗	return NULL;
164		}
165	✗	tok->pb = printbuf_new();
166	✗	if (!tok->pb)
167		{
168	✗	free(tok->stack);
169	✗	free(tok);
170	✗	return NULL;
171		}
172	✗	tok->max_depth = depth;
173	✗	json_tokener_reset(tok);
174	✗	return tok;
175		}
176
177	✗	struct json_tokener *json_tokener_new(void)
178		{
179	✗	return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
180		}
181
182	✗	void json_tokener_free(struct json_tokener *tok)
183		{
184	✗	json_tokener_reset(tok);
185	✗	if (tok->pb)
186	✗	printbuf_free(tok->pb);
187	✗	free(tok->stack);
188	✗	free(tok);
189		}
190
191	✗	static void json_tokener_reset_level(struct json_tokener *tok, int depth)
192		{
193	✗	tok->stack[depth].state = json_tokener_state_eatws;
194	✗	tok->stack[depth].saved_state = json_tokener_state_start;
195	✗	json_object_put(tok->stack[depth].current);
196	✗	tok->stack[depth].current = NULL;
197	✗	free(tok->stack[depth].obj_field_name);
198	✗	tok->stack[depth].obj_field_name = NULL;
199		}
200
201	✗	void json_tokener_reset(struct json_tokener *tok)
202		{
203		int i;
204	✗	if (!tok)
205		return;
206
207	✗	for (i = tok->depth; i >= 0; i--)
208	✗	json_tokener_reset_level(tok, i);
209	✗	tok->depth = 0;
210	✗	tok->err = json_tokener_success;
211		}
212
213	✗	struct json_object json_tokener_parse(const char str)
214		{
215		enum json_tokener_error jerr_ignored;
216		struct json_object *obj;
217	✗	obj = json_tokener_parse_verbose(str, &jerr_ignored);
218	✗	return obj;
219		}
220
221	✗	struct json_object json_tokener_parse_verbose(const char str, enum json_tokener_error *error)
222		{
223		struct json_tokener *tok;
224		struct json_object *obj;
225
226	✗	tok = json_tokener_new();
227	✗	if (!tok)
228		return NULL;
229	✗	obj = json_tokener_parse_ex(tok, str, -1);
230	✗	*error = tok->err;
231	✗	if (tok->err != json_tokener_success
232		#if 0
233		/* This would be a more sensible default, and cause parsing
234		* things like "null123" to fail when the caller can't know
235		* where the parsing left off, but starting to fail would
236		* be a notable behaviour change. Save for a 1.0 release.
237		*/
238		\|\| json_tokener_get_parse_end(tok) != strlen(str)
239		#endif
240		)
241
242		{
243	✗	if (obj != NULL)
244	✗	json_object_put(obj);
245		obj = NULL;
246		}
247
248	✗	json_tokener_free(tok);
249	✗	return obj;
250		}
251
252		#define state tok->stack[tok->depth].state
253		#define saved_state tok->stack[tok->depth].saved_state
254		#define current tok->stack[tok->depth].current
255		#define obj_field_name tok->stack[tok->depth].obj_field_name
256
257		/* Optimization:
258		* json_tokener_parse_ex() consumed a lot of CPU in its main loop,
259		* iterating character-by character. A large performance boost is
260		* achieved by using tighter loops to locally handle units such as
261		* comments and strings. Loops that handle an entire token within
262		* their scope also gather entire strings and pass them to
263		* printbuf_memappend() in a single call, rather than calling
264		* printbuf_memappend() one char at a time.
265		*
266		* PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
267		* common to both the main loop and the tighter loops.
268		*/
269
270		/* PEEK_CHAR(dest, tok) macro:
271		* Peeks at the current char and stores it in dest.
272		* Returns 1 on success, sets tok->err and returns 0 if no more chars.
273		* Implicit inputs: str, len, nBytesp vars
274		*/
275		#define PEEK_CHAR(dest, tok) \
276		(((tok)->char_offset == len) \
277		? (((tok)->depth == 0 && state == json_tokener_state_eatws && \
278		saved_state == json_tokener_state_finish) \
279		? (((tok)->err = json_tokener_success), 0) \
280		: (((tok)->err = json_tokener_continue), 0)) \
281		: (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \
282		(!json_tokener_validate_utf8(*str, nBytesp))) \
283		? ((tok->err = json_tokener_error_parse_utf8_string), 0) \
284		: (((dest) = *str), 1)))
285
286		/* ADVANCE_CHAR() macro:
287		* Increments str & tok->char_offset.
288		* For convenience of existing conditionals, returns the old value of c (0 on eof)
289		* Implicit inputs: c var
290		*/
291		#define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c)
292
293		/* End optimization macro defs */
294
295	✗	struct json_object json_tokener_parse_ex(struct json_tokener tok, const char *str, int len)
296		{
297		struct json_object *obj = NULL;
298	✗	char c = '\1';
299	✗	unsigned int nBytes = 0;
300		unsigned int *nBytesp = &nBytes;
301
302		#ifdef HAVE_USELOCALE
303	✗	locale_t oldlocale = uselocale(NULL);
304		locale_t newloc;
305		#elif defined(HAVE_SETLOCALE)
306		char *oldlocale = NULL;
307		#endif
308
309	✗	tok->char_offset = 0;
310	✗	tok->err = json_tokener_success;
311
312		/* this interface is presently not 64-bit clean due to the int len argument
313		* and the internal printbuf interface that takes 32-bit int len arguments
314		* so the function limits the maximum string size to INT32_MAX (2GB).
315		* If the function is called with len == -1 then strlen is called to check
316		* the string length is less than INT32_MAX (2GB)
317		*/
318	✗	if ((len < -1) \|\| (len == -1 && strlen(str) > INT32_MAX))
319		{
320	✗	tok->err = json_tokener_error_size;
321	✗	return NULL;
322		}
323
324		#ifdef HAVE_USELOCALE
325		{
326	✗	locale_t duploc = duplocale(oldlocale);
327	✗	newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);
328	✗	if (newloc == NULL)
329		{
330	✗	freelocale(duploc);
331	✗	return NULL;
332		}
333	✗	uselocale(newloc);
334		}
335		#elif defined(HAVE_SETLOCALE)
336		{
337		char *tmplocale;
338		tmplocale = setlocale(LC_NUMERIC, NULL);
339		if (tmplocale)
340		oldlocale = strdup(tmplocale);
341		setlocale(LC_NUMERIC, "C");
342		}
343		#endif
344
345	✗	while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !
346		{
347
348		redo_char:
349	✗	switch (state)
350		{
351
352		case json_tokener_state_eatws:
353		/* Advance until we change state */
354	✗	while (is_ws_char(c))
355		{
356	✗	if ((!ADVANCE_CHAR(str, tok)) \|\| (!PEEK_CHAR(c, tok)))
357	✗	goto out;
358		}
359	✗	if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))
360		{
361	✗	printbuf_reset(tok->pb);
362	✗	printbuf_memappend_fast(tok->pb, &c, 1);
363	✗	state = json_tokener_state_comment_start;
364		}
365		else
366		{
367	✗	state = saved_state;
368	✗	goto redo_char;
369		}
370	✗	break;
371
372	✗	case json_tokener_state_start:
373	✗	switch (c)
374		{
375	✗	case '{':
376	✗	state = json_tokener_state_eatws;
377	✗	saved_state = json_tokener_state_object_field_start;
378	✗	current = json_object_new_object();
379	✗	if (current == NULL)
380	✗	goto out;
381		break;
382	✗	case '[':
383	✗	state = json_tokener_state_eatws;
384	✗	saved_state = json_tokener_state_array;
385	✗	current = json_object_new_array();
386	✗	if (current == NULL)
387	✗	goto out;
388		break;
389	✗	case 'I':
390		case 'i':
391	✗	state = json_tokener_state_inf;
392	✗	printbuf_reset(tok->pb);
393	✗	tok->st_pos = 0;
394	✗	goto redo_char;
395	✗	case 'N':
396		case 'n':
397	✗	state = json_tokener_state_null; // or NaN
398	✗	printbuf_reset(tok->pb);
399	✗	tok->st_pos = 0;
400	✗	goto redo_char;
401	✗	case '\'':
402	✗	if (tok->flags & JSON_TOKENER_STRICT)
403		{
404		/* in STRICT mode only double-quote are allowed */
405	✗	tok->err = json_tokener_error_parse_unexpected;
406	✗	goto out;
407		}
408		/* FALLTHRU */
409		case '"':
410	✗	state = json_tokener_state_string;
411	✗	printbuf_reset(tok->pb);
412	✗	tok->quote_char = c;
413	✗	break;
414	✗	case 'T':
415		case 't':
416		case 'F':
417		case 'f':
418	✗	state = json_tokener_state_boolean;
419	✗	printbuf_reset(tok->pb);
420	✗	tok->st_pos = 0;
421	✗	goto redo_char;
422	✗	case '0':
423		case '1':
424		case '2':
425		case '3':
426		case '4':
427		case '5':
428		case '6':
429		case '7':
430		case '8':
431		case '9':
432		case '-':
433	✗	state = json_tokener_state_number;
434	✗	printbuf_reset(tok->pb);
435	✗	tok->is_double = 0;
436	✗	goto redo_char;
437	✗	default: tok->err = json_tokener_error_parse_unexpected; goto out;
438		}
439		break;
440
441	✗	case json_tokener_state_finish:
442	✗	if (tok->depth == 0)
443	✗	goto out;
444	✗	obj = json_object_get(current);
445	✗	json_tokener_reset_level(tok, tok->depth);
446	✗	tok->depth--;
447	✗	goto redo_char;
448
449		case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
450		{
451		/* If we were guaranteed to have len set, then we could (usually) handle
452		* the entire "Infinity" check in a single strncmp (strncasecmp), but
453		* since len might be -1 (i.e. "read until \0"), we need to check it
454		* a character at a time.
455		* Trying to handle it both ways would make this code considerably more
456		* complicated with likely little performance benefit.
457		*/
458		int is_negative = 0;
459
460		/* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
461	✗	while (tok->st_pos < (int)json_inf_str_len)
462		{
463	✗	char inf_char = *str;
464	✗	if (inf_char != json_inf_str[tok->st_pos] &&
465	✗	((tok->flags & JSON_TOKENER_STRICT) \|\|
466	✗	inf_char != json_inf_str_invert[tok->st_pos])
467		)
468		{
469	✗	tok->err = json_tokener_error_parse_unexpected;
470	✗	goto out;
471		}
472	✗	tok->st_pos++;
473	✗	(void)ADVANCE_CHAR(str, tok);
474	✗	if (!PEEK_CHAR(c, tok))
475		{
476		/* out of input chars, for now at least */
477	✗	goto out;
478		}
479		}
480		/* We checked the full length of "Infinity", so create the object.
481		* When handling -Infinity, the number parsing code will have dropped
482		* the "-" into tok->pb for us, so check it now.
483		*/
484	✗	if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
485		{
486		is_negative = 1;
487		}
488	✗	current = json_object_new_double(is_negative ? -INFINITY : INFINITY);
489	✗	if (current == NULL)
490	✗	goto out;
491	✗	saved_state = json_tokener_state_finish;
492	✗	state = json_tokener_state_eatws;
493	✗	goto redo_char;
494		}
495		break;
496	✗	case json_tokener_state_null: /* aka starts with 'n' */
497		{
498		int size;
499		int size_nan;
500	✗	printbuf_memappend_fast(tok->pb, &c, 1);

1

/*

2

* $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $

3

*

4

5

* Michael Clark <michael@metaparadigm.com>

6

*

7

* This library is free software; you can redistribute it and/or modify

8

* it under the terms of the MIT license. See COPYING for details.

*

*

* The copyrights to the contents of this file are licensed under the MIT License

13

* (https://www.opensource.org/licenses/mit-license.php)

*/

#include "config.h"

#include "math_compat.h"

#include <assert.h>

#include <errno.h>

#include <limits.h>

#include <math.h>

#include <stddef.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include "debug.h"

#include "json_inttypes.h"

30

#include "json_object.h"

31

#include "json_object_private.h"

32

#include "json_tokener.h"

33

#include "json_util.h"

34

#include "printbuf.h"

35

#include "strdup_compat.h"

#ifdef HAVE_LOCALE_H

#include <locale.h>

#endif /* HAVE_LOCALE_H */

40

#ifdef HAVE_XLOCALE_H

41

#include <xlocale.h>

42

#endif

43

#ifdef HAVE_STRINGS_H

44

#include <strings.h>

45

#endif /* HAVE_STRINGS_H */

46

47

#define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9)

48

49

#if !HAVE_STRNCASECMP && defined(_MSC_VER)

50

/* MSC has the version as _strnicmp */

51

#define strncasecmp _strnicmp

52

#elif !HAVE_STRNCASECMP

53

#error You do not have strncasecmp on your system.

54

#endif /* HAVE_STRNCASECMP */

55

56

#if defined(_MSC_VER) && (_MSC_VER <= 1800)

57

/* VS2013 doesn't know about "inline" */

58

#define inline __inline

59

#elif defined(AIX_CC)

#define inline

#endif

/* The following helper functions are used to speed up parsing. They

64

* are faster than their ctype counterparts because they assume that

65

* the input is in ASCII and that the locale is set to "C". The

66

* compiler will also inline these functions, providing an additional

67

* speedup by saving on function calls.

68

*/

69

✗

static inline int is_ws_char(char c)

70

{

71

✗

return c == ' '

72

✗

|| c == '\t'

73

|| c == '\n'

74

✗

|| c == '\r';

75

}

76

77

✗

static inline int is_hex_char(char c)

78

{

79

✗

return (c >= '0' && c <= '9')

80

✗

|| (c >= 'A' && c <= 'F')

81

✗

|| (c >= 'a' && c <= 'f');

82

}

83

84

/* Use C99 NAN by default; if not available, nan("") should work too. */

#ifndef NAN

#define NAN nan("")

#endif /* !NAN */

static const char json_null_str[] = "null";

90

static const int json_null_str_len = sizeof(json_null_str) - 1;

91

static const char json_inf_str[] = "Infinity";

92

/* Swapped case "Infinity" to avoid need to call tolower() on input chars: */

93

static const char json_inf_str_invert[] = "iNFINITY";

94

static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;

95

static const char json_nan_str[] = "NaN";

96

static const int json_nan_str_len = sizeof(json_nan_str) - 1;

97

static const char json_true_str[] = "true";

98

static const int json_true_str_len = sizeof(json_true_str) - 1;

99

static const char json_false_str[] = "false";

100

static const int json_false_str_len = sizeof(json_false_str) - 1;

101

102

/* clang-format off */

103

static const char *json_tokener_errors[] = {

"success",

"continue",

"nesting too deep",

"unexpected end of data",

108

"unexpected character",

"null expected",

"boolean expected",

"number expected",

"array value separator ',' expected",

113

"quoted object property name expected",

114

"object property name separator ':' expected",

115

"object value separator ',' expected",

116

"invalid string sequence",

117

"expected comment",

118

"invalid utf-8 string",

119

"buffer size overflow"

120

};

121

/* clang-format on */

122

123

/**

124

* validete the utf-8 string in strict model.

125

* if not utf-8 format, return err.

126

*/

127

static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);

128

129

static int json_tokener_parse_double(const char *buf, int len, double *retval);

130

131

✗

const char *json_tokener_error_desc(enum json_tokener_error jerr)

132

{

133

int jerr_int = (int)jerr;

134

✗

if (jerr_int < 0 ||

135

jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))

136

return "Unknown error, "

137

"invalid json_tokener_error value passed to json_tokener_error_desc()";

138

✗

return json_tokener_errors[jerr];

139

}

140

141

✗

enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)

142

{

143

✗

return tok->err;

144

}

145

146

/* Stuff for decoding unicode sequences */

147

#define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)

148

#define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)

149

#define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)

150

static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};

151

152

✗

struct json_tokener *json_tokener_new_ex(int depth)

153

{

154

struct json_tokener *tok;

155

156

✗

tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener));

157

✗

if (!tok)

158

return NULL;

159

✗

tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));

160

✗

if (!tok->stack)

161

{

162

✗

free(tok);

163

✗

return NULL;

164

}

165

✗

tok->pb = printbuf_new();

166

✗

if (!tok->pb)

167

{

168

✗

free(tok->stack);

169

✗

free(tok);

170

✗

return NULL;

171

}

172

✗

tok->max_depth = depth;

173

✗

json_tokener_reset(tok);

174

✗

return tok;

175

}

176

177

✗

struct json_tokener *json_tokener_new(void)

178

{

179

✗

return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);

180

}

181

182

✗

void json_tokener_free(struct json_tokener *tok)

183

{

184

✗

json_tokener_reset(tok);

185

✗

if (tok->pb)

186

✗

printbuf_free(tok->pb);

187

✗

free(tok->stack);

188

✗

free(tok);

189

}

190

191

✗

static void json_tokener_reset_level(struct json_tokener *tok, int depth)

192

{

193

✗

tok->stack[depth].state = json_tokener_state_eatws;

194

✗

tok->stack[depth].saved_state = json_tokener_state_start;

195

✗

json_object_put(tok->stack[depth].current);

196

✗

tok->stack[depth].current = NULL;

197

✗

free(tok->stack[depth].obj_field_name);

198

✗

tok->stack[depth].obj_field_name = NULL;

199

}

200

201

✗

void json_tokener_reset(struct json_tokener *tok)

202

{

203

int i;

204

✗

if (!tok)

205

return;

206

207

✗

for (i = tok->depth; i >= 0; i--)

208

✗

json_tokener_reset_level(tok, i);

209

✗

tok->depth = 0;

210

✗

tok->err = json_tokener_success;

211

}

212

213

✗

struct json_object *json_tokener_parse(const char *str)

214

{

215

enum json_tokener_error jerr_ignored;

216

struct json_object *obj;

217

✗

obj = json_tokener_parse_verbose(str, &jerr_ignored);

218

✗

return obj;

219

}

220

221

✗

struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)

222

{

223

struct json_tokener *tok;

224

struct json_object *obj;

225

226

✗

tok = json_tokener_new();

227

✗

if (!tok)

228

return NULL;

229

✗

obj = json_tokener_parse_ex(tok, str, -1);

230

✗

*error = tok->err;

231

✗

if (tok->err != json_tokener_success

232

#if 0

233

/* This would be a more sensible default, and cause parsing

234

* things like "null123" to fail when the caller can't know

235

* where the parsing left off, but starting to fail would

236

* be a notable behaviour change. Save for a 1.0 release.

237

*/

238

|| json_tokener_get_parse_end(tok) != strlen(str)

#endif

)

{

✗

if (obj != NULL)

244

✗

json_object_put(obj);

obj = NULL;

}

✗

json_tokener_free(tok);

249

✗

return obj;

250

}

251

252

#define state tok->stack[tok->depth].state

253

#define saved_state tok->stack[tok->depth].saved_state

254

#define current tok->stack[tok->depth].current

255

#define obj_field_name tok->stack[tok->depth].obj_field_name

256

257

/* Optimization:

258

* json_tokener_parse_ex() consumed a lot of CPU in its main loop,

259

* iterating character-by character. A large performance boost is

260

* achieved by using tighter loops to locally handle units such as

261

* comments and strings. Loops that handle an entire token within

262

* their scope also gather entire strings and pass them to

263

* printbuf_memappend() in a single call, rather than calling

264

* printbuf_memappend() one char at a time.

265

*

266

* PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is

267

* common to both the main loop and the tighter loops.

268

*/

269

270

/* PEEK_CHAR(dest, tok) macro:

271

* Peeks at the current char and stores it in dest.

272

* Returns 1 on success, sets tok->err and returns 0 if no more chars.

273

* Implicit inputs: str, len, nBytesp vars

274

*/

275

#define PEEK_CHAR(dest, tok) \

276

(((tok)->char_offset == len) \

277

? (((tok)->depth == 0 && state == json_tokener_state_eatws && \

278

saved_state == json_tokener_state_finish) \

279

? (((tok)->err = json_tokener_success), 0) \

280

: (((tok)->err = json_tokener_continue), 0)) \

281

: (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \

282

(!json_tokener_validate_utf8(*str, nBytesp))) \

283

? ((tok->err = json_tokener_error_parse_utf8_string), 0) \

284

: (((dest) = *str), 1)))

285

286

/* ADVANCE_CHAR() macro:

287

* Increments str & tok->char_offset.

288

* For convenience of existing conditionals, returns the old value of c (0 on eof)

289

* Implicit inputs: c var

290

*/

291

#define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c)

292

293

/* End optimization macro defs */

294

295

✗

struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len)

296

{

297

struct json_object *obj = NULL;

298

✗

char c = '\1';

299

✗

unsigned int nBytes = 0;

300

unsigned int *nBytesp = &nBytes;

301

302

#ifdef HAVE_USELOCALE

303

✗

locale_t oldlocale = uselocale(NULL);

304

locale_t newloc;

305

#elif defined(HAVE_SETLOCALE)

306

char *oldlocale = NULL;

307

#endif

308

309

✗

tok->char_offset = 0;

310

✗

tok->err = json_tokener_success;

311

312

/* this interface is presently not 64-bit clean due to the int len argument

313

* and the internal printbuf interface that takes 32-bit int len arguments

314

* so the function limits the maximum string size to INT32_MAX (2GB).

315

* If the function is called with len == -1 then strlen is called to check

316

* the string length is less than INT32_MAX (2GB)

317

*/

318

✗

if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX))

319

{

320

✗

tok->err = json_tokener_error_size;

321

✗

return NULL;

322

}

323

324

#ifdef HAVE_USELOCALE

325

{

326

✗

locale_t duploc = duplocale(oldlocale);

327

✗

newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);

328

✗

if (newloc == NULL)

329

{

330

✗

freelocale(duploc);

331

✗

return NULL;

332

}

333

✗

uselocale(newloc);

334

}

335

#elif defined(HAVE_SETLOCALE)

336

{

337

char *tmplocale;

338

tmplocale = setlocale(LC_NUMERIC, NULL);

339

if (tmplocale)

340

oldlocale = strdup(tmplocale);

341

setlocale(LC_NUMERIC, "C");

}

#endif

✗

while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !

{

redo_char:

✗

switch (state)

350

{

351

352

case json_tokener_state_eatws:

353

/* Advance until we change state */

354

✗

while (is_ws_char(c))

355

{

356

✗

if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))

357

✗

goto out;

358

}

359

✗

if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))

360

{

361

✗

printbuf_reset(tok->pb);

362

✗

printbuf_memappend_fast(tok->pb, &c, 1);

363

✗

state = json_tokener_state_comment_start;

}

else

{

✗

state = saved_state;

368

✗

goto redo_char;

369

}

370

✗

break;

371

372

✗

case json_tokener_state_start:

373

✗

switch (c)

374

{

375

✗

case '{':

376

✗

state = json_tokener_state_eatws;

377

✗

saved_state = json_tokener_state_object_field_start;

378

✗

current = json_object_new_object();

379

✗

if (current == NULL)

380

✗

goto out;

381

break;

382

✗

case '[':

383

✗

state = json_tokener_state_eatws;

384

✗

saved_state = json_tokener_state_array;

385

✗

current = json_object_new_array();

386

✗

if (current == NULL)

387

✗

goto out;

388

break;

389

✗

case 'I':

390

case 'i':

391

✗

state = json_tokener_state_inf;

392

✗

printbuf_reset(tok->pb);

393

✗

tok->st_pos = 0;

394

✗

goto redo_char;

395

✗

case 'N':

396

case 'n':

397

✗

state = json_tokener_state_null; // or NaN

398

✗

printbuf_reset(tok->pb);

399

✗

tok->st_pos = 0;

400

✗

goto redo_char;

401

✗

case '\'':

402

✗

if (tok->flags & JSON_TOKENER_STRICT)

403

{

404

/* in STRICT mode only double-quote are allowed */

405

✗

tok->err = json_tokener_error_parse_unexpected;

406

✗

goto out;

}

/* FALLTHRU */

case '"':

✗

state = json_tokener_state_string;

411

✗

printbuf_reset(tok->pb);

412

✗

tok->quote_char = c;

413

✗

break;

414

✗

case 'T':

case 't':

case 'F':

case 'f':

✗

state = json_tokener_state_boolean;

419

✗

printbuf_reset(tok->pb);

420

✗

tok->st_pos = 0;

421

✗

goto redo_char;

422

✗

case '0':

case '1':

case '2':

case '3':

case '4':

case '5':

case '6':

case '7':

case '8':

case '9':

case '-':

✗

state = json_tokener_state_number;

434

✗

printbuf_reset(tok->pb);

435

✗

tok->is_double = 0;

436

✗

goto redo_char;

437

✗

default: tok->err = json_tokener_error_parse_unexpected; goto out;

}

break;

✗

case json_tokener_state_finish:

442

✗

if (tok->depth == 0)

443

✗

goto out;

444

✗

obj = json_object_get(current);

445

✗

json_tokener_reset_level(tok, tok->depth);

446

✗

tok->depth--;

447

✗

goto redo_char;

448

449

case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */

450

{

451

/* If we were guaranteed to have len set, then we could (usually) handle

452

* the entire "Infinity" check in a single strncmp (strncasecmp), but

453

* since len might be -1 (i.e. "read until \0"), we need to check it

454

* a character at a time.

455

* Trying to handle it both ways would make this code considerably more

456

* complicated with likely little performance benefit.

*/

int is_negative = 0;

/* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */

461

✗

while (tok->st_pos < (int)json_inf_str_len)

462

{

463

✗

char inf_char = *str;

464

✗

if (inf_char != json_inf_str[tok->st_pos] &&

465

✗

((tok->flags & JSON_TOKENER_STRICT) ||

466

✗

inf_char != json_inf_str_invert[tok->st_pos])

467

)

468

{

469

✗

tok->err = json_tokener_error_parse_unexpected;

470

✗

goto out;

471

}

472

✗

tok->st_pos++;

473

✗

(void)ADVANCE_CHAR(str, tok);

474

✗

if (!PEEK_CHAR(c, tok))

475

{

476

/* out of input chars, for now at least */

477

✗

goto out;

478

}

479

}

480

/* We checked the full length of "Infinity", so create the object.

481

* When handling -Infinity, the number parsing code will have dropped

482

* the "-" into tok->pb for us, so check it now.

483

*/

484

✗

if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')

{

is_negative = 1;

}

✗

current = json_object_new_double(is_negative ? -INFINITY : INFINITY);

489

✗

if (current == NULL)

490

✗

goto out;

491

✗

saved_state = json_tokener_state_finish;

492

✗

state = json_tokener_state_eatws;

493

✗

goto redo_char;

494

}

495

break;

496

✗

case json_tokener_state_null: /* aka starts with 'n' */

{

int size;

int size_nan;

✗

printbuf_memappend_fast(tok->pb, &c, 1);

501

✗

size = json_min(tok->st_pos + 1, json_null_str_len);

502

✗

size_nan = json_min(tok->st_pos + 1, json_nan_str_len);

503

✗

if ((!(tok->flags & JSON_TOKENER_STRICT) &&

504

✗

strncasecmp(json_null_str, tok->pb->buf, size) == 0) ||

505

✗

(strncmp(json_null_str, tok->pb->buf, size) == 0))

506

{

507

✗

if (tok->st_pos == json_null_str_len)

508

{

509

✗

current = NULL;

510

✗

saved_state = json_tokener_state_finish;

511

✗

state = json_tokener_state_eatws;

512

✗

goto redo_char;

513

}

514

}

515

✗

else if ((!(tok->flags & JSON_TOKENER_STRICT) &&

516

✗

strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||

517

✗

(strncmp(json_nan_str, tok->pb->buf, size_nan) == 0))

518

{

519

✗

if (tok->st_pos == json_nan_str_len)

520

{

521

✗

current = json_object_new_double(NAN);

522

✗

if (current == NULL)

523

✗

goto out;

524

✗

saved_state = json_tokener_state_finish;

525

✗

state = json_tokener_state_eatws;

526

✗

goto redo_char;

}

}

else

{

✗

tok->err = json_tokener_error_parse_null;

532

✗

goto out;

533

}

534

✗

tok->st_pos++;

535

}

536

✗

break;

537

538

✗

case json_tokener_state_comment_start:

539

✗

if (c == '*')

540

{

541

✗

state = json_tokener_state_comment;

542

}

543

✗

else if (c == '/')

544

{

545

✗

state = json_tokener_state_comment_eol;

}

else

{

✗

tok->err = json_tokener_error_parse_comment;

550

✗

goto out;

551

}

552

✗

printbuf_memappend_fast(tok->pb, &c, 1);

553

break;

554

555

case json_tokener_state_comment:

556

{

557

/* Advance until we change state */

558

const char *case_start = str;

559

✗

while (c != '*')

560

{

561

✗

if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))

562

{

563

✗

printbuf_memappend_fast(tok->pb, case_start,

564

str - case_start);

565

✗

goto out;

566

}

567

}

568

✗

printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start);

569

✗

state = json_tokener_state_comment_end;

570

}

571

✗

break;

572

573

case json_tokener_state_comment_eol:

574

{

575

/* Advance until we change state */

576

const char *case_start = str;

577

✗

while (c != '\n')

578

{

579

✗

if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))

580

{

581

✗

printbuf_memappend_fast(tok->pb, case_start,

582

str - case_start);

583

✗

goto out;

584

}

585

}

586

✗

printbuf_memappend_fast(tok->pb, case_start, str - case_start);

587

MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);

588

✗

state = json_tokener_state_eatws;

589

}

590

✗

break;

591

592

✗

case json_tokener_state_comment_end:

593

✗

printbuf_memappend_fast(tok->pb, &c, 1);

594

✗

if (c == '/')

595

{

596

MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);

597

✗

state = json_tokener_state_eatws;

}

else

{

✗

state = json_tokener_state_comment;

}

break;

case json_tokener_state_string:

606

{

607

/* Advance until we change state */

608

const char *case_start = str;

609

while (1)

610

{

611

✗

if (c == tok->quote_char)

612

{

613

✗

printbuf_memappend_fast(tok->pb, case_start,

614

str - case_start);

615

✗

current =

616

✗

json_object_new_string_len(tok->pb->buf, tok->pb->bpos);

617

✗

if (current == NULL)

618

✗

goto out;

619

✗

saved_state = json_tokener_state_finish;

620

✗

state = json_tokener_state_eatws;

621

✗

break;

622

}

623

✗

else if (c == '\\')

624

{

625

✗

printbuf_memappend_fast(tok->pb, case_start,

626

str - case_start);

627

✗

saved_state = json_tokener_state_string;

628

✗

state = json_tokener_state_string_escape;

629

✗

break;

630

}

631

✗

if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))

632

{

633

✗

printbuf_memappend_fast(tok->pb, case_start,

634

str - case_start);

635

✗

goto out;

}

}

}

break;

✗

case json_tokener_state_string_escape:

642

✗

switch (c)

643

{

644

✗

case '"':

645

case '\\':

646

case '/':

647

✗

printbuf_memappend_fast(tok->pb, &c, 1);

648

✗

state = saved_state;

649

✗

break;

650

✗

case 'b':

case 'n':

case 'r':

case 't':

case 'f':

✗

if (c == 'b')

656

✗

printbuf_memappend_fast(tok->pb, "\b", 1);

657

✗

else if (c == 'n')

658

✗

printbuf_memappend_fast(tok->pb, "\n", 1);

659

✗

else if (c == 'r')

660

✗

printbuf_memappend_fast(tok->pb, "\r", 1);

661

✗

else if (c == 't')

662

✗

printbuf_memappend_fast(tok->pb, "\t", 1);

663

else if (c == 'f')

664

✗

printbuf_memappend_fast(tok->pb, "\f", 1);

665

✗

state = saved_state;

666

✗

break;

667

✗

case 'u':

668

✗

tok->ucs_char = 0;

669

✗

tok->st_pos = 0;

670

✗

state = json_tokener_state_escape_unicode;

671

✗

break;

672

✗

default: tok->err = json_tokener_error_parse_string; goto out;

}

break;

// ===================================================

677

678

✗

case json_tokener_state_escape_unicode:

679

{

680

/* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */

681

while (1)

682

{

683

✗

if (!c || !is_hex_char(c))

684

{

685

✗

tok->err = json_tokener_error_parse_string;

686

✗

goto out;

687

}

688

✗

tok->ucs_char |=

689

✗

((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4));

690

✗

tok->st_pos++;

691

✗

if (tok->st_pos >= 4)

692

break;

693

694

✗

(void)ADVANCE_CHAR(str, tok);

695

✗

if (!PEEK_CHAR(c, tok))

696

{

697

/*

698

* We're out of characters in the current call to

699

* json_tokener_parse(), but a subsequent call might

700

* provide us with more, so leave our current state

701

* as-is (including tok->high_surrogate) and return.

702

*/

703

✗

goto out;

704

}

705

}

706

✗

tok->st_pos = 0;

707

708

/* Now, we have a full \uNNNN sequence in tok->ucs_char */

709

710

/* If the *previous* sequence was a high surrogate ... */

711

✗

if (tok->high_surrogate)

712

{

713

✗

if (IS_LOW_SURROGATE(tok->ucs_char))

714

{

715

/* Recalculate the ucs_char, then fall thru to process normally */

716

✗

tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate,

tok->ucs_char);

}

else

{

/* High surrogate was not followed by a low surrogate

722

* Replace the high and process the rest normally

723

*/

724

✗

printbuf_memappend_fast(tok->pb,

725

(char *)utf8_replacement_char, 3);

726

}

727

✗

tok->high_surrogate = 0;

728

}

729

730

✗

if (tok->ucs_char < 0x80)

731

{

732

unsigned char unescaped_utf[1];

733

✗

unescaped_utf[0] = tok->ucs_char;

734

✗

printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1);

735

}

736

✗

else if (tok->ucs_char < 0x800)

737

{

738

unsigned char unescaped_utf[2];

739

✗

unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);

740

✗

unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);

741

✗

printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2);

742

}

743

✗

else if (IS_HIGH_SURROGATE(tok->ucs_char))

744

{

745

/*

746

* The next two characters should be \u, HOWEVER,

747

* we can't simply peek ahead here, because the

748

* characters we need might not be passed to us

749

* until a subsequent call to json_tokener_parse.

750

* Instead, transition through a couple of states.

751

* (now):

752

* _escape_unicode => _unicode_need_escape

753

* (see a '\\' char):

754

* _unicode_need_escape => _unicode_need_u

755

* (see a 'u' char):

756

* _unicode_need_u => _escape_unicode

757

* ...and we'll end up back around here.

758

*/

759

✗

tok->high_surrogate = tok->ucs_char;

760

✗

tok->ucs_char = 0;

761

✗

state = json_tokener_state_escape_unicode_need_escape;

762

✗

break;

763

}

764

✗

else if (IS_LOW_SURROGATE(tok->ucs_char))

765

{

766

/* Got a low surrogate not preceded by a high */

767

✗

printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);

768

}

769

✗

else if (tok->ucs_char < 0x10000)

770

{

771

unsigned char unescaped_utf[3];

772

✗

unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);

773

✗

unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);

774

✗

unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);

775

✗

printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3);

776

}

777

✗

else if (tok->ucs_char < 0x110000)

778

{

779

unsigned char unescaped_utf[4];

780

✗

unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);

781

✗

unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);

782

✗

unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);

783

✗

unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);

784

✗

printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4);

}

else

{

/* Don't know what we got--insert the replacement char */

789

✗

printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);

790

}

791

✗

state = saved_state; // i.e. _state_string or _state_object_field

792

}

793

✗

break;

794

795

✗

case json_tokener_state_escape_unicode_need_escape:

796

// We get here after processing a high_surrogate

797

// require a '\\' char

798

✗

if (!c || c != '\\')

799

{

800

/* Got a high surrogate without another sequence following

801

* it. Put a replacement char in for the high surrogate

802

* and pop back up to _state_string or _state_object_field.

803

*/

804

✗

printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);

805

✗

tok->high_surrogate = 0;

806

✗

tok->ucs_char = 0;

807

✗

tok->st_pos = 0;

808

✗

state = saved_state;

809

✗

goto redo_char;

810

}

811

✗

state = json_tokener_state_escape_unicode_need_u;

812

✗

break;

813

814

✗

case json_tokener_state_escape_unicode_need_u:

815

/* We already had a \ char, check that it's \u */

816

✗

if (!c || c != 'u')

817

{

818

/* Got a high surrogate with some non-unicode escape

819

* sequence following it.

820

* Put a replacement char in for the high surrogate

821

* and handle the escape sequence normally.

822

*/

823

✗

printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);

824

✗

tok->high_surrogate = 0;

825

✗

tok->ucs_char = 0;

826

✗

tok->st_pos = 0;

827

✗

state = json_tokener_state_string_escape;

828

✗

goto redo_char;

829

}

830

✗

state = json_tokener_state_escape_unicode;

831

✗

break;

832

833

// ===================================================

834

835

✗

case json_tokener_state_boolean:

836

{

837

int size1, size2;

838

✗

printbuf_memappend_fast(tok->pb, &c, 1);

839

✗

size1 = json_min(tok->st_pos + 1, json_true_str_len);

840

✗

size2 = json_min(tok->st_pos + 1, json_false_str_len);

841

✗

if ((!(tok->flags & JSON_TOKENER_STRICT) &&

842

✗

strncasecmp(json_true_str, tok->pb->buf, size1) == 0) ||

843

✗

(strncmp(json_true_str, tok->pb->buf, size1) == 0))

844

{

845

✗

if (tok->st_pos == json_true_str_len)

846

{

847

✗

current = json_object_new_boolean(1);

848

✗

if (current == NULL)

849

✗

goto out;

850

✗

saved_state = json_tokener_state_finish;

851

✗

state = json_tokener_state_eatws;

852

✗

goto redo_char;

853

}

854

}

855

✗

else if ((!(tok->flags & JSON_TOKENER_STRICT) &&

856

✗

strncasecmp(json_false_str, tok->pb->buf, size2) == 0) ||

857

✗

(strncmp(json_false_str, tok->pb->buf, size2) == 0))

858

{

859

✗

if (tok->st_pos == json_false_str_len)

860

{

861

✗

current = json_object_new_boolean(0);

862

✗

if (current == NULL)

863

✗

goto out;

864

✗

saved_state = json_tokener_state_finish;

865

✗

state = json_tokener_state_eatws;

866

✗

goto redo_char;

}

}

else

{

✗

tok->err = json_tokener_error_parse_boolean;

872

✗

goto out;

873

}

874

✗

tok->st_pos++;

875

}

876

✗

break;

877

878

✗

case json_tokener_state_number:

879

{

880

/* Advance until we change state */

881

const char *case_start = str;

int case_len = 0;

int is_exponent = 0;

int neg_sign_ok = 1;

int pos_sign_ok = 0;

✗

if (printbuf_length(tok->pb) > 0)

887

{

888

/* We don't save all state from the previous incremental parse

889

so we need to re-generate it based on the saved string so far.

890

*/

891

✗

char *e_loc = strchr(tok->pb->buf, 'e');

892

✗

if (!e_loc)

893

✗

e_loc = strchr(tok->pb->buf, 'E');

894

✗

if (e_loc)

895

{

896

✗

char *last_saved_char =

897

✗

&tok->pb->buf[printbuf_length(tok->pb) - 1];

898

is_exponent = 1;

899

pos_sign_ok = neg_sign_ok = 1;

900

/* If the "e" isn't at the end, we can't start with a '-' */

901

✗

if (e_loc != last_saved_char)

{

neg_sign_ok = 0;

pos_sign_ok = 0;

}

// else leave it set to 1, i.e. start of the new input

}

}

✗

while (c && ((c >= '0' && c <= '9') ||

911

✗

(!is_exponent && (c == 'e' || c == 'E')) ||

912

✗

(neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') ||

913

✗

(!tok->is_double && c == '.')))

914

{

915

pos_sign_ok = neg_sign_ok = 0;

916

✗

++case_len;

917

918

/* non-digit characters checks */

919

/* note: since the main loop condition to get here was

920

* an input starting with 0-9 or '-', we are

921

* protected from input starting with '.' or

* e/E.

*/

switch (c)

{

✗

case '.':

927

✗

tok->is_double = 1;

928

pos_sign_ok = 1;

929

neg_sign_ok = 1;

930

✗

break;

931

✗

case 'e': /* FALLTHRU */

932

case 'E':

933

is_exponent = 1;

934

✗

tok->is_double = 1;

935

/* the exponent part can begin with a negative sign */

936

pos_sign_ok = neg_sign_ok = 1;

937

✗

break;

default: break;

}

✗

if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))

942

{

943

✗

printbuf_memappend_fast(tok->pb, case_start, case_len);

944

✗

goto out;

}

}

/*

Now we know c isn't a valid number char, but check whether

949

it might have been intended to be, and return a potentially

950

more understandable error right away.

951

However, if we're at the top-level, use the number as-is

952

because c can be part of a new object to parse on the

953

next call to json_tokener_parse().

954

*/

955

✗

if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' &&

956

✗

c != 'I' && c != 'i' && !is_ws_char(c))

957

{

958

✗

tok->err = json_tokener_error_parse_number;

959

✗

goto out;

960

}

961

✗

if (case_len > 0)

962

✗

printbuf_memappend_fast(tok->pb, case_start, case_len);

963

964

// Check for -Infinity

965

✗

if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I'))

966

{

967

✗

state = json_tokener_state_inf;

968

✗

tok->st_pos = 0;

969

✗

goto redo_char;

970

}

971

✗

if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT))

972

{

973

/* Trim some chars off the end, to allow things

974

like "123e+" to parse ok. */

975

✗

while (printbuf_length(tok->pb) > 1)

976

{

977

✗

char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1];

978

✗

if (last_char != 'e' && last_char != 'E' &&

979

✗

last_char != '-' && last_char != '+')

{

break;

}

✗

tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0';

984

✗

printbuf_length(tok->pb)--;

}

}

}

{

int64_t num64;

uint64_t numuint64;

double numd;

✗

if (!tok->is_double && tok->pb->buf[0] == '-' &&

993

✗

json_parse_int64(tok->pb->buf, &num64) == 0)

994

{

995

✗

if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT))

996

{

997

✗

tok->err = json_tokener_error_parse_number;

998

✗

goto out;

999

}

1000

✗

current = json_object_new_int64(num64);

1001

✗

if (current == NULL)

1002

✗

goto out;

1003

}

1004

✗

else if (!tok->is_double && tok->pb->buf[0] != '-' &&

1005

✗

json_parse_uint64(tok->pb->buf, &numuint64) == 0)

1006

{

1007

✗

if (errno == ERANGE && (tok->flags & JSON_TOKENER_STRICT))

1008

{

1009

✗

tok->err = json_tokener_error_parse_number;

1010

✗

goto out;

1011

}

1012

✗

if (numuint64 && tok->pb->buf[0] == '0' &&

1013

✗

(tok->flags & JSON_TOKENER_STRICT))

1014

{

1015

✗

tok->err = json_tokener_error_parse_number;

1016

✗

goto out;

1017

}

1018

✗

if (numuint64 <= INT64_MAX)

1019

{

1020

✗

num64 = (uint64_t)numuint64;

1021

✗

current = json_object_new_int64(num64);

1022

✗

if (current == NULL)

1023

✗

goto out;

}

else

{

✗

current = json_object_new_uint64(numuint64);

1028

✗

if (current == NULL)

1029

✗

goto out;

1030

}

1031

}

1032

✗

else if (tok->is_double &&

1033

✗

json_tokener_parse_double(

1034

✗

tok->pb->buf, printbuf_length(tok->pb), &numd) == 0)

1035

{

1036

✗

current = json_object_new_double_s(numd, tok->pb->buf);

1037

✗

if (current == NULL)

1038

✗

goto out;

}

else

{

✗

tok->err = json_tokener_error_parse_number;

1043

✗

goto out;

1044

}

1045

✗

saved_state = json_tokener_state_finish;

1046

✗

state = json_tokener_state_eatws;

1047

✗

goto redo_char;

}

break;

✗

case json_tokener_state_array_after_sep:

1052

case json_tokener_state_array:

1053

✗

if (c == ']')

1054

{

1055

// Minimize memory usage; assume parsed objs are unlikely to be changed

1056

✗

json_object_array_shrink(current, 0);

1057

1058

✗

if (state == json_tokener_state_array_after_sep &&

1059

✗

(tok->flags & JSON_TOKENER_STRICT))

1060

{

1061

✗

tok->err = json_tokener_error_parse_unexpected;

1062

✗

goto out;

1063

}

1064

✗

saved_state = json_tokener_state_finish;

1065

✗

state = json_tokener_state_eatws;

}

else

{

✗

if (tok->depth >= tok->max_depth - 1)

1070

{

1071

✗

tok->err = json_tokener_error_depth;

1072

✗

goto out;

1073

}

1074

✗

state = json_tokener_state_array_add;

1075

✗

tok->depth++;

1076

✗

json_tokener_reset_level(tok, tok->depth);

1077

✗

goto redo_char;

1078

}

1079

✗

break;

1080

1081

✗

case json_tokener_state_array_add:

1082

✗

if (json_object_array_add(current, obj) != 0)

1083

✗

goto out;

1084

✗

saved_state = json_tokener_state_array_sep;

1085

✗

state = json_tokener_state_eatws;

1086

✗

goto redo_char;

1087

1088

✗

case json_tokener_state_array_sep:

1089

✗

if (c == ']')

1090

{

1091

// Minimize memory usage; assume parsed objs are unlikely to be changed

1092

✗

json_object_array_shrink(current, 0);

1093

1094

✗

saved_state = json_tokener_state_finish;

1095

✗

state = json_tokener_state_eatws;

1096

}

1097

✗

else if (c == ',')

1098

{

1099

✗

saved_state = json_tokener_state_array_after_sep;

1100

✗

state = json_tokener_state_eatws;

}

else

{

✗

tok->err = json_tokener_error_parse_array;

1105

✗

goto out;

}

break;

✗

case json_tokener_state_object_field_start:

1110

case json_tokener_state_object_field_start_after_sep:

1111

✗

if (c == '}')

1112

{

1113

✗

if (state == json_tokener_state_object_field_start_after_sep &&

1114

✗

(tok->flags & JSON_TOKENER_STRICT))

1115

{

1116

✗

tok->err = json_tokener_error_parse_unexpected;

1117

✗

goto out;

1118

}

1119

✗

saved_state = json_tokener_state_finish;

1120

✗

state = json_tokener_state_eatws;

1121

}

1122

✗

else if (c == '"' || c == '\'')

1123

{

1124

✗

tok->quote_char = c;

1125

✗

printbuf_reset(tok->pb);

1126

✗

state = json_tokener_state_object_field;

}

else

{

✗

tok->err = json_tokener_error_parse_object_key_name;

1131

✗

goto out;

}

break;

case json_tokener_state_object_field:

1136

{

1137

/* Advance until we change state */

1138

const char *case_start = str;

1139

while (1)

1140

{

1141

✗

if (c == tok->quote_char)

1142

{

1143

✗

printbuf_memappend_fast(tok->pb, case_start,

1144

str - case_start);

1145

✗

obj_field_name = strdup(tok->pb->buf);

1146

✗

saved_state = json_tokener_state_object_field_end;

1147

✗

state = json_tokener_state_eatws;

1148

✗

break;

1149

}

1150

✗

else if (c == '\\')

1151

{

1152

✗

printbuf_memappend_fast(tok->pb, case_start,

1153

str - case_start);

1154

✗

saved_state = json_tokener_state_object_field;

1155

✗

state = json_tokener_state_string_escape;

1156

✗

break;

1157

}

1158

✗

if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))

1159

{

1160

✗

printbuf_memappend_fast(tok->pb, case_start,

1161

str - case_start);

1162

✗

goto out;

}

}

}

break;

✗

case json_tokener_state_object_field_end:

1169

✗

if (c == ':')

1170

{

1171

✗

saved_state = json_tokener_state_object_value;

1172

✗

state = json_tokener_state_eatws;

}

else

{

✗

tok->err = json_tokener_error_parse_object_key_sep;

1177

✗

goto out;

1178

}

1179

✗

break;

1180

1181

✗

case json_tokener_state_object_value:

1182

✗

if (tok->depth >= tok->max_depth - 1)

1183

{

1184

✗

tok->err = json_tokener_error_depth;

1185

✗

goto out;

1186

}

1187

✗

state = json_tokener_state_object_value_add;

1188

✗

tok->depth++;

1189

✗

json_tokener_reset_level(tok, tok->depth);

1190

✗

goto redo_char;

1191

1192

✗

case json_tokener_state_object_value_add:

1193

✗

json_object_object_add(current, obj_field_name, obj);

1194

✗

free(obj_field_name);

1195

✗

obj_field_name = NULL;

1196

✗

saved_state = json_tokener_state_object_sep;

1197

✗

state = json_tokener_state_eatws;

1198

✗

goto redo_char;

1199

1200

✗

case json_tokener_state_object_sep:

1201

/* { */

1202

✗

if (c == '}')

1203

{

1204

✗

saved_state = json_tokener_state_finish;

1205

✗

state = json_tokener_state_eatws;

1206

}

1207

✗

else if (c == ',')

1208

{

1209

✗

saved_state = json_tokener_state_object_field_start_after_sep;

1210

✗

state = json_tokener_state_eatws;

}

else

{

✗

tok->err = json_tokener_error_parse_object_value_sep;

1215

✗

goto out;

}

break;

}

✗

(void)ADVANCE_CHAR(str, tok);

1220

✗

if (!c) // This is the char *before* advancing

1221

break;

1222

} /* while(PEEK_CHAR) */

1223

1224

✗

out:

1225

✗

if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))

1226

{

1227

✗

tok->err = json_tokener_error_parse_utf8_string;

1228

}

1229

✗

if (c && (state == json_tokener_state_finish) && (tok->depth == 0) &&

1230

✗

(tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) ==

1231

JSON_TOKENER_STRICT)

1232

{

1233

/* unexpected char after JSON data */

1234

✗

tok->err = json_tokener_error_parse_unexpected;

1235

}

1236

✗

if (!c)

1237

{

1238

/* We hit an eof char (0) */

1239

✗

if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish)

1240

✗

tok->err = json_tokener_error_parse_eof;

1241

}

1242

1243

#ifdef HAVE_USELOCALE

1244

✗

uselocale(oldlocale);

1245

✗

freelocale(newloc);

1246

#elif defined(HAVE_SETLOCALE)

1247

setlocale(LC_NUMERIC, oldlocale);

free(oldlocale);

#endif

✗

if (tok->err == json_tokener_success)

1252

{

1253

✗

json_object *ret = json_object_get(current);

1254

int ii;

1255

1256

/* Partially reset, so we parse additional objects on subsequent calls. */

1257

✗

for (ii = tok->depth; ii >= 0; ii--)

1258

✗

json_tokener_reset_level(tok, ii);

return ret;

}

MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err],

tok->char_offset);

return NULL;

}

✗

static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)

1268

{

1269

✗

unsigned char chr = c;

1270

✗

if (*nBytes == 0)

1271

{

1272

✗

if (chr >= 0x80)

1273

{

1274

✗

if ((chr & 0xe0) == 0xc0)

1275

✗

*nBytes = 1;

1276

✗

else if ((chr & 0xf0) == 0xe0)

1277

✗

*nBytes = 2;

1278

✗

else if ((chr & 0xf8) == 0xf0)

1279

✗

*nBytes = 3;

else

return 0;

}

}

else

{

✗

if ((chr & 0xC0) != 0x80)

1287

return 0;

1288

✗

(*nBytes)--;

}

return 1;

}

✗

void json_tokener_set_flags(struct json_tokener *tok, int flags)

1294

{

1295

✗

tok->flags = flags;

1296

}

1297

1298

✗

size_t json_tokener_get_parse_end(struct json_tokener *tok)

1299

{

1300

assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */

1301

✗

return (size_t)tok->char_offset;

1302

}

1303

1304

✗

static int json_tokener_parse_double(const char *buf, int len, double *retval)

1305

{

1306

char *end;

1307

✗

*retval = strtod(buf, &end);

1308

✗

if (buf + len == end)

1309

✗

return 0; // It worked

1310

return 1;

1311

}

1312