2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double)
4 #define PyOS_string_to_double json_PyOS_string_to_double
6 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
8 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) {
10 assert(endptr == NULL);
11 assert(overflow_exception == NULL);
12 PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
13 x = PyOS_ascii_atof(s);
18 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
19 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
21 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE)
22 #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
24 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
25 typedef int Py_ssize_t;
26 #define PY_SSIZE_T_MAX INT_MAX
27 #define PY_SSIZE_T_MIN INT_MIN
28 #define PyInt_FromSsize_t PyInt_FromLong
29 #define PyInt_AsSsize_t PyInt_AsLong
32 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
36 #define UNUSED __attribute__((__unused__))
41 #define DEFAULT_ENCODING "utf-8"
43 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
44 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
45 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
46 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
47 #define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr))
49 static PyTypeObject PyScannerType;
50 static PyTypeObject PyEncoderType;
51 static PyTypeObject *DecimalTypePtr;
53 typedef struct _PyScannerObject {
57 PyObject *object_hook;
59 PyObject *parse_float;
61 PyObject *parse_constant;
65 static PyMemberDef scanner_members[] = {
66 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
67 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
68 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
69 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
70 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
71 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
72 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
76 typedef struct _PyEncoderObject {
82 PyObject *key_separator;
83 PyObject *item_separator;
92 static PyMemberDef encoder_members[] = {
93 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
94 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
95 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
96 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
97 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
98 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
99 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
100 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
101 {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
106 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
108 ascii_escape_unicode(PyObject *pystr);
110 ascii_escape_str(PyObject *pystr);
112 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
113 void init_speedups(void);
115 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
117 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
119 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
121 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
123 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
125 scanner_dealloc(PyObject *self);
127 scanner_clear(PyObject *self);
129 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
131 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
133 encoder_dealloc(PyObject *self);
135 encoder_clear(PyObject *self);
137 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
139 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
141 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
143 _encoded_const(PyObject *obj);
145 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
147 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
149 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
151 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
153 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
155 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
156 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
158 #define MIN_EXPANSION 6
159 #ifdef Py_UNICODE_WIDE
160 #define MAX_EXPANSION (2 * MIN_EXPANSION)
162 #define MAX_EXPANSION MIN_EXPANSION
166 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
168 /* PyObject to Py_ssize_t converter */
169 *size_ptr = PyInt_AsSsize_t(o);
170 if (*size_ptr == -1 && PyErr_Occurred())
176 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
178 /* Py_ssize_t to PyObject converter */
179 return PyInt_FromSsize_t(*size_ptr);
183 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
185 /* Escape unicode code point c to ASCII escape sequences
186 in char *output. output must have at least 12 bytes unused to
187 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
188 output[chars++] = '\\';
190 case '\\': output[chars++] = (char)c; break;
191 case '"': output[chars++] = (char)c; break;
192 case '\b': output[chars++] = 'b'; break;
193 case '\f': output[chars++] = 'f'; break;
194 case '\n': output[chars++] = 'n'; break;
195 case '\r': output[chars++] = 'r'; break;
196 case '\t': output[chars++] = 't'; break;
198 #ifdef Py_UNICODE_WIDE
200 /* UTF-16 surrogate pair */
201 Py_UNICODE v = c - 0x10000;
202 c = 0xd800 | ((v >> 10) & 0x3ff);
203 output[chars++] = 'u';
204 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
205 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
206 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
207 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
208 c = 0xdc00 | (v & 0x3ff);
209 output[chars++] = '\\';
212 output[chars++] = 'u';
213 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
214 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
215 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
216 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
222 ascii_escape_unicode(PyObject *pystr)
224 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
226 Py_ssize_t input_chars;
227 Py_ssize_t output_size;
228 Py_ssize_t max_output_size;
232 Py_UNICODE *input_unicode;
234 input_chars = PyUnicode_GET_SIZE(pystr);
235 input_unicode = PyUnicode_AS_UNICODE(pystr);
237 /* One char input can be up to 6 chars output, estimate 4 of these */
238 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
239 max_output_size = 2 + (input_chars * MAX_EXPANSION);
240 rval = PyString_FromStringAndSize(NULL, output_size);
244 output = PyString_AS_STRING(rval);
246 output[chars++] = '"';
247 for (i = 0; i < input_chars; i++) {
248 Py_UNICODE c = input_unicode[i];
250 output[chars++] = (char)c;
253 chars = ascii_escape_char(c, output, chars);
255 if (output_size - chars < (1 + MAX_EXPANSION)) {
256 /* There's more than four, so let's resize by a lot */
257 Py_ssize_t new_output_size = output_size * 2;
258 /* This is an upper bound */
259 if (new_output_size > max_output_size) {
260 new_output_size = max_output_size;
262 /* Make sure that the output size changed before resizing */
263 if (new_output_size != output_size) {
264 output_size = new_output_size;
265 if (_PyString_Resize(&rval, output_size) == -1) {
268 output = PyString_AS_STRING(rval);
272 output[chars++] = '"';
273 if (_PyString_Resize(&rval, chars) == -1) {
280 ascii_escape_str(PyObject *pystr)
282 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
284 Py_ssize_t input_chars;
285 Py_ssize_t output_size;
291 input_chars = PyString_GET_SIZE(pystr);
292 input_str = PyString_AS_STRING(pystr);
294 /* Fast path for a string that's already ASCII */
295 for (i = 0; i < input_chars; i++) {
296 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
298 /* If we have to escape something, scan the string for unicode */
300 for (j = i; j < input_chars; j++) {
301 c = (Py_UNICODE)(unsigned char)input_str[j];
303 /* We hit a non-ASCII character, bail to unicode mode */
305 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
309 rval = ascii_escape_unicode(uni);
318 if (i == input_chars) {
319 /* Input is already ASCII */
320 output_size = 2 + input_chars;
323 /* One char input can be up to 6 chars output, estimate 4 of these */
324 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
326 rval = PyString_FromStringAndSize(NULL, output_size);
330 output = PyString_AS_STRING(rval);
333 /* We know that everything up to i is ASCII already */
335 memcpy(&output[1], input_str, i);
337 for (; i < input_chars; i++) {
338 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
340 output[chars++] = (char)c;
343 chars = ascii_escape_char(c, output, chars);
345 /* An ASCII char can't possibly expand to a surrogate! */
346 if (output_size - chars < (1 + MIN_EXPANSION)) {
347 /* There's more than four, so let's resize by a lot */
349 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
350 output_size = 2 + (input_chars * MIN_EXPANSION);
352 if (_PyString_Resize(&rval, output_size) == -1) {
355 output = PyString_AS_STRING(rval);
358 output[chars++] = '"';
359 if (_PyString_Resize(&rval, chars) == -1) {
366 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
368 /* Use the Python function simplejson.decoder.errmsg to raise a nice
369 looking ValueError exception */
370 static PyObject *JSONDecodeError = NULL;
372 if (JSONDecodeError == NULL) {
373 PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
376 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
378 if (JSONDecodeError == NULL)
381 exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
383 PyErr_SetObject(JSONDecodeError, exc);
389 join_list_unicode(PyObject *lst)
391 /* return u''.join(lst) */
392 static PyObject *joinfn = NULL;
393 if (joinfn == NULL) {
394 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
398 joinfn = PyObject_GetAttrString(ustr, "join");
403 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
407 join_list_string(PyObject *lst)
409 /* return ''.join(lst) */
410 static PyObject *joinfn = NULL;
411 if (joinfn == NULL) {
412 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
416 joinfn = PyObject_GetAttrString(ustr, "join");
421 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
425 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
426 /* return (rval, idx) tuple, stealing reference to rval */
430 steal a reference to rval, returns (rval, idx)
435 pyidx = PyInt_FromSsize_t(idx);
440 tpl = PyTuple_New(2);
446 PyTuple_SET_ITEM(tpl, 0, rval);
447 PyTuple_SET_ITEM(tpl, 1, pyidx);
451 #define APPEND_OLD_CHUNK \
452 if (chunk != NULL) { \
453 if (chunks == NULL) { \
454 chunks = PyList_New(0); \
455 if (chunks == NULL) { \
459 if (PyList_Append(chunks, chunk)) { \
466 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
468 /* Read the JSON string from PyString pystr.
469 end is the index of the first character after the quote.
470 encoding is the encoding of pystr (must be an ASCII superset)
471 if strict is zero then literal control characters are allowed
472 *next_end_ptr is a return-by-reference index of the character
475 Return value is a new PyString (if ASCII-only) or PyUnicode
478 Py_ssize_t len = PyString_GET_SIZE(pystr);
479 Py_ssize_t begin = end - 1;
480 Py_ssize_t next = begin;
482 char *buf = PyString_AS_STRING(pystr);
483 PyObject *chunks = NULL;
484 PyObject *chunk = NULL;
486 if (end < 0 || len <= end) {
487 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
491 /* Find the end of the string or the next escape */
493 for (next = end; next < len; next++) {
494 c = (unsigned char)buf[next];
495 if (c == '"' || c == '\\') {
498 else if (strict && c <= 0x1f) {
499 raise_errmsg("Invalid control character at", pystr, next);
506 if (!(c == '"' || c == '\\')) {
507 raise_errmsg("Unterminated string starting at", pystr, begin);
510 /* Pick up this chunk if it's not zero length */
514 strchunk = PyString_FromStringAndSize(&buf[end], next - end);
515 if (strchunk == NULL) {
519 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
535 raise_errmsg("Unterminated string starting at", pystr, begin);
540 /* Non-unicode backslash escapes */
546 case 'b': c = '\b'; break;
547 case 'f': c = '\f'; break;
548 case 'n': c = '\n'; break;
549 case 'r': c = '\r'; break;
550 case 't': c = '\t'; break;
554 raise_errmsg("Invalid \\escape", pystr, end - 2);
563 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
566 /* Decode 4 hex digits */
567 for (; next < end; next++) {
568 Py_UNICODE digit = buf[next];
571 case '0': case '1': case '2': case '3': case '4':
572 case '5': case '6': case '7': case '8': case '9':
573 c |= (digit - '0'); break;
574 case 'a': case 'b': case 'c': case 'd': case 'e':
576 c |= (digit - 'a' + 10); break;
577 case 'A': case 'B': case 'C': case 'D': case 'E':
579 c |= (digit - 'A' + 10); break;
581 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
585 #ifdef Py_UNICODE_WIDE
587 if ((c & 0xfc00) == 0xd800) {
589 if (end + 6 >= len) {
590 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
593 if (buf[next++] != '\\' || buf[next++] != 'u') {
594 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
598 /* Decode 4 hex digits */
599 for (; next < end; next++) {
601 Py_UNICODE digit = buf[next];
603 case '0': case '1': case '2': case '3': case '4':
604 case '5': case '6': case '7': case '8': case '9':
605 c2 |= (digit - '0'); break;
606 case 'a': case 'b': case 'c': case 'd': case 'e':
608 c2 |= (digit - 'a' + 10); break;
609 case 'A': case 'B': case 'C': case 'D': case 'E':
611 c2 |= (digit - 'A' + 10); break;
613 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
617 if ((c2 & 0xfc00) != 0xdc00) {
618 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
621 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
623 else if ((c & 0xfc00) == 0xdc00) {
624 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
634 chunk = PyUnicode_FromUnicode(&c, 1);
640 char c_char = Py_CHARMASK(c);
641 chunk = PyString_FromStringAndSize(&c_char, 1);
648 if (chunks == NULL) {
652 rval = PyString_FromStringAndSize("", 0);
656 rval = join_list_string(chunks);
674 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
676 /* Read the JSON string from PyUnicode pystr.
677 end is the index of the first character after the quote.
678 if strict is zero then literal control characters are allowed
679 *next_end_ptr is a return-by-reference index of the character
682 Return value is a new PyUnicode
685 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
686 Py_ssize_t begin = end - 1;
687 Py_ssize_t next = begin;
688 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
689 PyObject *chunks = NULL;
690 PyObject *chunk = NULL;
692 if (end < 0 || len <= end) {
693 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
697 /* Find the end of the string or the next escape */
699 for (next = end; next < len; next++) {
701 if (c == '"' || c == '\\') {
704 else if (strict && c <= 0x1f) {
705 raise_errmsg("Invalid control character at", pystr, next);
709 if (!(c == '"' || c == '\\')) {
710 raise_errmsg("Unterminated string starting at", pystr, begin);
713 /* Pick up this chunk if it's not zero length */
716 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
727 raise_errmsg("Unterminated string starting at", pystr, begin);
732 /* Non-unicode backslash escapes */
738 case 'b': c = '\b'; break;
739 case 'f': c = '\f'; break;
740 case 'n': c = '\n'; break;
741 case 'r': c = '\r'; break;
742 case 't': c = '\t'; break;
746 raise_errmsg("Invalid \\escape", pystr, end - 2);
755 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
758 /* Decode 4 hex digits */
759 for (; next < end; next++) {
760 Py_UNICODE digit = buf[next];
763 case '0': case '1': case '2': case '3': case '4':
764 case '5': case '6': case '7': case '8': case '9':
765 c |= (digit - '0'); break;
766 case 'a': case 'b': case 'c': case 'd': case 'e':
768 c |= (digit - 'a' + 10); break;
769 case 'A': case 'B': case 'C': case 'D': case 'E':
771 c |= (digit - 'A' + 10); break;
773 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
777 #ifdef Py_UNICODE_WIDE
779 if ((c & 0xfc00) == 0xd800) {
781 if (end + 6 >= len) {
782 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
785 if (buf[next++] != '\\' || buf[next++] != 'u') {
786 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
790 /* Decode 4 hex digits */
791 for (; next < end; next++) {
793 Py_UNICODE digit = buf[next];
795 case '0': case '1': case '2': case '3': case '4':
796 case '5': case '6': case '7': case '8': case '9':
797 c2 |= (digit - '0'); break;
798 case 'a': case 'b': case 'c': case 'd': case 'e':
800 c2 |= (digit - 'a' + 10); break;
801 case 'A': case 'B': case 'C': case 'D': case 'E':
803 c2 |= (digit - 'A' + 10); break;
805 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
809 if ((c2 & 0xfc00) != 0xdc00) {
810 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
813 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
815 else if ((c & 0xfc00) == 0xdc00) {
816 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
822 chunk = PyUnicode_FromUnicode(&c, 1);
828 if (chunks == NULL) {
832 rval = PyUnicode_FromUnicode(NULL, 0);
836 rval = join_list_unicode(chunks);
851 PyDoc_STRVAR(pydoc_scanstring,
852 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
854 "Scan the string s for a JSON string. End is the index of the\n"
855 "character in s after the quote that started the JSON string.\n"
856 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
857 "on attempt to decode an invalid string. If strict is False then literal\n"
858 "control characters are allowed in the string.\n"
860 "Returns a tuple of the decoded string and the index of the character in s\n"
861 "after the end quote."
865 py_scanstring(PyObject* self UNUSED, PyObject *args)
870 Py_ssize_t next_end = -1;
871 char *encoding = NULL;
873 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
876 if (encoding == NULL) {
877 encoding = DEFAULT_ENCODING;
879 if (PyString_Check(pystr)) {
880 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
882 else if (PyUnicode_Check(pystr)) {
883 rval = scanstring_unicode(pystr, end, strict, &next_end);
886 PyErr_Format(PyExc_TypeError,
887 "first argument must be a string, not %.80s",
888 Py_TYPE(pystr)->tp_name);
891 return _build_rval_index_tuple(rval, next_end);
894 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
895 "encode_basestring_ascii(basestring) -> str\n"
897 "Return an ASCII-only JSON representation of a Python string"
901 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
903 /* Return an ASCII-only JSON representation of a Python string */
905 if (PyString_Check(pystr)) {
906 return ascii_escape_str(pystr);
908 else if (PyUnicode_Check(pystr)) {
909 return ascii_escape_unicode(pystr);
912 PyErr_Format(PyExc_TypeError,
913 "first argument must be a string, not %.80s",
914 Py_TYPE(pystr)->tp_name);
920 scanner_dealloc(PyObject *self)
922 /* Deallocate scanner object */
924 Py_TYPE(self)->tp_free(self);
928 scanner_traverse(PyObject *self, visitproc visit, void *arg)
931 assert(PyScanner_Check(self));
932 s = (PyScannerObject *)self;
933 Py_VISIT(s->encoding);
935 Py_VISIT(s->object_hook);
936 Py_VISIT(s->pairs_hook);
937 Py_VISIT(s->parse_float);
938 Py_VISIT(s->parse_int);
939 Py_VISIT(s->parse_constant);
945 scanner_clear(PyObject *self)
948 assert(PyScanner_Check(self));
949 s = (PyScannerObject *)self;
950 Py_CLEAR(s->encoding);
952 Py_CLEAR(s->object_hook);
953 Py_CLEAR(s->pairs_hook);
954 Py_CLEAR(s->parse_float);
955 Py_CLEAR(s->parse_int);
956 Py_CLEAR(s->parse_constant);
962 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
963 /* Read a JSON object from PyString pystr.
964 idx is the index of the first character after the opening curly brace.
965 *next_idx_ptr is a return-by-reference index to the first character after
966 the closing curly brace.
968 Returns a new PyObject (usually a dict, but object_hook or
969 object_pairs_hook can change that)
971 char *str = PyString_AS_STRING(pystr);
972 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
973 PyObject *rval = NULL;
974 PyObject *pairs = NULL;
976 PyObject *key = NULL;
977 PyObject *val = NULL;
978 char *encoding = PyString_AS_STRING(s->encoding);
979 int strict = PyObject_IsTrue(s->strict);
980 int has_pairs_hook = (s->pairs_hook != Py_None);
982 if (has_pairs_hook) {
983 pairs = PyList_New(0);
993 /* skip whitespace after { */
994 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
996 /* only loop if the object is non-empty */
997 if (idx <= end_idx && str[idx] != '}') {
998 while (idx <= end_idx) {
1002 if (str[idx] != '"') {
1003 raise_errmsg("Expecting property name", pystr, idx);
1006 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
1009 memokey = PyDict_GetItem(s->memo, key);
1010 if (memokey != NULL) {
1016 if (PyDict_SetItem(s->memo, key, key) < 0)
1021 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1022 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1023 if (idx > end_idx || str[idx] != ':') {
1024 raise_errmsg("Expecting : delimiter", pystr, idx);
1028 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1030 /* read any JSON data type */
1031 val = scan_once_str(s, pystr, idx, &next_idx);
1035 if (has_pairs_hook) {
1036 item = PyTuple_Pack(2, key, val);
1041 if (PyList_Append(pairs, item) == -1) {
1048 if (PyDict_SetItem(rval, key, val) < 0)
1055 /* skip whitespace before } or , */
1056 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1058 /* bail if the object is closed or we didn't get the , delimiter */
1059 if (idx > end_idx) break;
1060 if (str[idx] == '}') {
1063 else if (str[idx] != ',') {
1064 raise_errmsg("Expecting , delimiter", pystr, idx);
1069 /* skip whitespace after , delimiter */
1070 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1073 /* verify that idx < end_idx, str[idx] should be '}' */
1074 if (idx > end_idx || str[idx] != '}') {
1075 raise_errmsg("Expecting object", pystr, end_idx);
1079 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1080 if (s->pairs_hook != Py_None) {
1081 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1085 *next_idx_ptr = idx + 1;
1089 /* if object_hook is not None: rval = object_hook(rval) */
1090 if (s->object_hook != Py_None) {
1091 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1098 *next_idx_ptr = idx + 1;
1109 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1110 /* Read a JSON object from PyUnicode pystr.
1111 idx is the index of the first character after the opening curly brace.
1112 *next_idx_ptr is a return-by-reference index to the first character after
1113 the closing curly brace.
1115 Returns a new PyObject (usually a dict, but object_hook can change that)
1117 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1118 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1119 PyObject *rval = NULL;
1120 PyObject *pairs = NULL;
1122 PyObject *key = NULL;
1123 PyObject *val = NULL;
1124 int strict = PyObject_IsTrue(s->strict);
1125 int has_pairs_hook = (s->pairs_hook != Py_None);
1126 Py_ssize_t next_idx;
1128 if (has_pairs_hook) {
1129 pairs = PyList_New(0);
1134 rval = PyDict_New();
1139 /* skip whitespace after { */
1140 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1142 /* only loop if the object is non-empty */
1143 if (idx <= end_idx && str[idx] != '}') {
1144 while (idx <= end_idx) {
1148 if (str[idx] != '"') {
1149 raise_errmsg("Expecting property name", pystr, idx);
1152 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1155 memokey = PyDict_GetItem(s->memo, key);
1156 if (memokey != NULL) {
1162 if (PyDict_SetItem(s->memo, key, key) < 0)
1167 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1168 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1169 if (idx > end_idx || str[idx] != ':') {
1170 raise_errmsg("Expecting : delimiter", pystr, idx);
1174 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1176 /* read any JSON term */
1177 val = scan_once_unicode(s, pystr, idx, &next_idx);
1181 if (has_pairs_hook) {
1182 item = PyTuple_Pack(2, key, val);
1187 if (PyList_Append(pairs, item) == -1) {
1194 if (PyDict_SetItem(rval, key, val) < 0)
1201 /* skip whitespace before } or , */
1202 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1204 /* bail if the object is closed or we didn't get the , delimiter */
1205 if (idx > end_idx) break;
1206 if (str[idx] == '}') {
1209 else if (str[idx] != ',') {
1210 raise_errmsg("Expecting , delimiter", pystr, idx);
1215 /* skip whitespace after , delimiter */
1216 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1220 /* verify that idx < end_idx, str[idx] should be '}' */
1221 if (idx > end_idx || str[idx] != '}') {
1222 raise_errmsg("Expecting object", pystr, end_idx);
1226 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1227 if (s->pairs_hook != Py_None) {
1228 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1232 *next_idx_ptr = idx + 1;
1236 /* if object_hook is not None: rval = object_hook(rval) */
1237 if (s->object_hook != Py_None) {
1238 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1245 *next_idx_ptr = idx + 1;
1256 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1257 /* Read a JSON array from PyString pystr.
1258 idx is the index of the first character after the opening brace.
1259 *next_idx_ptr is a return-by-reference index to the first character after
1262 Returns a new PyList
1264 char *str = PyString_AS_STRING(pystr);
1265 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1266 PyObject *val = NULL;
1267 PyObject *rval = PyList_New(0);
1268 Py_ssize_t next_idx;
1272 /* skip whitespace after [ */
1273 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1275 /* only loop if the array is non-empty */
1276 if (idx <= end_idx && str[idx] != ']') {
1277 while (idx <= end_idx) {
1279 /* read any JSON term and de-tuplefy the (rval, idx) */
1280 val = scan_once_str(s, pystr, idx, &next_idx);
1282 if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1284 raise_errmsg("Expecting object", pystr, idx);
1289 if (PyList_Append(rval, val) == -1)
1295 /* skip whitespace between term and , */
1296 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1298 /* bail if the array is closed or we didn't get the , delimiter */
1299 if (idx > end_idx) break;
1300 if (str[idx] == ']') {
1303 else if (str[idx] != ',') {
1304 raise_errmsg("Expecting , delimiter", pystr, idx);
1309 /* skip whitespace after , */
1310 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1314 /* verify that idx < end_idx, str[idx] should be ']' */
1315 if (idx > end_idx || str[idx] != ']') {
1316 raise_errmsg("Expecting object", pystr, end_idx);
1319 *next_idx_ptr = idx + 1;
1328 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1329 /* Read a JSON array from PyString pystr.
1330 idx is the index of the first character after the opening brace.
1331 *next_idx_ptr is a return-by-reference index to the first character after
1334 Returns a new PyList
1336 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1337 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1338 PyObject *val = NULL;
1339 PyObject *rval = PyList_New(0);
1340 Py_ssize_t next_idx;
1344 /* skip whitespace after [ */
1345 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1347 /* only loop if the array is non-empty */
1348 if (idx <= end_idx && str[idx] != ']') {
1349 while (idx <= end_idx) {
1351 /* read any JSON term */
1352 val = scan_once_unicode(s, pystr, idx, &next_idx);
1354 if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1356 raise_errmsg("Expecting object", pystr, idx);
1361 if (PyList_Append(rval, val) == -1)
1367 /* skip whitespace between term and , */
1368 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1370 /* bail if the array is closed or we didn't get the , delimiter */
1371 if (idx > end_idx) break;
1372 if (str[idx] == ']') {
1375 else if (str[idx] != ',') {
1376 raise_errmsg("Expecting , delimiter", pystr, idx);
1381 /* skip whitespace after , */
1382 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1386 /* verify that idx < end_idx, str[idx] should be ']' */
1387 if (idx > end_idx || str[idx] != ']') {
1388 raise_errmsg("Expecting object", pystr, end_idx);
1391 *next_idx_ptr = idx + 1;
1400 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1401 /* Read a JSON constant from PyString pystr.
1402 constant is the constant string that was found
1403 ("NaN", "Infinity", "-Infinity").
1404 idx is the index of the first character of the constant
1405 *next_idx_ptr is a return-by-reference index to the first character after
1408 Returns the result of parse_constant
1412 /* constant is "NaN", "Infinity", or "-Infinity" */
1413 cstr = PyString_InternFromString(constant);
1417 /* rval = parse_constant(constant) */
1418 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1419 idx += PyString_GET_SIZE(cstr);
1421 *next_idx_ptr = idx;
1426 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1427 /* Read a JSON number from PyString pystr.
1428 idx is the index of the first character of the number
1429 *next_idx_ptr is a return-by-reference index to the first character after
1432 Returns a new PyObject representation of that number:
1433 PyInt, PyLong, or PyFloat.
1434 May return other types if parse_int or parse_float are set
1436 char *str = PyString_AS_STRING(pystr);
1437 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1438 Py_ssize_t idx = start;
1443 /* read a sign if it's there, make sure it's not the end of the string */
1444 if (str[idx] == '-') {
1446 if (idx > end_idx) {
1447 PyErr_SetNone(PyExc_StopIteration);
1452 /* read as many integer digits as we find as long as it doesn't start with 0 */
1453 if (str[idx] >= '1' && str[idx] <= '9') {
1455 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1457 /* if it starts with 0 we only expect one integer digit */
1458 else if (str[idx] == '0') {
1461 /* no integer digits, error */
1463 PyErr_SetNone(PyExc_StopIteration);
1467 /* if the next char is '.' followed by a digit then read all float digits */
1468 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1471 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1474 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1475 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1477 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1478 Py_ssize_t e_start = idx;
1481 /* read an exponent sign if present */
1482 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1484 /* read all digits */
1485 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1487 /* if we got a digit, then parse as float. if not, backtrack */
1488 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1496 /* copy the section we determined to be a number */
1497 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1501 /* parse as a float using a fast path if available, otherwise call user defined method */
1502 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1503 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1506 /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
1507 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1509 if (d == -1.0 && PyErr_Occurred())
1511 rval = PyFloat_FromDouble(d);
1515 /* parse as an int using a fast path if available, otherwise call user defined method */
1516 if (s->parse_int != (PyObject *)&PyInt_Type) {
1517 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1520 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1524 *next_idx_ptr = idx;
1529 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1530 /* Read a JSON number from PyUnicode pystr.
1531 idx is the index of the first character of the number
1532 *next_idx_ptr is a return-by-reference index to the first character after
1535 Returns a new PyObject representation of that number:
1536 PyInt, PyLong, or PyFloat.
1537 May return other types if parse_int or parse_float are set
1539 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1540 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1541 Py_ssize_t idx = start;
1546 /* read a sign if it's there, make sure it's not the end of the string */
1547 if (str[idx] == '-') {
1549 if (idx > end_idx) {
1550 PyErr_SetNone(PyExc_StopIteration);
1555 /* read as many integer digits as we find as long as it doesn't start with 0 */
1556 if (str[idx] >= '1' && str[idx] <= '9') {
1558 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1560 /* if it starts with 0 we only expect one integer digit */
1561 else if (str[idx] == '0') {
1564 /* no integer digits, error */
1566 PyErr_SetNone(PyExc_StopIteration);
1570 /* if the next char is '.' followed by a digit then read all float digits */
1571 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1574 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1577 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1578 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1579 Py_ssize_t e_start = idx;
1582 /* read an exponent sign if present */
1583 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1585 /* read all digits */
1586 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1588 /* if we got a digit, then parse as float. if not, backtrack */
1589 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1597 /* copy the section we determined to be a number */
1598 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1602 /* parse as a float using a fast path if available, otherwise call user defined method */
1603 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1604 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1607 rval = PyFloat_FromString(numstr, NULL);
1611 /* no fast path for unicode -> int, just call */
1612 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1615 *next_idx_ptr = idx;
1620 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1622 /* Read one JSON term (of any kind) from PyString pystr.
1623 idx is the index of the first character of the term
1624 *next_idx_ptr is a return-by-reference index to the first character after
1627 Returns a new PyObject representation of the term.
1629 char *str = PyString_AS_STRING(pystr);
1630 Py_ssize_t length = PyString_GET_SIZE(pystr);
1631 PyObject *rval = NULL;
1632 int fallthrough = 0;
1633 if (idx >= length) {
1634 PyErr_SetNone(PyExc_StopIteration);
1637 if (Py_EnterRecursiveCall(" while decoding a JSON document"))
1642 rval = scanstring_str(pystr, idx + 1,
1643 PyString_AS_STRING(s->encoding),
1644 PyObject_IsTrue(s->strict),
1649 rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1653 rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1657 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1659 *next_idx_ptr = idx + 4;
1667 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1669 *next_idx_ptr = idx + 4;
1677 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1678 Py_INCREF(Py_False);
1679 *next_idx_ptr = idx + 5;
1687 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1688 rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
1695 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1696 rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
1703 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1704 rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1712 /* Didn't find a string, object, array, or named constant. Look for a number. */
1714 rval = _match_number_str(s, pystr, idx, next_idx_ptr);
1715 Py_LeaveRecursiveCall();
1720 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1722 /* Read one JSON term (of any kind) from PyUnicode pystr.
1723 idx is the index of the first character of the term
1724 *next_idx_ptr is a return-by-reference index to the first character after
1727 Returns a new PyObject representation of the term.
1729 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1730 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1731 PyObject *rval = NULL;
1732 int fallthrough = 0;
1733 if (idx >= length) {
1734 PyErr_SetNone(PyExc_StopIteration);
1737 if (Py_EnterRecursiveCall(" while decoding a JSON document"))
1742 rval = scanstring_unicode(pystr, idx + 1,
1743 PyObject_IsTrue(s->strict),
1748 rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1752 rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1756 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1758 *next_idx_ptr = idx + 4;
1766 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1768 *next_idx_ptr = idx + 4;
1776 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1777 Py_INCREF(Py_False);
1778 *next_idx_ptr = idx + 5;
1786 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1787 rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
1794 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1795 rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
1802 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1803 rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1811 /* Didn't find a string, object, array, or named constant. Look for a number. */
1813 rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
1814 Py_LeaveRecursiveCall();
1819 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1821 /* Python callable interface to scan_once_{str,unicode} */
1825 Py_ssize_t next_idx = -1;
1826 static char *kwlist[] = {"string", "idx", NULL};
1828 assert(PyScanner_Check(self));
1829 s = (PyScannerObject *)self;
1830 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1833 if (PyString_Check(pystr)) {
1834 rval = scan_once_str(s, pystr, idx, &next_idx);
1836 else if (PyUnicode_Check(pystr)) {
1837 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1840 PyErr_Format(PyExc_TypeError,
1841 "first argument must be a string, not %.80s",
1842 Py_TYPE(pystr)->tp_name);
1845 PyDict_Clear(s->memo);
1846 return _build_rval_index_tuple(rval, next_idx);
1850 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1853 s = (PyScannerObject *)type->tp_alloc(type, 0);
1857 s->object_hook = NULL;
1858 s->pairs_hook = NULL;
1859 s->parse_float = NULL;
1860 s->parse_int = NULL;
1861 s->parse_constant = NULL;
1863 return (PyObject *)s;
1867 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1869 /* Initialize Scanner object */
1871 static char *kwlist[] = {"context", NULL};
1874 assert(PyScanner_Check(self));
1875 s = (PyScannerObject *)self;
1877 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1880 if (s->memo == NULL) {
1881 s->memo = PyDict_New();
1882 if (s->memo == NULL)
1886 /* PyString_AS_STRING is used on encoding */
1887 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1888 if (s->encoding == NULL)
1890 if (s->encoding == Py_None) {
1892 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1894 else if (PyUnicode_Check(s->encoding)) {
1895 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1896 Py_DECREF(s->encoding);
1899 if (s->encoding == NULL || !PyString_Check(s->encoding))
1902 /* All of these will fail "gracefully" so we don't need to verify them */
1903 s->strict = PyObject_GetAttrString(ctx, "strict");
1904 if (s->strict == NULL)
1906 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1907 if (s->object_hook == NULL)
1909 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1910 if (s->pairs_hook == NULL)
1912 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1913 if (s->parse_float == NULL)
1915 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1916 if (s->parse_int == NULL)
1918 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1919 if (s->parse_constant == NULL)
1925 Py_CLEAR(s->encoding);
1926 Py_CLEAR(s->strict);
1927 Py_CLEAR(s->object_hook);
1928 Py_CLEAR(s->pairs_hook);
1929 Py_CLEAR(s->parse_float);
1930 Py_CLEAR(s->parse_int);
1931 Py_CLEAR(s->parse_constant);
1935 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1938 PyTypeObject PyScannerType = {
1939 PyObject_HEAD_INIT(NULL)
1940 0, /* tp_internal */
1941 "simplejson._speedups.Scanner", /* tp_name */
1942 sizeof(PyScannerObject), /* tp_basicsize */
1943 0, /* tp_itemsize */
1944 scanner_dealloc, /* tp_dealloc */
1950 0, /* tp_as_number */
1951 0, /* tp_as_sequence */
1952 0, /* tp_as_mapping */
1954 scanner_call, /* tp_call */
1956 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1957 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1958 0, /* tp_as_buffer */
1959 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1960 scanner_doc, /* tp_doc */
1961 scanner_traverse, /* tp_traverse */
1962 scanner_clear, /* tp_clear */
1963 0, /* tp_richcompare */
1964 0, /* tp_weaklistoffset */
1966 0, /* tp_iternext */
1968 scanner_members, /* tp_members */
1972 0, /* tp_descr_get */
1973 0, /* tp_descr_set */
1974 0, /* tp_dictoffset */
1975 scanner_init, /* tp_init */
1976 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1977 scanner_new, /* tp_new */
1978 0,/* PyObject_GC_Del, */ /* tp_free */
1982 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1985 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1988 s->defaultfn = NULL;
1991 s->key_separator = NULL;
1992 s->item_separator = NULL;
1993 s->sort_keys = NULL;
1997 return (PyObject *)s;
2001 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
2003 /* initialize Encoder object */
2004 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", NULL};
2007 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
2008 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal;
2010 assert(PyEncoder_Check(self));
2011 s = (PyEncoderObject *)self;
2013 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOO:make_encoder", kwlist,
2014 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
2015 &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal))
2018 s->markers = markers;
2019 s->defaultfn = defaultfn;
2020 s->encoder = encoder;
2022 s->key_separator = key_separator;
2023 s->item_separator = item_separator;
2024 s->sort_keys = sort_keys;
2025 s->skipkeys = skipkeys;
2026 s->key_memo = key_memo;
2027 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
2028 s->allow_nan = PyObject_IsTrue(allow_nan);
2029 s->use_decimal = PyObject_IsTrue(use_decimal);
2031 Py_INCREF(s->markers);
2032 Py_INCREF(s->defaultfn);
2033 Py_INCREF(s->encoder);
2034 Py_INCREF(s->indent);
2035 Py_INCREF(s->key_separator);
2036 Py_INCREF(s->item_separator);
2037 Py_INCREF(s->sort_keys);
2038 Py_INCREF(s->skipkeys);
2039 Py_INCREF(s->key_memo);
2044 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
2046 /* Python callable interface to encode_listencode_obj */
2047 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
2050 Py_ssize_t indent_level;
2052 assert(PyEncoder_Check(self));
2053 s = (PyEncoderObject *)self;
2054 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
2055 &obj, _convertPyInt_AsSsize_t, &indent_level))
2057 rval = PyList_New(0);
2060 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
2068 _encoded_const(PyObject *obj)
2070 /* Return the JSON string representation of None, True, False */
2071 if (obj == Py_None) {
2072 static PyObject *s_null = NULL;
2073 if (s_null == NULL) {
2074 s_null = PyString_InternFromString("null");
2079 else if (obj == Py_True) {
2080 static PyObject *s_true = NULL;
2081 if (s_true == NULL) {
2082 s_true = PyString_InternFromString("true");
2087 else if (obj == Py_False) {
2088 static PyObject *s_false = NULL;
2089 if (s_false == NULL) {
2090 s_false = PyString_InternFromString("false");
2096 PyErr_SetString(PyExc_ValueError, "not a const");
2102 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
2104 /* Return the JSON representation of a PyFloat */
2105 double i = PyFloat_AS_DOUBLE(obj);
2106 if (!Py_IS_FINITE(i)) {
2107 if (!s->allow_nan) {
2108 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
2112 return PyString_FromString("Infinity");
2115 return PyString_FromString("-Infinity");
2118 return PyString_FromString("NaN");
2121 /* Use a better float format here? */
2122 return PyObject_Repr(obj);
2126 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
2128 /* Return the JSON representation of a string */
2130 return py_encode_basestring_ascii(NULL, obj);
2132 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
2136 _steal_list_append(PyObject *lst, PyObject *stolen)
2138 /* Append stolen and then decrement its reference count */
2139 int rval = PyList_Append(lst, stolen);
2145 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2147 /* Encode Python object obj to a JSON term, rval is a PyList */
2149 if (Py_EnterRecursiveCall(" while encoding a JSON document"))
2152 if (obj == Py_None || obj == Py_True || obj == Py_False) {
2153 PyObject *cstr = _encoded_const(obj);
2155 rv = _steal_list_append(rval, cstr);
2157 else if (PyString_Check(obj) || PyUnicode_Check(obj))
2159 PyObject *encoded = encoder_encode_string(s, obj);
2160 if (encoded != NULL)
2161 rv = _steal_list_append(rval, encoded);
2163 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2164 PyObject *encoded = PyObject_Str(obj);
2165 if (encoded != NULL)
2166 rv = _steal_list_append(rval, encoded);
2168 else if (PyFloat_Check(obj)) {
2169 PyObject *encoded = encoder_encode_float(s, obj);
2170 if (encoded != NULL)
2171 rv = _steal_list_append(rval, encoded);
2173 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2174 rv = encoder_listencode_list(s, rval, obj, indent_level);
2176 else if (PyDict_Check(obj)) {
2177 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2179 else if (s->use_decimal && Decimal_Check(obj)) {
2180 PyObject *encoded = PyObject_Str(obj);
2181 if (encoded != NULL)
2182 rv = _steal_list_append(rval, encoded);
2185 PyObject *ident = NULL;
2187 if (s->markers != Py_None) {
2189 ident = PyLong_FromVoidPtr(obj);
2192 has_key = PyDict_Contains(s->markers, ident);
2195 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2199 if (PyDict_SetItem(s->markers, ident, obj)) {
2204 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2205 if (newobj == NULL) {
2209 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2215 else if (ident != NULL) {
2216 if (PyDict_DelItem(s->markers, ident)) {
2224 Py_LeaveRecursiveCall();
2229 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2231 /* Encode Python dict dct a JSON term, rval is a PyList */
2232 static PyObject *open_dict = NULL;
2233 static PyObject *close_dict = NULL;
2234 static PyObject *empty_dict = NULL;
2235 static PyObject *iteritems = NULL;
2236 PyObject *kstr = NULL;
2237 PyObject *ident = NULL;
2238 PyObject *iter = NULL;
2239 PyObject *item = NULL;
2240 PyObject *items = NULL;
2241 PyObject *encoded = NULL;
2245 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) {
2246 open_dict = PyString_InternFromString("{");
2247 close_dict = PyString_InternFromString("}");
2248 empty_dict = PyString_InternFromString("{}");
2249 iteritems = PyString_InternFromString("iteritems");
2250 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL)
2253 if (PyDict_Size(dct) == 0)
2254 return PyList_Append(rval, empty_dict);
2256 if (s->markers != Py_None) {
2258 ident = PyLong_FromVoidPtr(dct);
2261 has_key = PyDict_Contains(s->markers, ident);
2264 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2267 if (PyDict_SetItem(s->markers, ident, dct)) {
2272 if (PyList_Append(rval, open_dict))
2275 if (s->indent != Py_None) {
2276 /* TODO: DOES NOT RUN */
2279 newline_indent = '\n' + (_indent * _current_indent_level)
2280 separator = _item_separator + newline_indent
2281 buf += newline_indent
2285 if (PyObject_IsTrue(s->sort_keys)) {
2286 /* First sort the keys then replace them with (key, value) tuples. */
2287 Py_ssize_t i, nitems;
2288 if (PyDict_CheckExact(dct))
2289 items = PyDict_Keys(dct);
2291 items = PyMapping_Keys(dct);
2294 if (!PyList_Check(items)) {
2295 PyErr_SetString(PyExc_ValueError, "keys must return list");
2298 if (PyList_Sort(items) < 0)
2300 nitems = PyList_GET_SIZE(items);
2301 for (i = 0; i < nitems; i++) {
2302 PyObject *key, *value;
2303 key = PyList_GET_ITEM(items, i);
2304 value = PyDict_GetItem(dct, key);
2305 item = PyTuple_Pack(2, key, value);
2308 PyList_SET_ITEM(items, i, item);
2313 if (PyDict_CheckExact(dct))
2314 items = PyDict_Items(dct);
2316 items = PyMapping_Items(dct);
2320 iter = PyObject_GetIter(items);
2325 skipkeys = PyObject_IsTrue(s->skipkeys);
2327 while ((item = PyIter_Next(iter))) {
2328 PyObject *encoded, *key, *value;
2329 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
2330 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
2333 key = PyTuple_GET_ITEM(item, 0);
2336 value = PyTuple_GET_ITEM(item, 1);
2340 encoded = PyDict_GetItem(s->key_memo, key);
2341 if (encoded != NULL) {
2344 else if (PyString_Check(key) || PyUnicode_Check(key)) {
2348 else if (PyFloat_Check(key)) {
2349 kstr = encoder_encode_float(s, key);
2353 else if (key == Py_True || key == Py_False || key == Py_None) {
2354 /* This must come before the PyInt_Check because
2355 True and False are also 1 and 0.*/
2356 kstr = _encoded_const(key);
2360 else if (PyInt_Check(key) || PyLong_Check(key)) {
2361 kstr = PyObject_Str(key);
2365 else if (skipkeys) {
2370 /* TODO: include repr of key */
2371 PyErr_SetString(PyExc_TypeError, "keys must be a string");
2376 if (PyList_Append(rval, s->item_separator))
2380 if (encoded == NULL) {
2381 encoded = encoder_encode_string(s, kstr);
2383 if (encoded == NULL)
2385 if (PyDict_SetItem(s->key_memo, key, encoded))
2388 if (PyList_Append(rval, encoded)) {
2392 if (PyList_Append(rval, s->key_separator))
2394 if (encoder_listencode_obj(s, rval, value, indent_level))
2400 if (PyErr_Occurred())
2402 if (ident != NULL) {
2403 if (PyDict_DelItem(s->markers, ident))
2407 if (s->indent != Py_None) {
2408 /* TODO: DOES NOT RUN */
2411 yield '\n' + (_indent * _current_indent_level)
2414 if (PyList_Append(rval, close_dict))
2419 Py_XDECREF(encoded);
2429 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2431 /* Encode Python list seq to a JSON term, rval is a PyList */
2432 static PyObject *open_array = NULL;
2433 static PyObject *close_array = NULL;
2434 static PyObject *empty_array = NULL;
2435 PyObject *ident = NULL;
2436 PyObject *iter = NULL;
2437 PyObject *obj = NULL;
2441 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2442 open_array = PyString_InternFromString("[");
2443 close_array = PyString_InternFromString("]");
2444 empty_array = PyString_InternFromString("[]");
2445 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2449 is_true = PyObject_IsTrue(seq);
2452 else if (is_true == 0)
2453 return PyList_Append(rval, empty_array);
2455 if (s->markers != Py_None) {
2457 ident = PyLong_FromVoidPtr(seq);
2460 has_key = PyDict_Contains(s->markers, ident);
2463 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2466 if (PyDict_SetItem(s->markers, ident, seq)) {
2471 iter = PyObject_GetIter(seq);
2475 if (PyList_Append(rval, open_array))
2477 if (s->indent != Py_None) {
2478 /* TODO: DOES NOT RUN */
2481 newline_indent = '\n' + (_indent * _current_indent_level)
2482 separator = _item_separator + newline_indent
2483 buf += newline_indent
2486 while ((obj = PyIter_Next(iter))) {
2488 if (PyList_Append(rval, s->item_separator))
2491 if (encoder_listencode_obj(s, rval, obj, indent_level))
2497 if (PyErr_Occurred())
2499 if (ident != NULL) {
2500 if (PyDict_DelItem(s->markers, ident))
2504 if (s->indent != Py_None) {
2505 /* TODO: DOES NOT RUN */
2508 yield '\n' + (_indent * _current_indent_level)
2511 if (PyList_Append(rval, close_array))
2523 encoder_dealloc(PyObject *self)
2525 /* Deallocate Encoder */
2526 encoder_clear(self);
2527 Py_TYPE(self)->tp_free(self);
2531 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2534 assert(PyEncoder_Check(self));
2535 s = (PyEncoderObject *)self;
2536 Py_VISIT(s->markers);
2537 Py_VISIT(s->defaultfn);
2538 Py_VISIT(s->encoder);
2539 Py_VISIT(s->indent);
2540 Py_VISIT(s->key_separator);
2541 Py_VISIT(s->item_separator);
2542 Py_VISIT(s->sort_keys);
2543 Py_VISIT(s->skipkeys);
2544 Py_VISIT(s->key_memo);
2549 encoder_clear(PyObject *self)
2551 /* Deallocate Encoder */
2553 assert(PyEncoder_Check(self));
2554 s = (PyEncoderObject *)self;
2555 Py_CLEAR(s->markers);
2556 Py_CLEAR(s->defaultfn);
2557 Py_CLEAR(s->encoder);
2558 Py_CLEAR(s->indent);
2559 Py_CLEAR(s->key_separator);
2560 Py_CLEAR(s->item_separator);
2561 Py_CLEAR(s->sort_keys);
2562 Py_CLEAR(s->skipkeys);
2563 Py_CLEAR(s->key_memo);
2567 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2570 PyTypeObject PyEncoderType = {
2571 PyObject_HEAD_INIT(NULL)
2572 0, /* tp_internal */
2573 "simplejson._speedups.Encoder", /* tp_name */
2574 sizeof(PyEncoderObject), /* tp_basicsize */
2575 0, /* tp_itemsize */
2576 encoder_dealloc, /* tp_dealloc */
2582 0, /* tp_as_number */
2583 0, /* tp_as_sequence */
2584 0, /* tp_as_mapping */
2586 encoder_call, /* tp_call */
2588 0, /* tp_getattro */
2589 0, /* tp_setattro */
2590 0, /* tp_as_buffer */
2591 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2592 encoder_doc, /* tp_doc */
2593 encoder_traverse, /* tp_traverse */
2594 encoder_clear, /* tp_clear */
2595 0, /* tp_richcompare */
2596 0, /* tp_weaklistoffset */
2598 0, /* tp_iternext */
2600 encoder_members, /* tp_members */
2604 0, /* tp_descr_get */
2605 0, /* tp_descr_set */
2606 0, /* tp_dictoffset */
2607 encoder_init, /* tp_init */
2609 encoder_new, /* tp_new */
2613 static PyMethodDef speedups_methods[] = {
2614 {"encode_basestring_ascii",
2615 (PyCFunction)py_encode_basestring_ascii,
2617 pydoc_encode_basestring_ascii},
2619 (PyCFunction)py_scanstring,
2622 {NULL, NULL, 0, NULL}
2625 PyDoc_STRVAR(module_doc,
2626 "simplejson speedups\n");
2631 PyObject *m, *decimal;
2632 PyScannerType.tp_new = PyType_GenericNew;
2633 if (PyType_Ready(&PyScannerType) < 0)
2635 PyEncoderType.tp_new = PyType_GenericNew;
2636 if (PyType_Ready(&PyEncoderType) < 0)
2639 decimal = PyImport_ImportModule("decimal");
2640 if (decimal == NULL)
2642 DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal");
2644 if (DecimalTypePtr == NULL)
2647 m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2648 Py_INCREF((PyObject*)&PyScannerType);
2649 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2650 Py_INCREF((PyObject*)&PyEncoderType);
2651 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);