2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "LiteralParser.h"
32 #include "UStringBuilder.h"
33 #include <wtf/ASCIICType.h>
38 static inline bool isJSONWhiteSpace(const UChar& c)
40 // The JSON RFC 4627 defines a list of allowed characters to be considered
41 // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
42 return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
45 bool LiteralParser::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
47 if (m_lexer.next() != TokIdentifier)
50 Vector<JSONPPathEntry> path;
51 // Unguarded next to start off the lexer
52 Identifier name = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
54 if (name == m_exec->globalData().propertyNames->varKeyword) {
55 if (m_lexer.next() != TokIdentifier)
57 entry.m_type = JSONPPathEntryTypeDeclare;
58 entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
61 entry.m_type = JSONPPathEntryTypeDot;
62 entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
65 if (m_exec->globalData().lexer->isKeyword(entry.m_pathEntryName))
67 TokenType tokenType = m_lexer.next();
68 while (tokenType != TokAssign) {
71 entry.m_type = JSONPPathEntryTypeLookup;
72 if (m_lexer.next() != TokNumber)
74 double doubleIndex = m_lexer.currentToken().numberToken;
75 int index = (int)doubleIndex;
76 if (index != doubleIndex || index < 0)
78 entry.m_pathIndex = index;
79 if (m_lexer.next() != TokRBracket)
84 entry.m_type = JSONPPathEntryTypeDot;
85 if (m_lexer.next() != TokIdentifier)
87 entry.m_pathEntryName = Identifier(m_exec, m_lexer.currentToken().start, m_lexer.currentToken().end - m_lexer.currentToken().start);
91 if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo)
93 path.last().m_type = JSONPPathEntryTypeCall;
101 tokenType = m_lexer.next();
105 results.append(JSONPData());
106 results.last().m_value.set(m_exec->globalData(), parse(StartParseExpression));
107 if (!results.last().m_value)
109 results.last().m_path.swap(path);
110 if (entry.m_type == JSONPPathEntryTypeCall) {
111 if (m_lexer.currentToken().type != TokRParen)
115 if (m_lexer.currentToken().type != TokSemi)
118 } while (m_lexer.currentToken().type == TokIdentifier);
119 return m_lexer.currentToken().type == TokEnd;
122 ALWAYS_INLINE const Identifier LiteralParser::makeIdentifier(const UChar* characters, size_t length)
125 return m_exec->globalData().propertyNames->emptyIdentifier;
126 if (characters[0] >= MaximumCachableCharacter)
127 return Identifier(&m_exec->globalData(), characters, length);
130 if (!m_shortIdentifiers[characters[0]].isNull())
131 return m_shortIdentifiers[characters[0]];
132 m_shortIdentifiers[characters[0]] = Identifier(&m_exec->globalData(), characters, length);
133 return m_shortIdentifiers[characters[0]];
135 if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
136 return m_recentIdentifiers[characters[0]];
137 m_recentIdentifiers[characters[0]] = Identifier(&m_exec->globalData(), characters, length);
138 return m_recentIdentifiers[characters[0]];
141 template <LiteralParser::ParserMode mode> LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
143 while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
146 ASSERT(m_ptr <= m_end);
147 if (m_ptr >= m_end) {
149 token.start = token.end = m_ptr;
152 token.type = TokError;
156 token.type = TokLBracket;
160 token.type = TokRBracket;
164 token.type = TokLParen;
168 token.type = TokRParen;
172 token.type = TokLBrace;
176 token.type = TokRBrace;
180 token.type = TokComma;
184 token.type = TokColon;
188 return lexString<mode, '"'>(token);
190 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
192 token.type = TokTrue;
198 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
200 token.type = TokFalse;
206 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
208 token.type = TokNull;
224 return lexNumber(token);
233 token.type = TokAssign;
238 token.type = TokSemi;
242 if (isASCIIAlpha(*m_ptr) || *m_ptr == '_' || *m_ptr == '$') {
243 while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$'))
245 token.stringToken = token.start;
246 token.stringLength = m_ptr - token.start;
247 token.type = TokIdentifier;
249 return TokIdentifier;
251 if (*m_ptr == '\'') {
252 if (mode == StrictJSON) {
253 m_lexErrorMessage = "Single quotes (\') are not allowed in JSON";
256 return lexString<mode, '\''>(token);
259 m_lexErrorMessage = String::format("Unrecognized token '%c'", *m_ptr).impl();
263 LiteralParser::TokenType LiteralParser::Lexer::next()
265 if (m_mode == NonStrictJSON)
266 return lex<NonStrictJSON>(m_currentToken);
268 return lex<JSONP>(m_currentToken);
269 return lex<StrictJSON>(m_currentToken);
272 template <LiteralParser::ParserMode mode, UChar terminator> static inline bool isSafeStringCharacter(UChar c)
274 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != terminator) || c == '\t';
277 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
278 template <LiteralParser::ParserMode mode, UChar terminator> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
281 const UChar* runStart = m_ptr;
282 UStringBuilder builder;
285 while (m_ptr < m_end && isSafeStringCharacter<mode, terminator>(*m_ptr))
287 if (builder.length())
288 builder.append(runStart, m_ptr - runStart);
289 if ((mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
290 if (builder.isEmpty() && runStart < m_ptr)
291 builder.append(runStart, m_ptr - runStart);
293 if (m_ptr >= m_end) {
294 m_lexErrorMessage = "Unterminated string";
303 builder.append('\\');
311 builder.append('\b');
315 builder.append('\f');
319 builder.append('\n');
323 builder.append('\r');
327 builder.append('\t');
332 if ((m_end - m_ptr) < 5) {
333 m_lexErrorMessage = "\\u must be followed by 4 hex digits";
335 } // uNNNN == 5 characters
336 for (int i = 1; i < 5; i++) {
337 if (!isASCIIHexDigit(m_ptr[i])) {
338 m_lexErrorMessage = String::format("\"\\%s\" is not a valid unicode escape", UString(m_ptr, 5).ascii().data()).impl();
342 builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
347 if (*m_ptr == '\'' && mode != StrictJSON) {
348 builder.append('\'');
352 m_lexErrorMessage = String::format("Invalid escape character %c", *m_ptr).impl();
356 } while ((mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
358 if (m_ptr >= m_end || *m_ptr != terminator) {
359 m_lexErrorMessage = "Unterminated string";
363 if (builder.isEmpty()) {
364 token.stringBuffer = UString();
365 token.stringToken = runStart;
366 token.stringLength = m_ptr - runStart;
368 token.stringBuffer = builder.toUString();
369 token.stringToken = token.stringBuffer.characters();
370 token.stringLength = token.stringBuffer.length();
372 token.type = TokString;
377 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
379 // ES5 and json.org define numbers as
386 // -? digit1-9 digits?
391 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
393 if (m_ptr < m_end && *m_ptr == '-') // -?
397 if (m_ptr < m_end && *m_ptr == '0') // 0
399 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
402 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
405 m_lexErrorMessage = "Invalid number";
410 if (m_ptr < m_end && *m_ptr == '.') {
413 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
414 m_lexErrorMessage = "Invalid digits after decimal point";
419 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
421 } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) < 10) {
423 token.type = TokNumber;
425 const UChar* digit = token.start;
432 while (digit < m_ptr)
433 result = result * 10 + (*digit++) - '0';
435 token.numberToken = result;
439 // ([eE][+-]? [0-9]+)?
440 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
444 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
448 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
449 m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number";
454 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
458 token.type = TokNumber;
460 Vector<char, 64> buffer(token.end - token.start + 1);
462 for (i = 0; i < token.end - token.start; i++) {
463 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
464 buffer[i] = static_cast<char>(token.start[i]);
468 token.numberToken = WTF::strtod(buffer.data(), &end);
469 ASSERT(buffer.data() + (token.end - token.start) == end);
473 JSValue LiteralParser::parse(ParserState initialState)
475 ParserState state = initialState;
476 MarkedArgumentBuffer objectStack;
478 Vector<ParserState, 16> stateStack;
479 Vector<Identifier, 16> identifierStack;
483 case StartParseArray: {
484 JSArray* array = constructEmptyArray(m_exec);
485 objectStack.append(array);
488 doParseArrayStartExpression:
489 case DoParseArrayStartExpression: {
490 TokenType lastToken = m_lexer.currentToken().type;
491 if (m_lexer.next() == TokRBracket) {
492 if (lastToken == TokComma) {
493 m_parseErrorMessage = "Unexpected comma at the end of array expression";
497 lastValue = objectStack.last();
498 objectStack.removeLast();
502 stateStack.append(DoParseArrayEndExpression);
503 goto startParseExpression;
505 case DoParseArrayEndExpression: {
506 asArray(objectStack.last())->push(m_exec, lastValue);
508 if (m_lexer.currentToken().type == TokComma)
509 goto doParseArrayStartExpression;
511 if (m_lexer.currentToken().type != TokRBracket) {
512 m_parseErrorMessage = "Expected ']'";
517 lastValue = objectStack.last();
518 objectStack.removeLast();
522 case StartParseObject: {
523 JSObject* object = constructEmptyObject(m_exec);
524 objectStack.append(object);
526 TokenType type = m_lexer.next();
527 if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) {
528 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
531 if (m_lexer.next() != TokColon) {
532 m_parseErrorMessage = "Expected ':' before value in object property definition";
537 identifierStack.append(makeIdentifier(identifierToken.stringToken, identifierToken.stringLength));
538 stateStack.append(DoParseObjectEndExpression);
539 goto startParseExpression;
541 if (type != TokRBrace) {
542 m_parseErrorMessage = "Expected '}'";
546 lastValue = objectStack.last();
547 objectStack.removeLast();
550 doParseObjectStartExpression:
551 case DoParseObjectStartExpression: {
552 TokenType type = m_lexer.next();
553 if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) {
554 m_parseErrorMessage = "Property name must be a string literal";
557 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
560 if (m_lexer.next() != TokColon) {
561 m_parseErrorMessage = "Expected ':'";
566 identifierStack.append(makeIdentifier(identifierToken.stringToken, identifierToken.stringLength));
567 stateStack.append(DoParseObjectEndExpression);
568 goto startParseExpression;
570 case DoParseObjectEndExpression:
572 asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue);
573 identifierStack.removeLast();
574 if (m_lexer.currentToken().type == TokComma)
575 goto doParseObjectStartExpression;
576 if (m_lexer.currentToken().type != TokRBrace) {
577 m_parseErrorMessage = "Expected '}'";
581 lastValue = objectStack.last();
582 objectStack.removeLast();
585 startParseExpression:
586 case StartParseExpression: {
587 switch (m_lexer.currentToken().type) {
589 goto startParseArray;
591 goto startParseObject;
593 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
595 lastValue = jsString(m_exec, makeIdentifier(stringToken.stringToken, stringToken.stringLength).ustring());
599 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
601 lastValue = jsNumber(numberToken.numberToken);
606 lastValue = jsNull();
611 lastValue = jsBoolean(true);
616 lastValue = jsBoolean(false);
619 m_parseErrorMessage = "Unexpected token ']'";
622 m_parseErrorMessage = "Unexpected token '}'";
625 m_parseErrorMessage = String::format("Unexpected identifier \"%s\"", UString(m_lexer.currentToken().stringToken, m_lexer.currentToken().stringLength).ascii().data()).impl();
628 m_parseErrorMessage = "Unexpected token ':'";
631 m_parseErrorMessage = "Unexpected token '('";
634 m_parseErrorMessage = "Unexpected token ')'";
637 m_parseErrorMessage = "Unexpected token ','";
640 m_parseErrorMessage = "Unexpected token '.'";
643 m_parseErrorMessage = "Unexpected token '='";
646 m_parseErrorMessage = "Unexpected token ';'";
649 m_parseErrorMessage = "Unexpected EOF";
654 m_parseErrorMessage = "Could not parse value expression";
659 case StartParseStatement: {
660 switch (m_lexer.currentToken().type) {
664 goto startParseExpression;
668 stateStack.append(StartParseStatementEndStatement);
669 goto startParseExpression;
672 m_parseErrorMessage = "Unexpected token ']'";
675 m_parseErrorMessage = "Unexpected token '{'";
678 m_parseErrorMessage = "Unexpected token '}'";
681 m_parseErrorMessage = "Unexpected identifier";
684 m_parseErrorMessage = "Unexpected token ':'";
687 m_parseErrorMessage = "Unexpected token ')'";
690 m_parseErrorMessage = "Unexpected token ','";
693 m_parseErrorMessage = "Unexpected token 'true'";
696 m_parseErrorMessage = "Unexpected token 'false'";
699 m_parseErrorMessage = "Unexpected token 'null'";
702 m_parseErrorMessage = "Unexpected EOF";
705 m_parseErrorMessage = "Unexpected token '.'";
708 m_parseErrorMessage = "Unexpected token '='";
711 m_parseErrorMessage = "Unexpected token ';'";
715 m_parseErrorMessage = "Could not parse statement";
719 case StartParseStatementEndStatement: {
720 ASSERT(stateStack.isEmpty());
721 if (m_lexer.currentToken().type != TokRParen)
723 if (m_lexer.next() == TokEnd)
725 m_parseErrorMessage = "Unexpected content at end of JSON literal";
729 ASSERT_NOT_REACHED();
731 if (stateStack.isEmpty())
733 state = stateStack.last();
734 stateStack.removeLast();