2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
27 #include "JSFunction.h"
29 #include "JSGlobalObjectFunctions.h"
30 #include "Identifier.h"
37 #include <wtf/Assertions.h>
40 using namespace Unicode;
43 #include "KeywordLookup.h"
45 #include "Lexer.lut.h"
51 // Types for the main switch
53 // The first three types are fixed, and also used for identifying
54 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
55 CharacterIdentifierStart,
60 CharacterLineTerminator,
61 CharacterExclamationMark,
65 CharacterCloseBracket,
89 // Other types (only one so far)
94 static const unsigned short typesOfASCIICharacters[128] = {
95 /* 0 - Null */ CharacterInvalid,
96 /* 1 - Start of Heading */ CharacterInvalid,
97 /* 2 - Start of Text */ CharacterInvalid,
98 /* 3 - End of Text */ CharacterInvalid,
99 /* 4 - End of Transm. */ CharacterInvalid,
100 /* 5 - Enquiry */ CharacterInvalid,
101 /* 6 - Acknowledgment */ CharacterInvalid,
102 /* 7 - Bell */ CharacterInvalid,
103 /* 8 - Back Space */ CharacterInvalid,
104 /* 9 - Horizontal Tab */ CharacterWhiteSpace,
105 /* 10 - Line Feed */ CharacterLineTerminator,
106 /* 11 - Vertical Tab */ CharacterWhiteSpace,
107 /* 12 - Form Feed */ CharacterWhiteSpace,
108 /* 13 - Carriage Return */ CharacterLineTerminator,
109 /* 14 - Shift Out */ CharacterInvalid,
110 /* 15 - Shift In */ CharacterInvalid,
111 /* 16 - Data Line Escape */ CharacterInvalid,
112 /* 17 - Device Control 1 */ CharacterInvalid,
113 /* 18 - Device Control 2 */ CharacterInvalid,
114 /* 19 - Device Control 3 */ CharacterInvalid,
115 /* 20 - Device Control 4 */ CharacterInvalid,
116 /* 21 - Negative Ack. */ CharacterInvalid,
117 /* 22 - Synchronous Idle */ CharacterInvalid,
118 /* 23 - End of Transmit */ CharacterInvalid,
119 /* 24 - Cancel */ CharacterInvalid,
120 /* 25 - End of Medium */ CharacterInvalid,
121 /* 26 - Substitute */ CharacterInvalid,
122 /* 27 - Escape */ CharacterInvalid,
123 /* 28 - File Separator */ CharacterInvalid,
124 /* 29 - Group Separator */ CharacterInvalid,
125 /* 30 - Record Separator */ CharacterInvalid,
126 /* 31 - Unit Separator */ CharacterInvalid,
127 /* 32 - Space */ CharacterWhiteSpace,
128 /* 33 - ! */ CharacterExclamationMark,
129 /* 34 - " */ CharacterQuote,
130 /* 35 - # */ CharacterInvalid,
131 /* 36 - $ */ CharacterIdentifierStart,
132 /* 37 - % */ CharacterModulo,
133 /* 38 - & */ CharacterAnd,
134 /* 39 - ' */ CharacterQuote,
135 /* 40 - ( */ CharacterOpenParen,
136 /* 41 - ) */ CharacterCloseParen,
137 /* 42 - * */ CharacterMultiply,
138 /* 43 - + */ CharacterAdd,
139 /* 44 - , */ CharacterComma,
140 /* 45 - - */ CharacterSub,
141 /* 46 - . */ CharacterDot,
142 /* 47 - / */ CharacterSlash,
143 /* 48 - 0 */ CharacterZero,
144 /* 49 - 1 */ CharacterNumber,
145 /* 50 - 2 */ CharacterNumber,
146 /* 51 - 3 */ CharacterNumber,
147 /* 52 - 4 */ CharacterNumber,
148 /* 53 - 5 */ CharacterNumber,
149 /* 54 - 6 */ CharacterNumber,
150 /* 55 - 7 */ CharacterNumber,
151 /* 56 - 8 */ CharacterNumber,
152 /* 57 - 9 */ CharacterNumber,
153 /* 58 - : */ CharacterColon,
154 /* 59 - ; */ CharacterSemicolon,
155 /* 60 - < */ CharacterLess,
156 /* 61 - = */ CharacterEqual,
157 /* 62 - > */ CharacterGreater,
158 /* 63 - ? */ CharacterQuestion,
159 /* 64 - @ */ CharacterInvalid,
160 /* 65 - A */ CharacterIdentifierStart,
161 /* 66 - B */ CharacterIdentifierStart,
162 /* 67 - C */ CharacterIdentifierStart,
163 /* 68 - D */ CharacterIdentifierStart,
164 /* 69 - E */ CharacterIdentifierStart,
165 /* 70 - F */ CharacterIdentifierStart,
166 /* 71 - G */ CharacterIdentifierStart,
167 /* 72 - H */ CharacterIdentifierStart,
168 /* 73 - I */ CharacterIdentifierStart,
169 /* 74 - J */ CharacterIdentifierStart,
170 /* 75 - K */ CharacterIdentifierStart,
171 /* 76 - L */ CharacterIdentifierStart,
172 /* 77 - M */ CharacterIdentifierStart,
173 /* 78 - N */ CharacterIdentifierStart,
174 /* 79 - O */ CharacterIdentifierStart,
175 /* 80 - P */ CharacterIdentifierStart,
176 /* 81 - Q */ CharacterIdentifierStart,
177 /* 82 - R */ CharacterIdentifierStart,
178 /* 83 - S */ CharacterIdentifierStart,
179 /* 84 - T */ CharacterIdentifierStart,
180 /* 85 - U */ CharacterIdentifierStart,
181 /* 86 - V */ CharacterIdentifierStart,
182 /* 87 - W */ CharacterIdentifierStart,
183 /* 88 - X */ CharacterIdentifierStart,
184 /* 89 - Y */ CharacterIdentifierStart,
185 /* 90 - Z */ CharacterIdentifierStart,
186 /* 91 - [ */ CharacterOpenBracket,
187 /* 92 - \ */ CharacterBackSlash,
188 /* 93 - ] */ CharacterCloseBracket,
189 /* 94 - ^ */ CharacterXor,
190 /* 95 - _ */ CharacterIdentifierStart,
191 /* 96 - ` */ CharacterInvalid,
192 /* 97 - a */ CharacterIdentifierStart,
193 /* 98 - b */ CharacterIdentifierStart,
194 /* 99 - c */ CharacterIdentifierStart,
195 /* 100 - d */ CharacterIdentifierStart,
196 /* 101 - e */ CharacterIdentifierStart,
197 /* 102 - f */ CharacterIdentifierStart,
198 /* 103 - g */ CharacterIdentifierStart,
199 /* 104 - h */ CharacterIdentifierStart,
200 /* 105 - i */ CharacterIdentifierStart,
201 /* 106 - j */ CharacterIdentifierStart,
202 /* 107 - k */ CharacterIdentifierStart,
203 /* 108 - l */ CharacterIdentifierStart,
204 /* 109 - m */ CharacterIdentifierStart,
205 /* 110 - n */ CharacterIdentifierStart,
206 /* 111 - o */ CharacterIdentifierStart,
207 /* 112 - p */ CharacterIdentifierStart,
208 /* 113 - q */ CharacterIdentifierStart,
209 /* 114 - r */ CharacterIdentifierStart,
210 /* 115 - s */ CharacterIdentifierStart,
211 /* 116 - t */ CharacterIdentifierStart,
212 /* 117 - u */ CharacterIdentifierStart,
213 /* 118 - v */ CharacterIdentifierStart,
214 /* 119 - w */ CharacterIdentifierStart,
215 /* 120 - x */ CharacterIdentifierStart,
216 /* 121 - y */ CharacterIdentifierStart,
217 /* 122 - z */ CharacterIdentifierStart,
218 /* 123 - { */ CharacterOpenBrace,
219 /* 124 - | */ CharacterOr,
220 /* 125 - } */ CharacterCloseBrace,
221 /* 126 - ~ */ CharacterTilde,
222 /* 127 - Delete */ CharacterInvalid,
225 Lexer::Lexer(JSGlobalData* globalData)
226 : m_isReparsing(false)
227 , m_globalData(globalData)
228 , m_keywordTable(JSC::mainTable)
234 m_keywordTable.deleteTable();
237 UString Lexer::getInvalidCharMessage()
241 return "Invalid character: '\\0'";
243 return "Invalid character: '\\n'";
245 return "Invalid character: '\\v'";
247 return "Invalid character: '\\r'";
249 return "Invalid character: '#'";
251 return "Invalid character: '@'";
253 return "Invalid character: '`'";
255 return String::format("Invalid character '\\u%04u'", m_current).impl();
259 ALWAYS_INLINE const UChar* Lexer::currentCharacter() const
261 ASSERT(m_code <= m_codeEnd);
265 ALWAYS_INLINE int Lexer::currentOffset() const
267 return currentCharacter() - m_codeStart;
270 void Lexer::setCode(const SourceCode& source, ParserArena& arena)
272 m_arena = &arena.identifierArena();
274 m_lineNumber = source.firstLine();
278 const UChar* data = source.provider()->data();
282 m_code = data + source.startOffset();
283 m_codeEnd = data + source.endOffset();
285 m_atLineStart = true;
286 m_lexErrorMessage = UString();
288 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
289 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
291 if (LIKELY(m_code < m_codeEnd))
295 ASSERT(currentOffset() == source.startOffset());
298 template <int shiftAmount, Lexer::ShiftType shouldBoundsCheck> ALWAYS_INLINE void Lexer::internalShift()
300 if (shouldBoundsCheck == DoBoundsCheck) {
301 // Faster than an if-else sequence
302 ASSERT(m_current != -1);
304 m_code += shiftAmount;
305 if (LIKELY(m_code < m_codeEnd))
308 m_code += shiftAmount;
313 ALWAYS_INLINE void Lexer::shift()
315 internalShift<1, DoBoundsCheck>();
318 ALWAYS_INLINE int Lexer::peek(int offset)
320 // Only use if necessary
321 ASSERT(offset > 0 && offset < 5);
322 const UChar* code = m_code + offset;
323 return (code < m_codeEnd) ? *code : -1;
326 int Lexer::getUnicodeCharacter()
332 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
335 int result = convertUnicode(m_current, char1, char2, char3);
343 void Lexer::shiftLineTerminator()
345 ASSERT(isLineTerminator(m_current));
347 int m_prev = m_current;
350 // Allow both CRLF and LFCR.
351 if (m_prev + m_current == '\n' + '\r')
357 ALWAYS_INLINE bool Lexer::lastTokenWasRestrKeyword() const
359 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
362 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
364 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
367 static inline bool isIdentStart(int c)
369 return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c);
372 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
374 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
375 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
378 static ALWAYS_INLINE bool isIdentPart(int c)
380 // Character types are divided into two groups depending on whether they can be part of an
381 // identifier or not. Those whose type value is less or equal than CharacterNumber can be
382 // part of an identifier. (See the CharacterType definition for more details.)
383 return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c);
386 static inline int singleEscape(int c)
412 inline void Lexer::record8(int c)
416 m_buffer8.append(static_cast<char>(c));
419 inline void Lexer::record16(UChar c)
421 m_buffer16.append(c);
424 inline void Lexer::record16(int c)
427 ASSERT(c <= USHRT_MAX);
428 record16(UChar(static_cast<unsigned short>(c)));
431 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* tokenData, unsigned lexType, bool strictMode)
433 const ptrdiff_t remaining = m_codeEnd - m_code;
434 if ((remaining >= maxTokenLength) && !(lexType & IgnoreReservedWords)) {
435 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
436 if (keyword != IDENT) {
437 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
438 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
441 const UChar* identifierStart = currentCharacter();
442 bool bufferRequired = false;
445 if (LIKELY(isIdentPart(m_current))) {
449 if (LIKELY(m_current != '\\'))
452 // \uXXXX unicode characters.
453 bufferRequired = true;
454 if (identifierStart != currentCharacter())
455 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
457 if (UNLIKELY(m_current != 'u'))
460 int character = getUnicodeCharacter();
461 if (UNLIKELY(character == -1))
463 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character)))
465 if (shouldCreateIdentifier)
467 identifierStart = currentCharacter();
470 int identifierLength;
471 const Identifier* ident = 0;
472 if (shouldCreateIdentifier) {
474 identifierLength = currentCharacter() - identifierStart;
476 if (identifierStart != currentCharacter())
477 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
478 identifierStart = m_buffer16.data();
479 identifierLength = m_buffer16.size();
482 ident = makeIdentifier(identifierStart, identifierLength);
483 tokenData->ident = ident;
485 tokenData->ident = 0;
489 if (LIKELY(!bufferRequired && !(lexType & IgnoreReservedWords))) {
490 ASSERT(shouldCreateIdentifier);
491 // Keywords must not be recognized if there was an \uXXXX in the identifier.
492 if (remaining < maxTokenLength) {
493 const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident);
494 ASSERT((remaining < maxTokenLength) || !entry);
497 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
498 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
503 m_buffer16.resize(0);
507 bool Lexer::isKeyword(const Identifier& ident)
509 return m_keywordTable.entry(m_globalData, ident);
512 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer::parseString(JSTokenData* tokenData, bool strictMode)
514 int stringQuoteCharacter = m_current;
517 const UChar* stringStart = currentCharacter();
519 while (m_current != stringQuoteCharacter) {
520 if (UNLIKELY(m_current == '\\')) {
521 if (stringStart != currentCharacter() && shouldBuildStrings)
522 m_buffer16.append(stringStart, currentCharacter() - stringStart);
525 int escape = singleEscape(m_current);
527 // Most common escape sequences first
529 if (shouldBuildStrings)
532 } else if (UNLIKELY(isLineTerminator(m_current)))
533 shiftLineTerminator();
534 else if (m_current == 'x') {
536 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
537 int prev = m_current;
539 if (shouldBuildStrings)
540 record16(convertHex(prev, m_current));
542 } else if (shouldBuildStrings)
544 } else if (m_current == 'u') {
546 int character = getUnicodeCharacter();
547 if (character != -1) {
548 if (shouldBuildStrings)
550 } else if (m_current == stringQuoteCharacter) {
551 if (shouldBuildStrings)
554 m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
557 } else if (strictMode && isASCIIDigit(m_current)) {
558 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
559 int character1 = m_current;
561 if (character1 != '0' || isASCIIDigit(m_current)) {
562 m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
565 if (shouldBuildStrings)
567 } else if (!strictMode && isASCIIOctalDigit(m_current)) {
568 // Octal character sequences
569 int character1 = m_current;
571 if (isASCIIOctalDigit(m_current)) {
572 // Two octal characters
573 int character2 = m_current;
575 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
576 if (shouldBuildStrings)
577 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
580 if (shouldBuildStrings)
581 record16((character1 - '0') * 8 + character2 - '0');
584 if (shouldBuildStrings)
585 record16(character1 - '0');
587 } else if (m_current != -1) {
588 if (shouldBuildStrings)
592 m_lexErrorMessage = "Unterminated string constant";
596 stringStart = currentCharacter();
599 // Fast check for characters that require special handling.
600 // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
601 // as possible, and lets through all common ASCII characters.
602 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
603 // New-line or end of input is not allowed
604 if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1)) {
605 m_lexErrorMessage = "Unexpected EOF";
608 // Anything else is just a normal character
613 if (currentCharacter() != stringStart && shouldBuildStrings)
614 m_buffer16.append(stringStart, currentCharacter() - stringStart);
615 if (shouldBuildStrings)
616 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
618 tokenData->ident = 0;
620 m_buffer16.resize(0);
624 ALWAYS_INLINE void Lexer::parseHex(double& returnValue)
626 // Optimization: most hexadecimal values fit into 4 bytes.
627 uint32_t hexValue = 0;
628 int maximumDigits = 7;
630 // Shift out the 'x' prefix.
634 hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
637 } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
639 if (maximumDigits >= 0) {
640 returnValue = hexValue;
644 // No more place in the hexValue buffer.
645 // The values are shifted out and placed into the m_buffer8 vector.
646 for (int i = 0; i < 8; ++i) {
647 int digit = hexValue >> 28;
649 record8(digit + '0');
651 record8(digit - 10 + 'a');
655 while (isASCIIHexDigit(m_current)) {
660 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
663 ALWAYS_INLINE bool Lexer::parseOctal(double& returnValue)
665 // Optimization: most octal values fit into 4 bytes.
666 uint32_t octalValue = 0;
667 int maximumDigits = 9;
668 // Temporary buffer for the digits. Makes easier
669 // to reconstruct the input characters when needed.
673 octalValue = octalValue * 8 + (m_current - '0');
674 digits[maximumDigits] = m_current;
677 } while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
679 if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
680 returnValue = octalValue;
684 for (int i = 9; i > maximumDigits; --i)
687 while (isASCIIOctalDigit(m_current)) {
692 if (isASCIIDigit(m_current))
695 returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
699 ALWAYS_INLINE bool Lexer::parseDecimal(double& returnValue)
701 // Optimization: most decimal values fit into 4 bytes.
702 uint32_t decimalValue = 0;
704 // Since parseOctal may be executed before parseDecimal,
705 // the m_buffer8 may hold ascii digits.
706 if (!m_buffer8.size()) {
707 int maximumDigits = 9;
708 // Temporary buffer for the digits. Makes easier
709 // to reconstruct the input characters when needed.
713 decimalValue = decimalValue * 10 + (m_current - '0');
714 digits[maximumDigits] = m_current;
717 } while (isASCIIDigit(m_current) && maximumDigits >= 0);
719 if (maximumDigits >= 0 && m_current != '.' && (m_current | 0x20) != 'e') {
720 returnValue = decimalValue;
724 for (int i = 9; i > maximumDigits; --i)
728 while (isASCIIDigit(m_current)) {
736 ALWAYS_INLINE void Lexer::parseNumberAfterDecimalPoint()
739 while (isASCIIDigit(m_current)) {
745 ALWAYS_INLINE bool Lexer::parseNumberAfterExponentIndicator()
749 if (m_current == '+' || m_current == '-') {
754 if (!isASCIIDigit(m_current))
760 } while (isASCIIDigit(m_current));
764 ALWAYS_INLINE bool Lexer::parseMultilineComment()
767 while (UNLIKELY(m_current == '*')) {
769 if (m_current == '/') {
775 if (UNLIKELY(m_current == -1))
778 if (isLineTerminator(m_current)) {
779 shiftLineTerminator();
786 bool Lexer::nextTokenIsColon()
788 const UChar* code = m_code;
789 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
792 return code < m_codeEnd && *code == ':';
795 JSTokenType Lexer::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexType, bool strictMode)
798 ASSERT(m_buffer8.isEmpty());
799 ASSERT(m_buffer16.isEmpty());
801 JSTokenType token = ERRORTOK;
802 m_terminator = false;
805 while (isWhiteSpace(m_current))
808 int startOffset = currentOffset();
810 if (UNLIKELY(m_current == -1))
816 if (LIKELY(isASCII(m_current)))
817 type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]);
818 else if (isNonASCIIIdentStart(m_current))
819 type = CharacterIdentifierStart;
820 else if (isLineTerminator(m_current))
821 type = CharacterLineTerminator;
823 type = CharacterInvalid;
826 case CharacterGreater:
828 if (m_current == '>') {
830 if (m_current == '>') {
832 if (m_current == '=') {
834 token = URSHIFTEQUAL;
840 if (m_current == '=') {
848 if (m_current == '=') {
857 if (m_current == '=') {
859 if (m_current == '=') {
871 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
872 // <!-- marks the beginning of a line comment (for www usage)
873 goto inSingleLineComment;
875 if (m_current == '<') {
877 if (m_current == '=') {
885 if (m_current == '=') {
892 case CharacterExclamationMark:
894 if (m_current == '=') {
896 if (m_current == '=') {
908 if (m_current == '+') {
910 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
913 if (m_current == '=') {
922 if (m_current == '-') {
924 if (m_atLineStart && m_current == '>') {
926 goto inSingleLineComment;
928 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
931 if (m_current == '=') {
938 case CharacterMultiply:
940 if (m_current == '=') {
949 if (m_current == '/') {
951 goto inSingleLineComment;
953 if (m_current == '*') {
955 if (parseMultilineComment())
957 m_lexErrorMessage = "Multiline comment was not closed properly";
960 if (m_current == '=') {
969 if (m_current == '&') {
974 if (m_current == '=') {
983 if (m_current == '=') {
990 case CharacterModulo:
992 if (m_current == '=') {
1001 if (m_current == '=') {
1006 if (m_current == '|') {
1013 case CharacterOpenParen:
1017 case CharacterCloseParen:
1021 case CharacterOpenBracket:
1022 token = OPENBRACKET;
1025 case CharacterCloseBracket:
1026 token = CLOSEBRACKET;
1029 case CharacterComma:
1033 case CharacterColon:
1037 case CharacterQuestion:
1041 case CharacterTilde:
1045 case CharacterSemicolon:
1050 case CharacterOpenBrace:
1051 tokenData->intValue = currentOffset();
1055 case CharacterCloseBrace:
1056 tokenData->intValue = currentOffset();
1063 if (!isASCIIDigit(m_current)) {
1067 goto inNumberAfterDecimalPoint;
1070 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
1071 parseHex(tokenData->doubleValue);
1075 if (isASCIIOctalDigit(m_current)) {
1076 if (parseOctal(tokenData->doubleValue)) {
1078 m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
1085 // Fall through into CharacterNumber
1086 case CharacterNumber:
1087 if (LIKELY(token != NUMBER)) {
1088 if (!parseDecimal(tokenData->doubleValue)) {
1089 if (m_current == '.') {
1091 inNumberAfterDecimalPoint:
1092 parseNumberAfterDecimalPoint();
1094 if ((m_current | 0x20) == 'e')
1095 if (!parseNumberAfterExponentIndicator()) {
1096 m_lexErrorMessage = "Non-number found after exponent indicator";
1099 // Null-terminate string for strtod.
1100 m_buffer8.append('\0');
1101 tokenData->doubleValue = WTF::strtod(m_buffer8.data(), 0);
1106 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
1107 if (UNLIKELY(isIdentStart(m_current))) {
1108 m_lexErrorMessage = "At least one digit must occur after a decimal point";
1111 m_buffer8.resize(0);
1112 m_delimited = false;
1114 case CharacterQuote:
1115 if (lexType & DontBuildStrings) {
1116 if (UNLIKELY(!parseString<false>(tokenData, strictMode)))
1119 if (UNLIKELY(!parseString<true>(tokenData, strictMode)))
1123 m_delimited = false;
1126 case CharacterIdentifierStart:
1127 ASSERT(isIdentStart(m_current));
1128 // Fall through into CharacterBackSlash.
1129 case CharacterBackSlash:
1130 if (lexType & DontBuildKeywords)
1131 token = parseIdentifier<false>(tokenData, lexType, strictMode);
1133 token = parseIdentifier<true>(tokenData, lexType, strictMode);
1135 case CharacterLineTerminator:
1136 ASSERT(isLineTerminator(m_current));
1137 shiftLineTerminator();
1138 m_atLineStart = true;
1139 m_terminator = true;
1141 case CharacterInvalid:
1142 m_lexErrorMessage = getInvalidCharMessage();
1145 ASSERT_NOT_REACHED();
1146 m_lexErrorMessage = "Internal Error";
1150 m_atLineStart = false;
1153 inSingleLineComment:
1154 while (!isLineTerminator(m_current)) {
1155 if (UNLIKELY(m_current == -1))
1159 shiftLineTerminator();
1160 m_atLineStart = true;
1161 m_terminator = true;
1162 if (!lastTokenWasRestrKeyword())
1167 // Fall through into returnToken.
1170 tokenInfo->line = m_lineNumber;
1171 tokenInfo->startOffset = startOffset;
1172 tokenInfo->endOffset = currentOffset();
1173 m_lastToken = token;
1178 tokenInfo->line = m_lineNumber;
1179 tokenInfo->startOffset = startOffset;
1180 tokenInfo->endOffset = currentOffset();
1184 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
1186 ASSERT(m_buffer16.isEmpty());
1188 bool lastWasEscape = false;
1189 bool inBrackets = false;
1191 if (patternPrefix) {
1192 ASSERT(!isLineTerminator(patternPrefix));
1193 ASSERT(patternPrefix != '/');
1194 ASSERT(patternPrefix != '[');
1195 record16(patternPrefix);
1199 int current = m_current;
1201 if (isLineTerminator(current) || current == -1) {
1202 m_buffer16.resize(0);
1208 if (current == '/' && !lastWasEscape && !inBrackets)
1213 if (lastWasEscape) {
1214 lastWasEscape = false;
1226 lastWasEscape = true;
1231 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1232 m_buffer16.resize(0);
1234 while (isIdentPart(m_current)) {
1235 record16(m_current);
1239 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1240 m_buffer16.resize(0);
1245 bool Lexer::skipRegExp()
1247 bool lastWasEscape = false;
1248 bool inBrackets = false;
1251 int current = m_current;
1253 if (isLineTerminator(current) || current == -1)
1258 if (current == '/' && !lastWasEscape && !inBrackets)
1261 if (lastWasEscape) {
1262 lastWasEscape = false;
1274 lastWasEscape = true;
1279 while (isIdentPart(m_current))
1289 Vector<char> newBuffer8;
1290 m_buffer8.swap(newBuffer8);
1292 Vector<UChar> newBuffer16;
1293 m_buffer16.swap(newBuffer16);
1295 m_isReparsing = false;
1298 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1300 ASSERT(m_source->provider()->data()[openBrace] == '{');
1301 ASSERT(m_source->provider()->data()[closeBrace] == '}');
1302 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);