code.vuplus.com Git - vuplus_webkit/blob - Source/JavaScriptCore/parser/Lexer.h

   1 /*
   2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
   3  *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
   4  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
   5  *
   6  *  This library is free software; you can redistribute it and/or
   7  *  modify it under the terms of the GNU Library General Public
   8  *  License as published by the Free Software Foundation; either
   9  *  version 2 of the License, or (at your option) any later version.
  10  *
  11  *  This library is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  *  Library General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU Library General Public License
  17  *  along with this library; see the file COPYING.LIB.  If not, write to
  18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19  *  Boston, MA 02110-1301, USA.
  20  *
  21  */
  22
  23 #ifndef Lexer_h
  24 #define Lexer_h
  25
  26 #include "JSParser.h"
  27 #include "Lookup.h"
  28 #include "ParserArena.h"
  29 #include "SourceCode.h"
  30 #include <wtf/ASCIICType.h>
  31 #include <wtf/AlwaysInline.h>
  32 #include <wtf/SegmentedVector.h>
  33 #include <wtf/Vector.h>
  34 #include <wtf/unicode/Unicode.h>
  35
  36 namespace JSC {
  37
  38     class RegExp;
  39
  40     class Lexer {
  41         WTF_MAKE_NONCOPYABLE(Lexer); WTF_MAKE_FAST_ALLOCATED;
  42     public:
  43         // Character manipulation functions.
  44         static bool isWhiteSpace(int character);
  45         static bool isLineTerminator(int character);
  46         static unsigned char convertHex(int c1, int c2);
  47         static UChar convertUnicode(int c1, int c2, int c3, int c4);
  48
  49         // Functions to set up parsing.
  50         void setCode(const SourceCode&, ParserArena&);
  51         void setIsReparsing() { m_isReparsing = true; }
  52         bool isReparsing() const { return m_isReparsing; }
  53
  54         // Functions for the parser itself.
  55         enum LexType {
  56             IgnoreReservedWords = 1,
  57             DontBuildStrings = 2,
  58             DontBuildKeywords = 4
  59         };
  60         JSTokenType lex(JSTokenData*, JSTokenInfo*, unsigned, bool strictMode);
  61         bool nextTokenIsColon();
  62         int lineNumber() const { return m_lineNumber; }
  63         void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
  64         int lastLineNumber() const { return m_lastLineNumber; }
  65         bool prevTerminator() const { return m_terminator; }
  66         SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
  67         bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
  68         bool skipRegExp();
  69
  70         // Functions for use after parsing.
  71         bool sawError() const { return m_error; }
  72         UString getErrorMessage() const { return m_lexErrorMessage; }
  73         void clear();
  74         int currentOffset() { return m_code - m_codeStart; }
  75         void setOffset(int offset)
  76         {
  77             m_error = 0;
  78             m_lexErrorMessage = UString();
  79             m_code = m_codeStart + offset;
  80             m_buffer8.resize(0);
  81             m_buffer16.resize(0);
  82             // Faster than an if-else sequence
  83             m_current = -1;
  84             if (LIKELY(m_code < m_codeEnd))
  85                 m_current = *m_code;
  86         }
  87         void setLineNumber(int line)
  88         {
  89             m_lineNumber = line;
  90         }
  91
  92         SourceProvider* sourceProvider() const { return m_source->provider(); }
  93
  94         JSTokenType lexExpectIdentifier(JSTokenData*, JSTokenInfo*, unsigned, bool strictMode);
  95
  96         bool isKeyword(const Identifier&);
  97
  98     private:
  99         friend class JSGlobalData;
 100
 101         Lexer(JSGlobalData*);
 102         ~Lexer();
 103
 104         void record8(int);
 105         void record16(int);
 106         void record16(UChar);
 107
 108         void copyCodeWithoutBOMs();
 109
 110         ALWAYS_INLINE void shift();
 111         ALWAYS_INLINE int peek(int offset);
 112         int getUnicodeCharacter();
 113         void shiftLineTerminator();
 114
 115         UString getInvalidCharMessage();
 116         ALWAYS_INLINE const UChar* currentCharacter() const;
 117         ALWAYS_INLINE int currentOffset() const;
 118
 119         ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
 120
 121         ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
 122
 123         enum ShiftType { DoBoundsCheck, DoNotBoundsCheck };
 124         template <int shiftAmount, ShiftType shouldBoundsCheck> void internalShift();
 125         template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
 126         template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned, bool strictMode);
 127         template <bool shouldBuildStrings> ALWAYS_INLINE bool parseString(JSTokenData*, bool strictMode);
 128         ALWAYS_INLINE void parseHex(double& returnValue);
 129         ALWAYS_INLINE bool parseOctal(double& returnValue);
 130         ALWAYS_INLINE bool parseDecimal(double& returnValue);
 131         ALWAYS_INLINE void parseNumberAfterDecimalPoint();
 132         ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
 133         ALWAYS_INLINE bool parseMultilineComment();
 134
 135         static const size_t initialReadBufferCapacity = 32;
 136
 137         int m_lineNumber;
 138         int m_lastLineNumber;
 139
 140         Vector<char> m_buffer8;
 141         Vector<UChar> m_buffer16;
 142         bool m_terminator;
 143         bool m_delimited; // encountered delimiter like "'" and "}" on last run
 144         int m_lastToken;
 145
 146         const SourceCode* m_source;
 147         const UChar* m_code;
 148         const UChar* m_codeStart;
 149         const UChar* m_codeEnd;
 150         bool m_isReparsing;
 151         bool m_atLineStart;
 152         bool m_error;
 153         UString m_lexErrorMessage;
 154
 155         // current and following unicode characters (int to allow for -1 for end-of-file marker)
 156         int m_current;
 157
 158         IdentifierArena* m_arena;
 159
 160         JSGlobalData* m_globalData;
 161
 162         const HashTable m_keywordTable;
 163     };
 164
 165     ALWAYS_INLINE bool Lexer::isWhiteSpace(int ch)
 166     {
 167         return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF);
 168     }
 169
 170     ALWAYS_INLINE bool Lexer::isLineTerminator(int ch)
 171     {
 172         return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
 173     }
 174
 175     inline unsigned char Lexer::convertHex(int c1, int c2)
 176     {
 177         return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
 178     }
 179
 180     inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
 181     {
 182         return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
 183     }
 184
 185     ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
 186     {
 187         return &m_arena->makeIdentifier(m_globalData, characters, length);
 188     }
 189
 190     ALWAYS_INLINE JSTokenType Lexer::lexExpectIdentifier(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexType, bool strictMode)
 191     {
 192         ASSERT((lexType & IgnoreReservedWords));
 193         const UChar* start = m_code;
 194         const UChar* ptr = start;
 195         const UChar* end = m_codeEnd;
 196         if (ptr >= end) {
 197             ASSERT(ptr == end);
 198             goto slowCase;
 199         }
 200         if (!WTF::isASCIIAlpha(*ptr))
 201             goto slowCase;
 202         ++ptr;
 203         while (ptr < end) {
 204             if (!WTF::isASCIIAlphanumeric(*ptr))
 205                 break;
 206             ++ptr;
 207         }
 208
 209         // Here's the shift
 210         if (ptr < end) {
 211             if ((!WTF::isASCII(*ptr)) || (*ptr == '\\') || (*ptr == '_') || (*ptr == '$'))
 212                 goto slowCase;
 213             m_current = *ptr;
 214         } else
 215             m_current = -1;
 216
 217         m_code = ptr;
 218
 219         // Create the identifier if needed
 220         if (lexType & DontBuildKeywords)
 221             tokenData->ident = 0;
 222         else
 223             tokenData->ident = makeIdentifier(start, ptr - start);
 224         tokenInfo->line = m_lineNumber;
 225         tokenInfo->startOffset = start - m_codeStart;
 226         tokenInfo->endOffset = currentOffset();
 227         m_lastToken = IDENT;
 228         return IDENT;
 229
 230     slowCase:
 231         return lex(tokenData, tokenInfo, lexType, strictMode);
 232     }
 233
 234 } // namespace JSC
 235
 236 #endif // Lexer_h