initial import
[vuplus_webkit] / Source / WebCore / inspector / front-end / SourceHTMLTokenizer.re2js
1 /*
2  * Copyright (C) 2009 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30
31 // Generate js file as follows:
32 //
33 // re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
34 // | sed 's|^yy\([^:]*\)*\:|case \1:|' \
35 // | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
36 // | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
37 // | sed 's|[*]cursor|this._charAt(cursor)|' \
38 // | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
39 // | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
40 // | sed 's|unsigned\ int|var|' \
41 // | sed 's|var\ yych|case 1: var yych|'
42
43 WebInspector.SourceHTMLTokenizer = function()
44 {
45     WebInspector.SourceTokenizer.call(this);
46
47     // The order is determined by the generated code.
48     this._lexConditions = {
49         INITIAL: 0,
50         COMMENT: 1,
51         DOCTYPE: 2,
52         TAG: 3,
53         DSTRING: 4,
54         SSTRING: 5
55     };
56     this.case_INITIAL = 1000;
57     this.case_COMMENT = 1001;
58     this.case_DOCTYPE = 1002;
59     this.case_TAG = 1003;
60     this.case_DSTRING = 1004;
61     this.case_SSTRING = 1005;
62
63     this._parseConditions = {
64         INITIAL: 0,
65         ATTRIBUTE: 1,
66         ATTRIBUTE_VALUE: 2,
67         LINKIFY: 4,
68         A_NODE: 8,
69         SCRIPT: 16,
70         STYLE: 32
71     };
72
73     this.condition = this.createInitialCondition();
74 }
75
76 WebInspector.SourceHTMLTokenizer.prototype = {
77     createInitialCondition: function()
78     {
79         return { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
80     },
81
82     set line(line) {
83         if (this._condition.internalJavaScriptTokenizerCondition) {
84             var match = /<\/script/i.exec(line);
85             if (match) {
86                 this._internalJavaScriptTokenizer.line = line.substring(0, match.index);
87             } else
88                 this._internalJavaScriptTokenizer.line = line;
89         } else if (this._condition.internalCSSTokenizerCondition) {
90             var match = /<\/style/i.exec(line);
91             if (match) {
92                 this._internalCSSTokenizer.line = line.substring(0, match.index);
93             } else
94                 this._internalCSSTokenizer.line = line;
95         }
96         this._line = line;
97     },
98
99     _isExpectingAttribute: function()
100     {
101         return this._condition.parseCondition & this._parseConditions.ATTRIBUTE;
102     },
103
104     _isExpectingAttributeValue: function()
105     {
106         return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
107     },
108
109     _setExpectingAttribute: function()
110     {
111         if (this._isExpectingAttributeValue())
112             this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
113         this._condition.parseCondition |= this._parseConditions.ATTRIBUTE;
114     },
115
116     _setExpectingAttributeValue: function()
117     {
118         if (this._isExpectingAttribute())
119             this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE;
120         this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
121     },
122
123     _stringToken: function(cursor, stringEnds)
124     {
125         if (!this._isExpectingAttributeValue()) {
126             this.tokenType = null;
127             return cursor;
128         }
129         this.tokenType = this._attrValueTokenType();
130         if (stringEnds)
131             this._setExpectingAttribute();
132         return cursor;
133     },
134
135     _attrValueTokenType: function()
136     {
137         if (this._condition.parseCondition & this._parseConditions.LINKIFY) {
138             if (this._condition.parseCondition & this._parseConditions.A_NODE)
139                 return "html-external-link";
140             return "html-resource-link";
141         }
142         return "html-attribute-value";
143     },
144
145     get _internalJavaScriptTokenizer()
146     {
147         return WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript");
148     },
149
150     get _internalCSSTokenizer()
151     {
152         return WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/css");
153     },
154
155     scriptStarted: function(cursor)
156     {
157         this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.createInitialCondition();
158     },
159
160     scriptEnded: function(cursor)
161     {
162     },
163
164     styleSheetStarted: function(cursor)
165     {
166         this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.createInitialCondition();
167     },
168
169     styleSheetEnded: function(cursor)
170     {
171     },
172
173     nextToken: function(cursor)
174     {
175         if (this._condition.internalJavaScriptTokenizerCondition) {
176             // Re-set line to force </script> detection first.
177             this.line = this._line;
178             if (cursor !== this._internalJavaScriptTokenizer._line.length) {
179                 // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
180                 this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition;
181                 var result = this._internalJavaScriptTokenizer.nextToken(cursor);
182                 this.tokenType = this._internalJavaScriptTokenizer.tokenType;
183                 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition;
184                 return result;
185             } else if (cursor !== this._line.length)
186                 delete this._condition.internalJavaScriptTokenizerCondition;
187         } else if (this._condition.internalCSSTokenizerCondition) {
188             // Re-set line to force </style> detection first.
189             this.line = this._line;
190             if (cursor !== this._internalCSSTokenizer._line.length) {
191                 // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
192                 this._internalCSSTokenizer.condition = this._condition.internalCSSTokenizerCondition;
193                 var result = this._internalCSSTokenizer.nextToken(cursor);
194                 this.tokenType = this._internalCSSTokenizer.tokenType;
195                 this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.condition;
196                 return result;
197             } else if (cursor !== this._line.length)
198                 delete this._condition.internalCSSTokenizerCondition;
199         }
200
201         var cursorOnEnter = cursor;
202         var gotoCase = 1;
203         while (1) {
204             switch (gotoCase)
205             // Following comment is replaced with generated state machine.
206             /*!re2c
207                 re2c:define:YYCTYPE  = "var";
208                 re2c:define:YYCURSOR = cursor;
209                 re2c:define:YYGETCONDITION = "this.getLexCondition";
210                 re2c:define:YYSETCONDITION = "this.setLexCondition";
211                 re2c:condprefix = "case this.case_";
212                 re2c:condenumprefix = "this._lexConditions.";
213                 re2c:yyfill:enable = 0;
214                 re2c:labelprefix = "case ";
215                 re2c:indent:top = 2;
216                 re2c:indent:string = "    ";
217
218                 CommentContent = ([^-\r\n] | ("--" [^>]))*;
219                 Comment = "<!--" CommentContent "-->";
220                 CommentStart = "<!--" CommentContent [\r\n];
221                 CommentEnd = CommentContent "-->";
222
223                 DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
224                 DocTypeContent = [^\r\n>]*;
225
226                 ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
227                 ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
228
229                 StyleStart = "<" [Ss] [Tt] [Yy] [Ll] [Ee];
230                 StyleEnd = "</" [Ss] [Tt] [Yy] [Ll] [Ee];
231
232                 LT = "<" | "</";
233                 GT = ">";
234                 EqualSign = "=";
235
236                 DoubleStringContent = [^\r\n\"]*;
237                 SingleStringContent = [^\r\n\']*;
238                 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
239                 DoubleStringStart = "\"" DoubleStringContent [\r\n];
240                 DoubleStringEnd = DoubleStringContent "\"";
241                 SingleStringStart = "'" SingleStringContent [\r\n];
242                 SingleStringEnd = SingleStringContent "'";
243
244                 Identifier = [^ \r\n"'<>\[\]=]+;
245
246                 <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
247                 <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
248                 <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
249                 <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }
250
251                 <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
252                 <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
253                 <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
254
255                 <INITIAL> ScriptStart => TAG
256                 {
257                     if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
258                         // Do not tokenize script tag contents, keep lexer state, even though processing "<".
259                         this.setLexCondition(this._lexConditions.INITIAL);
260                         this.tokenType = null;
261                         return cursor;
262                     }
263                     this.tokenType = "html-tag";
264                     this._condition.parseCondition = this._parseConditions.SCRIPT;
265                     this._setExpectingAttribute();
266                     return cursor;
267                 }
268
269                 <INITIAL> ScriptEnd => TAG
270                 {
271                     this.tokenType = "html-tag";
272                     this._condition.parseCondition = this._parseConditions.INITIAL;
273                     this.scriptEnded(cursor - 8);
274                     return cursor;
275                 }
276
277                 <INITIAL> StyleStart => TAG
278                 {
279                     if (this._condition.parseCondition & this._parseConditions.STYLE) {
280                         // Do not tokenize style tag contents, keep lexer state, even though processing "<".
281                         this.setLexCondition(this._lexConditions.INITIAL);
282                         this.tokenType = null;
283                         return cursor;
284                     }
285                     this.tokenType = "html-tag";
286                     this._condition.parseCondition = this._parseConditions.STYLE;
287                     this._setExpectingAttribute();
288                     return cursor;
289                 }
290
291                 <INITIAL> StyleEnd => TAG
292                 {
293                     this.tokenType = "html-tag";
294                     this._condition.parseCondition = this._parseConditions.INITIAL;
295                     this.styleSheetEnded(cursor - 7);
296                     return cursor;
297                 }
298
299                 <INITIAL> LT => TAG
300                 {
301                     if (this._condition.parseCondition & (this._parseConditions.SCRIPT | this._parseConditions.STYLE)) {
302                         // Do not tokenize script and style tag contents, keep lexer state, even though processing "<".
303                         this.setLexCondition(this._lexConditions.INITIAL);
304                         this.tokenType = null;
305                         return cursor;
306                     }
307
308                     this._condition.parseCondition = this._parseConditions.INITIAL;
309                     this.tokenType = "html-tag";
310                     return cursor;
311                 }
312
313                 <TAG> GT => INITIAL
314                 {
315                     this.tokenType = "html-tag";
316                     if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
317                         this.scriptStarted(cursor);
318                         // Do not tokenize script tag contents.
319                         return cursor;
320                     }
321
322                     if (this._condition.parseCondition & this._parseConditions.STYLE) {
323                         this.styleSheetStarted(cursor);
324                         // Do not tokenize style tag contents.
325                         return cursor;
326                     }
327
328                     this._condition.parseCondition = this._parseConditions.INITIAL;
329                     return cursor;
330                 }
331
332                 <TAG> StringLiteral { return this._stringToken(cursor, true); }
333                 <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
334                 <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
335                 <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
336                 <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
337                 <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
338                 <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
339
340                 <TAG> EqualSign => TAG
341                 {
342                     if (this._isExpectingAttribute())
343                         this._setExpectingAttributeValue();
344                     this.tokenType = null;
345                     return cursor;
346                 }
347
348                 <TAG> Identifier
349                 {
350                     if (this._condition.parseCondition === this._parseConditions.SCRIPT || this._condition.parseCondition === this._parseConditions.STYLE) {
351                         // Fall through if expecting attributes.
352                         this.tokenType = null;
353                         return cursor;
354                     }
355
356                     if (this._condition.parseCondition === this._parseConditions.INITIAL) {
357                         this.tokenType = "html-tag";
358                         this._setExpectingAttribute();
359                         var token = this._line.substring(cursorOnEnter, cursor);
360                         if (token === "a")
361                             this._condition.parseCondition |= this._parseConditions.A_NODE;
362                         else if (this._condition.parseCondition & this._parseConditions.A_NODE)
363                             this._condition.parseCondition ^= this._parseConditions.A_NODE;
364                     } else if (this._isExpectingAttribute()) {
365                         var token = this._line.substring(cursorOnEnter, cursor);
366                         if (token === "href" || token === "src")
367                             this._condition.parseCondition |= this._parseConditions.LINKIFY;
368                         else if (this._condition.parseCondition |= this._parseConditions.LINKIFY)
369                             this._condition.parseCondition ^= this._parseConditions.LINKIFY;
370                         this.tokenType = "html-attribute-name";
371                     } else if (this._isExpectingAttributeValue())
372                         this.tokenType = this._attrValueTokenType();
373                     else
374                         this.tokenType = null;
375                     return cursor;
376                 }
377                 <*> [^] { this.tokenType = null; return cursor; }
378             */
379         }
380     }
381 }
382
383 WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;