2 * Copyright (C) 2009 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // Generate js file as follows:
33 // re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
34 // | sed 's|^yy\([^:]*\)*\:|case \1:|' \
35 // | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
36 // | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
37 // | sed 's|[*]cursor|this._charAt(cursor)|' \
38 // | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
39 // | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
40 // | sed 's|unsigned\ int|var|' \
41 // | sed 's|var\ yych|case 1: var yych|'
43 WebInspector.SourceHTMLTokenizer = function()
45 WebInspector.SourceTokenizer.call(this);
47 // The order is determined by the generated code.
48 this._lexConditions = {
56 this.case_INITIAL = 1000;
57 this.case_COMMENT = 1001;
58 this.case_DOCTYPE = 1002;
60 this.case_DSTRING = 1004;
61 this.case_SSTRING = 1005;
63 this._parseConditions = {
73 this.condition = this.createInitialCondition();
76 WebInspector.SourceHTMLTokenizer.prototype = {
77 createInitialCondition: function()
79 return { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
83 if (this._condition.internalJavaScriptTokenizerCondition) {
84 var match = /<\/script/i.exec(line);
86 this._internalJavaScriptTokenizer.line = line.substring(0, match.index);
88 this._internalJavaScriptTokenizer.line = line;
89 } else if (this._condition.internalCSSTokenizerCondition) {
90 var match = /<\/style/i.exec(line);
92 this._internalCSSTokenizer.line = line.substring(0, match.index);
94 this._internalCSSTokenizer.line = line;
99 _isExpectingAttribute: function()
101 return this._condition.parseCondition & this._parseConditions.ATTRIBUTE;
104 _isExpectingAttributeValue: function()
106 return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
109 _setExpectingAttribute: function()
111 if (this._isExpectingAttributeValue())
112 this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
113 this._condition.parseCondition |= this._parseConditions.ATTRIBUTE;
116 _setExpectingAttributeValue: function()
118 if (this._isExpectingAttribute())
119 this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE;
120 this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
123 _stringToken: function(cursor, stringEnds)
125 if (!this._isExpectingAttributeValue()) {
126 this.tokenType = null;
129 this.tokenType = this._attrValueTokenType();
131 this._setExpectingAttribute();
135 _attrValueTokenType: function()
137 if (this._condition.parseCondition & this._parseConditions.LINKIFY) {
138 if (this._condition.parseCondition & this._parseConditions.A_NODE)
139 return "html-external-link";
140 return "html-resource-link";
142 return "html-attribute-value";
145 get _internalJavaScriptTokenizer()
147 return WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript");
150 get _internalCSSTokenizer()
152 return WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/css");
155 scriptStarted: function(cursor)
157 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.createInitialCondition();
160 scriptEnded: function(cursor)
164 styleSheetStarted: function(cursor)
166 this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.createInitialCondition();
169 styleSheetEnded: function(cursor)
173 nextToken: function(cursor)
175 if (this._condition.internalJavaScriptTokenizerCondition) {
176 // Re-set line to force </script> detection first.
177 this.line = this._line;
178 if (cursor !== this._internalJavaScriptTokenizer._line.length) {
179 // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
180 this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition;
181 var result = this._internalJavaScriptTokenizer.nextToken(cursor);
182 this.tokenType = this._internalJavaScriptTokenizer.tokenType;
183 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition;
185 } else if (cursor !== this._line.length)
186 delete this._condition.internalJavaScriptTokenizerCondition;
187 } else if (this._condition.internalCSSTokenizerCondition) {
188 // Re-set line to force </style> detection first.
189 this.line = this._line;
190 if (cursor !== this._internalCSSTokenizer._line.length) {
191 // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
192 this._internalCSSTokenizer.condition = this._condition.internalCSSTokenizerCondition;
193 var result = this._internalCSSTokenizer.nextToken(cursor);
194 this.tokenType = this._internalCSSTokenizer.tokenType;
195 this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.condition;
197 } else if (cursor !== this._line.length)
198 delete this._condition.internalCSSTokenizerCondition;
201 var cursorOnEnter = cursor;
205 // Following comment is replaced with generated state machine.
207 re2c:define:YYCTYPE = "var";
208 re2c:define:YYCURSOR = cursor;
209 re2c:define:YYGETCONDITION = "this.getLexCondition";
210 re2c:define:YYSETCONDITION = "this.setLexCondition";
211 re2c:condprefix = "case this.case_";
212 re2c:condenumprefix = "this._lexConditions.";
213 re2c:yyfill:enable = 0;
214 re2c:labelprefix = "case ";
216 re2c:indent:string = " ";
218 CommentContent = ([^-\r\n] | ("--" [^>]))*;
219 Comment = "<!--" CommentContent "-->";
220 CommentStart = "<!--" CommentContent [\r\n];
221 CommentEnd = CommentContent "-->";
223 DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
224 DocTypeContent = [^\r\n>]*;
226 ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
227 ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
229 StyleStart = "<" [Ss] [Tt] [Yy] [Ll] [Ee];
230 StyleEnd = "</" [Ss] [Tt] [Yy] [Ll] [Ee];
236 DoubleStringContent = [^\r\n\"]*;
237 SingleStringContent = [^\r\n\']*;
238 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
239 DoubleStringStart = "\"" DoubleStringContent [\r\n];
240 DoubleStringEnd = DoubleStringContent "\"";
241 SingleStringStart = "'" SingleStringContent [\r\n];
242 SingleStringEnd = SingleStringContent "'";
244 Identifier = [^ \r\n"'<>\[\]=]+;
246 <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
247 <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
248 <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
249 <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }
251 <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
252 <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
253 <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
255 <INITIAL> ScriptStart => TAG
257 if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
258 // Do not tokenize script tag contents, keep lexer state, even though processing "<".
259 this.setLexCondition(this._lexConditions.INITIAL);
260 this.tokenType = null;
263 this.tokenType = "html-tag";
264 this._condition.parseCondition = this._parseConditions.SCRIPT;
265 this._setExpectingAttribute();
269 <INITIAL> ScriptEnd => TAG
271 this.tokenType = "html-tag";
272 this._condition.parseCondition = this._parseConditions.INITIAL;
273 this.scriptEnded(cursor - 8);
277 <INITIAL> StyleStart => TAG
279 if (this._condition.parseCondition & this._parseConditions.STYLE) {
280 // Do not tokenize style tag contents, keep lexer state, even though processing "<".
281 this.setLexCondition(this._lexConditions.INITIAL);
282 this.tokenType = null;
285 this.tokenType = "html-tag";
286 this._condition.parseCondition = this._parseConditions.STYLE;
287 this._setExpectingAttribute();
291 <INITIAL> StyleEnd => TAG
293 this.tokenType = "html-tag";
294 this._condition.parseCondition = this._parseConditions.INITIAL;
295 this.styleSheetEnded(cursor - 7);
301 if (this._condition.parseCondition & (this._parseConditions.SCRIPT | this._parseConditions.STYLE)) {
302 // Do not tokenize script and style tag contents, keep lexer state, even though processing "<".
303 this.setLexCondition(this._lexConditions.INITIAL);
304 this.tokenType = null;
308 this._condition.parseCondition = this._parseConditions.INITIAL;
309 this.tokenType = "html-tag";
315 this.tokenType = "html-tag";
316 if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
317 this.scriptStarted(cursor);
318 // Do not tokenize script tag contents.
322 if (this._condition.parseCondition & this._parseConditions.STYLE) {
323 this.styleSheetStarted(cursor);
324 // Do not tokenize style tag contents.
328 this._condition.parseCondition = this._parseConditions.INITIAL;
332 <TAG> StringLiteral { return this._stringToken(cursor, true); }
333 <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
334 <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
335 <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
336 <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
337 <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
338 <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
340 <TAG> EqualSign => TAG
342 if (this._isExpectingAttribute())
343 this._setExpectingAttributeValue();
344 this.tokenType = null;
350 if (this._condition.parseCondition === this._parseConditions.SCRIPT || this._condition.parseCondition === this._parseConditions.STYLE) {
351 // Fall through if expecting attributes.
352 this.tokenType = null;
356 if (this._condition.parseCondition === this._parseConditions.INITIAL) {
357 this.tokenType = "html-tag";
358 this._setExpectingAttribute();
359 var token = this._line.substring(cursorOnEnter, cursor);
361 this._condition.parseCondition |= this._parseConditions.A_NODE;
362 else if (this._condition.parseCondition & this._parseConditions.A_NODE)
363 this._condition.parseCondition ^= this._parseConditions.A_NODE;
364 } else if (this._isExpectingAttribute()) {
365 var token = this._line.substring(cursorOnEnter, cursor);
366 if (token === "href" || token === "src")
367 this._condition.parseCondition |= this._parseConditions.LINKIFY;
368 else if (this._condition.parseCondition |= this._parseConditions.LINKIFY)
369 this._condition.parseCondition ^= this._parseConditions.LINKIFY;
370 this.tokenType = "html-attribute-name";
371 } else if (this._isExpectingAttributeValue())
372 this.tokenType = this._attrValueTokenType();
374 this.tokenType = null;
377 <*> [^] { this.tokenType = null; return cursor; }
383 WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;