2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef YarrInterpreter_h
27 #define YarrInterpreter_h
29 #include "YarrPattern.h"
30 #include <wtf/PassOwnPtr.h>
31 #include <wtf/unicode/Unicode.h>
34 class BumpPointerAllocator;
36 using WTF::BumpPointerAllocator;
38 namespace JSC { namespace Yarr {
40 class ByteDisjunction;
44 TypeBodyAlternativeBegin,
45 TypeBodyAlternativeDisjunction,
46 TypeBodyAlternativeEnd,
48 TypeAlternativeDisjunction,
54 TypeAssertionWordBoundary,
55 TypePatternCharacterOnce,
56 TypePatternCharacterFixed,
57 TypePatternCharacterGreedy,
58 TypePatternCharacterNonGreedy,
59 TypePatternCasedCharacterOnce,
60 TypePatternCasedCharacterFixed,
61 TypePatternCasedCharacterGreedy,
62 TypePatternCasedCharacterNonGreedy,
65 TypeParenthesesSubpattern,
66 TypeParenthesesSubpatternOnceBegin,
67 TypeParenthesesSubpatternOnceEnd,
68 TypeParenthesesSubpatternTerminalBegin,
69 TypeParenthesesSubpatternTerminalEnd,
70 TypeParentheticalAssertionBegin,
71 TypeParentheticalAssertionEnd,
79 UChar patternCharacter;
84 CharacterClass* characterClass;
85 unsigned subpatternId;
88 ByteDisjunction* parenthesesDisjunction;
89 unsigned parenthesesWidth;
91 QuantifierType quantityType;
92 unsigned quantityCount;
103 unsigned checkInputCount;
105 unsigned frameLocation;
110 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
111 : frameLocation(frameLocation)
115 switch (quantityType) {
116 case QuantifierFixedCount:
117 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
119 case QuantifierGreedy:
120 type = ByteTerm::TypePatternCharacterGreedy;
122 case QuantifierNonGreedy:
123 type = ByteTerm::TypePatternCharacterNonGreedy;
127 atom.patternCharacter = ch;
128 atom.quantityType = quantityType;
129 atom.quantityCount = quantityCount.unsafeGet();
130 inputPosition = inputPos;
133 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
134 : frameLocation(frameLocation)
138 switch (quantityType) {
139 case QuantifierFixedCount:
140 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
142 case QuantifierGreedy:
143 type = ByteTerm::TypePatternCasedCharacterGreedy;
145 case QuantifierNonGreedy:
146 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
150 atom.casedCharacter.lo = lo;
151 atom.casedCharacter.hi = hi;
152 atom.quantityType = quantityType;
153 atom.quantityCount = quantityCount.unsafeGet();
154 inputPosition = inputPos;
157 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
158 : type(ByteTerm::TypeCharacterClass)
162 atom.characterClass = characterClass;
163 atom.quantityType = QuantifierFixedCount;
164 atom.quantityCount = 1;
165 inputPosition = inputPos;
168 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
173 atom.subpatternId = subpatternId;
174 atom.parenthesesDisjunction = parenthesesInfo;
175 atom.quantityType = QuantifierFixedCount;
176 atom.quantityCount = 1;
177 inputPosition = inputPos;
180 ByteTerm(Type type, bool invert = false)
185 atom.quantityType = QuantifierFixedCount;
186 atom.quantityCount = 1;
189 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
194 atom.subpatternId = subpatternId;
195 atom.quantityType = QuantifierFixedCount;
196 atom.quantityCount = 1;
197 inputPosition = inputPos;
200 static ByteTerm BOL(int inputPos)
202 ByteTerm term(TypeAssertionBOL);
203 term.inputPosition = inputPos;
207 static ByteTerm CheckInput(Checked<unsigned> count)
209 ByteTerm term(TypeCheckInput);
210 term.checkInputCount = count.unsafeGet();
214 static ByteTerm UncheckInput(Checked<unsigned> count)
216 ByteTerm term(TypeUncheckInput);
217 term.checkInputCount = count.unsafeGet();
221 static ByteTerm EOL(int inputPos)
223 ByteTerm term(TypeAssertionEOL);
224 term.inputPosition = inputPos;
228 static ByteTerm WordBoundary(bool invert, int inputPos)
230 ByteTerm term(TypeAssertionWordBoundary, invert);
231 term.inputPosition = inputPos;
235 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
237 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
240 static ByteTerm BodyAlternativeBegin(bool onceThrough)
242 ByteTerm term(TypeBodyAlternativeBegin);
243 term.alternative.next = 0;
244 term.alternative.end = 0;
245 term.alternative.onceThrough = onceThrough;
249 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
251 ByteTerm term(TypeBodyAlternativeDisjunction);
252 term.alternative.next = 0;
253 term.alternative.end = 0;
254 term.alternative.onceThrough = onceThrough;
258 static ByteTerm BodyAlternativeEnd()
260 ByteTerm term(TypeBodyAlternativeEnd);
261 term.alternative.next = 0;
262 term.alternative.end = 0;
263 term.alternative.onceThrough = false;
267 static ByteTerm AlternativeBegin()
269 ByteTerm term(TypeAlternativeBegin);
270 term.alternative.next = 0;
271 term.alternative.end = 0;
272 term.alternative.onceThrough = false;
276 static ByteTerm AlternativeDisjunction()
278 ByteTerm term(TypeAlternativeDisjunction);
279 term.alternative.next = 0;
280 term.alternative.end = 0;
281 term.alternative.onceThrough = false;
285 static ByteTerm AlternativeEnd()
287 ByteTerm term(TypeAlternativeEnd);
288 term.alternative.next = 0;
289 term.alternative.end = 0;
290 term.alternative.onceThrough = false;
294 static ByteTerm SubpatternBegin()
296 return ByteTerm(TypeSubpatternBegin);
299 static ByteTerm SubpatternEnd()
301 return ByteTerm(TypeSubpatternEnd);
304 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
306 ByteTerm term(TypeDotStarEnclosure);
307 term.anchors.m_bol = bolAnchor;
308 term.anchors.m_eol = eolAnchor;
323 class ByteDisjunction {
324 WTF_MAKE_FAST_ALLOCATED;
326 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
327 : m_numSubpatterns(numSubpatterns)
328 , m_frameSize(frameSize)
332 Vector<ByteTerm> terms;
333 unsigned m_numSubpatterns;
334 unsigned m_frameSize;
337 struct BytecodePattern {
338 WTF_MAKE_FAST_ALLOCATED;
340 BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
342 , m_ignoreCase(pattern.m_ignoreCase)
343 , m_multiline(pattern.m_multiline)
344 , m_allocator(allocator)
346 newlineCharacterClass = pattern.newlineCharacterClass();
347 wordcharCharacterClass = pattern.wordcharCharacterClass();
349 m_allParenthesesInfo.append(allParenthesesInfo);
350 m_userCharacterClasses.append(pattern.m_userCharacterClasses);
351 // 'Steal' the YarrPattern's CharacterClasses! We clear its
352 // array, so that it won't delete them on destruction. We'll
353 // take responsibility for that.
354 pattern.m_userCharacterClasses.clear();
359 deleteAllValues(m_allParenthesesInfo);
360 deleteAllValues(m_userCharacterClasses);
363 OwnPtr<ByteDisjunction> m_body;
366 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
367 // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator.
368 BumpPointerAllocator* m_allocator;
370 CharacterClass* newlineCharacterClass;
371 CharacterClass* wordcharCharacterClass;
374 Vector<ByteDisjunction*> m_allParenthesesInfo;
375 Vector<CharacterClass*> m_userCharacterClasses;
378 } } // namespace JSC::Yarr
380 #endif // YarrInterpreter_h