2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "ASCIICType.h"
30 #include "LinkBuffer.h"
37 namespace JSC { namespace Yarr {
39 class YarrGenerator : private MacroAssembler {
40 friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
43 static const RegisterID input = ARMRegisters::r0;
44 static const RegisterID index = ARMRegisters::r1;
45 static const RegisterID length = ARMRegisters::r2;
46 static const RegisterID output = ARMRegisters::r4;
48 static const RegisterID regT0 = ARMRegisters::r5;
49 static const RegisterID regT1 = ARMRegisters::r6;
51 static const RegisterID returnRegister = ARMRegisters::r0;
53 static const RegisterID input = MIPSRegisters::a0;
54 static const RegisterID index = MIPSRegisters::a1;
55 static const RegisterID length = MIPSRegisters::a2;
56 static const RegisterID output = MIPSRegisters::a3;
58 static const RegisterID regT0 = MIPSRegisters::t4;
59 static const RegisterID regT1 = MIPSRegisters::t5;
61 static const RegisterID returnRegister = MIPSRegisters::v0;
63 static const RegisterID input = SH4Registers::r4;
64 static const RegisterID index = SH4Registers::r5;
65 static const RegisterID length = SH4Registers::r6;
66 static const RegisterID output = SH4Registers::r7;
68 static const RegisterID regT0 = SH4Registers::r0;
69 static const RegisterID regT1 = SH4Registers::r1;
71 static const RegisterID returnRegister = SH4Registers::r0;
73 static const RegisterID input = X86Registers::eax;
74 static const RegisterID index = X86Registers::edx;
75 static const RegisterID length = X86Registers::ecx;
76 static const RegisterID output = X86Registers::edi;
78 static const RegisterID regT0 = X86Registers::ebx;
79 static const RegisterID regT1 = X86Registers::esi;
81 static const RegisterID returnRegister = X86Registers::eax;
83 static const RegisterID input = X86Registers::edi;
84 static const RegisterID index = X86Registers::esi;
85 static const RegisterID length = X86Registers::edx;
86 static const RegisterID output = X86Registers::ecx;
88 static const RegisterID regT0 = X86Registers::eax;
89 static const RegisterID regT1 = X86Registers::ebx;
91 static const RegisterID returnRegister = X86Registers::eax;
94 void optimizeAlternative(PatternAlternative* alternative)
96 if (!alternative->m_terms.size())
99 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
100 PatternTerm& term = alternative->m_terms[i];
101 PatternTerm& nextTerm = alternative->m_terms[i + 1];
103 if ((term.type == PatternTerm::TypeCharacterClass)
104 && (term.quantityType == QuantifierFixedCount)
105 && (nextTerm.type == PatternTerm::TypePatternCharacter)
106 && (nextTerm.quantityType == QuantifierFixedCount)) {
107 PatternTerm termCopy = term;
108 alternative->m_terms[i] = nextTerm;
109 alternative->m_terms[i + 1] = termCopy;
114 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
117 // pick which range we're going to generate
118 int which = count >> 1;
119 char lo = ranges[which].begin;
120 char hi = ranges[which].end;
122 // check if there are any ranges or matches below lo. If not, just jl to failure -
123 // if there is anything else to check, check that first, if it falls through jmp to failure.
124 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
125 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
127 // generate code for all ranges before this one
129 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
131 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
132 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
135 failures.append(jump());
137 loOrAbove.link(this);
139 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
141 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
142 failures.append(jump());
144 loOrAbove.link(this);
146 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
148 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
151 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
152 // fall through to here, the value is above hi.
154 // shuffle along & loop around if there are any more matches to handle.
155 unsigned next = which + 1;
161 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
163 if (charClass->m_table) {
164 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
165 matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
169 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
170 Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
172 if (charClass->m_matchesUnicode.size()) {
173 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
174 UChar ch = charClass->m_matchesUnicode[i];
175 matchDest.append(branch32(Equal, character, Imm32(ch)));
179 if (charClass->m_rangesUnicode.size()) {
180 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
181 UChar lo = charClass->m_rangesUnicode[i].begin;
182 UChar hi = charClass->m_rangesUnicode[i].end;
184 Jump below = branch32(LessThan, character, Imm32(lo));
185 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
190 unicodeFail = jump();
194 if (charClass->m_ranges.size()) {
195 unsigned matchIndex = 0;
197 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
198 while (matchIndex < charClass->m_matches.size())
199 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
202 } else if (charClass->m_matches.size()) {
203 // optimization: gather 'a','A' etc back together, can mask & test once.
204 Vector<char> matchesAZaz;
206 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
207 char ch = charClass->m_matches[i];
208 if (m_pattern.m_ignoreCase) {
209 if (isASCIILower(ch)) {
210 matchesAZaz.append(ch);
213 if (isASCIIUpper(ch))
216 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
219 if (unsigned countAZaz = matchesAZaz.size()) {
220 or32(TrustedImm32(32), character);
221 for (unsigned i = 0; i < countAZaz; ++i)
222 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
226 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
227 unicodeFail.link(this);
230 // Jumps if input not available; will have (incorrectly) incremented already!
231 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
234 add32(Imm32(countToCheck), index);
235 return branch32(Above, index, length);
238 Jump jumpIfAvailableInput(unsigned countToCheck)
240 add32(Imm32(countToCheck), index);
241 return branch32(BelowOrEqual, index, length);
246 return branch32(BelowOrEqual, index, length);
251 return branch32(Equal, index, length);
254 Jump notAtEndOfInput()
256 return branch32(NotEqual, index, length);
259 Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
261 if (m_charSize == Char8)
262 return branch8(NotEqual, BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), Imm32(ch));
263 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
266 void readCharacter(int inputPosition, RegisterID reg)
268 if (m_charSize == Char8)
269 load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
271 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
274 void storeToFrame(RegisterID reg, unsigned frameLocation)
276 poke(reg, frameLocation);
279 void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
281 poke(imm, frameLocation);
284 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
286 return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
289 void loadFromFrame(unsigned frameLocation, RegisterID reg)
291 peek(reg, frameLocation);
294 void loadFromFrameAndJump(unsigned frameLocation)
296 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
300 // These nodes wrap body alternatives - those in the main disjunction,
301 // rather than subpatterns or assertions. These are chained together in
302 // a doubly linked list, with a 'begin' node for the first alternative,
303 // a 'next' node for each subsequent alternative, and an 'end' node at
304 // the end. In the case of repeating alternatives, the 'end' node also
305 // has a reference back to 'begin'.
306 OpBodyAlternativeBegin,
307 OpBodyAlternativeNext,
308 OpBodyAlternativeEnd,
309 // Similar to the body alternatives, but used for subpatterns with two
310 // or more alternatives.
311 OpNestedAlternativeBegin,
312 OpNestedAlternativeNext,
313 OpNestedAlternativeEnd,
314 // Used for alternatives in subpatterns where there is only a single
315 // alternative (backtrackingis easier in these cases), or for alternatives
316 // which never need to be backtracked (those in parenthetical assertions,
317 // terminal subpatterns).
318 OpSimpleNestedAlternativeBegin,
319 OpSimpleNestedAlternativeNext,
320 OpSimpleNestedAlternativeEnd,
321 // Used to wrap 'Once' subpattern matches (quantityCount == 1).
322 OpParenthesesSubpatternOnceBegin,
323 OpParenthesesSubpatternOnceEnd,
324 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
325 OpParenthesesSubpatternTerminalBegin,
326 OpParenthesesSubpatternTerminalEnd,
327 // Used to wrap parenthetical assertions.
328 OpParentheticalAssertionBegin,
329 OpParentheticalAssertionEnd,
330 // Wraps all simple terms (pattern characters, character classes).
332 // Where an expression contains only 'once through' body alternatives
333 // and no repeating ones, this op is used to return match failure.
337 // This structure is used to hold the compiled opcode information,
338 // including reference back to the original PatternTerm/PatternAlternatives,
339 // and JIT compilation data structures.
341 explicit YarrOp(PatternTerm* term)
344 , m_isDeadCode(false)
348 explicit YarrOp(YarrOpCode op)
350 , m_isDeadCode(false)
354 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
358 // For alternatives, this holds the PatternAlternative and doubly linked
359 // references to this alternative's siblings. In the case of the
360 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
361 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
362 // repeating alternative.
363 PatternAlternative* m_alternative;
367 // Used to record a set of Jumps out of the generated code, typically
368 // used for jumps out to backtracking code, and a single reentry back
369 // into the code for a node (likely where a backtrack will trigger
374 // This flag is used to null out the second pattern character, when
375 // two are fused to match a pair together.
378 // Currently used in the case of some of the more complex management of
379 // 'm_checked', to cache the offset used in this alternative, to avoid
383 // Used by OpNestedAlternativeNext/End to hold the pointer to the
384 // value that will be pushed into the pattern's frame to return to,
385 // upon backtracking back into the disjunction.
386 DataLabelPtr m_returnAddress;
390 // This class encapsulates information about the state of code generation
391 // whilst generating the code for backtracking, when a term fails to match.
392 // Upon entry to code generation of the backtracking code for a given node,
393 // the Backtracking state will hold references to all control flow sources
394 // that are outputs in need of further backtracking from the prior node
395 // generated (which is the subsequent operation in the regular expression,
396 // and in the m_ops Vector, since we generated backtracking backwards).
397 // These references to control flow take the form of:
398 // - A jump list of jumps, to be linked to code that will backtrack them
400 // - A set of DataLabelPtr values, to be populated with values to be
401 // treated effectively as return addresses backtracking into complex
403 // - A flag indicating that the current sequence of generated code up to
404 // this point requires backtracking.
405 class BacktrackingState {
408 : m_pendingFallthrough(false)
412 // Add a jump or jumps, a return address, or set the flag indicating
413 // that the current 'fallthrough' control flow requires backtracking.
414 void append(const Jump& jump)
416 m_laterFailures.append(jump);
418 void append(JumpList& jumpList)
420 m_laterFailures.append(jumpList);
422 void append(const DataLabelPtr& returnAddress)
424 m_pendingReturns.append(returnAddress);
428 ASSERT(!m_pendingFallthrough);
429 m_pendingFallthrough = true;
432 // These methods clear the backtracking state, either linking to the
433 // current location, a provided label, or copying the backtracking out
434 // to a JumpList. All actions may require code generation to take place,
435 // and as such are passed a pointer to the assembler.
436 void link(MacroAssembler* assembler)
438 if (m_pendingReturns.size()) {
439 Label here(assembler);
440 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
441 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
442 m_pendingReturns.clear();
444 m_laterFailures.link(assembler);
445 m_laterFailures.clear();
446 m_pendingFallthrough = false;
448 void linkTo(Label label, MacroAssembler* assembler)
450 if (m_pendingReturns.size()) {
451 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
452 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
453 m_pendingReturns.clear();
455 if (m_pendingFallthrough)
456 assembler->jump(label);
457 m_laterFailures.linkTo(label, assembler);
458 m_laterFailures.clear();
459 m_pendingFallthrough = false;
461 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
463 if (m_pendingReturns.size()) {
464 Label here(assembler);
465 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
466 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
467 m_pendingReturns.clear();
468 m_pendingFallthrough = true;
470 if (m_pendingFallthrough)
471 jumpList.append(assembler->jump());
472 jumpList.append(m_laterFailures);
473 m_laterFailures.clear();
474 m_pendingFallthrough = false;
479 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
482 // Called at the end of code generation to link all return addresses.
483 void linkDataLabels(LinkBuffer& linkBuffer)
486 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
487 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
491 struct ReturnAddressRecord {
492 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
493 : m_dataLabel(dataLabel)
494 , m_backtrackLocation(backtrackLocation)
498 DataLabelPtr m_dataLabel;
499 Label m_backtrackLocation;
502 JumpList m_laterFailures;
503 bool m_pendingFallthrough;
504 Vector<DataLabelPtr, 4> m_pendingReturns;
505 Vector<ReturnAddressRecord, 4> m_backtrackRecords;
508 // Generation methods:
509 // ===================
511 // This method provides a default implementation of backtracking common
512 // to many terms; terms commonly jump out of the forwards matching path
513 // on any failed conditions, and add these jumps to the m_jumps list. If
514 // no special handling is required we can often just backtrack to m_jumps.
515 void backtrackTermDefault(size_t opIndex)
517 YarrOp& op = m_ops[opIndex];
518 m_backtrackingState.append(op.m_jumps);
521 void generateAssertionBOL(size_t opIndex)
523 YarrOp& op = m_ops[opIndex];
524 PatternTerm* term = op.m_term;
526 if (m_pattern.m_multiline) {
527 const RegisterID character = regT0;
530 if (!term->inputPosition)
531 matchDest.append(branch32(Equal, index, Imm32(m_checked)));
533 readCharacter((term->inputPosition - m_checked) - 1, character);
534 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
535 op.m_jumps.append(jump());
537 matchDest.link(this);
539 // Erk, really should poison out these alternatives early. :-/
540 if (term->inputPosition)
541 op.m_jumps.append(jump());
543 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
546 void backtrackAssertionBOL(size_t opIndex)
548 backtrackTermDefault(opIndex);
551 void generateAssertionEOL(size_t opIndex)
553 YarrOp& op = m_ops[opIndex];
554 PatternTerm* term = op.m_term;
556 if (m_pattern.m_multiline) {
557 const RegisterID character = regT0;
560 if (term->inputPosition == m_checked)
561 matchDest.append(atEndOfInput());
563 readCharacter(term->inputPosition - m_checked, character);
564 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
565 op.m_jumps.append(jump());
567 matchDest.link(this);
569 if (term->inputPosition == m_checked)
570 op.m_jumps.append(notAtEndOfInput());
571 // Erk, really should poison out these alternatives early. :-/
573 op.m_jumps.append(jump());
576 void backtrackAssertionEOL(size_t opIndex)
578 backtrackTermDefault(opIndex);
581 // Also falls though on nextIsNotWordChar.
582 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
584 YarrOp& op = m_ops[opIndex];
585 PatternTerm* term = op.m_term;
587 const RegisterID character = regT0;
589 if (term->inputPosition == m_checked)
590 nextIsNotWordChar.append(atEndOfInput());
592 readCharacter((term->inputPosition - m_checked), character);
593 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
596 void generateAssertionWordBoundary(size_t opIndex)
598 YarrOp& op = m_ops[opIndex];
599 PatternTerm* term = op.m_term;
601 const RegisterID character = regT0;
605 if (!term->inputPosition)
606 atBegin = branch32(Equal, index, Imm32(m_checked));
607 readCharacter((term->inputPosition - m_checked) - 1, character);
608 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
609 if (!term->inputPosition)
612 // We fall through to here if the last character was not a wordchar.
613 JumpList nonWordCharThenWordChar;
614 JumpList nonWordCharThenNonWordChar;
615 if (term->invert()) {
616 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
617 nonWordCharThenWordChar.append(jump());
619 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
620 nonWordCharThenNonWordChar.append(jump());
622 op.m_jumps.append(nonWordCharThenNonWordChar);
624 // We jump here if the last character was a wordchar.
625 matchDest.link(this);
626 JumpList wordCharThenWordChar;
627 JumpList wordCharThenNonWordChar;
628 if (term->invert()) {
629 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
630 wordCharThenWordChar.append(jump());
632 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
633 // This can fall-though!
636 op.m_jumps.append(wordCharThenWordChar);
638 nonWordCharThenWordChar.link(this);
639 wordCharThenNonWordChar.link(this);
641 void backtrackAssertionWordBoundary(size_t opIndex)
643 backtrackTermDefault(opIndex);
646 void generatePatternCharacterOnce(size_t opIndex)
648 YarrOp& op = m_ops[opIndex];
650 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
651 // node, so there must always be at least one more node.
652 ASSERT(opIndex + 1 < m_ops.size());
653 YarrOp& nextOp = m_ops[opIndex + 1];
658 PatternTerm* term = op.m_term;
659 UChar ch = term->patternCharacter;
661 if ((ch > 0xff) && (m_charSize == Char8)) {
662 // Have a 16 bit pattern character and an 8 bit string - short circuit
663 op.m_jumps.append(jump());
667 const RegisterID character = regT0;
669 if (nextOp.m_op == OpTerm) {
670 PatternTerm* nextTerm = nextOp.m_term;
671 if (nextTerm->type == PatternTerm::TypePatternCharacter
672 && nextTerm->quantityType == QuantifierFixedCount
673 && nextTerm->quantityCount == 1
674 && nextTerm->inputPosition == (term->inputPosition + 1)) {
676 UChar ch2 = nextTerm->patternCharacter;
678 int shiftAmount = m_charSize == Char8 ? 8 : 16;
680 int chPair = ch | (ch2 << shiftAmount);
682 if (m_pattern.m_ignoreCase) {
683 if (isASCIIAlpha(ch))
685 if (isASCIIAlpha(ch2))
686 mask |= 32 << shiftAmount;
689 if (m_charSize == Char8) {
690 BaseIndex address(input, index, TimesOne, (term->inputPosition - m_checked) * sizeof(char));
692 load16(address, character);
693 or32(Imm32(mask), character);
694 op.m_jumps.append(branch16(NotEqual, character, Imm32(chPair | mask)));
696 op.m_jumps.append(branch16(NotEqual, address, Imm32(chPair)));
698 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
700 load32WithUnalignedHalfWords(address, character);
701 or32(Imm32(mask), character);
702 op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
704 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
706 nextOp.m_isDeadCode = true;
711 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
712 readCharacter(term->inputPosition - m_checked, character);
713 or32(TrustedImm32(32), character);
714 op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
716 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
717 op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
720 void backtrackPatternCharacterOnce(size_t opIndex)
722 backtrackTermDefault(opIndex);
725 void generatePatternCharacterFixed(size_t opIndex)
727 YarrOp& op = m_ops[opIndex];
728 PatternTerm* term = op.m_term;
729 UChar ch = term->patternCharacter;
731 const RegisterID character = regT0;
732 const RegisterID countRegister = regT1;
734 move(index, countRegister);
735 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
738 BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
740 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
741 if (m_charSize == Char8)
742 load8(address, character);
744 load16(address, character);
745 or32(TrustedImm32(32), character);
746 op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
748 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
749 if (m_charSize == Char8)
750 op.m_jumps.append(branch8(NotEqual, address, Imm32(ch)));
752 op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
754 add32(TrustedImm32(1), countRegister);
755 branch32(NotEqual, countRegister, index).linkTo(loop, this);
757 void backtrackPatternCharacterFixed(size_t opIndex)
759 backtrackTermDefault(opIndex);
762 void generatePatternCharacterGreedy(size_t opIndex)
764 YarrOp& op = m_ops[opIndex];
765 PatternTerm* term = op.m_term;
766 UChar ch = term->patternCharacter;
768 const RegisterID character = regT0;
769 const RegisterID countRegister = regT1;
771 move(TrustedImm32(0), countRegister);
773 if ((ch > 0xff) && (m_charSize == Char8)) {
774 // Have a 16 bit pattern character and an 8 bit string - short circuit
775 op.m_jumps.append(jump());
779 failures.append(atEndOfInput());
780 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
781 readCharacter(term->inputPosition - m_checked, character);
782 or32(TrustedImm32(32), character);
783 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
785 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
786 failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
789 add32(TrustedImm32(1), countRegister);
790 add32(TrustedImm32(1), index);
791 if (term->quantityCount == quantifyInfinite)
794 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
798 op.m_reentry = label();
800 storeToFrame(countRegister, term->frameLocation);
803 void backtrackPatternCharacterGreedy(size_t opIndex)
805 YarrOp& op = m_ops[opIndex];
806 PatternTerm* term = op.m_term;
808 const RegisterID countRegister = regT1;
810 m_backtrackingState.link(this);
812 loadFromFrame(term->frameLocation, countRegister);
813 m_backtrackingState.append(branchTest32(Zero, countRegister));
814 sub32(TrustedImm32(1), countRegister);
815 sub32(TrustedImm32(1), index);
819 void generatePatternCharacterNonGreedy(size_t opIndex)
821 YarrOp& op = m_ops[opIndex];
822 PatternTerm* term = op.m_term;
824 const RegisterID countRegister = regT1;
826 move(TrustedImm32(0), countRegister);
827 op.m_reentry = label();
828 storeToFrame(countRegister, term->frameLocation);
830 void backtrackPatternCharacterNonGreedy(size_t opIndex)
832 YarrOp& op = m_ops[opIndex];
833 PatternTerm* term = op.m_term;
834 UChar ch = term->patternCharacter;
836 const RegisterID character = regT0;
837 const RegisterID countRegister = regT1;
839 JumpList nonGreedyFailures;
841 m_backtrackingState.link(this);
843 loadFromFrame(term->frameLocation, countRegister);
845 if ((ch > 0xff) && (m_charSize == Char8)) {
846 // Have a 16 bit pattern character and an 8 bit string - short circuit
847 nonGreedyFailures.append(jump());
849 nonGreedyFailures.append(atEndOfInput());
850 if (term->quantityCount != quantifyInfinite)
851 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
852 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
853 readCharacter(term->inputPosition - m_checked, character);
854 or32(TrustedImm32(32), character);
855 nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
857 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
858 nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
861 add32(TrustedImm32(1), countRegister);
862 add32(TrustedImm32(1), index);
866 nonGreedyFailures.link(this);
868 sub32(countRegister, index);
869 m_backtrackingState.fallthrough();
872 void generateCharacterClassOnce(size_t opIndex)
874 YarrOp& op = m_ops[opIndex];
875 PatternTerm* term = op.m_term;
877 const RegisterID character = regT0;
880 readCharacter(term->inputPosition - m_checked, character);
881 matchCharacterClass(character, matchDest, term->characterClass);
884 op.m_jumps.append(matchDest);
886 op.m_jumps.append(jump());
887 matchDest.link(this);
890 void backtrackCharacterClassOnce(size_t opIndex)
892 backtrackTermDefault(opIndex);
895 void generateCharacterClassFixed(size_t opIndex)
897 YarrOp& op = m_ops[opIndex];
898 PatternTerm* term = op.m_term;
900 const RegisterID character = regT0;
901 const RegisterID countRegister = regT1;
903 move(index, countRegister);
904 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
908 if (m_charSize == Char8)
909 load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
911 load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
912 matchCharacterClass(character, matchDest, term->characterClass);
915 op.m_jumps.append(matchDest);
917 op.m_jumps.append(jump());
918 matchDest.link(this);
921 add32(TrustedImm32(1), countRegister);
922 branch32(NotEqual, countRegister, index).linkTo(loop, this);
924 void backtrackCharacterClassFixed(size_t opIndex)
926 backtrackTermDefault(opIndex);
929 void generateCharacterClassGreedy(size_t opIndex)
931 YarrOp& op = m_ops[opIndex];
932 PatternTerm* term = op.m_term;
934 const RegisterID character = regT0;
935 const RegisterID countRegister = regT1;
937 move(TrustedImm32(0), countRegister);
941 failures.append(atEndOfInput());
943 if (term->invert()) {
944 readCharacter(term->inputPosition - m_checked, character);
945 matchCharacterClass(character, failures, term->characterClass);
948 readCharacter(term->inputPosition - m_checked, character);
949 matchCharacterClass(character, matchDest, term->characterClass);
950 failures.append(jump());
951 matchDest.link(this);
954 add32(TrustedImm32(1), countRegister);
955 add32(TrustedImm32(1), index);
956 if (term->quantityCount != quantifyInfinite) {
957 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
958 failures.append(jump());
963 op.m_reentry = label();
965 storeToFrame(countRegister, term->frameLocation);
967 void backtrackCharacterClassGreedy(size_t opIndex)
969 YarrOp& op = m_ops[opIndex];
970 PatternTerm* term = op.m_term;
972 const RegisterID countRegister = regT1;
974 m_backtrackingState.link(this);
976 loadFromFrame(term->frameLocation, countRegister);
977 m_backtrackingState.append(branchTest32(Zero, countRegister));
978 sub32(TrustedImm32(1), countRegister);
979 sub32(TrustedImm32(1), index);
983 void generateCharacterClassNonGreedy(size_t opIndex)
985 YarrOp& op = m_ops[opIndex];
986 PatternTerm* term = op.m_term;
988 const RegisterID countRegister = regT1;
990 move(TrustedImm32(0), countRegister);
991 op.m_reentry = label();
992 storeToFrame(countRegister, term->frameLocation);
994 void backtrackCharacterClassNonGreedy(size_t opIndex)
996 YarrOp& op = m_ops[opIndex];
997 PatternTerm* term = op.m_term;
999 const RegisterID character = regT0;
1000 const RegisterID countRegister = regT1;
1002 JumpList nonGreedyFailures;
1004 m_backtrackingState.link(this);
1006 Label backtrackBegin(this);
1007 loadFromFrame(term->frameLocation, countRegister);
1009 nonGreedyFailures.append(atEndOfInput());
1010 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
1013 readCharacter(term->inputPosition - m_checked, character);
1014 matchCharacterClass(character, matchDest, term->characterClass);
1017 nonGreedyFailures.append(matchDest);
1019 nonGreedyFailures.append(jump());
1020 matchDest.link(this);
1023 add32(TrustedImm32(1), countRegister);
1024 add32(TrustedImm32(1), index);
1028 nonGreedyFailures.link(this);
1029 sub32(countRegister, index);
1030 m_backtrackingState.fallthrough();
1033 void generateDotStarEnclosure(size_t opIndex)
1035 YarrOp& op = m_ops[opIndex];
1036 PatternTerm* term = op.m_term;
1038 const RegisterID character = regT0;
1039 const RegisterID matchPos = regT1;
1041 JumpList foundBeginningNewLine;
1042 JumpList saveStartIndex;
1043 JumpList foundEndingNewLine;
1045 if (m_pattern.m_body->m_hasFixedSize) {
1046 move(index, matchPos);
1047 sub32(Imm32(m_checked), matchPos);
1049 load32(Address(output), matchPos);
1051 saveStartIndex.append(branchTest32(Zero, matchPos));
1052 Label findBOLLoop(this);
1053 sub32(TrustedImm32(1), matchPos);
1054 if (m_charSize == Char8)
1055 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1057 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1058 matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1059 branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
1060 saveStartIndex.append(jump());
1062 foundBeginningNewLine.link(this);
1063 add32(TrustedImm32(1), matchPos); // Advance past newline
1064 saveStartIndex.link(this);
1066 if (!m_pattern.m_multiline && term->anchors.bolAnchor)
1067 op.m_jumps.append(branchTest32(NonZero, matchPos));
1069 store32(matchPos, Address(output));
1071 move(index, matchPos);
1073 Label findEOLLoop(this);
1074 foundEndingNewLine.append(branch32(Equal, matchPos, length));
1075 if (m_charSize == Char8)
1076 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1078 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1079 matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1080 add32(TrustedImm32(1), matchPos);
1083 foundEndingNewLine.link(this);
1085 if (!m_pattern.m_multiline && term->anchors.eolAnchor)
1086 op.m_jumps.append(branch32(NotEqual, matchPos, length));
1088 move(matchPos, index);
1091 void backtrackDotStarEnclosure(size_t opIndex)
1093 backtrackTermDefault(opIndex);
1096 // Code generation/backtracking for simple terms
1097 // (pattern characters, character classes, and assertions).
1098 // These methods farm out work to the set of functions above.
1099 void generateTerm(size_t opIndex)
1101 YarrOp& op = m_ops[opIndex];
1102 PatternTerm* term = op.m_term;
1104 switch (term->type) {
1105 case PatternTerm::TypePatternCharacter:
1106 switch (term->quantityType) {
1107 case QuantifierFixedCount:
1108 if (term->quantityCount == 1)
1109 generatePatternCharacterOnce(opIndex);
1111 generatePatternCharacterFixed(opIndex);
1113 case QuantifierGreedy:
1114 generatePatternCharacterGreedy(opIndex);
1116 case QuantifierNonGreedy:
1117 generatePatternCharacterNonGreedy(opIndex);
1122 case PatternTerm::TypeCharacterClass:
1123 switch (term->quantityType) {
1124 case QuantifierFixedCount:
1125 if (term->quantityCount == 1)
1126 generateCharacterClassOnce(opIndex);
1128 generateCharacterClassFixed(opIndex);
1130 case QuantifierGreedy:
1131 generateCharacterClassGreedy(opIndex);
1133 case QuantifierNonGreedy:
1134 generateCharacterClassNonGreedy(opIndex);
1139 case PatternTerm::TypeAssertionBOL:
1140 generateAssertionBOL(opIndex);
1143 case PatternTerm::TypeAssertionEOL:
1144 generateAssertionEOL(opIndex);
1147 case PatternTerm::TypeAssertionWordBoundary:
1148 generateAssertionWordBoundary(opIndex);
1151 case PatternTerm::TypeForwardReference:
1154 case PatternTerm::TypeParenthesesSubpattern:
1155 case PatternTerm::TypeParentheticalAssertion:
1156 ASSERT_NOT_REACHED();
1157 case PatternTerm::TypeBackReference:
1158 m_shouldFallBack = true;
1160 case PatternTerm::TypeDotStarEnclosure:
1161 generateDotStarEnclosure(opIndex);
1165 void backtrackTerm(size_t opIndex)
1167 YarrOp& op = m_ops[opIndex];
1168 PatternTerm* term = op.m_term;
1170 switch (term->type) {
1171 case PatternTerm::TypePatternCharacter:
1172 switch (term->quantityType) {
1173 case QuantifierFixedCount:
1174 if (term->quantityCount == 1)
1175 backtrackPatternCharacterOnce(opIndex);
1177 backtrackPatternCharacterFixed(opIndex);
1179 case QuantifierGreedy:
1180 backtrackPatternCharacterGreedy(opIndex);
1182 case QuantifierNonGreedy:
1183 backtrackPatternCharacterNonGreedy(opIndex);
1188 case PatternTerm::TypeCharacterClass:
1189 switch (term->quantityType) {
1190 case QuantifierFixedCount:
1191 if (term->quantityCount == 1)
1192 backtrackCharacterClassOnce(opIndex);
1194 backtrackCharacterClassFixed(opIndex);
1196 case QuantifierGreedy:
1197 backtrackCharacterClassGreedy(opIndex);
1199 case QuantifierNonGreedy:
1200 backtrackCharacterClassNonGreedy(opIndex);
1205 case PatternTerm::TypeAssertionBOL:
1206 backtrackAssertionBOL(opIndex);
1209 case PatternTerm::TypeAssertionEOL:
1210 backtrackAssertionEOL(opIndex);
1213 case PatternTerm::TypeAssertionWordBoundary:
1214 backtrackAssertionWordBoundary(opIndex);
1217 case PatternTerm::TypeForwardReference:
1220 case PatternTerm::TypeParenthesesSubpattern:
1221 case PatternTerm::TypeParentheticalAssertion:
1222 ASSERT_NOT_REACHED();
1224 case PatternTerm::TypeDotStarEnclosure:
1225 backtrackDotStarEnclosure(opIndex);
1228 case PatternTerm::TypeBackReference:
1229 m_shouldFallBack = true;
1236 // Forwards generate the matching code.
1237 ASSERT(m_ops.size());
1241 YarrOp& op = m_ops[opIndex];
1245 generateTerm(opIndex);
1248 // OpBodyAlternativeBegin/Next/End
1250 // These nodes wrap the set of alternatives in the body of the regular expression.
1251 // There may be either one or two chains of OpBodyAlternative nodes, one representing
1252 // the 'once through' sequence of alternatives (if any exist), and one representing
1253 // the repeating alternatives (again, if any exist).
1255 // Upon normal entry to the Begin alternative, we will check that input is available.
1256 // Reentry to the Begin alternative will take place after the check has taken place,
1257 // and will assume that the input position has already been progressed as appropriate.
1259 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1260 // successfully completed a match - return a success state from JIT code.
1262 // Next alternatives allow for reentry optimized to suit backtracking from its
1263 // preceding alternative. It expects the input position to still be set to a position
1264 // appropriate to its predecessor, and it will only perform an input check if the
1265 // predecessor had a minimum size less than its own.
1267 // In the case 'once through' expressions, the End node will also have a reentry
1268 // point to jump to when the last alternative fails. Again, this expects the input
1269 // position to still reflect that expected by the prior alternative.
1270 case OpBodyAlternativeBegin: {
1271 PatternAlternative* alternative = op.m_alternative;
1273 // Upon entry at the head of the set of alternatives, check if input is available
1274 // to run the first alternative. (This progresses the input position).
1275 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1276 // We will reenter after the check, and assume the input position to have been
1277 // set as appropriate to this alternative.
1278 op.m_reentry = label();
1280 m_checked += alternative->m_minimumSize;
1283 case OpBodyAlternativeNext:
1284 case OpBodyAlternativeEnd: {
1285 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1286 PatternAlternative* alternative = op.m_alternative;
1288 // If we get here, the prior alternative matched - return success.
1290 // Adjust the stack pointer to remove the pattern's frame.
1291 if (m_pattern.m_body->m_callFrameSize)
1292 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1294 // Load appropriate values into the return register and the first output
1295 // slot, and return. In the case of pattern with a fixed size, we will
1296 // not have yet set the value in the first
1297 ASSERT(index != returnRegister);
1298 if (m_pattern.m_body->m_hasFixedSize) {
1299 move(index, returnRegister);
1300 if (priorAlternative->m_minimumSize)
1301 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1302 store32(returnRegister, output);
1304 load32(Address(output), returnRegister);
1305 store32(index, Address(output, 4));
1308 // This is the divide between the tail of the prior alternative, above, and
1309 // the head of the subsequent alternative, below.
1311 if (op.m_op == OpBodyAlternativeNext) {
1312 // This is the reentry point for the Next alternative. We expect any code
1313 // that jumps here to do so with the input position matching that of the
1314 // PRIOR alteranative, and we will only check input availability if we
1315 // need to progress it forwards.
1316 op.m_reentry = label();
1317 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1318 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1319 op.m_jumps.append(jumpIfNoAvailableInput());
1320 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1321 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1322 } else if (op.m_nextOp == notFound) {
1323 // This is the reentry point for the End of 'once through' alternatives,
1324 // jumped to when the las alternative fails to match.
1325 op.m_reentry = label();
1326 sub32(Imm32(priorAlternative->m_minimumSize), index);
1329 if (op.m_op == OpBodyAlternativeNext)
1330 m_checked += alternative->m_minimumSize;
1331 m_checked -= priorAlternative->m_minimumSize;
1335 // OpSimpleNestedAlternativeBegin/Next/End
1336 // OpNestedAlternativeBegin/Next/End
1338 // These nodes are used to handle sets of alternatives that are nested within
1339 // subpatterns and parenthetical assertions. The 'simple' forms are used where
1340 // we do not need to be able to backtrack back into any alternative other than
1341 // the last, the normal forms allow backtracking into any alternative.
1343 // Each Begin/Next node is responsible for planting an input check to ensure
1344 // sufficient input is available on entry. Next nodes additionally need to
1345 // jump to the end - Next nodes use the End node's m_jumps list to hold this
1348 // In the non-simple forms, successful alternative matches must store a
1349 // 'return address' using a DataLabelPtr, used to store the address to jump
1350 // to when backtracking, to get to the code for the appropriate alternative.
1351 case OpSimpleNestedAlternativeBegin:
1352 case OpNestedAlternativeBegin: {
1353 PatternTerm* term = op.m_term;
1354 PatternAlternative* alternative = op.m_alternative;
1355 PatternDisjunction* disjunction = term->parentheses.disjunction;
1357 // Calculate how much input we need to check for, and if non-zero check.
1358 op.m_checkAdjust = alternative->m_minimumSize;
1359 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1360 op.m_checkAdjust -= disjunction->m_minimumSize;
1361 if (op.m_checkAdjust)
1362 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1364 m_checked += op.m_checkAdjust;
1367 case OpSimpleNestedAlternativeNext:
1368 case OpNestedAlternativeNext: {
1369 PatternTerm* term = op.m_term;
1370 PatternAlternative* alternative = op.m_alternative;
1371 PatternDisjunction* disjunction = term->parentheses.disjunction;
1373 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1374 if (op.m_op == OpNestedAlternativeNext) {
1375 unsigned parenthesesFrameLocation = term->frameLocation;
1376 unsigned alternativeFrameLocation = parenthesesFrameLocation;
1377 if (term->quantityType != QuantifierFixedCount)
1378 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1379 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1382 // If we reach here then the last alternative has matched - jump to the
1383 // End node, to skip over any further alternatives.
1385 // FIXME: this is logically O(N^2) (though N can be expected to be very
1386 // small). We could avoid this either by adding an extra jump to the JIT
1387 // data structures, or by making backtracking code that jumps to Next
1388 // alternatives are responsible for checking that input is available (if
1389 // we didn't need to plant the input checks, then m_jumps would be free).
1390 YarrOp* endOp = &m_ops[op.m_nextOp];
1391 while (endOp->m_nextOp != notFound) {
1392 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1393 endOp = &m_ops[endOp->m_nextOp];
1395 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1396 endOp->m_jumps.append(jump());
1398 // This is the entry point for the next alternative.
1399 op.m_reentry = label();
1401 // Calculate how much input we need to check for, and if non-zero check.
1402 op.m_checkAdjust = alternative->m_minimumSize;
1403 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1404 op.m_checkAdjust -= disjunction->m_minimumSize;
1405 if (op.m_checkAdjust)
1406 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1408 YarrOp& lastOp = m_ops[op.m_previousOp];
1409 m_checked -= lastOp.m_checkAdjust;
1410 m_checked += op.m_checkAdjust;
1413 case OpSimpleNestedAlternativeEnd:
1414 case OpNestedAlternativeEnd: {
1415 PatternTerm* term = op.m_term;
1417 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1418 if (op.m_op == OpNestedAlternativeEnd) {
1419 unsigned parenthesesFrameLocation = term->frameLocation;
1420 unsigned alternativeFrameLocation = parenthesesFrameLocation;
1421 if (term->quantityType != QuantifierFixedCount)
1422 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1423 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1426 // If this set of alternatives contains more than one alternative,
1427 // then the Next nodes will have planted jumps to the End, and added
1428 // them to this node's m_jumps list.
1429 op.m_jumps.link(this);
1432 YarrOp& lastOp = m_ops[op.m_previousOp];
1433 m_checked -= lastOp.m_checkAdjust;
1437 // OpParenthesesSubpatternOnceBegin/End
1439 // These nodes support (optionally) capturing subpatterns, that have a
1440 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
1441 case OpParenthesesSubpatternOnceBegin: {
1442 PatternTerm* term = op.m_term;
1443 unsigned parenthesesFrameLocation = term->frameLocation;
1444 const RegisterID indexTemporary = regT0;
1445 ASSERT(term->quantityCount == 1);
1447 // Upon entry to a Greedy quantified set of parenthese store the index.
1448 // We'll use this for two purposes:
1449 // - To indicate which iteration we are on of mathing the remainder of
1450 // the expression after the parentheses - the first, including the
1451 // match within the parentheses, or the second having skipped over them.
1452 // - To check for empty matches, which must be rejected.
1454 // At the head of a NonGreedy set of parentheses we'll immediately set the
1455 // value on the stack to -1 (indicating a match skipping the subpattern),
1456 // and plant a jump to the end. We'll also plant a label to backtrack to
1457 // to reenter the subpattern later, with a store to set up index on the
1458 // second iteration.
1460 // FIXME: for capturing parens, could use the index in the capture array?
1461 if (term->quantityType == QuantifierGreedy)
1462 storeToFrame(index, parenthesesFrameLocation);
1463 else if (term->quantityType == QuantifierNonGreedy) {
1464 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1465 op.m_jumps.append(jump());
1466 op.m_reentry = label();
1467 storeToFrame(index, parenthesesFrameLocation);
1470 // If the parenthese are capturing, store the starting index value to the
1471 // captures array, offsetting as necessary.
1473 // FIXME: could avoid offsetting this value in JIT code, apply
1474 // offsets only afterwards, at the point the results array is
1476 if (term->capture()) {
1477 int offsetId = term->parentheses.subpatternId << 1;
1478 int inputOffset = term->inputPosition - m_checked;
1479 if (term->quantityType == QuantifierFixedCount)
1480 inputOffset -= term->parentheses.disjunction->m_minimumSize;
1482 move(index, indexTemporary);
1483 add32(Imm32(inputOffset), indexTemporary);
1484 store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1486 store32(index, Address(output, offsetId * sizeof(int)));
1490 case OpParenthesesSubpatternOnceEnd: {
1491 PatternTerm* term = op.m_term;
1492 unsigned parenthesesFrameLocation = term->frameLocation;
1493 const RegisterID indexTemporary = regT0;
1494 ASSERT(term->quantityCount == 1);
1496 // For Greedy/NonGreedy quantified parentheses, we must reject zero length
1497 // matches. If the minimum size is know to be non-zero we need not check.
1498 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
1499 op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
1501 // If the parenthese are capturing, store the ending index value to the
1502 // captures array, offsetting as necessary.
1504 // FIXME: could avoid offsetting this value in JIT code, apply
1505 // offsets only afterwards, at the point the results array is
1507 if (term->capture()) {
1508 int offsetId = (term->parentheses.subpatternId << 1) + 1;
1509 int inputOffset = term->inputPosition - m_checked;
1511 move(index, indexTemporary);
1512 add32(Imm32(inputOffset), indexTemporary);
1513 store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1515 store32(index, Address(output, offsetId * sizeof(int)));
1518 // If the parentheses are quantified Greedy then add a label to jump back
1519 // to if get a failed match from after the parentheses. For NonGreedy
1520 // parentheses, link the jump from before the subpattern to here.
1521 if (term->quantityType == QuantifierGreedy)
1522 op.m_reentry = label();
1523 else if (term->quantityType == QuantifierNonGreedy) {
1524 YarrOp& beginOp = m_ops[op.m_previousOp];
1525 beginOp.m_jumps.link(this);
1530 // OpParenthesesSubpatternTerminalBegin/End
1531 case OpParenthesesSubpatternTerminalBegin: {
1532 PatternTerm* term = op.m_term;
1533 ASSERT(term->quantityType == QuantifierGreedy);
1534 ASSERT(term->quantityCount == quantifyInfinite);
1535 ASSERT(!term->capture());
1537 // Upon entry set a label to loop back to.
1538 op.m_reentry = label();
1540 // Store the start index of the current match; we need to reject zero
1542 storeToFrame(index, term->frameLocation);
1545 case OpParenthesesSubpatternTerminalEnd: {
1546 PatternTerm* term = op.m_term;
1548 // Check for zero length matches - if the match is non-zero, then we
1549 // can accept it & loop back up to the head of the subpattern.
1550 YarrOp& beginOp = m_ops[op.m_previousOp];
1551 branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry);
1553 // Reject the match - backtrack back into the subpattern.
1554 op.m_jumps.append(jump());
1556 // This is the entry point to jump to when we stop matching - we will
1557 // do so once the subpattern cannot match any more.
1558 op.m_reentry = label();
1562 // OpParentheticalAssertionBegin/End
1563 case OpParentheticalAssertionBegin: {
1564 PatternTerm* term = op.m_term;
1566 // Store the current index - assertions should not update index, so
1567 // we will need to restore it upon a successful match.
1568 unsigned parenthesesFrameLocation = term->frameLocation;
1569 storeToFrame(index, parenthesesFrameLocation);
1572 op.m_checkAdjust = m_checked - term->inputPosition;
1573 if (op.m_checkAdjust)
1574 sub32(Imm32(op.m_checkAdjust), index);
1576 m_checked -= op.m_checkAdjust;
1579 case OpParentheticalAssertionEnd: {
1580 PatternTerm* term = op.m_term;
1582 // Restore the input index value.
1583 unsigned parenthesesFrameLocation = term->frameLocation;
1584 loadFromFrame(parenthesesFrameLocation, index);
1586 // If inverted, a successful match of the assertion must be treated
1587 // as a failure, so jump to backtracking.
1588 if (term->invert()) {
1589 op.m_jumps.append(jump());
1590 op.m_reentry = label();
1593 YarrOp& lastOp = m_ops[op.m_previousOp];
1594 m_checked += lastOp.m_checkAdjust;
1599 if (m_pattern.m_body->m_callFrameSize)
1600 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1601 move(TrustedImm32(-1), returnRegister);
1607 } while (opIndex < m_ops.size());
1612 // Backwards generate the backtracking code.
1613 size_t opIndex = m_ops.size();
1618 YarrOp& op = m_ops[opIndex];
1622 backtrackTerm(opIndex);
1625 // OpBodyAlternativeBegin/Next/End
1627 // For each Begin/Next node representing an alternative, we need to decide what to do
1628 // in two circumstances:
1629 // - If we backtrack back into this node, from within the alternative.
1630 // - If the input check at the head of the alternative fails (if this exists).
1632 // We treat these two cases differently since in the former case we have slightly
1633 // more information - since we are backtracking out of a prior alternative we know
1634 // that at least enough input was available to run it. For example, given the regular
1635 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1636 // character match of 'a'), then we need not perform an additional input availability
1637 // check before running the second alternative.
1639 // Backtracking required differs for the last alternative, which in the case of the
1640 // repeating set of alternatives must loop. The code generated for the last alternative
1641 // will also be used to handle all input check failures from any prior alternatives -
1642 // these require similar functionality, in seeking the next available alternative for
1643 // which there is sufficient input.
1645 // Since backtracking of all other alternatives simply requires us to link backtracks
1646 // to the reentry point for the subsequent alternative, we will only be generating any
1647 // code when backtracking the last alternative.
1648 case OpBodyAlternativeBegin:
1649 case OpBodyAlternativeNext: {
1650 PatternAlternative* alternative = op.m_alternative;
1652 if (op.m_op == OpBodyAlternativeNext) {
1653 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1654 m_checked += priorAlternative->m_minimumSize;
1656 m_checked -= alternative->m_minimumSize;
1658 // Is this the last alternative? If not, then if we backtrack to this point we just
1659 // need to jump to try to match the next alternative.
1660 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1661 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1664 YarrOp& endOp = m_ops[op.m_nextOp];
1666 YarrOp* beginOp = &op;
1667 while (beginOp->m_op != OpBodyAlternativeBegin) {
1668 ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1669 beginOp = &m_ops[beginOp->m_previousOp];
1672 bool onceThrough = endOp.m_nextOp == notFound;
1674 // First, generate code to handle cases where we backtrack out of an attempted match
1675 // of the last alternative. If this is a 'once through' set of alternatives then we
1676 // have nothing to do - link this straight through to the End.
1678 m_backtrackingState.linkTo(endOp.m_reentry, this);
1680 // If we don't need to move the input poistion, and the pattern has a fixed size
1681 // (in which case we omit the store of the start index until the pattern has matched)
1682 // then we can just link the backtrack out of the last alternative straight to the
1683 // head of the first alternative.
1684 if (m_pattern.m_body->m_hasFixedSize
1685 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1686 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1687 m_backtrackingState.linkTo(beginOp->m_reentry, this);
1689 // We need to generate a trampoline of code to execute before looping back
1690 // around to the first alternative.
1691 m_backtrackingState.link(this);
1693 // If the pattern size is not fixed, then store the start index, for use if we match.
1694 if (!m_pattern.m_body->m_hasFixedSize) {
1695 if (alternative->m_minimumSize == 1)
1696 store32(index, Address(output));
1699 if (alternative->m_minimumSize)
1700 sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1702 add32(Imm32(1), regT0);
1703 store32(regT0, Address(output));
1707 // Generate code to loop. Check whether the last alternative is longer than the
1708 // first (e.g. /a|xy/ or /a|xyz/).
1709 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1710 // We want to loop, and increment input position. If the delta is 1, it is
1711 // already correctly incremented, if more than one then decrement as appropriate.
1712 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1715 sub32(Imm32(delta - 1), index);
1716 jump(beginOp->m_reentry);
1718 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1719 // be sufficent input available to handle this, so just fall through.
1720 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1721 if (delta != 0xFFFFFFFFu) {
1722 // We need to check input because we are incrementing the input.
1723 add32(Imm32(delta + 1), index);
1724 checkInput().linkTo(beginOp->m_reentry, this);
1730 // We can reach this point in the code in two ways:
1731 // - Fallthrough from the code above (a repeating alternative backtracked out of its
1732 // last alternative, and did not have sufficent input to run the first).
1733 // - We will loop back up to the following label when a releating alternative loops,
1734 // following a failed input check.
1736 // Either way, we have just failed the input check for the first alternative.
1737 Label firstInputCheckFailed(this);
1739 // Generate code to handle input check failures from alternatives except the last.
1740 // prevOp is the alternative we're handling a bail out from (initially Begin), and
1741 // nextOp is the alternative we will be attempting to reenter into.
1743 // We will link input check failures from the forwards matching path back to the code
1744 // that can handle them.
1745 YarrOp* prevOp = beginOp;
1746 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1747 while (nextOp->m_op != OpBodyAlternativeEnd) {
1748 prevOp->m_jumps.link(this);
1750 // We only get here if an input check fails, it is only worth checking again
1751 // if the next alternative has a minimum size less than the last.
1752 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1753 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1754 // subtract delta back out, and reduce this code. Should performance test
1755 // the benefit of this.
1756 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1757 sub32(Imm32(delta), index);
1758 Jump fail = jumpIfNoAvailableInput();
1759 add32(Imm32(delta), index);
1760 jump(nextOp->m_reentry);
1762 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1763 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1765 nextOp = &m_ops[nextOp->m_nextOp];
1768 // We fall through to here if there is insufficient input to run the last alternative.
1770 // If there is insufficient input to run the last alternative, then for 'once through'
1771 // alternatives we are done - just jump back up into the forwards matching path at the End.
1773 op.m_jumps.linkTo(endOp.m_reentry, this);
1774 jump(endOp.m_reentry);
1778 // For repeating alternatives, link any input check failure from the last alternative to
1780 op.m_jumps.link(this);
1782 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1784 // Check for cases where input position is already incremented by 1 for the last
1785 // alternative (this is particularly useful where the minimum size of the body
1786 // disjunction is 0, e.g. /a*|b/).
1787 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1788 // index is already incremented by 1, so just store it now!
1789 store32(index, Address(output));
1790 needsToUpdateMatchStart = false;
1793 // Check whether there is sufficient input to loop. Increment the input position by
1794 // one, and check. Also add in the minimum disjunction size before checking - there
1795 // is no point in looping if we're just going to fail all the input checks around
1796 // the next iteration.
1797 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1798 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1799 // If the last alternative had the same minimum size as the disjunction,
1800 // just simply increment input pos by 1, no adjustment based on minimum size.
1801 add32(Imm32(1), index);
1803 // If the minumum for the last alternative was one greater than than that
1804 // for the disjunction, we're already progressed by 1, nothing to do!
1805 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1807 sub32(Imm32(delta), index);
1809 Jump matchFailed = jumpIfNoAvailableInput();
1811 if (needsToUpdateMatchStart) {
1812 if (!m_pattern.m_body->m_minimumSize)
1813 store32(index, Address(output));
1816 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
1817 store32(regT0, Address(output));
1821 // Calculate how much more input the first alternative requires than the minimum
1822 // for the body as a whole. If no more is needed then we dont need an additional
1823 // input check here - jump straight back up to the start of the first alternative.
1824 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1825 jump(beginOp->m_reentry);
1827 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1828 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1830 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1831 checkInput().linkTo(beginOp->m_reentry, this);
1832 jump(firstInputCheckFailed);
1835 // We jump to here if we iterate to the point that there is insufficient input to
1836 // run any matches, and need to return a failure state from JIT code.
1837 matchFailed.link(this);
1839 if (m_pattern.m_body->m_callFrameSize)
1840 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1841 move(TrustedImm32(-1), returnRegister);
1845 case OpBodyAlternativeEnd: {
1846 // We should never backtrack back into a body disjunction.
1847 ASSERT(m_backtrackingState.isEmpty());
1849 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1850 m_checked += priorAlternative->m_minimumSize;
1854 // OpSimpleNestedAlternativeBegin/Next/End
1855 // OpNestedAlternativeBegin/Next/End
1857 // Generate code for when we backtrack back out of an alternative into
1858 // a Begin or Next node, or when the entry input count check fails. If
1859 // there are more alternatives we need to jump to the next alternative,
1860 // if not we backtrack back out of the current set of parentheses.
1862 // In the case of non-simple nested assertions we need to also link the
1863 // 'return address' appropriately to backtrack back out into the correct
1865 case OpSimpleNestedAlternativeBegin:
1866 case OpSimpleNestedAlternativeNext:
1867 case OpNestedAlternativeBegin:
1868 case OpNestedAlternativeNext: {
1869 YarrOp& nextOp = m_ops[op.m_nextOp];
1870 bool isBegin = op.m_previousOp == notFound;
1871 bool isLastAlternative = nextOp.m_nextOp == notFound;
1872 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
1873 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
1875 // Treat an input check failure the same as a failed match.
1876 m_backtrackingState.append(op.m_jumps);
1878 // Set the backtracks to jump to the appropriate place. We may need
1879 // to link the backtracks in one of three different way depending on
1880 // the type of alternative we are dealing with:
1881 // - A single alternative, with no simplings.
1882 // - The last alternative of a set of two or more.
1883 // - An alternative other than the last of a set of two or more.
1885 // In the case of a single alternative on its own, we don't need to
1886 // jump anywhere - if the alternative fails to match we can just
1887 // continue to backtrack out of the parentheses without jumping.
1889 // In the case of the last alternative in a set of more than one, we
1890 // need to jump to return back out to the beginning. We'll do so by
1891 // adding a jump to the End node's m_jumps list, and linking this
1892 // when we come to generate the Begin node. For alternatives other
1893 // than the last, we need to jump to the next alternative.
1895 // If the alternative had adjusted the input position we must link
1896 // backtracking to here, correct, and then jump on. If not we can
1897 // link the backtracks directly to their destination.
1898 if (op.m_checkAdjust) {
1899 // Handle the cases where we need to link the backtracks here.
1900 m_backtrackingState.link(this);
1901 sub32(Imm32(op.m_checkAdjust), index);
1902 if (!isLastAlternative) {
1903 // An alternative that is not the last should jump to its successor.
1904 jump(nextOp.m_reentry);
1905 } else if (!isBegin) {
1906 // The last of more than one alternatives must jump back to the begnning.
1907 nextOp.m_jumps.append(jump());
1909 // A single alternative on its own can fall through.
1910 m_backtrackingState.fallthrough();
1913 // Handle the cases where we can link the backtracks directly to their destinations.
1914 if (!isLastAlternative) {
1915 // An alternative that is not the last should jump to its successor.
1916 m_backtrackingState.linkTo(nextOp.m_reentry, this);
1917 } else if (!isBegin) {
1918 // The last of more than one alternatives must jump back to the begnning.
1919 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
1921 // In the case of a single alternative on its own do nothing - it can fall through.
1924 // At this point we've handled the backtracking back into this node.
1925 // Now link any backtracks that need to jump to here.
1927 // For non-simple alternatives, link the alternative's 'return address'
1928 // so that we backtrack back out into the previous alternative.
1929 if (op.m_op == OpNestedAlternativeNext)
1930 m_backtrackingState.append(op.m_returnAddress);
1932 // If there is more than one alternative, then the last alternative will
1933 // have planted a jump to be linked to the end. This jump was added to the
1934 // End node's m_jumps list. If we are back at the beginning, link it here.
1936 YarrOp* endOp = &m_ops[op.m_nextOp];
1937 while (endOp->m_nextOp != notFound) {
1938 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1939 endOp = &m_ops[endOp->m_nextOp];
1941 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1942 m_backtrackingState.append(endOp->m_jumps);
1946 YarrOp& lastOp = m_ops[op.m_previousOp];
1947 m_checked += lastOp.m_checkAdjust;
1949 m_checked -= op.m_checkAdjust;
1952 case OpSimpleNestedAlternativeEnd:
1953 case OpNestedAlternativeEnd: {
1954 PatternTerm* term = op.m_term;
1956 // If we backtrack into the end of a simple subpattern do nothing;
1957 // just continue through into the last alternative. If we backtrack
1958 // into the end of a non-simple set of alterntives we need to jump
1959 // to the backtracking return address set up during generation.
1960 if (op.m_op == OpNestedAlternativeEnd) {
1961 m_backtrackingState.link(this);
1963 // Plant a jump to the return address.
1964 unsigned parenthesesFrameLocation = term->frameLocation;
1965 unsigned alternativeFrameLocation = parenthesesFrameLocation;
1966 if (term->quantityType != QuantifierFixedCount)
1967 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1968 loadFromFrameAndJump(alternativeFrameLocation);
1970 // Link the DataLabelPtr associated with the end of the last
1971 // alternative to this point.
1972 m_backtrackingState.append(op.m_returnAddress);
1975 YarrOp& lastOp = m_ops[op.m_previousOp];
1976 m_checked += lastOp.m_checkAdjust;
1980 // OpParenthesesSubpatternOnceBegin/End
1982 // When we are backtracking back out of a capturing subpattern we need
1983 // to clear the start index in the matches output array, to record that
1984 // this subpattern has not been captured.
1986 // When backtracking back out of a Greedy quantified subpattern we need
1987 // to catch this, and try running the remainder of the alternative after
1988 // the subpattern again, skipping the parentheses.
1990 // Upon backtracking back into a quantified set of parentheses we need to
1991 // check whether we were currently skipping the subpattern. If not, we
1992 // can backtrack into them, if we were we need to either backtrack back
1993 // out of the start of the parentheses, or jump back to the forwards
1994 // matching start, depending of whether the match is Greedy or NonGreedy.
1995 case OpParenthesesSubpatternOnceBegin: {
1996 PatternTerm* term = op.m_term;
1997 ASSERT(term->quantityCount == 1);
1999 // We only need to backtrack to thispoint if capturing or greedy.
2000 if (term->capture() || term->quantityType == QuantifierGreedy) {
2001 m_backtrackingState.link(this);
2003 // If capturing, clear the capture (we only need to reset start).
2004 if (term->capture())
2005 store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int)));
2007 // If Greedy, jump to the end.
2008 if (term->quantityType == QuantifierGreedy) {
2009 // Clear the flag in the stackframe indicating we ran through the subpattern.
2010 unsigned parenthesesFrameLocation = term->frameLocation;
2011 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
2012 // Jump to after the parentheses, skipping the subpattern.
2013 jump(m_ops[op.m_nextOp].m_reentry);
2014 // A backtrack from after the parentheses, when skipping the subpattern,
2015 // will jump back to here.
2016 op.m_jumps.link(this);
2019 m_backtrackingState.fallthrough();
2023 case OpParenthesesSubpatternOnceEnd: {
2024 PatternTerm* term = op.m_term;
2026 if (term->quantityType != QuantifierFixedCount) {
2027 m_backtrackingState.link(this);
2029 // Check whether we should backtrack back into the parentheses, or if we
2030 // are currently in a state where we had skipped over the subpattern
2031 // (in which case the flag value on the stack will be -1).
2032 unsigned parenthesesFrameLocation = term->frameLocation;
2033 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
2035 if (term->quantityType == QuantifierGreedy) {
2036 // For Greedy parentheses, we skip after having already tried going
2037 // through the subpattern, so if we get here we're done.
2038 YarrOp& beginOp = m_ops[op.m_previousOp];
2039 beginOp.m_jumps.append(hadSkipped);
2041 // For NonGreedy parentheses, we try skipping the subpattern first,
2042 // so if we get here we need to try running through the subpattern
2043 // next. Jump back to the start of the parentheses in the forwards
2045 ASSERT(term->quantityType == QuantifierNonGreedy);
2046 YarrOp& beginOp = m_ops[op.m_previousOp];
2047 hadSkipped.linkTo(beginOp.m_reentry, this);
2050 m_backtrackingState.fallthrough();
2053 m_backtrackingState.append(op.m_jumps);
2057 // OpParenthesesSubpatternTerminalBegin/End
2059 // Terminal subpatterns will always match - there is nothing after them to
2060 // force a backtrack, and they have a minimum count of 0, and as such will
2061 // always produce an acceptable result.
2062 case OpParenthesesSubpatternTerminalBegin: {
2063 // We will backtrack to this point once the subpattern cannot match any
2064 // more. Since no match is accepted as a successful match (we are Greedy
2065 // quantified with a minimum of zero) jump back to the forwards matching
2067 YarrOp& endOp = m_ops[op.m_nextOp];
2068 m_backtrackingState.linkTo(endOp.m_reentry, this);
2071 case OpParenthesesSubpatternTerminalEnd:
2072 // We should never be backtracking to here (hence the 'terminal' in the name).
2073 ASSERT(m_backtrackingState.isEmpty());
2074 m_backtrackingState.append(op.m_jumps);
2077 // OpParentheticalAssertionBegin/End
2078 case OpParentheticalAssertionBegin: {
2079 PatternTerm* term = op.m_term;
2080 YarrOp& endOp = m_ops[op.m_nextOp];
2082 // We need to handle the backtracks upon backtracking back out
2083 // of a parenthetical assertion if either we need to correct
2084 // the input index, or the assertion was inverted.
2085 if (op.m_checkAdjust || term->invert()) {
2086 m_backtrackingState.link(this);
2088 if (op.m_checkAdjust)
2089 add32(Imm32(op.m_checkAdjust), index);
2091 // In an inverted assertion failure to match the subpattern
2092 // is treated as a successful match - jump to the end of the
2093 // subpattern. We already have adjusted the input position
2094 // back to that before the assertion, which is correct.
2096 jump(endOp.m_reentry);
2098 m_backtrackingState.fallthrough();
2101 // The End node's jump list will contain any backtracks into
2102 // the end of the assertion. Also, if inverted, we will have
2103 // added the failure caused by a successful match to this.
2104 m_backtrackingState.append(endOp.m_jumps);
2106 m_checked += op.m_checkAdjust;
2109 case OpParentheticalAssertionEnd: {
2110 // FIXME: We should really be clearing any nested subpattern
2111 // matches on bailing out from after the pattern. Firefox has
2112 // this bug too (presumably because they use YARR!)
2114 // Never backtrack into an assertion; later failures bail to before the begin.
2115 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2117 YarrOp& lastOp = m_ops[op.m_previousOp];
2118 m_checked -= lastOp.m_checkAdjust;
2129 // Compilation methods:
2130 // ====================
2132 // opCompileParenthesesSubpattern
2133 // Emits ops for a subpattern (set of parentheses). These consist
2134 // of a set of alternatives wrapped in an outer set of nodes for
2136 // Supported types of parentheses are 'Once' (quantityCount == 1)
2137 // and 'Terminal' (non-capturing parentheses quantified as greedy
2139 // Alternatives will use the 'Simple' set of ops if either the
2140 // subpattern is terminal (in which case we will never need to
2141 // backtrack), or if the subpattern only contains one alternative.
2142 void opCompileParenthesesSubpattern(PatternTerm* term)
2144 YarrOpCode parenthesesBeginOpCode;
2145 YarrOpCode parenthesesEndOpCode;
2146 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2147 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2148 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2150 // We can currently only compile quantity 1 subpatterns that are
2151 // not copies. We generate a copy in the case of a range quantifier,
2152 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2153 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2154 // comes where the subpattern is capturing, in which case we would
2155 // need to restore the capture from the first subpattern upon a
2156 // failure in the second.
2157 if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2158 // Select the 'Once' nodes.
2159 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2160 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2162 // If there is more than one alternative we cannot use the 'simple' nodes.
2163 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2164 alternativeBeginOpCode = OpNestedAlternativeBegin;
2165 alternativeNextOpCode = OpNestedAlternativeNext;
2166 alternativeEndOpCode = OpNestedAlternativeEnd;
2168 } else if (term->parentheses.isTerminal) {
2169 // Select the 'Terminal' nodes.
2170 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2171 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2173 // This subpattern is not supported by the JIT.
2174 m_shouldFallBack = true;
2178 size_t parenBegin = m_ops.size();
2179 m_ops.append(parenthesesBeginOpCode);
2181 m_ops.append(alternativeBeginOpCode);
2182 m_ops.last().m_previousOp = notFound;
2183 m_ops.last().m_term = term;
2184 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2185 for (unsigned i = 0; i < alternatives.size(); ++i) {
2186 size_t lastOpIndex = m_ops.size() - 1;
2188 PatternAlternative* nestedAlternative = alternatives[i];
2189 opCompileAlternative(nestedAlternative);
2191 size_t thisOpIndex = m_ops.size();
2192 m_ops.append(YarrOp(alternativeNextOpCode));
2194 YarrOp& lastOp = m_ops[lastOpIndex];
2195 YarrOp& thisOp = m_ops[thisOpIndex];
2197 lastOp.m_alternative = nestedAlternative;
2198 lastOp.m_nextOp = thisOpIndex;
2199 thisOp.m_previousOp = lastOpIndex;
2200 thisOp.m_term = term;
2202 YarrOp& lastOp = m_ops.last();
2203 ASSERT(lastOp.m_op == alternativeNextOpCode);
2204 lastOp.m_op = alternativeEndOpCode;
2205 lastOp.m_alternative = 0;
2206 lastOp.m_nextOp = notFound;
2208 size_t parenEnd = m_ops.size();
2209 m_ops.append(parenthesesEndOpCode);
2211 m_ops[parenBegin].m_term = term;
2212 m_ops[parenBegin].m_previousOp = notFound;
2213 m_ops[parenBegin].m_nextOp = parenEnd;
2214 m_ops[parenEnd].m_term = term;
2215 m_ops[parenEnd].m_previousOp = parenBegin;
2216 m_ops[parenEnd].m_nextOp = notFound;
2219 // opCompileParentheticalAssertion
2220 // Emits ops for a parenthetical assertion. These consist of an
2221 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2222 // the alternatives, with these wrapped by an outer pair of
2223 // OpParentheticalAssertionBegin/End nodes.
2224 // We can always use the OpSimpleNestedAlternative nodes in the
2225 // case of parenthetical assertions since these only ever match
2226 // once, and will never backtrack back into the assertion.
2227 void opCompileParentheticalAssertion(PatternTerm* term)
2229 size_t parenBegin = m_ops.size();
2230 m_ops.append(OpParentheticalAssertionBegin);
2232 m_ops.append(OpSimpleNestedAlternativeBegin);
2233 m_ops.last().m_previousOp = notFound;
2234 m_ops.last().m_term = term;
2235 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
2236 for (unsigned i = 0; i < alternatives.size(); ++i) {
2237 size_t lastOpIndex = m_ops.size() - 1;
2239 PatternAlternative* nestedAlternative = alternatives[i];
2240 opCompileAlternative(nestedAlternative);
2242 size_t thisOpIndex = m_ops.size();
2243 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2245 YarrOp& lastOp = m_ops[lastOpIndex];
2246 YarrOp& thisOp = m_ops[thisOpIndex];
2248 lastOp.m_alternative = nestedAlternative;
2249 lastOp.m_nextOp = thisOpIndex;
2250 thisOp.m_previousOp = lastOpIndex;
2251 thisOp.m_term = term;
2253 YarrOp& lastOp = m_ops.last();
2254 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2255 lastOp.m_op = OpSimpleNestedAlternativeEnd;
2256 lastOp.m_alternative = 0;
2257 lastOp.m_nextOp = notFound;
2259 size_t parenEnd = m_ops.size();
2260 m_ops.append(OpParentheticalAssertionEnd);
2262 m_ops[parenBegin].m_term = term;
2263 m_ops[parenBegin].m_previousOp = notFound;
2264 m_ops[parenBegin].m_nextOp = parenEnd;
2265 m_ops[parenEnd].m_term = term;
2266 m_ops[parenEnd].m_previousOp = parenBegin;
2267 m_ops[parenEnd].m_nextOp = notFound;
2270 // opCompileAlternative
2271 // Called to emit nodes for all terms in an alternative.
2272 void opCompileAlternative(PatternAlternative* alternative)
2274 optimizeAlternative(alternative);
2276 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2277 PatternTerm* term = &alternative->m_terms[i];
2279 switch (term->type) {
2280 case PatternTerm::TypeParenthesesSubpattern:
2281 opCompileParenthesesSubpattern(term);
2284 case PatternTerm::TypeParentheticalAssertion:
2285 opCompileParentheticalAssertion(term);
2295 // This method compiles the body disjunction of the regular expression.
2296 // The body consists of two sets of alternatives - zero or more 'once
2297 // through' (BOL anchored) alternatives, followed by zero or more
2298 // repeated alternatives.
2299 // For each of these two sets of alteratives, if not empty they will be
2300 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2301 // 'begin' node referencing the first alternative, and 'next' nodes
2302 // referencing any further alternatives. The begin/next/end nodes are
2303 // linked together in a doubly linked list. In the case of repeating
2304 // alternatives, the end node is also linked back to the beginning.
2305 // If no repeating alternatives exist, then a OpMatchFailed node exists
2306 // to return the failing result.
2307 void opCompileBody(PatternDisjunction* disjunction)
2309 Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives;
2310 size_t currentAlternativeIndex = 0;
2312 // Emit the 'once through' alternatives.
2313 if (alternatives.size() && alternatives[0]->onceThrough()) {
2314 m_ops.append(YarrOp(OpBodyAlternativeBegin));
2315 m_ops.last().m_previousOp = notFound;
2318 size_t lastOpIndex = m_ops.size() - 1;
2319 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2320 opCompileAlternative(alternative);
2322 size_t thisOpIndex = m_ops.size();
2323 m_ops.append(YarrOp(OpBodyAlternativeNext));
2325 YarrOp& lastOp = m_ops[lastOpIndex];
2326 YarrOp& thisOp = m_ops[thisOpIndex];
2328 lastOp.m_alternative = alternative;
2329 lastOp.m_nextOp = thisOpIndex;
2330 thisOp.m_previousOp = lastOpIndex;
2332 ++currentAlternativeIndex;
2333 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2335 YarrOp& lastOp = m_ops.last();
2337 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2338 lastOp.m_op = OpBodyAlternativeEnd;
2339 lastOp.m_alternative = 0;
2340 lastOp.m_nextOp = notFound;
2343 if (currentAlternativeIndex == alternatives.size()) {
2344 m_ops.append(YarrOp(OpMatchFailed));
2348 // Emit the repeated alternatives.
2349 size_t repeatLoop = m_ops.size();
2350 m_ops.append(YarrOp(OpBodyAlternativeBegin));
2351 m_ops.last().m_previousOp = notFound;
2353 size_t lastOpIndex = m_ops.size() - 1;
2354 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2355 ASSERT(!alternative->onceThrough());
2356 opCompileAlternative(alternative);
2358 size_t thisOpIndex = m_ops.size();
2359 m_ops.append(YarrOp(OpBodyAlternativeNext));
2361 YarrOp& lastOp = m_ops[lastOpIndex];
2362 YarrOp& thisOp = m_ops[thisOpIndex];
2364 lastOp.m_alternative = alternative;
2365 lastOp.m_nextOp = thisOpIndex;
2366 thisOp.m_previousOp = lastOpIndex;
2368 ++currentAlternativeIndex;
2369 } while (currentAlternativeIndex < alternatives.size());
2370 YarrOp& lastOp = m_ops.last();
2371 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2372 lastOp.m_op = OpBodyAlternativeEnd;
2373 lastOp.m_alternative = 0;
2374 lastOp.m_nextOp = repeatLoop;
2377 void generateEnter()
2380 push(X86Registers::ebp);
2381 move(stackPointerRegister, X86Registers::ebp);
2382 push(X86Registers::ebx);
2384 push(X86Registers::ebp);
2385 move(stackPointerRegister, X86Registers::ebp);
2386 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2387 push(X86Registers::ebx);
2388 push(X86Registers::edi);
2389 push(X86Registers::esi);
2390 // load output into edi (2 = saved ebp + return address).
2392 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2393 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2394 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
2395 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
2397 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
2400 push(ARMRegisters::r4);
2401 push(ARMRegisters::r5);
2402 push(ARMRegisters::r6);
2403 #if CPU(ARM_TRADITIONAL)
2404 push(ARMRegisters::r8); // scratch register
2406 move(ARMRegisters::r3, output);
2408 push(SH4Registers::r11);
2409 push(SH4Registers::r13);
2415 void generateReturn()
2418 pop(X86Registers::ebx);
2419 pop(X86Registers::ebp);
2421 pop(X86Registers::esi);
2422 pop(X86Registers::edi);
2423 pop(X86Registers::ebx);
2424 pop(X86Registers::ebp);
2426 #if CPU(ARM_TRADITIONAL)
2427 pop(ARMRegisters::r8); // scratch register
2429 pop(ARMRegisters::r6);
2430 pop(ARMRegisters::r5);
2431 pop(ARMRegisters::r4);
2433 pop(SH4Registers::r13);
2434 pop(SH4Registers::r11);
2442 YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
2443 : m_pattern(pattern)
2444 , m_charSize(charSize)
2445 , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
2446 , m_shouldFallBack(false)
2451 void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
2455 if (!m_pattern.m_body->m_hasFixedSize)
2456 store32(index, Address(output));
2458 if (m_pattern.m_body->m_callFrameSize)
2459 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
2461 // Compile the pattern to the internal 'YarrOp' representation.
2462 opCompileBody(m_pattern.m_body);
2464 // If we encountered anything we can't handle in the JIT code
2465 // (e.g. backreferences) then return early.
2466 if (m_shouldFallBack) {
2467 jitObject.setFallBack(true);
2474 // Link & finalize the code.
2475 LinkBuffer linkBuffer(*globalData, this);
2476 m_backtrackingState.linkDataLabels(linkBuffer);
2477 if (m_charSize == Char8)
2478 jitObject.set8BitCode(linkBuffer.finalizeCode());
2480 jitObject.set16BitCode(linkBuffer.finalizeCode());
2481 jitObject.setFallBack(m_shouldFallBack);
2485 YarrPattern& m_pattern;
2487 YarrCharSize m_charSize;
2491 // Used to detect regular expression constructs that are not currently
2492 // supported in the JIT; fall back to the interpreter when this is detected.
2493 bool m_shouldFallBack;
2495 // The regular expression expressed as a linear sequence of operations.
2496 Vector<YarrOp, 128> m_ops;
2498 // This records the current input offset being applied due to the current
2499 // set of alternatives we are nested within. E.g. when matching the
2500 // character 'b' within the regular expression /abc/, we will know that
2501 // the minimum size for the alternative is 3, checked upon entry to the
2502 // alternative, and that 'b' is at offset 1 from the start, and as such
2503 // when matching 'b' we need to apply an offset of -2 to the load.
2505 // FIXME: This should go away. Rather than tracking this value throughout
2506 // code generation, we should gather this information up front & store it
2507 // on the YarrOp structure.
2510 // This class records state whilst generating the backtracking path of code.
2511 BacktrackingState m_backtrackingState;
2514 void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject)
2516 YarrGenerator(pattern, charSize).compile(globalData, jitObject);
2519 int execute(YarrCodeBlock& jitObject, const char* input, unsigned start, unsigned length, int* output)
2521 return jitObject.execute(input, start, length, output);
2524 int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
2526 return jitObject.execute(input, start, length, output);