initial import
[vuplus_webkit] / Source / JavaScriptCore / yarr / YarrJIT.cpp
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
24  */
25
26 #include "config.h"
27 #include "YarrJIT.h"
28
29 #include "ASCIICType.h"
30 #include "LinkBuffer.h"
31 #include "Yarr.h"
32
33 #if ENABLE(YARR_JIT)
34
35 using namespace WTF;
36
37 namespace JSC { namespace Yarr {
38
39 class YarrGenerator : private MacroAssembler {
40     friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
41
42 #if CPU(ARM)
43     static const RegisterID input = ARMRegisters::r0;
44     static const RegisterID index = ARMRegisters::r1;
45     static const RegisterID length = ARMRegisters::r2;
46     static const RegisterID output = ARMRegisters::r4;
47
48     static const RegisterID regT0 = ARMRegisters::r5;
49     static const RegisterID regT1 = ARMRegisters::r6;
50
51     static const RegisterID returnRegister = ARMRegisters::r0;
52 #elif CPU(MIPS)
53     static const RegisterID input = MIPSRegisters::a0;
54     static const RegisterID index = MIPSRegisters::a1;
55     static const RegisterID length = MIPSRegisters::a2;
56     static const RegisterID output = MIPSRegisters::a3;
57
58     static const RegisterID regT0 = MIPSRegisters::t4;
59     static const RegisterID regT1 = MIPSRegisters::t5;
60
61     static const RegisterID returnRegister = MIPSRegisters::v0;
62 #elif CPU(SH4)
63     static const RegisterID input = SH4Registers::r4;
64     static const RegisterID index = SH4Registers::r5;
65     static const RegisterID length = SH4Registers::r6;
66     static const RegisterID output = SH4Registers::r7;
67
68     static const RegisterID regT0 = SH4Registers::r0;
69     static const RegisterID regT1 = SH4Registers::r1;
70
71     static const RegisterID returnRegister = SH4Registers::r0;
72 #elif CPU(X86)
73     static const RegisterID input = X86Registers::eax;
74     static const RegisterID index = X86Registers::edx;
75     static const RegisterID length = X86Registers::ecx;
76     static const RegisterID output = X86Registers::edi;
77
78     static const RegisterID regT0 = X86Registers::ebx;
79     static const RegisterID regT1 = X86Registers::esi;
80
81     static const RegisterID returnRegister = X86Registers::eax;
82 #elif CPU(X86_64)
83     static const RegisterID input = X86Registers::edi;
84     static const RegisterID index = X86Registers::esi;
85     static const RegisterID length = X86Registers::edx;
86     static const RegisterID output = X86Registers::ecx;
87
88     static const RegisterID regT0 = X86Registers::eax;
89     static const RegisterID regT1 = X86Registers::ebx;
90
91     static const RegisterID returnRegister = X86Registers::eax;
92 #endif
93
94     void optimizeAlternative(PatternAlternative* alternative)
95     {
96         if (!alternative->m_terms.size())
97             return;
98
99         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
100             PatternTerm& term = alternative->m_terms[i];
101             PatternTerm& nextTerm = alternative->m_terms[i + 1];
102
103             if ((term.type == PatternTerm::TypeCharacterClass)
104                 && (term.quantityType == QuantifierFixedCount)
105                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
106                 && (nextTerm.quantityType == QuantifierFixedCount)) {
107                 PatternTerm termCopy = term;
108                 alternative->m_terms[i] = nextTerm;
109                 alternative->m_terms[i + 1] = termCopy;
110             }
111         }
112     }
113
114     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
115     {
116         do {
117             // pick which range we're going to generate
118             int which = count >> 1;
119             char lo = ranges[which].begin;
120             char hi = ranges[which].end;
121
122             // check if there are any ranges or matches below lo.  If not, just jl to failure -
123             // if there is anything else to check, check that first, if it falls through jmp to failure.
124             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
125                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
126
127                 // generate code for all ranges before this one
128                 if (which)
129                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
130
131                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
132                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
133                     ++*matchIndex;
134                 }
135                 failures.append(jump());
136
137                 loOrAbove.link(this);
138             } else if (which) {
139                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
140
141                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
142                 failures.append(jump());
143
144                 loOrAbove.link(this);
145             } else
146                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
147
148             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
149                 ++*matchIndex;
150
151             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
152             // fall through to here, the value is above hi.
153
154             // shuffle along & loop around if there are any more matches to handle.
155             unsigned next = which + 1;
156             ranges += next;
157             count -= next;
158         } while (count);
159     }
160
161     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
162     {
163         if (charClass->m_table) {
164             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
165             matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
166             return;
167         }
168         Jump unicodeFail;
169         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
170             Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
171
172             if (charClass->m_matchesUnicode.size()) {
173                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
174                     UChar ch = charClass->m_matchesUnicode[i];
175                     matchDest.append(branch32(Equal, character, Imm32(ch)));
176                 }
177             }
178
179             if (charClass->m_rangesUnicode.size()) {
180                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
181                     UChar lo = charClass->m_rangesUnicode[i].begin;
182                     UChar hi = charClass->m_rangesUnicode[i].end;
183
184                     Jump below = branch32(LessThan, character, Imm32(lo));
185                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
186                     below.link(this);
187                 }
188             }
189
190             unicodeFail = jump();
191             isAscii.link(this);
192         }
193
194         if (charClass->m_ranges.size()) {
195             unsigned matchIndex = 0;
196             JumpList failures;
197             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
198             while (matchIndex < charClass->m_matches.size())
199                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
200
201             failures.link(this);
202         } else if (charClass->m_matches.size()) {
203             // optimization: gather 'a','A' etc back together, can mask & test once.
204             Vector<char> matchesAZaz;
205
206             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
207                 char ch = charClass->m_matches[i];
208                 if (m_pattern.m_ignoreCase) {
209                     if (isASCIILower(ch)) {
210                         matchesAZaz.append(ch);
211                         continue;
212                     }
213                     if (isASCIIUpper(ch))
214                         continue;
215                 }
216                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
217             }
218
219             if (unsigned countAZaz = matchesAZaz.size()) {
220                 or32(TrustedImm32(32), character);
221                 for (unsigned i = 0; i < countAZaz; ++i)
222                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
223             }
224         }
225
226         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
227             unicodeFail.link(this);
228     }
229
230     // Jumps if input not available; will have (incorrectly) incremented already!
231     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
232     {
233         if (countToCheck)
234             add32(Imm32(countToCheck), index);
235         return branch32(Above, index, length);
236     }
237
238     Jump jumpIfAvailableInput(unsigned countToCheck)
239     {
240         add32(Imm32(countToCheck), index);
241         return branch32(BelowOrEqual, index, length);
242     }
243
244     Jump checkInput()
245     {
246         return branch32(BelowOrEqual, index, length);
247     }
248
249     Jump atEndOfInput()
250     {
251         return branch32(Equal, index, length);
252     }
253
254     Jump notAtEndOfInput()
255     {
256         return branch32(NotEqual, index, length);
257     }
258
259     Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
260     {
261         if (m_charSize == Char8)
262             return branch8(NotEqual, BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), Imm32(ch));
263         return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
264     }
265
266     void readCharacter(int inputPosition, RegisterID reg)
267     {
268         if (m_charSize == Char8)
269             load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
270         else
271             load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
272     }
273
274     void storeToFrame(RegisterID reg, unsigned frameLocation)
275     {
276         poke(reg, frameLocation);
277     }
278
279     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
280     {
281         poke(imm, frameLocation);
282     }
283
284     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
285     {
286         return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
287     }
288
289     void loadFromFrame(unsigned frameLocation, RegisterID reg)
290     {
291         peek(reg, frameLocation);
292     }
293
294     void loadFromFrameAndJump(unsigned frameLocation)
295     {
296         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
297     }
298
299     enum YarrOpCode {
300         // These nodes wrap body alternatives - those in the main disjunction,
301         // rather than subpatterns or assertions. These are chained together in
302         // a doubly linked list, with a 'begin' node for the first alternative,
303         // a 'next' node for each subsequent alternative, and an 'end' node at
304         // the end. In the case of repeating alternatives, the 'end' node also
305         // has a reference back to 'begin'.
306         OpBodyAlternativeBegin,
307         OpBodyAlternativeNext,
308         OpBodyAlternativeEnd,
309         // Similar to the body alternatives, but used for subpatterns with two
310         // or more alternatives.
311         OpNestedAlternativeBegin,
312         OpNestedAlternativeNext,
313         OpNestedAlternativeEnd,
314         // Used for alternatives in subpatterns where there is only a single
315         // alternative (backtrackingis easier in these cases), or for alternatives
316         // which never need to be backtracked (those in parenthetical assertions,
317         // terminal subpatterns).
318         OpSimpleNestedAlternativeBegin,
319         OpSimpleNestedAlternativeNext,
320         OpSimpleNestedAlternativeEnd,
321         // Used to wrap 'Once' subpattern matches (quantityCount == 1).
322         OpParenthesesSubpatternOnceBegin,
323         OpParenthesesSubpatternOnceEnd,
324         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
325         OpParenthesesSubpatternTerminalBegin,
326         OpParenthesesSubpatternTerminalEnd,
327         // Used to wrap parenthetical assertions.
328         OpParentheticalAssertionBegin,
329         OpParentheticalAssertionEnd,
330         // Wraps all simple terms (pattern characters, character classes).
331         OpTerm,
332         // Where an expression contains only 'once through' body alternatives
333         // and no repeating ones, this op is used to return match failure.
334         OpMatchFailed
335     };
336
337     // This structure is used to hold the compiled opcode information,
338     // including reference back to the original PatternTerm/PatternAlternatives,
339     // and JIT compilation data structures.
340     struct YarrOp {
341         explicit YarrOp(PatternTerm* term)
342             : m_op(OpTerm)
343             , m_term(term)
344             , m_isDeadCode(false)
345         {
346         }
347
348         explicit YarrOp(YarrOpCode op)
349             : m_op(op)
350             , m_isDeadCode(false)
351         {
352         }
353
354         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
355         YarrOpCode m_op;
356         PatternTerm* m_term;
357
358         // For alternatives, this holds the PatternAlternative and doubly linked
359         // references to this alternative's siblings. In the case of the
360         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
361         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
362         // repeating alternative.
363         PatternAlternative* m_alternative;
364         size_t m_previousOp;
365         size_t m_nextOp;
366
367         // Used to record a set of Jumps out of the generated code, typically
368         // used for jumps out to backtracking code, and a single reentry back
369         // into the code for a node (likely where a backtrack will trigger
370         // rematching).
371         Label m_reentry;
372         JumpList m_jumps;
373
374         // This flag is used to null out the second pattern character, when
375         // two are fused to match a pair together.
376         bool m_isDeadCode;
377
378         // Currently used in the case of some of the more complex management of
379         // 'm_checked', to cache the offset used in this alternative, to avoid
380         // recalculating it.
381         int m_checkAdjust;
382
383         // Used by OpNestedAlternativeNext/End to hold the pointer to the
384         // value that will be pushed into the pattern's frame to return to,
385         // upon backtracking back into the disjunction.
386         DataLabelPtr m_returnAddress;
387     };
388
389     // BacktrackingState
390     // This class encapsulates information about the state of code generation
391     // whilst generating the code for backtracking, when a term fails to match.
392     // Upon entry to code generation of the backtracking code for a given node,
393     // the Backtracking state will hold references to all control flow sources
394     // that are outputs in need of further backtracking from the prior node
395     // generated (which is the subsequent operation in the regular expression,
396     // and in the m_ops Vector, since we generated backtracking backwards).
397     // These references to control flow take the form of:
398     //  - A jump list of jumps, to be linked to code that will backtrack them
399     //    further.
400     //  - A set of DataLabelPtr values, to be populated with values to be
401     //    treated effectively as return addresses backtracking into complex
402     //    subpatterns.
403     //  - A flag indicating that the current sequence of generated code up to
404     //    this point requires backtracking.
405     class BacktrackingState {
406     public:
407         BacktrackingState()
408             : m_pendingFallthrough(false)
409         {
410         }
411
412         // Add a jump or jumps, a return address, or set the flag indicating
413         // that the current 'fallthrough' control flow requires backtracking.
414         void append(const Jump& jump)
415         {
416             m_laterFailures.append(jump);
417         }
418         void append(JumpList& jumpList)
419         {
420             m_laterFailures.append(jumpList);
421         }
422         void append(const DataLabelPtr& returnAddress)
423         {
424             m_pendingReturns.append(returnAddress);
425         }
426         void fallthrough()
427         {
428             ASSERT(!m_pendingFallthrough);
429             m_pendingFallthrough = true;
430         }
431
432         // These methods clear the backtracking state, either linking to the
433         // current location, a provided label, or copying the backtracking out
434         // to a JumpList. All actions may require code generation to take place,
435         // and as such are passed a pointer to the assembler.
436         void link(MacroAssembler* assembler)
437         {
438             if (m_pendingReturns.size()) {
439                 Label here(assembler);
440                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
441                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
442                 m_pendingReturns.clear();
443             }
444             m_laterFailures.link(assembler);
445             m_laterFailures.clear();
446             m_pendingFallthrough = false;
447         }
448         void linkTo(Label label, MacroAssembler* assembler)
449         {
450             if (m_pendingReturns.size()) {
451                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
452                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
453                 m_pendingReturns.clear();
454             }
455             if (m_pendingFallthrough)
456                 assembler->jump(label);
457             m_laterFailures.linkTo(label, assembler);
458             m_laterFailures.clear();
459             m_pendingFallthrough = false;
460         }
461         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
462         {
463             if (m_pendingReturns.size()) {
464                 Label here(assembler);
465                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
466                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
467                 m_pendingReturns.clear();
468                 m_pendingFallthrough = true;
469             }
470             if (m_pendingFallthrough)
471                 jumpList.append(assembler->jump());
472             jumpList.append(m_laterFailures);
473             m_laterFailures.clear();
474             m_pendingFallthrough = false;
475         }
476
477         bool isEmpty()
478         {
479             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
480         }
481
482         // Called at the end of code generation to link all return addresses.
483         void linkDataLabels(LinkBuffer& linkBuffer)
484         {
485             ASSERT(isEmpty());
486             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
487                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
488         }
489
490     private:
491         struct ReturnAddressRecord {
492             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
493                 : m_dataLabel(dataLabel)
494                 , m_backtrackLocation(backtrackLocation)
495             {
496             }
497
498             DataLabelPtr m_dataLabel;
499             Label m_backtrackLocation;
500         };
501
502         JumpList m_laterFailures;
503         bool m_pendingFallthrough;
504         Vector<DataLabelPtr, 4> m_pendingReturns;
505         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
506     };
507
508     // Generation methods:
509     // ===================
510
511     // This method provides a default implementation of backtracking common
512     // to many terms; terms commonly jump out of the forwards  matching path
513     // on any failed conditions, and add these jumps to the m_jumps list. If
514     // no special handling is required we can often just backtrack to m_jumps.
515     void backtrackTermDefault(size_t opIndex)
516     {
517         YarrOp& op = m_ops[opIndex];
518         m_backtrackingState.append(op.m_jumps);
519     }
520
521     void generateAssertionBOL(size_t opIndex)
522     {
523         YarrOp& op = m_ops[opIndex];
524         PatternTerm* term = op.m_term;
525
526         if (m_pattern.m_multiline) {
527             const RegisterID character = regT0;
528
529             JumpList matchDest;
530             if (!term->inputPosition)
531                 matchDest.append(branch32(Equal, index, Imm32(m_checked)));
532
533             readCharacter((term->inputPosition - m_checked) - 1, character);
534             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
535             op.m_jumps.append(jump());
536
537             matchDest.link(this);
538         } else {
539             // Erk, really should poison out these alternatives early. :-/
540             if (term->inputPosition)
541                 op.m_jumps.append(jump());
542             else
543                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
544         }
545     }
546     void backtrackAssertionBOL(size_t opIndex)
547     {
548         backtrackTermDefault(opIndex);
549     }
550
551     void generateAssertionEOL(size_t opIndex)
552     {
553         YarrOp& op = m_ops[opIndex];
554         PatternTerm* term = op.m_term;
555
556         if (m_pattern.m_multiline) {
557             const RegisterID character = regT0;
558
559             JumpList matchDest;
560             if (term->inputPosition == m_checked)
561                 matchDest.append(atEndOfInput());
562
563             readCharacter(term->inputPosition - m_checked, character);
564             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
565             op.m_jumps.append(jump());
566
567             matchDest.link(this);
568         } else {
569             if (term->inputPosition == m_checked)
570                 op.m_jumps.append(notAtEndOfInput());
571             // Erk, really should poison out these alternatives early. :-/
572             else
573                 op.m_jumps.append(jump());
574         }
575     }
576     void backtrackAssertionEOL(size_t opIndex)
577     {
578         backtrackTermDefault(opIndex);
579     }
580
581     // Also falls though on nextIsNotWordChar.
582     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
583     {
584         YarrOp& op = m_ops[opIndex];
585         PatternTerm* term = op.m_term;
586
587         const RegisterID character = regT0;
588
589         if (term->inputPosition == m_checked)
590             nextIsNotWordChar.append(atEndOfInput());
591
592         readCharacter((term->inputPosition - m_checked), character);
593         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
594     }
595
596     void generateAssertionWordBoundary(size_t opIndex)
597     {
598         YarrOp& op = m_ops[opIndex];
599         PatternTerm* term = op.m_term;
600
601         const RegisterID character = regT0;
602
603         Jump atBegin;
604         JumpList matchDest;
605         if (!term->inputPosition)
606             atBegin = branch32(Equal, index, Imm32(m_checked));
607         readCharacter((term->inputPosition - m_checked) - 1, character);
608         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
609         if (!term->inputPosition)
610             atBegin.link(this);
611
612         // We fall through to here if the last character was not a wordchar.
613         JumpList nonWordCharThenWordChar;
614         JumpList nonWordCharThenNonWordChar;
615         if (term->invert()) {
616             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
617             nonWordCharThenWordChar.append(jump());
618         } else {
619             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
620             nonWordCharThenNonWordChar.append(jump());
621         }
622         op.m_jumps.append(nonWordCharThenNonWordChar);
623
624         // We jump here if the last character was a wordchar.
625         matchDest.link(this);
626         JumpList wordCharThenWordChar;
627         JumpList wordCharThenNonWordChar;
628         if (term->invert()) {
629             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
630             wordCharThenWordChar.append(jump());
631         } else {
632             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
633             // This can fall-though!
634         }
635
636         op.m_jumps.append(wordCharThenWordChar);
637
638         nonWordCharThenWordChar.link(this);
639         wordCharThenNonWordChar.link(this);
640     }
641     void backtrackAssertionWordBoundary(size_t opIndex)
642     {
643         backtrackTermDefault(opIndex);
644     }
645
646     void generatePatternCharacterOnce(size_t opIndex)
647     {
648         YarrOp& op = m_ops[opIndex];
649
650         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
651         // node, so there must always be at least one more node.
652         ASSERT(opIndex + 1 < m_ops.size());
653         YarrOp& nextOp = m_ops[opIndex + 1];
654
655         if (op.m_isDeadCode)
656             return;
657
658         PatternTerm* term = op.m_term;
659         UChar ch = term->patternCharacter;
660
661         if ((ch > 0xff) && (m_charSize == Char8)) {
662             // Have a 16 bit pattern character and an 8 bit string - short circuit
663             op.m_jumps.append(jump());
664             return;
665         }
666
667         const RegisterID character = regT0;
668
669         if (nextOp.m_op == OpTerm) {
670             PatternTerm* nextTerm = nextOp.m_term;
671             if (nextTerm->type == PatternTerm::TypePatternCharacter
672                 && nextTerm->quantityType == QuantifierFixedCount
673                 && nextTerm->quantityCount == 1
674                 && nextTerm->inputPosition == (term->inputPosition + 1)) {
675
676                 UChar ch2 = nextTerm->patternCharacter;
677
678                 int shiftAmount = m_charSize == Char8 ? 8 : 16;
679                 int mask = 0;
680                 int chPair = ch | (ch2 << shiftAmount);
681
682                 if (m_pattern.m_ignoreCase) {
683                     if (isASCIIAlpha(ch))
684                         mask |= 32;
685                     if (isASCIIAlpha(ch2))
686                         mask |= 32 << shiftAmount;
687                 }
688
689                 if (m_charSize == Char8) {
690                     BaseIndex address(input, index, TimesOne, (term->inputPosition - m_checked) * sizeof(char));
691                     if (mask) {
692                         load16(address, character);
693                         or32(Imm32(mask), character);
694                         op.m_jumps.append(branch16(NotEqual, character, Imm32(chPair | mask)));
695                     } else
696                         op.m_jumps.append(branch16(NotEqual, address, Imm32(chPair)));
697                 } else {
698                     BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
699                     if (mask) {
700                         load32WithUnalignedHalfWords(address, character);
701                         or32(Imm32(mask), character);
702                         op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
703                     } else
704                         op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
705                 }
706                 nextOp.m_isDeadCode = true;
707                 return;
708             }
709         }
710
711         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
712             readCharacter(term->inputPosition - m_checked, character);
713             or32(TrustedImm32(32), character);
714             op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
715         } else {
716             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
717             op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
718         }
719     }
720     void backtrackPatternCharacterOnce(size_t opIndex)
721     {
722         backtrackTermDefault(opIndex);
723     }
724
725     void generatePatternCharacterFixed(size_t opIndex)
726     {
727         YarrOp& op = m_ops[opIndex];
728         PatternTerm* term = op.m_term;
729         UChar ch = term->patternCharacter;
730
731         const RegisterID character = regT0;
732         const RegisterID countRegister = regT1;
733
734         move(index, countRegister);
735         sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
736
737         Label loop(this);
738         BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
739
740         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
741             if (m_charSize == Char8)
742                 load8(address, character);
743             else
744                 load16(address, character);
745             or32(TrustedImm32(32), character);
746             op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
747         } else {
748             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
749             if (m_charSize == Char8)
750                 op.m_jumps.append(branch8(NotEqual, address, Imm32(ch)));
751             else
752                 op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
753         }
754         add32(TrustedImm32(1), countRegister);
755         branch32(NotEqual, countRegister, index).linkTo(loop, this);
756     }
757     void backtrackPatternCharacterFixed(size_t opIndex)
758     {
759         backtrackTermDefault(opIndex);
760     }
761
762     void generatePatternCharacterGreedy(size_t opIndex)
763     {
764         YarrOp& op = m_ops[opIndex];
765         PatternTerm* term = op.m_term;
766         UChar ch = term->patternCharacter;
767
768         const RegisterID character = regT0;
769         const RegisterID countRegister = regT1;
770
771         move(TrustedImm32(0), countRegister);
772
773         if ((ch > 0xff) && (m_charSize == Char8)) {
774             // Have a 16 bit pattern character and an 8 bit string - short circuit
775             op.m_jumps.append(jump());
776         } else {
777             JumpList failures;
778             Label loop(this);
779             failures.append(atEndOfInput());
780             if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
781                 readCharacter(term->inputPosition - m_checked, character);
782                 or32(TrustedImm32(32), character);
783                 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
784             } else {
785                 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
786                 failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
787             }
788
789             add32(TrustedImm32(1), countRegister);
790             add32(TrustedImm32(1), index);
791             if (term->quantityCount == quantifyInfinite)
792                 jump(loop);
793             else
794                 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
795
796             failures.link(this);
797         }
798         op.m_reentry = label();
799
800         storeToFrame(countRegister, term->frameLocation);
801
802     }
803     void backtrackPatternCharacterGreedy(size_t opIndex)
804     {
805         YarrOp& op = m_ops[opIndex];
806         PatternTerm* term = op.m_term;
807
808         const RegisterID countRegister = regT1;
809
810         m_backtrackingState.link(this);
811
812         loadFromFrame(term->frameLocation, countRegister);
813         m_backtrackingState.append(branchTest32(Zero, countRegister));
814         sub32(TrustedImm32(1), countRegister);
815         sub32(TrustedImm32(1), index);
816         jump(op.m_reentry);
817     }
818
819     void generatePatternCharacterNonGreedy(size_t opIndex)
820     {
821         YarrOp& op = m_ops[opIndex];
822         PatternTerm* term = op.m_term;
823
824         const RegisterID countRegister = regT1;
825
826         move(TrustedImm32(0), countRegister);
827         op.m_reentry = label();
828         storeToFrame(countRegister, term->frameLocation);
829     }
830     void backtrackPatternCharacterNonGreedy(size_t opIndex)
831     {
832         YarrOp& op = m_ops[opIndex];
833         PatternTerm* term = op.m_term;
834         UChar ch = term->patternCharacter;
835
836         const RegisterID character = regT0;
837         const RegisterID countRegister = regT1;
838
839         JumpList nonGreedyFailures;
840
841         m_backtrackingState.link(this);
842
843         loadFromFrame(term->frameLocation, countRegister);
844
845         if ((ch > 0xff) && (m_charSize == Char8)) {
846             // Have a 16 bit pattern character and an 8 bit string - short circuit
847             nonGreedyFailures.append(jump());
848         } else {
849             nonGreedyFailures.append(atEndOfInput());
850             if (term->quantityCount != quantifyInfinite)
851                 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
852             if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
853                 readCharacter(term->inputPosition - m_checked, character);
854                 or32(TrustedImm32(32), character);
855                 nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
856             } else {
857                 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
858                 nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
859             }
860
861             add32(TrustedImm32(1), countRegister);
862             add32(TrustedImm32(1), index);
863
864             jump(op.m_reentry);
865         }
866         nonGreedyFailures.link(this);
867
868         sub32(countRegister, index);
869         m_backtrackingState.fallthrough();
870     }
871
872     void generateCharacterClassOnce(size_t opIndex)
873     {
874         YarrOp& op = m_ops[opIndex];
875         PatternTerm* term = op.m_term;
876
877         const RegisterID character = regT0;
878
879         JumpList matchDest;
880         readCharacter(term->inputPosition - m_checked, character);
881         matchCharacterClass(character, matchDest, term->characterClass);
882
883         if (term->invert())
884             op.m_jumps.append(matchDest);
885         else {
886             op.m_jumps.append(jump());
887             matchDest.link(this);
888         }
889     }
890     void backtrackCharacterClassOnce(size_t opIndex)
891     {
892         backtrackTermDefault(opIndex);
893     }
894
895     void generateCharacterClassFixed(size_t opIndex)
896     {
897         YarrOp& op = m_ops[opIndex];
898         PatternTerm* term = op.m_term;
899
900         const RegisterID character = regT0;
901         const RegisterID countRegister = regT1;
902
903         move(index, countRegister);
904         sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
905
906         Label loop(this);
907         JumpList matchDest;
908         if (m_charSize == Char8)
909             load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
910         else
911             load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
912         matchCharacterClass(character, matchDest, term->characterClass);
913
914         if (term->invert())
915             op.m_jumps.append(matchDest);
916         else {
917             op.m_jumps.append(jump());
918             matchDest.link(this);
919         }
920
921         add32(TrustedImm32(1), countRegister);
922         branch32(NotEqual, countRegister, index).linkTo(loop, this);
923     }
924     void backtrackCharacterClassFixed(size_t opIndex)
925     {
926         backtrackTermDefault(opIndex);
927     }
928
929     void generateCharacterClassGreedy(size_t opIndex)
930     {
931         YarrOp& op = m_ops[opIndex];
932         PatternTerm* term = op.m_term;
933
934         const RegisterID character = regT0;
935         const RegisterID countRegister = regT1;
936
937         move(TrustedImm32(0), countRegister);
938
939         JumpList failures;
940         Label loop(this);
941         failures.append(atEndOfInput());
942
943         if (term->invert()) {
944             readCharacter(term->inputPosition - m_checked, character);
945             matchCharacterClass(character, failures, term->characterClass);
946         } else {
947             JumpList matchDest;
948             readCharacter(term->inputPosition - m_checked, character);
949             matchCharacterClass(character, matchDest, term->characterClass);
950             failures.append(jump());
951             matchDest.link(this);
952         }
953
954         add32(TrustedImm32(1), countRegister);
955         add32(TrustedImm32(1), index);
956         if (term->quantityCount != quantifyInfinite) {
957             branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
958             failures.append(jump());
959         } else
960             jump(loop);
961
962         failures.link(this);
963         op.m_reentry = label();
964
965         storeToFrame(countRegister, term->frameLocation);
966     }
967     void backtrackCharacterClassGreedy(size_t opIndex)
968     {
969         YarrOp& op = m_ops[opIndex];
970         PatternTerm* term = op.m_term;
971
972         const RegisterID countRegister = regT1;
973
974         m_backtrackingState.link(this);
975
976         loadFromFrame(term->frameLocation, countRegister);
977         m_backtrackingState.append(branchTest32(Zero, countRegister));
978         sub32(TrustedImm32(1), countRegister);
979         sub32(TrustedImm32(1), index);
980         jump(op.m_reentry);
981     }
982
983     void generateCharacterClassNonGreedy(size_t opIndex)
984     {
985         YarrOp& op = m_ops[opIndex];
986         PatternTerm* term = op.m_term;
987
988         const RegisterID countRegister = regT1;
989
990         move(TrustedImm32(0), countRegister);
991         op.m_reentry = label();
992         storeToFrame(countRegister, term->frameLocation);
993     }
994     void backtrackCharacterClassNonGreedy(size_t opIndex)
995     {
996         YarrOp& op = m_ops[opIndex];
997         PatternTerm* term = op.m_term;
998
999         const RegisterID character = regT0;
1000         const RegisterID countRegister = regT1;
1001
1002         JumpList nonGreedyFailures;
1003
1004         m_backtrackingState.link(this);
1005
1006         Label backtrackBegin(this);
1007         loadFromFrame(term->frameLocation, countRegister);
1008
1009         nonGreedyFailures.append(atEndOfInput());
1010         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
1011
1012         JumpList matchDest;
1013         readCharacter(term->inputPosition - m_checked, character);
1014         matchCharacterClass(character, matchDest, term->characterClass);
1015
1016         if (term->invert())
1017             nonGreedyFailures.append(matchDest);
1018         else {
1019             nonGreedyFailures.append(jump());
1020             matchDest.link(this);
1021         }
1022
1023         add32(TrustedImm32(1), countRegister);
1024         add32(TrustedImm32(1), index);
1025
1026         jump(op.m_reentry);
1027
1028         nonGreedyFailures.link(this);
1029         sub32(countRegister, index);
1030         m_backtrackingState.fallthrough();
1031     }
1032
1033     void generateDotStarEnclosure(size_t opIndex)
1034     {
1035         YarrOp& op = m_ops[opIndex];
1036         PatternTerm* term = op.m_term;
1037
1038         const RegisterID character = regT0;
1039         const RegisterID matchPos = regT1;
1040
1041         JumpList foundBeginningNewLine;
1042         JumpList saveStartIndex;
1043         JumpList foundEndingNewLine;
1044
1045         if (m_pattern.m_body->m_hasFixedSize) {
1046             move(index, matchPos);
1047             sub32(Imm32(m_checked), matchPos);
1048         } else
1049             load32(Address(output), matchPos);
1050
1051         saveStartIndex.append(branchTest32(Zero, matchPos));
1052         Label findBOLLoop(this);
1053         sub32(TrustedImm32(1), matchPos);
1054         if (m_charSize == Char8)
1055             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1056         else
1057             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1058         matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
1059         branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
1060         saveStartIndex.append(jump());
1061
1062         foundBeginningNewLine.link(this);
1063         add32(TrustedImm32(1), matchPos); // Advance past newline
1064         saveStartIndex.link(this);
1065
1066         if (!m_pattern.m_multiline && term->anchors.bolAnchor)
1067             op.m_jumps.append(branchTest32(NonZero, matchPos));
1068
1069         store32(matchPos, Address(output));
1070
1071         move(index, matchPos);
1072
1073         Label findEOLLoop(this);        
1074         foundEndingNewLine.append(branch32(Equal, matchPos, length));
1075         if (m_charSize == Char8)
1076             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
1077         else
1078             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
1079         matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
1080         add32(TrustedImm32(1), matchPos);
1081         jump(findEOLLoop);
1082
1083         foundEndingNewLine.link(this);
1084
1085         if (!m_pattern.m_multiline && term->anchors.eolAnchor)
1086             op.m_jumps.append(branch32(NotEqual, matchPos, length));
1087
1088         move(matchPos, index);
1089     }
1090
1091     void backtrackDotStarEnclosure(size_t opIndex)
1092     {
1093         backtrackTermDefault(opIndex);
1094     }
1095     
1096     // Code generation/backtracking for simple terms
1097     // (pattern characters, character classes, and assertions).
1098     // These methods farm out work to the set of functions above.
1099     void generateTerm(size_t opIndex)
1100     {
1101         YarrOp& op = m_ops[opIndex];
1102         PatternTerm* term = op.m_term;
1103
1104         switch (term->type) {
1105         case PatternTerm::TypePatternCharacter:
1106             switch (term->quantityType) {
1107             case QuantifierFixedCount:
1108                 if (term->quantityCount == 1)
1109                     generatePatternCharacterOnce(opIndex);
1110                 else
1111                     generatePatternCharacterFixed(opIndex);
1112                 break;
1113             case QuantifierGreedy:
1114                 generatePatternCharacterGreedy(opIndex);
1115                 break;
1116             case QuantifierNonGreedy:
1117                 generatePatternCharacterNonGreedy(opIndex);
1118                 break;
1119             }
1120             break;
1121
1122         case PatternTerm::TypeCharacterClass:
1123             switch (term->quantityType) {
1124             case QuantifierFixedCount:
1125                 if (term->quantityCount == 1)
1126                     generateCharacterClassOnce(opIndex);
1127                 else
1128                     generateCharacterClassFixed(opIndex);
1129                 break;
1130             case QuantifierGreedy:
1131                 generateCharacterClassGreedy(opIndex);
1132                 break;
1133             case QuantifierNonGreedy:
1134                 generateCharacterClassNonGreedy(opIndex);
1135                 break;
1136             }
1137             break;
1138
1139         case PatternTerm::TypeAssertionBOL:
1140             generateAssertionBOL(opIndex);
1141             break;
1142
1143         case PatternTerm::TypeAssertionEOL:
1144             generateAssertionEOL(opIndex);
1145             break;
1146
1147         case PatternTerm::TypeAssertionWordBoundary:
1148             generateAssertionWordBoundary(opIndex);
1149             break;
1150
1151         case PatternTerm::TypeForwardReference:
1152             break;
1153
1154         case PatternTerm::TypeParenthesesSubpattern:
1155         case PatternTerm::TypeParentheticalAssertion:
1156             ASSERT_NOT_REACHED();
1157         case PatternTerm::TypeBackReference:
1158             m_shouldFallBack = true;
1159             break;
1160         case PatternTerm::TypeDotStarEnclosure:
1161             generateDotStarEnclosure(opIndex);
1162             break;
1163         }
1164     }
1165     void backtrackTerm(size_t opIndex)
1166     {
1167         YarrOp& op = m_ops[opIndex];
1168         PatternTerm* term = op.m_term;
1169
1170         switch (term->type) {
1171         case PatternTerm::TypePatternCharacter:
1172             switch (term->quantityType) {
1173             case QuantifierFixedCount:
1174                 if (term->quantityCount == 1)
1175                     backtrackPatternCharacterOnce(opIndex);
1176                 else
1177                     backtrackPatternCharacterFixed(opIndex);
1178                 break;
1179             case QuantifierGreedy:
1180                 backtrackPatternCharacterGreedy(opIndex);
1181                 break;
1182             case QuantifierNonGreedy:
1183                 backtrackPatternCharacterNonGreedy(opIndex);
1184                 break;
1185             }
1186             break;
1187
1188         case PatternTerm::TypeCharacterClass:
1189             switch (term->quantityType) {
1190             case QuantifierFixedCount:
1191                 if (term->quantityCount == 1)
1192                     backtrackCharacterClassOnce(opIndex);
1193                 else
1194                     backtrackCharacterClassFixed(opIndex);
1195                 break;
1196             case QuantifierGreedy:
1197                 backtrackCharacterClassGreedy(opIndex);
1198                 break;
1199             case QuantifierNonGreedy:
1200                 backtrackCharacterClassNonGreedy(opIndex);
1201                 break;
1202             }
1203             break;
1204
1205         case PatternTerm::TypeAssertionBOL:
1206             backtrackAssertionBOL(opIndex);
1207             break;
1208
1209         case PatternTerm::TypeAssertionEOL:
1210             backtrackAssertionEOL(opIndex);
1211             break;
1212
1213         case PatternTerm::TypeAssertionWordBoundary:
1214             backtrackAssertionWordBoundary(opIndex);
1215             break;
1216
1217         case PatternTerm::TypeForwardReference:
1218             break;
1219
1220         case PatternTerm::TypeParenthesesSubpattern:
1221         case PatternTerm::TypeParentheticalAssertion:
1222             ASSERT_NOT_REACHED();
1223
1224         case PatternTerm::TypeDotStarEnclosure:
1225             backtrackDotStarEnclosure(opIndex);
1226             break;
1227
1228         case PatternTerm::TypeBackReference:
1229             m_shouldFallBack = true;
1230             break;
1231         }
1232     }
1233
1234     void generate()
1235     {
1236         // Forwards generate the matching code.
1237         ASSERT(m_ops.size());
1238         size_t opIndex = 0;
1239
1240         do {
1241             YarrOp& op = m_ops[opIndex];
1242             switch (op.m_op) {
1243
1244             case OpTerm:
1245                 generateTerm(opIndex);
1246                 break;
1247
1248             // OpBodyAlternativeBegin/Next/End
1249             //
1250             // These nodes wrap the set of alternatives in the body of the regular expression.
1251             // There may be either one or two chains of OpBodyAlternative nodes, one representing
1252             // the 'once through' sequence of alternatives (if any exist), and one representing
1253             // the repeating alternatives (again, if any exist).
1254             //
1255             // Upon normal entry to the Begin alternative, we will check that input is available.
1256             // Reentry to the Begin alternative will take place after the check has taken place,
1257             // and will assume that the input position has already been progressed as appropriate.
1258             //
1259             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
1260             // successfully completed a match - return a success state from JIT code.
1261             //
1262             // Next alternatives allow for reentry optimized to suit backtracking from its
1263             // preceding alternative. It expects the input position to still be set to a position
1264             // appropriate to its predecessor, and it will only perform an input check if the
1265             // predecessor had a minimum size less than its own.
1266             //
1267             // In the case 'once through' expressions, the End node will also have a reentry
1268             // point to jump to when the last alternative fails. Again, this expects the input
1269             // position to still reflect that expected by the prior alternative.
1270             case OpBodyAlternativeBegin: {
1271                 PatternAlternative* alternative = op.m_alternative;
1272
1273                 // Upon entry at the head of the set of alternatives, check if input is available
1274                 // to run the first alternative. (This progresses the input position).
1275                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
1276                 // We will reenter after the check, and assume the input position to have been
1277                 // set as appropriate to this alternative.
1278                 op.m_reentry = label();
1279
1280                 m_checked += alternative->m_minimumSize;
1281                 break;
1282             }
1283             case OpBodyAlternativeNext:
1284             case OpBodyAlternativeEnd: {
1285                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1286                 PatternAlternative* alternative = op.m_alternative;
1287
1288                 // If we get here, the prior alternative matched - return success.
1289                 
1290                 // Adjust the stack pointer to remove the pattern's frame.
1291                 if (m_pattern.m_body->m_callFrameSize)
1292                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1293
1294                 // Load appropriate values into the return register and the first output
1295                 // slot, and return. In the case of pattern with a fixed size, we will
1296                 // not have yet set the value in the first 
1297                 ASSERT(index != returnRegister);
1298                 if (m_pattern.m_body->m_hasFixedSize) {
1299                     move(index, returnRegister);
1300                     if (priorAlternative->m_minimumSize)
1301                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
1302                     store32(returnRegister, output);
1303                 } else
1304                     load32(Address(output), returnRegister);
1305                 store32(index, Address(output, 4));
1306                 generateReturn();
1307
1308                 // This is the divide between the tail of the prior alternative, above, and
1309                 // the head of the subsequent alternative, below.
1310
1311                 if (op.m_op == OpBodyAlternativeNext) {
1312                     // This is the reentry point for the Next alternative. We expect any code
1313                     // that jumps here to do so with the input position matching that of the
1314                     // PRIOR alteranative, and we will only check input availability if we
1315                     // need to progress it forwards.
1316                     op.m_reentry = label();
1317                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
1318                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
1319                         op.m_jumps.append(jumpIfNoAvailableInput());
1320                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
1321                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
1322                 } else if (op.m_nextOp == notFound) {
1323                     // This is the reentry point for the End of 'once through' alternatives,
1324                     // jumped to when the las alternative fails to match.
1325                     op.m_reentry = label();
1326                     sub32(Imm32(priorAlternative->m_minimumSize), index);
1327                 }
1328
1329                 if (op.m_op == OpBodyAlternativeNext)
1330                     m_checked += alternative->m_minimumSize;
1331                 m_checked -= priorAlternative->m_minimumSize;
1332                 break;
1333             }
1334
1335             // OpSimpleNestedAlternativeBegin/Next/End
1336             // OpNestedAlternativeBegin/Next/End
1337             //
1338             // These nodes are used to handle sets of alternatives that are nested within
1339             // subpatterns and parenthetical assertions. The 'simple' forms are used where
1340             // we do not need to be able to backtrack back into any alternative other than
1341             // the last, the normal forms allow backtracking into any alternative.
1342             //
1343             // Each Begin/Next node is responsible for planting an input check to ensure
1344             // sufficient input is available on entry. Next nodes additionally need to
1345             // jump to the end - Next nodes use the End node's m_jumps list to hold this
1346             // set of jumps.
1347             //
1348             // In the non-simple forms, successful alternative matches must store a
1349             // 'return address' using a DataLabelPtr, used to store the address to jump
1350             // to when backtracking, to get to the code for the appropriate alternative.
1351             case OpSimpleNestedAlternativeBegin:
1352             case OpNestedAlternativeBegin: {
1353                 PatternTerm* term = op.m_term;
1354                 PatternAlternative* alternative = op.m_alternative;
1355                 PatternDisjunction* disjunction = term->parentheses.disjunction;
1356
1357                 // Calculate how much input we need to check for, and if non-zero check.
1358                 op.m_checkAdjust = alternative->m_minimumSize;
1359                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1360                     op.m_checkAdjust -= disjunction->m_minimumSize;
1361                 if (op.m_checkAdjust)
1362                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1363  
1364                 m_checked += op.m_checkAdjust;
1365                 break;
1366             }
1367             case OpSimpleNestedAlternativeNext:
1368             case OpNestedAlternativeNext: {
1369                 PatternTerm* term = op.m_term;
1370                 PatternAlternative* alternative = op.m_alternative;
1371                 PatternDisjunction* disjunction = term->parentheses.disjunction;
1372
1373                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1374                 if (op.m_op == OpNestedAlternativeNext) {
1375                     unsigned parenthesesFrameLocation = term->frameLocation;
1376                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
1377                     if (term->quantityType != QuantifierFixedCount)
1378                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1379                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1380                 }
1381
1382                 // If we reach here then the last alternative has matched - jump to the
1383                 // End node, to skip over any further alternatives.
1384                 //
1385                 // FIXME: this is logically O(N^2) (though N can be expected to be very
1386                 // small). We could avoid this either by adding an extra jump to the JIT
1387                 // data structures, or by making backtracking code that jumps to Next
1388                 // alternatives are responsible for checking that input is available (if
1389                 // we didn't need to plant the input checks, then m_jumps would be free).
1390                 YarrOp* endOp = &m_ops[op.m_nextOp];
1391                 while (endOp->m_nextOp != notFound) {
1392                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1393                     endOp = &m_ops[endOp->m_nextOp];
1394                 }
1395                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1396                 endOp->m_jumps.append(jump());
1397
1398                 // This is the entry point for the next alternative.
1399                 op.m_reentry = label();
1400
1401                 // Calculate how much input we need to check for, and if non-zero check.
1402                 op.m_checkAdjust = alternative->m_minimumSize;
1403                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
1404                     op.m_checkAdjust -= disjunction->m_minimumSize;
1405                 if (op.m_checkAdjust)
1406                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
1407
1408                 YarrOp& lastOp = m_ops[op.m_previousOp];
1409                 m_checked -= lastOp.m_checkAdjust;
1410                 m_checked += op.m_checkAdjust;
1411                 break;
1412             }
1413             case OpSimpleNestedAlternativeEnd:
1414             case OpNestedAlternativeEnd: {
1415                 PatternTerm* term = op.m_term;
1416
1417                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
1418                 if (op.m_op == OpNestedAlternativeEnd) {
1419                     unsigned parenthesesFrameLocation = term->frameLocation;
1420                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
1421                     if (term->quantityType != QuantifierFixedCount)
1422                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1423                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
1424                 }
1425
1426                 // If this set of alternatives contains more than one alternative,
1427                 // then the Next nodes will have planted jumps to the End, and added
1428                 // them to this node's m_jumps list.
1429                 op.m_jumps.link(this);
1430                 op.m_jumps.clear();
1431
1432                 YarrOp& lastOp = m_ops[op.m_previousOp];
1433                 m_checked -= lastOp.m_checkAdjust;
1434                 break;
1435             }
1436
1437             // OpParenthesesSubpatternOnceBegin/End
1438             //
1439             // These nodes support (optionally) capturing subpatterns, that have a
1440             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 
1441             case OpParenthesesSubpatternOnceBegin: {
1442                 PatternTerm* term = op.m_term;
1443                 unsigned parenthesesFrameLocation = term->frameLocation;
1444                 const RegisterID indexTemporary = regT0;
1445                 ASSERT(term->quantityCount == 1);
1446
1447                 // Upon entry to a Greedy quantified set of parenthese store the index.
1448                 // We'll use this for two purposes:
1449                 //  - To indicate which iteration we are on of mathing the remainder of
1450                 //    the expression after the parentheses - the first, including the
1451                 //    match within the parentheses, or the second having skipped over them.
1452                 //  - To check for empty matches, which must be rejected.
1453                 //
1454                 // At the head of a NonGreedy set of parentheses we'll immediately set the
1455                 // value on the stack to -1 (indicating a match skipping the subpattern),
1456                 // and plant a jump to the end. We'll also plant a label to backtrack to
1457                 // to reenter the subpattern later, with a store to set up index on the
1458                 // second iteration.
1459                 //
1460                 // FIXME: for capturing parens, could use the index in the capture array?
1461                 if (term->quantityType == QuantifierGreedy)
1462                     storeToFrame(index, parenthesesFrameLocation);
1463                 else if (term->quantityType == QuantifierNonGreedy) {
1464                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
1465                     op.m_jumps.append(jump());
1466                     op.m_reentry = label();
1467                     storeToFrame(index, parenthesesFrameLocation);
1468                 }
1469
1470                 // If the parenthese are capturing, store the starting index value to the
1471                 // captures array, offsetting as necessary.
1472                 //
1473                 // FIXME: could avoid offsetting this value in JIT code, apply
1474                 // offsets only afterwards, at the point the results array is
1475                 // being accessed.
1476                 if (term->capture()) {
1477                     int offsetId = term->parentheses.subpatternId << 1;
1478                     int inputOffset = term->inputPosition - m_checked;
1479                     if (term->quantityType == QuantifierFixedCount)
1480                         inputOffset -= term->parentheses.disjunction->m_minimumSize;
1481                     if (inputOffset) {
1482                         move(index, indexTemporary);
1483                         add32(Imm32(inputOffset), indexTemporary);
1484                         store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1485                     } else
1486                         store32(index, Address(output, offsetId * sizeof(int)));
1487                 }
1488                 break;
1489             }
1490             case OpParenthesesSubpatternOnceEnd: {
1491                 PatternTerm* term = op.m_term;
1492                 unsigned parenthesesFrameLocation = term->frameLocation;
1493                 const RegisterID indexTemporary = regT0;
1494                 ASSERT(term->quantityCount == 1);
1495
1496                 // For Greedy/NonGreedy quantified parentheses, we must reject zero length
1497                 // matches. If the minimum size is know to be non-zero we need not check.
1498                 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
1499                     op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
1500
1501                 // If the parenthese are capturing, store the ending index value to the
1502                 // captures array, offsetting as necessary.
1503                 //
1504                 // FIXME: could avoid offsetting this value in JIT code, apply
1505                 // offsets only afterwards, at the point the results array is
1506                 // being accessed.
1507                 if (term->capture()) {
1508                     int offsetId = (term->parentheses.subpatternId << 1) + 1;
1509                     int inputOffset = term->inputPosition - m_checked;
1510                     if (inputOffset) {
1511                         move(index, indexTemporary);
1512                         add32(Imm32(inputOffset), indexTemporary);
1513                         store32(indexTemporary, Address(output, offsetId * sizeof(int)));
1514                     } else
1515                         store32(index, Address(output, offsetId * sizeof(int)));
1516                 }
1517
1518                 // If the parentheses are quantified Greedy then add a label to jump back
1519                 // to if get a failed match from after the parentheses. For NonGreedy
1520                 // parentheses, link the jump from before the subpattern to here.
1521                 if (term->quantityType == QuantifierGreedy)
1522                     op.m_reentry = label();
1523                 else if (term->quantityType == QuantifierNonGreedy) {
1524                     YarrOp& beginOp = m_ops[op.m_previousOp];
1525                     beginOp.m_jumps.link(this);
1526                 }
1527                 break;
1528             }
1529
1530             // OpParenthesesSubpatternTerminalBegin/End
1531             case OpParenthesesSubpatternTerminalBegin: {
1532                 PatternTerm* term = op.m_term;
1533                 ASSERT(term->quantityType == QuantifierGreedy);
1534                 ASSERT(term->quantityCount == quantifyInfinite);
1535                 ASSERT(!term->capture());
1536
1537                 // Upon entry set a label to loop back to.
1538                 op.m_reentry = label();
1539
1540                 // Store the start index of the current match; we need to reject zero
1541                 // length matches.
1542                 storeToFrame(index, term->frameLocation);
1543                 break;
1544             }
1545             case OpParenthesesSubpatternTerminalEnd: {
1546                 PatternTerm* term = op.m_term;
1547
1548                 // Check for zero length matches - if the match is non-zero, then we
1549                 // can accept it & loop back up to the head of the subpattern.
1550                 YarrOp& beginOp = m_ops[op.m_previousOp];
1551                 branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry);
1552
1553                 // Reject the match - backtrack back into the subpattern.
1554                 op.m_jumps.append(jump());
1555
1556                 // This is the entry point to jump to when we stop matching - we will
1557                 // do so once the subpattern cannot match any more.
1558                 op.m_reentry = label();
1559                 break;
1560             }
1561
1562             // OpParentheticalAssertionBegin/End
1563             case OpParentheticalAssertionBegin: {
1564                 PatternTerm* term = op.m_term;
1565
1566                 // Store the current index - assertions should not update index, so
1567                 // we will need to restore it upon a successful match.
1568                 unsigned parenthesesFrameLocation = term->frameLocation;
1569                 storeToFrame(index, parenthesesFrameLocation);
1570
1571                 // Check 
1572                 op.m_checkAdjust = m_checked - term->inputPosition;
1573                 if (op.m_checkAdjust)
1574                     sub32(Imm32(op.m_checkAdjust), index);
1575
1576                 m_checked -= op.m_checkAdjust;
1577                 break;
1578             }
1579             case OpParentheticalAssertionEnd: {
1580                 PatternTerm* term = op.m_term;
1581
1582                 // Restore the input index value.
1583                 unsigned parenthesesFrameLocation = term->frameLocation;
1584                 loadFromFrame(parenthesesFrameLocation, index);
1585
1586                 // If inverted, a successful match of the assertion must be treated
1587                 // as a failure, so jump to backtracking.
1588                 if (term->invert()) {
1589                     op.m_jumps.append(jump());
1590                     op.m_reentry = label();
1591                 }
1592
1593                 YarrOp& lastOp = m_ops[op.m_previousOp];
1594                 m_checked += lastOp.m_checkAdjust;
1595                 break;
1596             }
1597
1598             case OpMatchFailed:
1599                 if (m_pattern.m_body->m_callFrameSize)
1600                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1601                 move(TrustedImm32(-1), returnRegister);
1602                 generateReturn();
1603                 break;
1604             }
1605
1606             ++opIndex;
1607         } while (opIndex < m_ops.size());
1608     }
1609
1610     void backtrack()
1611     {
1612         // Backwards generate the backtracking code.
1613         size_t opIndex = m_ops.size();
1614         ASSERT(opIndex);
1615
1616         do {
1617             --opIndex;
1618             YarrOp& op = m_ops[opIndex];
1619             switch (op.m_op) {
1620
1621             case OpTerm:
1622                 backtrackTerm(opIndex);
1623                 break;
1624
1625             // OpBodyAlternativeBegin/Next/End
1626             //
1627             // For each Begin/Next node representing an alternative, we need to decide what to do
1628             // in two circumstances:
1629             //  - If we backtrack back into this node, from within the alternative.
1630             //  - If the input check at the head of the alternative fails (if this exists).
1631             //
1632             // We treat these two cases differently since in the former case we have slightly
1633             // more information - since we are backtracking out of a prior alternative we know
1634             // that at least enough input was available to run it. For example, given the regular
1635             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
1636             // character match of 'a'), then we need not perform an additional input availability
1637             // check before running the second alternative.
1638             //
1639             // Backtracking required differs for the last alternative, which in the case of the
1640             // repeating set of alternatives must loop. The code generated for the last alternative
1641             // will also be used to handle all input check failures from any prior alternatives -
1642             // these require similar functionality, in seeking the next available alternative for
1643             // which there is sufficient input.
1644             //
1645             // Since backtracking of all other alternatives simply requires us to link backtracks
1646             // to the reentry point for the subsequent alternative, we will only be generating any
1647             // code when backtracking the last alternative.
1648             case OpBodyAlternativeBegin:
1649             case OpBodyAlternativeNext: {
1650                 PatternAlternative* alternative = op.m_alternative;
1651
1652                 if (op.m_op == OpBodyAlternativeNext) {
1653                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1654                     m_checked += priorAlternative->m_minimumSize;
1655                 }
1656                 m_checked -= alternative->m_minimumSize;
1657
1658                 // Is this the last alternative? If not, then if we backtrack to this point we just
1659                 // need to jump to try to match the next alternative.
1660                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
1661                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
1662                     break;
1663                 }
1664                 YarrOp& endOp = m_ops[op.m_nextOp];
1665
1666                 YarrOp* beginOp = &op;
1667                 while (beginOp->m_op != OpBodyAlternativeBegin) {
1668                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
1669                     beginOp = &m_ops[beginOp->m_previousOp];
1670                 }
1671
1672                 bool onceThrough = endOp.m_nextOp == notFound;
1673
1674                 // First, generate code to handle cases where we backtrack out of an attempted match
1675                 // of the last alternative. If this is a 'once through' set of alternatives then we
1676                 // have nothing to do - link this straight through to the End.
1677                 if (onceThrough)
1678                     m_backtrackingState.linkTo(endOp.m_reentry, this);
1679                 else {
1680                     // If we don't need to move the input poistion, and the pattern has a fixed size
1681                     // (in which case we omit the store of the start index until the pattern has matched)
1682                     // then we can just link the backtrack out of the last alternative straight to the
1683                     // head of the first alternative.
1684                     if (m_pattern.m_body->m_hasFixedSize
1685                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
1686                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
1687                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
1688                     else {
1689                         // We need to generate a trampoline of code to execute before looping back
1690                         // around to the first alternative.
1691                         m_backtrackingState.link(this);
1692
1693                         // If the pattern size is not fixed, then store the start index, for use if we match.
1694                         if (!m_pattern.m_body->m_hasFixedSize) {
1695                             if (alternative->m_minimumSize == 1)
1696                                 store32(index, Address(output));
1697                             else {
1698                                 move(index, regT0);
1699                                 if (alternative->m_minimumSize)
1700                                     sub32(Imm32(alternative->m_minimumSize - 1), regT0);
1701                                 else
1702                                     add32(Imm32(1), regT0);
1703                                 store32(regT0, Address(output));
1704                             }
1705                         }
1706
1707                         // Generate code to loop. Check whether the last alternative is longer than the
1708                         // first (e.g. /a|xy/ or /a|xyz/).
1709                         if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
1710                             // We want to loop, and increment input position. If the delta is 1, it is
1711                             // already correctly incremented, if more than one then decrement as appropriate.
1712                             unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
1713                             ASSERT(delta);
1714                             if (delta != 1)
1715                                 sub32(Imm32(delta - 1), index);
1716                             jump(beginOp->m_reentry);
1717                         } else {
1718                             // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
1719                             // be sufficent input available to handle this, so just fall through.
1720                             unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
1721                             if (delta != 0xFFFFFFFFu) {
1722                                 // We need to check input because we are incrementing the input.
1723                                 add32(Imm32(delta + 1), index);
1724                                 checkInput().linkTo(beginOp->m_reentry, this);
1725                             }
1726                         }
1727                     }
1728                 }
1729
1730                 // We can reach this point in the code in two ways:
1731                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
1732                 //    last alternative, and did not have sufficent input to run the first).
1733                 //  - We will loop back up to the following label when a releating alternative loops,
1734                 //    following a failed input check.
1735                 //
1736                 // Either way, we have just failed the input check for the first alternative.
1737                 Label firstInputCheckFailed(this);
1738
1739                 // Generate code to handle input check failures from alternatives except the last.
1740                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
1741                 // nextOp is the alternative we will be attempting to reenter into.
1742                 // 
1743                 // We will link input check failures from the forwards matching path back to the code
1744                 // that can handle them.
1745                 YarrOp* prevOp = beginOp;
1746                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
1747                 while (nextOp->m_op != OpBodyAlternativeEnd) {
1748                     prevOp->m_jumps.link(this);
1749
1750                     // We only get here if an input check fails, it is only worth checking again
1751                     // if the next alternative has a minimum size less than the last.
1752                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
1753                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
1754                         // subtract delta back out, and reduce this code. Should performance test
1755                         // the benefit of this.
1756                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
1757                         sub32(Imm32(delta), index);
1758                         Jump fail = jumpIfNoAvailableInput();
1759                         add32(Imm32(delta), index);
1760                         jump(nextOp->m_reentry);
1761                         fail.link(this);
1762                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
1763                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
1764                     prevOp = nextOp;
1765                     nextOp = &m_ops[nextOp->m_nextOp];
1766                 }
1767
1768                 // We fall through to here if there is insufficient input to run the last alternative.
1769
1770                 // If there is insufficient input to run the last alternative, then for 'once through'
1771                 // alternatives we are done - just jump back up into the forwards matching path at the End.
1772                 if (onceThrough) {
1773                     op.m_jumps.linkTo(endOp.m_reentry, this);
1774                     jump(endOp.m_reentry);
1775                     break;
1776                 }
1777
1778                 // For repeating alternatives, link any input check failure from the last alternative to
1779                 // this point.
1780                 op.m_jumps.link(this);
1781
1782                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
1783
1784                 // Check for cases where input position is already incremented by 1 for the last
1785                 // alternative (this is particularly useful where the minimum size of the body
1786                 // disjunction is 0, e.g. /a*|b/).
1787                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
1788                     // index is already incremented by 1, so just store it now!
1789                     store32(index, Address(output));
1790                     needsToUpdateMatchStart = false;
1791                 }
1792
1793                 // Check whether there is sufficient input to loop. Increment the input position by
1794                 // one, and check. Also add in the minimum disjunction size before checking - there
1795                 // is no point in looping if we're just going to fail all the input checks around
1796                 // the next iteration.
1797                 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
1798                 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
1799                     // If the last alternative had the same minimum size as the disjunction,
1800                     // just simply increment input pos by 1, no adjustment based on minimum size.
1801                     add32(Imm32(1), index);
1802                 } else {
1803                     // If the minumum for the last alternative was one greater than than that
1804                     // for the disjunction, we're already progressed by 1, nothing to do!
1805                     unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
1806                     if (delta)
1807                         sub32(Imm32(delta), index);
1808                 }
1809                 Jump matchFailed = jumpIfNoAvailableInput();
1810
1811                 if (needsToUpdateMatchStart) {
1812                     if (!m_pattern.m_body->m_minimumSize)
1813                         store32(index, Address(output));
1814                     else {
1815                         move(index, regT0);
1816                         sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
1817                         store32(regT0, Address(output));
1818                     }
1819                 }
1820
1821                 // Calculate how much more input the first alternative requires than the minimum
1822                 // for the body as a whole. If no more is needed then we dont need an additional
1823                 // input check here - jump straight back up to the start of the first alternative.
1824                 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
1825                     jump(beginOp->m_reentry);
1826                 else {
1827                     if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
1828                         add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
1829                     else
1830                         sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
1831                     checkInput().linkTo(beginOp->m_reentry, this);
1832                     jump(firstInputCheckFailed);
1833                 }
1834
1835                 // We jump to here if we iterate to the point that there is insufficient input to
1836                 // run any matches, and need to return a failure state from JIT code.
1837                 matchFailed.link(this);
1838
1839                 if (m_pattern.m_body->m_callFrameSize)
1840                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1841                 move(TrustedImm32(-1), returnRegister);
1842                 generateReturn();
1843                 break;
1844             }
1845             case OpBodyAlternativeEnd: {
1846                 // We should never backtrack back into a body disjunction.
1847                 ASSERT(m_backtrackingState.isEmpty());
1848
1849                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
1850                 m_checked += priorAlternative->m_minimumSize;
1851                 break;
1852             }
1853
1854             // OpSimpleNestedAlternativeBegin/Next/End
1855             // OpNestedAlternativeBegin/Next/End
1856             //
1857             // Generate code for when we backtrack back out of an alternative into
1858             // a Begin or Next node, or when the entry input count check fails. If
1859             // there are more alternatives we need to jump to the next alternative,
1860             // if not we backtrack back out of the current set of parentheses.
1861             //
1862             // In the case of non-simple nested assertions we need to also link the
1863             // 'return address' appropriately to backtrack back out into the correct
1864             // alternative.
1865             case OpSimpleNestedAlternativeBegin:
1866             case OpSimpleNestedAlternativeNext:
1867             case OpNestedAlternativeBegin:
1868             case OpNestedAlternativeNext: {
1869                 YarrOp& nextOp = m_ops[op.m_nextOp];
1870                 bool isBegin = op.m_previousOp == notFound;
1871                 bool isLastAlternative = nextOp.m_nextOp == notFound;
1872                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
1873                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
1874
1875                 // Treat an input check failure the same as a failed match.
1876                 m_backtrackingState.append(op.m_jumps);
1877
1878                 // Set the backtracks to jump to the appropriate place. We may need
1879                 // to link the backtracks in one of three different way depending on
1880                 // the type of alternative we are dealing with:
1881                 //  - A single alternative, with no simplings.
1882                 //  - The last alternative of a set of two or more.
1883                 //  - An alternative other than the last of a set of two or more.
1884                 //
1885                 // In the case of a single alternative on its own, we don't need to
1886                 // jump anywhere - if the alternative fails to match we can just
1887                 // continue to backtrack out of the parentheses without jumping.
1888                 //
1889                 // In the case of the last alternative in a set of more than one, we
1890                 // need to jump to return back out to the beginning. We'll do so by
1891                 // adding a jump to the End node's m_jumps list, and linking this
1892                 // when we come to generate the Begin node. For alternatives other
1893                 // than the last, we need to jump to the next alternative.
1894                 //
1895                 // If the alternative had adjusted the input position we must link
1896                 // backtracking to here, correct, and then jump on. If not we can
1897                 // link the backtracks directly to their destination.
1898                 if (op.m_checkAdjust) {
1899                     // Handle the cases where we need to link the backtracks here.
1900                     m_backtrackingState.link(this);
1901                     sub32(Imm32(op.m_checkAdjust), index);
1902                     if (!isLastAlternative) {
1903                         // An alternative that is not the last should jump to its successor.
1904                         jump(nextOp.m_reentry);
1905                     } else if (!isBegin) {
1906                         // The last of more than one alternatives must jump back to the begnning.
1907                         nextOp.m_jumps.append(jump());
1908                     } else {
1909                         // A single alternative on its own can fall through.
1910                         m_backtrackingState.fallthrough();
1911                     }
1912                 } else {
1913                     // Handle the cases where we can link the backtracks directly to their destinations.
1914                     if (!isLastAlternative) {
1915                         // An alternative that is not the last should jump to its successor.
1916                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
1917                     } else if (!isBegin) {
1918                         // The last of more than one alternatives must jump back to the begnning.
1919                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
1920                     }
1921                     // In the case of a single alternative on its own do nothing - it can fall through.
1922                 }
1923
1924                 // At this point we've handled the backtracking back into this node.
1925                 // Now link any backtracks that need to jump to here.
1926
1927                 // For non-simple alternatives, link the alternative's 'return address'
1928                 // so that we backtrack back out into the previous alternative.
1929                 if (op.m_op == OpNestedAlternativeNext)
1930                     m_backtrackingState.append(op.m_returnAddress);
1931
1932                 // If there is more than one alternative, then the last alternative will
1933                 // have planted a jump to be linked to the end. This jump was added to the
1934                 // End node's m_jumps list. If we are back at the beginning, link it here.
1935                 if (isBegin) {
1936                     YarrOp* endOp = &m_ops[op.m_nextOp];
1937                     while (endOp->m_nextOp != notFound) {
1938                         ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
1939                         endOp = &m_ops[endOp->m_nextOp];
1940                     }
1941                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
1942                     m_backtrackingState.append(endOp->m_jumps);
1943                 }
1944
1945                 if (!isBegin) {
1946                     YarrOp& lastOp = m_ops[op.m_previousOp];
1947                     m_checked += lastOp.m_checkAdjust;
1948                 }
1949                 m_checked -= op.m_checkAdjust;
1950                 break;
1951             }
1952             case OpSimpleNestedAlternativeEnd:
1953             case OpNestedAlternativeEnd: {
1954                 PatternTerm* term = op.m_term;
1955
1956                 // If we backtrack into the end of a simple subpattern do nothing;
1957                 // just continue through into the last alternative. If we backtrack
1958                 // into the end of a non-simple set of alterntives we need to jump
1959                 // to the backtracking return address set up during generation.
1960                 if (op.m_op == OpNestedAlternativeEnd) {
1961                     m_backtrackingState.link(this);
1962
1963                     // Plant a jump to the return address.
1964                     unsigned parenthesesFrameLocation = term->frameLocation;
1965                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
1966                     if (term->quantityType != QuantifierFixedCount)
1967                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1968                     loadFromFrameAndJump(alternativeFrameLocation);
1969
1970                     // Link the DataLabelPtr associated with the end of the last
1971                     // alternative to this point.
1972                     m_backtrackingState.append(op.m_returnAddress);
1973                 }
1974
1975                 YarrOp& lastOp = m_ops[op.m_previousOp];
1976                 m_checked += lastOp.m_checkAdjust;
1977                 break;
1978             }
1979
1980             // OpParenthesesSubpatternOnceBegin/End
1981             //
1982             // When we are backtracking back out of a capturing subpattern we need
1983             // to clear the start index in the matches output array, to record that
1984             // this subpattern has not been captured.
1985             //
1986             // When backtracking back out of a Greedy quantified subpattern we need
1987             // to catch this, and try running the remainder of the alternative after
1988             // the subpattern again, skipping the parentheses.
1989             //
1990             // Upon backtracking back into a quantified set of parentheses we need to
1991             // check whether we were currently skipping the subpattern. If not, we
1992             // can backtrack into them, if we were we need to either backtrack back
1993             // out of the start of the parentheses, or jump back to the forwards
1994             // matching start, depending of whether the match is Greedy or NonGreedy.
1995             case OpParenthesesSubpatternOnceBegin: {
1996                 PatternTerm* term = op.m_term;
1997                 ASSERT(term->quantityCount == 1);
1998
1999                 // We only need to backtrack to thispoint if capturing or greedy.
2000                 if (term->capture() || term->quantityType == QuantifierGreedy) {
2001                     m_backtrackingState.link(this);
2002
2003                     // If capturing, clear the capture (we only need to reset start).
2004                     if (term->capture())
2005                         store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int)));
2006
2007                     // If Greedy, jump to the end.
2008                     if (term->quantityType == QuantifierGreedy) {
2009                         // Clear the flag in the stackframe indicating we ran through the subpattern.
2010                         unsigned parenthesesFrameLocation = term->frameLocation;
2011                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
2012                         // Jump to after the parentheses, skipping the subpattern.
2013                         jump(m_ops[op.m_nextOp].m_reentry);
2014                         // A backtrack from after the parentheses, when skipping the subpattern,
2015                         // will jump back to here.
2016                         op.m_jumps.link(this);
2017                     }
2018
2019                     m_backtrackingState.fallthrough();
2020                 }
2021                 break;
2022             }
2023             case OpParenthesesSubpatternOnceEnd: {
2024                 PatternTerm* term = op.m_term;
2025
2026                 if (term->quantityType != QuantifierFixedCount) {
2027                     m_backtrackingState.link(this);
2028
2029                     // Check whether we should backtrack back into the parentheses, or if we
2030                     // are currently in a state where we had skipped over the subpattern
2031                     // (in which case the flag value on the stack will be -1).
2032                     unsigned parenthesesFrameLocation = term->frameLocation;
2033                     Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
2034
2035                     if (term->quantityType == QuantifierGreedy) {
2036                         // For Greedy parentheses, we skip after having already tried going
2037                         // through the subpattern, so if we get here we're done.
2038                         YarrOp& beginOp = m_ops[op.m_previousOp];
2039                         beginOp.m_jumps.append(hadSkipped);
2040                     } else {
2041                         // For NonGreedy parentheses, we try skipping the subpattern first,
2042                         // so if we get here we need to try running through the subpattern
2043                         // next. Jump back to the start of the parentheses in the forwards
2044                         // matching path.
2045                         ASSERT(term->quantityType == QuantifierNonGreedy);
2046                         YarrOp& beginOp = m_ops[op.m_previousOp];
2047                         hadSkipped.linkTo(beginOp.m_reentry, this);
2048                     }
2049
2050                     m_backtrackingState.fallthrough();
2051                 }
2052
2053                 m_backtrackingState.append(op.m_jumps);
2054                 break;
2055             }
2056
2057             // OpParenthesesSubpatternTerminalBegin/End
2058             //
2059             // Terminal subpatterns will always match - there is nothing after them to
2060             // force a backtrack, and they have a minimum count of 0, and as such will
2061             // always produce an acceptable result.
2062             case OpParenthesesSubpatternTerminalBegin: {
2063                 // We will backtrack to this point once the subpattern cannot match any
2064                 // more. Since no match is accepted as a successful match (we are Greedy
2065                 // quantified with a minimum of zero) jump back to the forwards matching
2066                 // path at the end.
2067                 YarrOp& endOp = m_ops[op.m_nextOp];
2068                 m_backtrackingState.linkTo(endOp.m_reentry, this);
2069                 break;
2070             }
2071             case OpParenthesesSubpatternTerminalEnd:
2072                 // We should never be backtracking to here (hence the 'terminal' in the name).
2073                 ASSERT(m_backtrackingState.isEmpty());
2074                 m_backtrackingState.append(op.m_jumps);
2075                 break;
2076
2077             // OpParentheticalAssertionBegin/End
2078             case OpParentheticalAssertionBegin: {
2079                 PatternTerm* term = op.m_term;
2080                 YarrOp& endOp = m_ops[op.m_nextOp];
2081
2082                 // We need to handle the backtracks upon backtracking back out
2083                 // of a parenthetical assertion if either we need to correct
2084                 // the input index, or the assertion was inverted.
2085                 if (op.m_checkAdjust || term->invert()) {
2086                      m_backtrackingState.link(this);
2087
2088                     if (op.m_checkAdjust)
2089                         add32(Imm32(op.m_checkAdjust), index);
2090
2091                     // In an inverted assertion failure to match the subpattern
2092                     // is treated as a successful match - jump to the end of the
2093                     // subpattern. We already have adjusted the input position
2094                     // back to that before the assertion, which is correct.
2095                     if (term->invert())
2096                         jump(endOp.m_reentry);
2097
2098                     m_backtrackingState.fallthrough();
2099                 }
2100
2101                 // The End node's jump list will contain any backtracks into
2102                 // the end of the assertion. Also, if inverted, we will have
2103                 // added the failure caused by a successful match to this.
2104                 m_backtrackingState.append(endOp.m_jumps);
2105
2106                 m_checked += op.m_checkAdjust;
2107                 break;
2108             }
2109             case OpParentheticalAssertionEnd: {
2110                 // FIXME: We should really be clearing any nested subpattern
2111                 // matches on bailing out from after the pattern. Firefox has
2112                 // this bug too (presumably because they use YARR!)
2113
2114                 // Never backtrack into an assertion; later failures bail to before the begin.
2115                 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
2116
2117                 YarrOp& lastOp = m_ops[op.m_previousOp];
2118                 m_checked -= lastOp.m_checkAdjust;
2119                 break;
2120             }
2121
2122             case OpMatchFailed:
2123                 break;
2124             }
2125
2126         } while (opIndex);
2127     }
2128
2129     // Compilation methods:
2130     // ====================
2131
2132     // opCompileParenthesesSubpattern
2133     // Emits ops for a subpattern (set of parentheses). These consist
2134     // of a set of alternatives wrapped in an outer set of nodes for
2135     // the parentheses.
2136     // Supported types of parentheses are 'Once' (quantityCount == 1)
2137     // and 'Terminal' (non-capturing parentheses quantified as greedy
2138     // and infinite).
2139     // Alternatives will use the 'Simple' set of ops if either the
2140     // subpattern is terminal (in which case we will never need to
2141     // backtrack), or if the subpattern only contains one alternative.
2142     void opCompileParenthesesSubpattern(PatternTerm* term)
2143     {
2144         YarrOpCode parenthesesBeginOpCode;
2145         YarrOpCode parenthesesEndOpCode;
2146         YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
2147         YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
2148         YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
2149
2150         // We can currently only compile quantity 1 subpatterns that are
2151         // not copies. We generate a copy in the case of a range quantifier,
2152         // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
2153         // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
2154         // comes where the subpattern is capturing, in which case we would
2155         // need to restore the capture from the first subpattern upon a
2156         // failure in the second.
2157         if (term->quantityCount == 1 && !term->parentheses.isCopy) {
2158             // Select the 'Once' nodes.
2159             parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
2160             parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
2161
2162             // If there is more than one alternative we cannot use the 'simple' nodes.
2163             if (term->parentheses.disjunction->m_alternatives.size() != 1) {
2164                 alternativeBeginOpCode = OpNestedAlternativeBegin;
2165                 alternativeNextOpCode = OpNestedAlternativeNext;
2166                 alternativeEndOpCode = OpNestedAlternativeEnd;
2167             }
2168         } else if (term->parentheses.isTerminal) {
2169             // Select the 'Terminal' nodes.
2170             parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
2171             parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
2172         } else {
2173             // This subpattern is not supported by the JIT.
2174             m_shouldFallBack = true;
2175             return;
2176         }
2177
2178         size_t parenBegin = m_ops.size();
2179         m_ops.append(parenthesesBeginOpCode);
2180
2181         m_ops.append(alternativeBeginOpCode);
2182         m_ops.last().m_previousOp = notFound;
2183         m_ops.last().m_term = term;
2184         Vector<PatternAlternative*>& alternatives =  term->parentheses.disjunction->m_alternatives;
2185         for (unsigned i = 0; i < alternatives.size(); ++i) {
2186             size_t lastOpIndex = m_ops.size() - 1;
2187
2188             PatternAlternative* nestedAlternative = alternatives[i];
2189             opCompileAlternative(nestedAlternative);
2190
2191             size_t thisOpIndex = m_ops.size();
2192             m_ops.append(YarrOp(alternativeNextOpCode));
2193
2194             YarrOp& lastOp = m_ops[lastOpIndex];
2195             YarrOp& thisOp = m_ops[thisOpIndex];
2196
2197             lastOp.m_alternative = nestedAlternative;
2198             lastOp.m_nextOp = thisOpIndex;
2199             thisOp.m_previousOp = lastOpIndex;
2200             thisOp.m_term = term;
2201         }
2202         YarrOp& lastOp = m_ops.last();
2203         ASSERT(lastOp.m_op == alternativeNextOpCode);
2204         lastOp.m_op = alternativeEndOpCode;
2205         lastOp.m_alternative = 0;
2206         lastOp.m_nextOp = notFound;
2207
2208         size_t parenEnd = m_ops.size();
2209         m_ops.append(parenthesesEndOpCode);
2210
2211         m_ops[parenBegin].m_term = term;
2212         m_ops[parenBegin].m_previousOp = notFound;
2213         m_ops[parenBegin].m_nextOp = parenEnd;
2214         m_ops[parenEnd].m_term = term;
2215         m_ops[parenEnd].m_previousOp = parenBegin;
2216         m_ops[parenEnd].m_nextOp = notFound;
2217     }
2218
2219     // opCompileParentheticalAssertion
2220     // Emits ops for a parenthetical assertion. These consist of an
2221     // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
2222     // the alternatives, with these wrapped by an outer pair of
2223     // OpParentheticalAssertionBegin/End nodes.
2224     // We can always use the OpSimpleNestedAlternative nodes in the
2225     // case of parenthetical assertions since these only ever match
2226     // once, and will never backtrack back into the assertion.
2227     void opCompileParentheticalAssertion(PatternTerm* term)
2228     {
2229         size_t parenBegin = m_ops.size();
2230         m_ops.append(OpParentheticalAssertionBegin);
2231
2232         m_ops.append(OpSimpleNestedAlternativeBegin);
2233         m_ops.last().m_previousOp = notFound;
2234         m_ops.last().m_term = term;
2235         Vector<PatternAlternative*>& alternatives =  term->parentheses.disjunction->m_alternatives;
2236         for (unsigned i = 0; i < alternatives.size(); ++i) {
2237             size_t lastOpIndex = m_ops.size() - 1;
2238
2239             PatternAlternative* nestedAlternative = alternatives[i];
2240             opCompileAlternative(nestedAlternative);
2241
2242             size_t thisOpIndex = m_ops.size();
2243             m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
2244
2245             YarrOp& lastOp = m_ops[lastOpIndex];
2246             YarrOp& thisOp = m_ops[thisOpIndex];
2247
2248             lastOp.m_alternative = nestedAlternative;
2249             lastOp.m_nextOp = thisOpIndex;
2250             thisOp.m_previousOp = lastOpIndex;
2251             thisOp.m_term = term;
2252         }
2253         YarrOp& lastOp = m_ops.last();
2254         ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
2255         lastOp.m_op = OpSimpleNestedAlternativeEnd;
2256         lastOp.m_alternative = 0;
2257         lastOp.m_nextOp = notFound;
2258
2259         size_t parenEnd = m_ops.size();
2260         m_ops.append(OpParentheticalAssertionEnd);
2261
2262         m_ops[parenBegin].m_term = term;
2263         m_ops[parenBegin].m_previousOp = notFound;
2264         m_ops[parenBegin].m_nextOp = parenEnd;
2265         m_ops[parenEnd].m_term = term;
2266         m_ops[parenEnd].m_previousOp = parenBegin;
2267         m_ops[parenEnd].m_nextOp = notFound;
2268     }
2269
2270     // opCompileAlternative
2271     // Called to emit nodes for all terms in an alternative.
2272     void opCompileAlternative(PatternAlternative* alternative)
2273     {
2274         optimizeAlternative(alternative);
2275
2276         for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
2277             PatternTerm* term = &alternative->m_terms[i];
2278
2279             switch (term->type) {
2280             case PatternTerm::TypeParenthesesSubpattern:
2281                 opCompileParenthesesSubpattern(term);
2282                 break;
2283
2284             case PatternTerm::TypeParentheticalAssertion:
2285                 opCompileParentheticalAssertion(term);
2286                 break;
2287
2288             default:
2289                 m_ops.append(term);
2290             }
2291         }
2292     }
2293
2294     // opCompileBody
2295     // This method compiles the body disjunction of the regular expression.
2296     // The body consists of two sets of alternatives - zero or more 'once
2297     // through' (BOL anchored) alternatives, followed by zero or more
2298     // repeated alternatives.
2299     // For each of these two sets of alteratives, if not empty they will be
2300     // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
2301     // 'begin' node referencing the first alternative, and 'next' nodes
2302     // referencing any further alternatives. The begin/next/end nodes are
2303     // linked together in a doubly linked list. In the case of repeating
2304     // alternatives, the end node is also linked back to the beginning.
2305     // If no repeating alternatives exist, then a OpMatchFailed node exists
2306     // to return the failing result.
2307     void opCompileBody(PatternDisjunction* disjunction)
2308     {
2309         Vector<PatternAlternative*>& alternatives =  disjunction->m_alternatives;
2310         size_t currentAlternativeIndex = 0;
2311
2312         // Emit the 'once through' alternatives.
2313         if (alternatives.size() && alternatives[0]->onceThrough()) {
2314             m_ops.append(YarrOp(OpBodyAlternativeBegin));
2315             m_ops.last().m_previousOp = notFound;
2316
2317             do {
2318                 size_t lastOpIndex = m_ops.size() - 1;
2319                 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2320                 opCompileAlternative(alternative);
2321
2322                 size_t thisOpIndex = m_ops.size();
2323                 m_ops.append(YarrOp(OpBodyAlternativeNext));
2324
2325                 YarrOp& lastOp = m_ops[lastOpIndex];
2326                 YarrOp& thisOp = m_ops[thisOpIndex];
2327
2328                 lastOp.m_alternative = alternative;
2329                 lastOp.m_nextOp = thisOpIndex;
2330                 thisOp.m_previousOp = lastOpIndex;
2331                 
2332                 ++currentAlternativeIndex;
2333             } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
2334
2335             YarrOp& lastOp = m_ops.last();
2336
2337             ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2338             lastOp.m_op = OpBodyAlternativeEnd;
2339             lastOp.m_alternative = 0;
2340             lastOp.m_nextOp = notFound;
2341         }
2342
2343         if (currentAlternativeIndex == alternatives.size()) {
2344             m_ops.append(YarrOp(OpMatchFailed));
2345             return;
2346         }
2347
2348         // Emit the repeated alternatives.
2349         size_t repeatLoop = m_ops.size();
2350         m_ops.append(YarrOp(OpBodyAlternativeBegin));
2351         m_ops.last().m_previousOp = notFound;
2352         do {
2353             size_t lastOpIndex = m_ops.size() - 1;
2354             PatternAlternative* alternative = alternatives[currentAlternativeIndex];
2355             ASSERT(!alternative->onceThrough());
2356             opCompileAlternative(alternative);
2357
2358             size_t thisOpIndex = m_ops.size();
2359             m_ops.append(YarrOp(OpBodyAlternativeNext));
2360
2361             YarrOp& lastOp = m_ops[lastOpIndex];
2362             YarrOp& thisOp = m_ops[thisOpIndex];
2363
2364             lastOp.m_alternative = alternative;
2365             lastOp.m_nextOp = thisOpIndex;
2366             thisOp.m_previousOp = lastOpIndex;
2367             
2368             ++currentAlternativeIndex;
2369         } while (currentAlternativeIndex < alternatives.size());
2370         YarrOp& lastOp = m_ops.last();
2371         ASSERT(lastOp.m_op == OpBodyAlternativeNext);
2372         lastOp.m_op = OpBodyAlternativeEnd;
2373         lastOp.m_alternative = 0;
2374         lastOp.m_nextOp = repeatLoop;
2375     }
2376
2377     void generateEnter()
2378     {
2379 #if CPU(X86_64)
2380         push(X86Registers::ebp);
2381         move(stackPointerRegister, X86Registers::ebp);
2382         push(X86Registers::ebx);
2383 #elif CPU(X86)
2384         push(X86Registers::ebp);
2385         move(stackPointerRegister, X86Registers::ebp);
2386         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
2387         push(X86Registers::ebx);
2388         push(X86Registers::edi);
2389         push(X86Registers::esi);
2390         // load output into edi (2 = saved ebp + return address).
2391     #if COMPILER(MSVC)
2392         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
2393         loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
2394         loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
2395         loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
2396     #else
2397         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
2398     #endif
2399 #elif CPU(ARM)
2400         push(ARMRegisters::r4);
2401         push(ARMRegisters::r5);
2402         push(ARMRegisters::r6);
2403 #if CPU(ARM_TRADITIONAL)
2404         push(ARMRegisters::r8); // scratch register
2405 #endif
2406         move(ARMRegisters::r3, output);
2407 #elif CPU(SH4)
2408         push(SH4Registers::r11);
2409         push(SH4Registers::r13);
2410 #elif CPU(MIPS)
2411         // Do nothing.
2412 #endif
2413     }
2414
2415     void generateReturn()
2416     {
2417 #if CPU(X86_64)
2418         pop(X86Registers::ebx);
2419         pop(X86Registers::ebp);
2420 #elif CPU(X86)
2421         pop(X86Registers::esi);
2422         pop(X86Registers::edi);
2423         pop(X86Registers::ebx);
2424         pop(X86Registers::ebp);
2425 #elif CPU(ARM)
2426 #if CPU(ARM_TRADITIONAL)
2427         pop(ARMRegisters::r8); // scratch register
2428 #endif
2429         pop(ARMRegisters::r6);
2430         pop(ARMRegisters::r5);
2431         pop(ARMRegisters::r4);
2432 #elif CPU(SH4)
2433         pop(SH4Registers::r13);
2434         pop(SH4Registers::r11);
2435 #elif CPU(MIPS)
2436         // Do nothing
2437 #endif
2438         ret();
2439     }
2440
2441 public:
2442     YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
2443         : m_pattern(pattern)
2444         , m_charSize(charSize)
2445         , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
2446         , m_shouldFallBack(false)
2447         , m_checked(0)
2448     {
2449     }
2450
2451     void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
2452     {
2453         generateEnter();
2454
2455         if (!m_pattern.m_body->m_hasFixedSize)
2456             store32(index, Address(output));
2457
2458         if (m_pattern.m_body->m_callFrameSize)
2459             subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
2460
2461         // Compile the pattern to the internal 'YarrOp' representation.
2462         opCompileBody(m_pattern.m_body);
2463
2464         // If we encountered anything we can't handle in the JIT code
2465         // (e.g. backreferences) then return early.
2466         if (m_shouldFallBack) {
2467             jitObject.setFallBack(true);
2468             return;
2469         }
2470
2471         generate();
2472         backtrack();
2473
2474         // Link & finalize the code.
2475         LinkBuffer linkBuffer(*globalData, this);
2476         m_backtrackingState.linkDataLabels(linkBuffer);
2477         if (m_charSize == Char8)
2478             jitObject.set8BitCode(linkBuffer.finalizeCode());
2479         else
2480             jitObject.set16BitCode(linkBuffer.finalizeCode());
2481         jitObject.setFallBack(m_shouldFallBack);
2482     }
2483
2484 private:
2485     YarrPattern& m_pattern;
2486
2487     YarrCharSize m_charSize;
2488
2489     Scale m_charScale;
2490
2491     // Used to detect regular expression constructs that are not currently
2492     // supported in the JIT; fall back to the interpreter when this is detected.
2493     bool m_shouldFallBack;
2494
2495     // The regular expression expressed as a linear sequence of operations.
2496     Vector<YarrOp, 128> m_ops;
2497
2498     // This records the current input offset being applied due to the current
2499     // set of alternatives we are nested within. E.g. when matching the
2500     // character 'b' within the regular expression /abc/, we will know that
2501     // the minimum size for the alternative is 3, checked upon entry to the
2502     // alternative, and that 'b' is at offset 1 from the start, and as such
2503     // when matching 'b' we need to apply an offset of -2 to the load.
2504     //
2505     // FIXME: This should go away. Rather than tracking this value throughout
2506     // code generation, we should gather this information up front & store it
2507     // on the YarrOp structure.
2508     int m_checked;
2509
2510     // This class records state whilst generating the backtracking path of code.
2511     BacktrackingState m_backtrackingState;
2512 };
2513
2514 void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject)
2515 {
2516     YarrGenerator(pattern, charSize).compile(globalData, jitObject);
2517 }
2518
2519 int execute(YarrCodeBlock& jitObject, const char* input, unsigned start, unsigned length, int* output)
2520 {
2521     return jitObject.execute(input, start, length, output);
2522 }
2523
2524 int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
2525 {
2526     return jitObject.execute(input, start, length, output);
2527 }
2528
2529 }}
2530
2531 #endif