source: code/trunk/vendor/github.com/dlclark/regexp2/runner.go@ 67

Last change on this file since 67 was 67, checked in by Izuru Yakumo, 23 months ago

Use vendored modules

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 34.7 KB
Line 
1package regexp2
2
3import (
4 "bytes"
5 "errors"
6 "fmt"
7 "math"
8 "strconv"
9 "strings"
10 "time"
11 "unicode"
12
13 "github.com/dlclark/regexp2/syntax"
14)
15
16type runner struct {
17 re *Regexp
18 code *syntax.Code
19
20 runtextstart int // starting point for search
21
22 runtext []rune // text to search
23 runtextpos int // current position in text
24 runtextend int
25
26 // The backtracking stack. Opcodes use this to store data regarding
27 // what they have matched and where to backtrack to. Each "frame" on
28 // the stack takes the form of [CodePosition Data1 Data2...], where
29 // CodePosition is the position of the current opcode and
30 // the data values are all optional. The CodePosition can be negative, and
31 // these values (also called "back2") are used by the BranchMark family of opcodes
32 // to indicate whether they are backtracking after a successful or failed
33 // match.
34 // When we backtrack, we pop the CodePosition off the stack, set the current
35 // instruction pointer to that code position, and mark the opcode
36 // with a backtracking flag ("Back"). Each opcode then knows how to
37 // handle its own data.
38 runtrack []int
39 runtrackpos int
40
41 // This stack is used to track text positions across different opcodes.
42 // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
43 // pair. SetMark records the text position before we match a*b. Then
44 // CaptureMark uses that position to figure out where the capture starts.
45 // Opcodes which push onto this stack are always paired with other opcodes
46 // which will pop the value from it later. A successful match should mean
47 // that this stack is empty.
48 runstack []int
49 runstackpos int
50
51 // The crawl stack is used to keep track of captures. Every time a group
52 // has a capture, we push its group number onto the runcrawl stack. In
53 // the case of a balanced match, we push BOTH groups onto the stack.
54 runcrawl []int
55 runcrawlpos int
56
57 runtrackcount int // count of states that may do backtracking
58
59 runmatch *Match // result object
60
61 ignoreTimeout bool
62 timeout time.Duration // timeout in milliseconds (needed for actual)
63 timeoutChecksToSkip int
64 timeoutAt time.Time
65
66 operator syntax.InstOp
67 codepos int
68 rightToLeft bool
69 caseInsensitive bool
70}
71
72// run searches for matches and can continue from the previous match
73//
74// quick is usually false, but can be true to not return matches, just put it in caches
75// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
76// input is the string to search for our regex pattern
77func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
78
79 // get a cached runner
80 runner := re.getRunner()
81 defer re.putRunner(runner)
82
83 if textstart < 0 {
84 if re.RightToLeft() {
85 textstart = len(input)
86 } else {
87 textstart = 0
88 }
89 }
90
91 return runner.scan(input, textstart, quick, re.MatchTimeout)
92}
93
94// Scans the string to find the first match. Uses the Match object
95// both to feed text in and as a place to store matches that come out.
96//
97// All the action is in the Go() method. Our
98// responsibility is to load up the class members before
99// calling Go.
100//
101// The optimizer can compute a set of candidate starting characters,
102// and we could use a separate method Skip() that will quickly scan past
103// any characters that we know can't match.
104func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
105 r.timeout = timeout
106 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
107 r.runtextstart = textstart
108 r.runtext = rt
109 r.runtextend = len(rt)
110
111 stoppos := r.runtextend
112 bump := 1
113
114 if r.re.RightToLeft() {
115 bump = -1
116 stoppos = 0
117 }
118
119 r.runtextpos = textstart
120 initted := false
121
122 r.startTimeoutWatch()
123 for {
124 if r.re.Debug() {
125 //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
126 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
127 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
128 }
129
130 if r.findFirstChar() {
131 if err := r.checkTimeout(); err != nil {
132 return nil, err
133 }
134
135 if !initted {
136 r.initMatch()
137 initted = true
138 }
139
140 if r.re.Debug() {
141 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
142 }
143
144 if err := r.execute(); err != nil {
145 return nil, err
146 }
147
148 if r.runmatch.matchcount[0] > 0 {
149 // We'll return a match even if it touches a previous empty match
150 return r.tidyMatch(quick), nil
151 }
152
153 // reset state for another go
154 r.runtrackpos = len(r.runtrack)
155 r.runstackpos = len(r.runstack)
156 r.runcrawlpos = len(r.runcrawl)
157 }
158
159 // failure!
160
161 if r.runtextpos == stoppos {
162 r.tidyMatch(true)
163 return nil, nil
164 }
165
166 // Recognize leading []* and various anchors, and bump on failure accordingly
167
168 // r.bump by one and start again
169
170 r.runtextpos += bump
171 }
172 // We never get here
173}
174
175func (r *runner) execute() error {
176
177 r.goTo(0)
178
179 for {
180
181 if r.re.Debug() {
182 r.dumpState()
183 }
184
185 if err := r.checkTimeout(); err != nil {
186 return err
187 }
188
189 switch r.operator {
190 case syntax.Stop:
191 return nil
192
193 case syntax.Nothing:
194 break
195
196 case syntax.Goto:
197 r.goTo(r.operand(0))
198 continue
199
200 case syntax.Testref:
201 if !r.runmatch.isMatched(r.operand(0)) {
202 break
203 }
204 r.advance(1)
205 continue
206
207 case syntax.Lazybranch:
208 r.trackPush1(r.textPos())
209 r.advance(1)
210 continue
211
212 case syntax.Lazybranch | syntax.Back:
213 r.trackPop()
214 r.textto(r.trackPeek())
215 r.goTo(r.operand(0))
216 continue
217
218 case syntax.Setmark:
219 r.stackPush(r.textPos())
220 r.trackPush()
221 r.advance(0)
222 continue
223
224 case syntax.Nullmark:
225 r.stackPush(-1)
226 r.trackPush()
227 r.advance(0)
228 continue
229
230 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
231 r.stackPop()
232 break
233
234 case syntax.Getmark:
235 r.stackPop()
236 r.trackPush1(r.stackPeek())
237 r.textto(r.stackPeek())
238 r.advance(0)
239 continue
240
241 case syntax.Getmark | syntax.Back:
242 r.trackPop()
243 r.stackPush(r.trackPeek())
244 break
245
246 case syntax.Capturemark:
247 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
248 break
249 }
250 r.stackPop()
251 if r.operand(1) != -1 {
252 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
253 } else {
254 r.capture(r.operand(0), r.stackPeek(), r.textPos())
255 }
256 r.trackPush1(r.stackPeek())
257
258 r.advance(2)
259
260 continue
261
262 case syntax.Capturemark | syntax.Back:
263 r.trackPop()
264 r.stackPush(r.trackPeek())
265 r.uncapture()
266 if r.operand(0) != -1 && r.operand(1) != -1 {
267 r.uncapture()
268 }
269
270 break
271
272 case syntax.Branchmark:
273 r.stackPop()
274
275 matched := r.textPos() - r.stackPeek()
276
277 if matched != 0 { // Nonempty match -> loop now
278 r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
279 r.stackPush(r.textPos()) // Make new mark
280 r.goTo(r.operand(0)) // Loop
281 } else { // Empty match -> straight now
282 r.trackPushNeg1(r.stackPeek()) // Save old mark
283 r.advance(1) // Straight
284 }
285 continue
286
287 case syntax.Branchmark | syntax.Back:
288 r.trackPopN(2)
289 r.stackPop()
290 r.textto(r.trackPeekN(1)) // Recall position
291 r.trackPushNeg1(r.trackPeek()) // Save old mark
292 r.advance(1) // Straight
293 continue
294
295 case syntax.Branchmark | syntax.Back2:
296 r.trackPop()
297 r.stackPush(r.trackPeek()) // Recall old mark
298 break // Backtrack
299
300 case syntax.Lazybranchmark:
301 {
302 // We hit this the first time through a lazy loop and after each
303 // successful match of the inner expression. It simply continues
304 // on and doesn't loop.
305 r.stackPop()
306
307 oldMarkPos := r.stackPeek()
308
309 if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
310 if oldMarkPos != -1 {
311 r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
312 } else {
313 r.trackPush2(r.textPos(), r.textPos())
314 }
315 } else {
316 // The inner expression found an empty match, so we'll go directly to 'back2' if we
317 // backtrack. In this case, we need to push something on the stack, since back2 pops.
318 // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
319 // position associated with that empty match.
320 r.stackPush(oldMarkPos)
321
322 r.trackPushNeg1(r.stackPeek()) // Save old mark
323 }
324 r.advance(1)
325 continue
326 }
327
328 case syntax.Lazybranchmark | syntax.Back:
329
330 // After the first time, Lazybranchmark | syntax.Back occurs
331 // with each iteration of the loop, and therefore with every attempted
332 // match of the inner expression. We'll try to match the inner expression,
333 // then go back to Lazybranchmark if successful. If the inner expression
334 // fails, we go to Lazybranchmark | syntax.Back2
335
336 r.trackPopN(2)
337 pos := r.trackPeekN(1)
338 r.trackPushNeg1(r.trackPeek()) // Save old mark
339 r.stackPush(pos) // Make new mark
340 r.textto(pos) // Recall position
341 r.goTo(r.operand(0)) // Loop
342 continue
343
344 case syntax.Lazybranchmark | syntax.Back2:
345 // The lazy loop has failed. We'll do a true backtrack and
346 // start over before the lazy loop.
347 r.stackPop()
348 r.trackPop()
349 r.stackPush(r.trackPeek()) // Recall old mark
350 break
351
352 case syntax.Setcount:
353 r.stackPush2(r.textPos(), r.operand(0))
354 r.trackPush()
355 r.advance(1)
356 continue
357
358 case syntax.Nullcount:
359 r.stackPush2(-1, r.operand(0))
360 r.trackPush()
361 r.advance(1)
362 continue
363
364 case syntax.Setcount | syntax.Back:
365 r.stackPopN(2)
366 break
367
368 case syntax.Nullcount | syntax.Back:
369 r.stackPopN(2)
370 break
371
372 case syntax.Branchcount:
373 // r.stackPush:
374 // 0: Mark
375 // 1: Count
376
377 r.stackPopN(2)
378 mark := r.stackPeek()
379 count := r.stackPeekN(1)
380 matched := r.textPos() - mark
381
382 if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
383 r.trackPushNeg2(mark, count) // Save old mark, count
384 r.advance(2) // Straight
385 } else { // Nonempty match -> count+loop now
386 r.trackPush1(mark) // remember mark
387 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
388 r.goTo(r.operand(0)) // Loop
389 }
390 continue
391
392 case syntax.Branchcount | syntax.Back:
393 // r.trackPush:
394 // 0: Previous mark
395 // r.stackPush:
396 // 0: Mark (= current pos, discarded)
397 // 1: Count
398 r.trackPop()
399 r.stackPopN(2)
400 if r.stackPeekN(1) > 0 { // Positive -> can go straight
401 r.textto(r.stackPeek()) // Zap to mark
402 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
403 r.advance(2) // Straight
404 continue
405 }
406 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
407 break
408
409 case syntax.Branchcount | syntax.Back2:
410 // r.trackPush:
411 // 0: Previous mark
412 // 1: Previous count
413 r.trackPopN(2)
414 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
415 break // Backtrack
416
417 case syntax.Lazybranchcount:
418 // r.stackPush:
419 // 0: Mark
420 // 1: Count
421
422 r.stackPopN(2)
423 mark := r.stackPeek()
424 count := r.stackPeekN(1)
425
426 if count < 0 { // Negative count -> loop now
427 r.trackPushNeg1(mark) // Save old mark
428 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
429 r.goTo(r.operand(0)) // Loop
430 } else { // Nonneg count -> straight now
431 r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
432 r.advance(2) // Straight
433 }
434 continue
435
436 case syntax.Lazybranchcount | syntax.Back:
437 // r.trackPush:
438 // 0: Mark
439 // 1: Count
440 // 2: r.textPos
441
442 r.trackPopN(3)
443 mark := r.trackPeek()
444 textpos := r.trackPeekN(2)
445
446 if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
447 r.textto(textpos) // Recall position
448 r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
449 r.trackPushNeg1(mark) // Save old mark
450 r.goTo(r.operand(0)) // Loop
451 continue
452 } else { // Max loops or empty match -> backtrack
453 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
454 break // backtrack
455 }
456
457 case syntax.Lazybranchcount | syntax.Back2:
458 // r.trackPush:
459 // 0: Previous mark
460 // r.stackPush:
461 // 0: Mark (== current pos, discarded)
462 // 1: Count
463 r.trackPop()
464 r.stackPopN(2)
465 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
466 break // Backtrack
467
468 case syntax.Setjump:
469 r.stackPush2(r.trackpos(), r.crawlpos())
470 r.trackPush()
471 r.advance(0)
472 continue
473
474 case syntax.Setjump | syntax.Back:
475 r.stackPopN(2)
476 break
477
478 case syntax.Backjump:
479 // r.stackPush:
480 // 0: Saved trackpos
481 // 1: r.crawlpos
482 r.stackPopN(2)
483 r.trackto(r.stackPeek())
484
485 for r.crawlpos() != r.stackPeekN(1) {
486 r.uncapture()
487 }
488
489 break
490
491 case syntax.Forejump:
492 // r.stackPush:
493 // 0: Saved trackpos
494 // 1: r.crawlpos
495 r.stackPopN(2)
496 r.trackto(r.stackPeek())
497 r.trackPush1(r.stackPeekN(1))
498 r.advance(0)
499 continue
500
501 case syntax.Forejump | syntax.Back:
502 // r.trackPush:
503 // 0: r.crawlpos
504 r.trackPop()
505
506 for r.crawlpos() != r.trackPeek() {
507 r.uncapture()
508 }
509
510 break
511
512 case syntax.Bol:
513 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
514 break
515 }
516 r.advance(0)
517 continue
518
519 case syntax.Eol:
520 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
521 break
522 }
523 r.advance(0)
524 continue
525
526 case syntax.Boundary:
527 if !r.isBoundary(r.textPos(), 0, r.runtextend) {
528 break
529 }
530 r.advance(0)
531 continue
532
533 case syntax.Nonboundary:
534 if r.isBoundary(r.textPos(), 0, r.runtextend) {
535 break
536 }
537 r.advance(0)
538 continue
539
540 case syntax.ECMABoundary:
541 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
542 break
543 }
544 r.advance(0)
545 continue
546
547 case syntax.NonECMABoundary:
548 if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
549 break
550 }
551 r.advance(0)
552 continue
553
554 case syntax.Beginning:
555 if r.leftchars() > 0 {
556 break
557 }
558 r.advance(0)
559 continue
560
561 case syntax.Start:
562 if r.textPos() != r.textstart() {
563 break
564 }
565 r.advance(0)
566 continue
567
568 case syntax.EndZ:
569 rchars := r.rightchars()
570 if rchars > 1 {
571 break
572 }
573 // RE2 and EcmaScript define $ as "asserts position at the end of the string"
574 // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
575 if (r.re.options & (RE2 | ECMAScript)) != 0 {
576 // RE2/Ecmascript mode
577 if rchars > 0 {
578 break
579 }
580 } else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
581 // "regular" mode
582 break
583 }
584
585 r.advance(0)
586 continue
587
588 case syntax.End:
589 if r.rightchars() > 0 {
590 break
591 }
592 r.advance(0)
593 continue
594
595 case syntax.One:
596 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
597 break
598 }
599
600 r.advance(1)
601 continue
602
603 case syntax.Notone:
604 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
605 break
606 }
607
608 r.advance(1)
609 continue
610
611 case syntax.Set:
612
613 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
614 break
615 }
616
617 r.advance(1)
618 continue
619
620 case syntax.Multi:
621 if !r.runematch(r.code.Strings[r.operand(0)]) {
622 break
623 }
624
625 r.advance(1)
626 continue
627
628 case syntax.Ref:
629
630 capnum := r.operand(0)
631
632 if r.runmatch.isMatched(capnum) {
633 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
634 break
635 }
636 } else {
637 if (r.re.options & ECMAScript) == 0 {
638 break
639 }
640 }
641
642 r.advance(1)
643 continue
644
645 case syntax.Onerep:
646
647 c := r.operand(1)
648
649 if r.forwardchars() < c {
650 break
651 }
652
653 ch := rune(r.operand(0))
654
655 for c > 0 {
656 if r.forwardcharnext() != ch {
657 goto BreakBackward
658 }
659 c--
660 }
661
662 r.advance(2)
663 continue
664
665 case syntax.Notonerep:
666
667 c := r.operand(1)
668
669 if r.forwardchars() < c {
670 break
671 }
672 ch := rune(r.operand(0))
673
674 for c > 0 {
675 if r.forwardcharnext() == ch {
676 goto BreakBackward
677 }
678 c--
679 }
680
681 r.advance(2)
682 continue
683
684 case syntax.Setrep:
685
686 c := r.operand(1)
687
688 if r.forwardchars() < c {
689 break
690 }
691
692 set := r.code.Sets[r.operand(0)]
693
694 for c > 0 {
695 if !set.CharIn(r.forwardcharnext()) {
696 goto BreakBackward
697 }
698 c--
699 }
700
701 r.advance(2)
702 continue
703
704 case syntax.Oneloop:
705
706 c := r.operand(1)
707
708 if c > r.forwardchars() {
709 c = r.forwardchars()
710 }
711
712 ch := rune(r.operand(0))
713 i := c
714
715 for ; i > 0; i-- {
716 if r.forwardcharnext() != ch {
717 r.backwardnext()
718 break
719 }
720 }
721
722 if c > i {
723 r.trackPush2(c-i-1, r.textPos()-r.bump())
724 }
725
726 r.advance(2)
727 continue
728
729 case syntax.Notoneloop:
730
731 c := r.operand(1)
732
733 if c > r.forwardchars() {
734 c = r.forwardchars()
735 }
736
737 ch := rune(r.operand(0))
738 i := c
739
740 for ; i > 0; i-- {
741 if r.forwardcharnext() == ch {
742 r.backwardnext()
743 break
744 }
745 }
746
747 if c > i {
748 r.trackPush2(c-i-1, r.textPos()-r.bump())
749 }
750
751 r.advance(2)
752 continue
753
754 case syntax.Setloop:
755
756 c := r.operand(1)
757
758 if c > r.forwardchars() {
759 c = r.forwardchars()
760 }
761
762 set := r.code.Sets[r.operand(0)]
763 i := c
764
765 for ; i > 0; i-- {
766 if !set.CharIn(r.forwardcharnext()) {
767 r.backwardnext()
768 break
769 }
770 }
771
772 if c > i {
773 r.trackPush2(c-i-1, r.textPos()-r.bump())
774 }
775
776 r.advance(2)
777 continue
778
779 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
780
781 r.trackPopN(2)
782 i := r.trackPeek()
783 pos := r.trackPeekN(1)
784
785 r.textto(pos)
786
787 if i > 0 {
788 r.trackPush2(i-1, pos-r.bump())
789 }
790
791 r.advance(2)
792 continue
793
794 case syntax.Setloop | syntax.Back:
795
796 r.trackPopN(2)
797 i := r.trackPeek()
798 pos := r.trackPeekN(1)
799
800 r.textto(pos)
801
802 if i > 0 {
803 r.trackPush2(i-1, pos-r.bump())
804 }
805
806 r.advance(2)
807 continue
808
809 case syntax.Onelazy, syntax.Notonelazy:
810
811 c := r.operand(1)
812
813 if c > r.forwardchars() {
814 c = r.forwardchars()
815 }
816
817 if c > 0 {
818 r.trackPush2(c-1, r.textPos())
819 }
820
821 r.advance(2)
822 continue
823
824 case syntax.Setlazy:
825
826 c := r.operand(1)
827
828 if c > r.forwardchars() {
829 c = r.forwardchars()
830 }
831
832 if c > 0 {
833 r.trackPush2(c-1, r.textPos())
834 }
835
836 r.advance(2)
837 continue
838
839 case syntax.Onelazy | syntax.Back:
840
841 r.trackPopN(2)
842 pos := r.trackPeekN(1)
843 r.textto(pos)
844
845 if r.forwardcharnext() != rune(r.operand(0)) {
846 break
847 }
848
849 i := r.trackPeek()
850
851 if i > 0 {
852 r.trackPush2(i-1, pos+r.bump())
853 }
854
855 r.advance(2)
856 continue
857
858 case syntax.Notonelazy | syntax.Back:
859
860 r.trackPopN(2)
861 pos := r.trackPeekN(1)
862 r.textto(pos)
863
864 if r.forwardcharnext() == rune(r.operand(0)) {
865 break
866 }
867
868 i := r.trackPeek()
869
870 if i > 0 {
871 r.trackPush2(i-1, pos+r.bump())
872 }
873
874 r.advance(2)
875 continue
876
877 case syntax.Setlazy | syntax.Back:
878
879 r.trackPopN(2)
880 pos := r.trackPeekN(1)
881 r.textto(pos)
882
883 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
884 break
885 }
886
887 i := r.trackPeek()
888
889 if i > 0 {
890 r.trackPush2(i-1, pos+r.bump())
891 }
892
893 r.advance(2)
894 continue
895
896 default:
897 return errors.New("unknown state in regex runner")
898 }
899
900 BreakBackward:
901 ;
902
903 // "break Backward" comes here:
904 r.backtrack()
905 }
906}
907
908// increase the size of stack and track storage
909func (r *runner) ensureStorage() {
910 if r.runstackpos < r.runtrackcount*4 {
911 doubleIntSlice(&r.runstack, &r.runstackpos)
912 }
913 if r.runtrackpos < r.runtrackcount*4 {
914 doubleIntSlice(&r.runtrack, &r.runtrackpos)
915 }
916}
917
918func doubleIntSlice(s *[]int, pos *int) {
919 oldLen := len(*s)
920 newS := make([]int, oldLen*2)
921
922 copy(newS[oldLen:], *s)
923 *pos += oldLen
924 *s = newS
925}
926
927// Save a number on the longjump unrolling stack
928func (r *runner) crawl(i int) {
929 if r.runcrawlpos == 0 {
930 doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
931 }
932 r.runcrawlpos--
933 r.runcrawl[r.runcrawlpos] = i
934}
935
936// Remove a number from the longjump unrolling stack
937func (r *runner) popcrawl() int {
938 val := r.runcrawl[r.runcrawlpos]
939 r.runcrawlpos++
940 return val
941}
942
943// Get the height of the stack
944func (r *runner) crawlpos() int {
945 return len(r.runcrawl) - r.runcrawlpos
946}
947
948func (r *runner) advance(i int) {
949 r.codepos += (i + 1)
950 r.setOperator(r.code.Codes[r.codepos])
951}
952
953func (r *runner) goTo(newpos int) {
954 // when branching backward or in place, ensure storage
955 if newpos <= r.codepos {
956 r.ensureStorage()
957 }
958
959 r.setOperator(r.code.Codes[newpos])
960 r.codepos = newpos
961}
962
963func (r *runner) textto(newpos int) {
964 r.runtextpos = newpos
965}
966
967func (r *runner) trackto(newpos int) {
968 r.runtrackpos = len(r.runtrack) - newpos
969}
970
971func (r *runner) textstart() int {
972 return r.runtextstart
973}
974
975func (r *runner) textPos() int {
976 return r.runtextpos
977}
978
979// push onto the backtracking stack
980func (r *runner) trackpos() int {
981 return len(r.runtrack) - r.runtrackpos
982}
983
984func (r *runner) trackPush() {
985 r.runtrackpos--
986 r.runtrack[r.runtrackpos] = r.codepos
987}
988
989func (r *runner) trackPush1(I1 int) {
990 r.runtrackpos--
991 r.runtrack[r.runtrackpos] = I1
992 r.runtrackpos--
993 r.runtrack[r.runtrackpos] = r.codepos
994}
995
996func (r *runner) trackPush2(I1, I2 int) {
997 r.runtrackpos--
998 r.runtrack[r.runtrackpos] = I1
999 r.runtrackpos--
1000 r.runtrack[r.runtrackpos] = I2
1001 r.runtrackpos--
1002 r.runtrack[r.runtrackpos] = r.codepos
1003}
1004
1005func (r *runner) trackPush3(I1, I2, I3 int) {
1006 r.runtrackpos--
1007 r.runtrack[r.runtrackpos] = I1
1008 r.runtrackpos--
1009 r.runtrack[r.runtrackpos] = I2
1010 r.runtrackpos--
1011 r.runtrack[r.runtrackpos] = I3
1012 r.runtrackpos--
1013 r.runtrack[r.runtrackpos] = r.codepos
1014}
1015
1016func (r *runner) trackPushNeg1(I1 int) {
1017 r.runtrackpos--
1018 r.runtrack[r.runtrackpos] = I1
1019 r.runtrackpos--
1020 r.runtrack[r.runtrackpos] = -r.codepos
1021}
1022
1023func (r *runner) trackPushNeg2(I1, I2 int) {
1024 r.runtrackpos--
1025 r.runtrack[r.runtrackpos] = I1
1026 r.runtrackpos--
1027 r.runtrack[r.runtrackpos] = I2
1028 r.runtrackpos--
1029 r.runtrack[r.runtrackpos] = -r.codepos
1030}
1031
1032func (r *runner) backtrack() {
1033 newpos := r.runtrack[r.runtrackpos]
1034 r.runtrackpos++
1035
1036 if r.re.Debug() {
1037 if newpos < 0 {
1038 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
1039 } else {
1040 fmt.Printf(" Backtracking to code position %v\n", newpos)
1041 }
1042 }
1043
1044 if newpos < 0 {
1045 newpos = -newpos
1046 r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1047 } else {
1048 r.setOperator(r.code.Codes[newpos] | syntax.Back)
1049 }
1050
1051 // When branching backward, ensure storage
1052 if newpos < r.codepos {
1053 r.ensureStorage()
1054 }
1055
1056 r.codepos = newpos
1057}
1058
1059func (r *runner) setOperator(op int) {
1060 r.caseInsensitive = (0 != (op & syntax.Ci))
1061 r.rightToLeft = (0 != (op & syntax.Rtl))
1062 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1063}
1064
1065func (r *runner) trackPop() {
1066 r.runtrackpos++
1067}
1068
1069// pop framesize items from the backtracking stack
1070func (r *runner) trackPopN(framesize int) {
1071 r.runtrackpos += framesize
1072}
1073
1074// Technically we are actually peeking at items already popped. So if you want to
1075// get and pop the top item from the stack, you do
1076// r.trackPop();
1077// r.trackPeek();
1078func (r *runner) trackPeek() int {
1079 return r.runtrack[r.runtrackpos-1]
1080}
1081
1082// get the ith element down on the backtracking stack
1083func (r *runner) trackPeekN(i int) int {
1084 return r.runtrack[r.runtrackpos-i-1]
1085}
1086
1087// Push onto the grouping stack
1088func (r *runner) stackPush(I1 int) {
1089 r.runstackpos--
1090 r.runstack[r.runstackpos] = I1
1091}
1092
1093func (r *runner) stackPush2(I1, I2 int) {
1094 r.runstackpos--
1095 r.runstack[r.runstackpos] = I1
1096 r.runstackpos--
1097 r.runstack[r.runstackpos] = I2
1098}
1099
1100func (r *runner) stackPop() {
1101 r.runstackpos++
1102}
1103
1104// pop framesize items from the grouping stack
1105func (r *runner) stackPopN(framesize int) {
1106 r.runstackpos += framesize
1107}
1108
1109// Technically we are actually peeking at items already popped. So if you want to
1110// get and pop the top item from the stack, you do
1111// r.stackPop();
1112// r.stackPeek();
1113func (r *runner) stackPeek() int {
1114 return r.runstack[r.runstackpos-1]
1115}
1116
1117// get the ith element down on the grouping stack
1118func (r *runner) stackPeekN(i int) int {
1119 return r.runstack[r.runstackpos-i-1]
1120}
1121
1122func (r *runner) operand(i int) int {
1123 return r.code.Codes[r.codepos+i+1]
1124}
1125
1126func (r *runner) leftchars() int {
1127 return r.runtextpos
1128}
1129
1130func (r *runner) rightchars() int {
1131 return r.runtextend - r.runtextpos
1132}
1133
1134func (r *runner) bump() int {
1135 if r.rightToLeft {
1136 return -1
1137 }
1138 return 1
1139}
1140
1141func (r *runner) forwardchars() int {
1142 if r.rightToLeft {
1143 return r.runtextpos
1144 }
1145 return r.runtextend - r.runtextpos
1146}
1147
1148func (r *runner) forwardcharnext() rune {
1149 var ch rune
1150 if r.rightToLeft {
1151 r.runtextpos--
1152 ch = r.runtext[r.runtextpos]
1153 } else {
1154 ch = r.runtext[r.runtextpos]
1155 r.runtextpos++
1156 }
1157
1158 if r.caseInsensitive {
1159 return unicode.ToLower(ch)
1160 }
1161 return ch
1162}
1163
1164func (r *runner) runematch(str []rune) bool {
1165 var pos int
1166
1167 c := len(str)
1168 if !r.rightToLeft {
1169 if r.runtextend-r.runtextpos < c {
1170 return false
1171 }
1172
1173 pos = r.runtextpos + c
1174 } else {
1175 if r.runtextpos-0 < c {
1176 return false
1177 }
1178
1179 pos = r.runtextpos
1180 }
1181
1182 if !r.caseInsensitive {
1183 for c != 0 {
1184 c--
1185 pos--
1186 if str[c] != r.runtext[pos] {
1187 return false
1188 }
1189 }
1190 } else {
1191 for c != 0 {
1192 c--
1193 pos--
1194 if str[c] != unicode.ToLower(r.runtext[pos]) {
1195 return false
1196 }
1197 }
1198 }
1199
1200 if !r.rightToLeft {
1201 pos += len(str)
1202 }
1203
1204 r.runtextpos = pos
1205
1206 return true
1207}
1208
1209func (r *runner) refmatch(index, len int) bool {
1210 var c, pos, cmpos int
1211
1212 if !r.rightToLeft {
1213 if r.runtextend-r.runtextpos < len {
1214 return false
1215 }
1216
1217 pos = r.runtextpos + len
1218 } else {
1219 if r.runtextpos-0 < len {
1220 return false
1221 }
1222
1223 pos = r.runtextpos
1224 }
1225 cmpos = index + len
1226
1227 c = len
1228
1229 if !r.caseInsensitive {
1230 for c != 0 {
1231 c--
1232 cmpos--
1233 pos--
1234 if r.runtext[cmpos] != r.runtext[pos] {
1235 return false
1236 }
1237
1238 }
1239 } else {
1240 for c != 0 {
1241 c--
1242 cmpos--
1243 pos--
1244
1245 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1246 return false
1247 }
1248 }
1249 }
1250
1251 if !r.rightToLeft {
1252 pos += len
1253 }
1254
1255 r.runtextpos = pos
1256
1257 return true
1258}
1259
1260func (r *runner) backwardnext() {
1261 if r.rightToLeft {
1262 r.runtextpos++
1263 } else {
1264 r.runtextpos--
1265 }
1266}
1267
1268func (r *runner) charAt(j int) rune {
1269 return r.runtext[j]
1270}
1271
1272func (r *runner) findFirstChar() bool {
1273
1274 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1275 if !r.code.RightToLeft {
1276 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1277 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1278 r.runtextpos = r.runtextend
1279 return false
1280 }
1281 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1282 r.runtextpos = r.runtextend - 1
1283 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1284 r.runtextpos = r.runtextend
1285 }
1286 } else {
1287 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1288 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1289 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1290 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1291 r.runtextpos = 0
1292 return false
1293 }
1294 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1295 r.runtextpos = 0
1296 }
1297 }
1298
1299 if r.code.BmPrefix != nil {
1300 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1301 }
1302
1303 return true // found a valid start or end anchor
1304 } else if r.code.BmPrefix != nil {
1305 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1306
1307 if r.runtextpos == -1 {
1308 if r.code.RightToLeft {
1309 r.runtextpos = 0
1310 } else {
1311 r.runtextpos = r.runtextend
1312 }
1313 return false
1314 }
1315
1316 return true
1317 } else if r.code.FcPrefix == nil {
1318 return true
1319 }
1320
1321 r.rightToLeft = r.code.RightToLeft
1322 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1323
1324 set := r.code.FcPrefix.PrefixSet
1325 if set.IsSingleton() {
1326 ch := set.SingletonChar()
1327 for i := r.forwardchars(); i > 0; i-- {
1328 if ch == r.forwardcharnext() {
1329 r.backwardnext()
1330 return true
1331 }
1332 }
1333 } else {
1334 for i := r.forwardchars(); i > 0; i-- {
1335 n := r.forwardcharnext()
1336 //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
1337 if set.CharIn(n) {
1338 r.backwardnext()
1339 return true
1340 }
1341 }
1342 }
1343
1344 return false
1345}
1346
1347func (r *runner) initMatch() {
1348 // Use a hashtable'ed Match object if the capture numbers are sparse
1349
1350 if r.runmatch == nil {
1351 if r.re.caps != nil {
1352 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1353 } else {
1354 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1355 }
1356 } else {
1357 r.runmatch.reset(r.runtext, r.runtextstart)
1358 }
1359
1360 // note we test runcrawl, because it is the last one to be allocated
1361 // If there is an alloc failure in the middle of the three allocations,
1362 // we may still return to reuse this instance, and we want to behave
1363 // as if the allocations didn't occur. (we used to test _trackcount != 0)
1364
1365 if r.runcrawl != nil {
1366 r.runtrackpos = len(r.runtrack)
1367 r.runstackpos = len(r.runstack)
1368 r.runcrawlpos = len(r.runcrawl)
1369 return
1370 }
1371
1372 r.initTrackCount()
1373
1374 tracksize := r.runtrackcount * 8
1375 stacksize := r.runtrackcount * 8
1376
1377 if tracksize < 32 {
1378 tracksize = 32
1379 }
1380 if stacksize < 16 {
1381 stacksize = 16
1382 }
1383
1384 r.runtrack = make([]int, tracksize)
1385 r.runtrackpos = tracksize
1386
1387 r.runstack = make([]int, stacksize)
1388 r.runstackpos = stacksize
1389
1390 r.runcrawl = make([]int, 32)
1391 r.runcrawlpos = 32
1392}
1393
1394func (r *runner) tidyMatch(quick bool) *Match {
1395 if !quick {
1396 match := r.runmatch
1397
1398 r.runmatch = nil
1399
1400 match.tidy(r.runtextpos)
1401 return match
1402 } else {
1403 // send back our match -- it's not leaving the package, so it's safe to not clean it up
1404 // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
1405 return r.runmatch
1406 }
1407}
1408
1409// capture captures a subexpression. Note that the
1410// capnum used here has already been mapped to a non-sparse
1411// index (by the code generator RegexWriter).
1412func (r *runner) capture(capnum, start, end int) {
1413 if end < start {
1414 T := end
1415 end = start
1416 start = T
1417 }
1418
1419 r.crawl(capnum)
1420 r.runmatch.addMatch(capnum, start, end-start)
1421}
1422
1423// transferCapture captures a subexpression. Note that the
1424// capnum used here has already been mapped to a non-sparse
1425// index (by the code generator RegexWriter).
1426func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1427 var start2, end2 int
1428
1429 // these are the two intervals that are cancelling each other
1430
1431 if end < start {
1432 T := end
1433 end = start
1434 start = T
1435 }
1436
1437 start2 = r.runmatch.matchIndex(uncapnum)
1438 end2 = start2 + r.runmatch.matchLength(uncapnum)
1439
1440 // The new capture gets the innermost defined interval
1441
1442 if start >= end2 {
1443 end = start
1444 start = end2
1445 } else if end <= start2 {
1446 start = start2
1447 } else {
1448 if end > end2 {
1449 end = end2
1450 }
1451 if start2 > start {
1452 start = start2
1453 }
1454 }
1455
1456 r.crawl(uncapnum)
1457 r.runmatch.balanceMatch(uncapnum)
1458
1459 if capnum != -1 {
1460 r.crawl(capnum)
1461 r.runmatch.addMatch(capnum, start, end-start)
1462 }
1463}
1464
1465// revert the last capture
1466func (r *runner) uncapture() {
1467 capnum := r.popcrawl()
1468 r.runmatch.removeMatch(capnum)
1469}
1470
1471//debug
1472
1473func (r *runner) dumpState() {
1474 back := ""
1475 if r.operator&syntax.Back != 0 {
1476 back = " Back"
1477 }
1478 if r.operator&syntax.Back2 != 0 {
1479 back += " Back2"
1480 }
1481 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
1482 r.textposDescription(),
1483 r.stackDescription(r.runtrack, r.runtrackpos),
1484 r.stackDescription(r.runstack, r.runstackpos),
1485 r.code.OpcodeDescription(r.codepos),
1486 back)
1487}
1488
1489func (r *runner) stackDescription(a []int, index int) string {
1490 buf := &bytes.Buffer{}
1491
1492 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1493 if buf.Len() < 8 {
1494 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1495 }
1496
1497 buf.WriteRune('(')
1498 for i := index; i < len(a); i++ {
1499 if i > index {
1500 buf.WriteRune(' ')
1501 }
1502
1503 buf.WriteString(strconv.Itoa(a[i]))
1504 }
1505
1506 buf.WriteRune(')')
1507
1508 return buf.String()
1509}
1510
1511func (r *runner) textposDescription() string {
1512 buf := &bytes.Buffer{}
1513
1514 buf.WriteString(strconv.Itoa(r.runtextpos))
1515
1516 if buf.Len() < 8 {
1517 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1518 }
1519
1520 if r.runtextpos > 0 {
1521 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1522 } else {
1523 buf.WriteRune('^')
1524 }
1525
1526 buf.WriteRune('>')
1527
1528 for i := r.runtextpos; i < r.runtextend; i++ {
1529 buf.WriteString(syntax.CharDescription(r.runtext[i]))
1530 }
1531 if buf.Len() >= 64 {
1532 buf.Truncate(61)
1533 buf.WriteString("...")
1534 } else {
1535 buf.WriteRune('$')
1536 }
1537
1538 return buf.String()
1539}
1540
1541// decide whether the pos
1542// at the specified index is a boundary or not. It's just not worth
1543// emitting inline code for this logic.
1544func (r *runner) isBoundary(index, startpos, endpos int) bool {
1545 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1546 (index < endpos && syntax.IsWordChar(r.runtext[index]))
1547}
1548
1549func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1550 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1551 (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1552}
1553
1554// this seems like a comment to justify randomly picking 1000 :-P
1555// We have determined this value in a series of experiments where x86 retail
1556// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
1557// of TimeoutCheckFrequency did not tend to increase performance; smaller values
1558// of TimeoutCheckFrequency tended to slow down the execution.
1559const timeoutCheckFrequency int = 1000
1560
1561func (r *runner) startTimeoutWatch() {
1562 if r.ignoreTimeout {
1563 return
1564 }
1565
1566 r.timeoutChecksToSkip = timeoutCheckFrequency
1567 r.timeoutAt = time.Now().Add(r.timeout)
1568}
1569
1570func (r *runner) checkTimeout() error {
1571 if r.ignoreTimeout {
1572 return nil
1573 }
1574 r.timeoutChecksToSkip--
1575 if r.timeoutChecksToSkip != 0 {
1576 return nil
1577 }
1578
1579 r.timeoutChecksToSkip = timeoutCheckFrequency
1580 return r.doCheckTimeout()
1581}
1582
1583func (r *runner) doCheckTimeout() error {
1584 current := time.Now()
1585
1586 if current.Before(r.timeoutAt) {
1587 return nil
1588 }
1589
1590 if r.re.Debug() {
1591 //Debug.WriteLine("")
1592 //Debug.WriteLine("RegEx match timeout occurred!")
1593 //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
1594 //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
1595 //Debug.WriteLine("Search pattern: " + _runregex._pattern)
1596 //Debug.WriteLine("Input: " + r.runtext)
1597 //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
1598 }
1599
1600 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1601}
1602
1603func (r *runner) initTrackCount() {
1604 r.runtrackcount = r.code.TrackCount
1605}
1606
1607// getRunner returns a run to use for matching re.
1608// It uses the re's runner cache if possible, to avoid
1609// unnecessary allocation.
1610func (re *Regexp) getRunner() *runner {
1611 re.muRun.Lock()
1612 if n := len(re.runner); n > 0 {
1613 z := re.runner[n-1]
1614 re.runner = re.runner[:n-1]
1615 re.muRun.Unlock()
1616 return z
1617 }
1618 re.muRun.Unlock()
1619 z := &runner{
1620 re: re,
1621 code: re.code,
1622 }
1623 return z
1624}
1625
1626// putRunner returns a runner to the re's cache.
1627// There is no attempt to limit the size of the cache, so it will
1628// grow to the maximum number of simultaneous matches
1629// run using re. (The cache empties when re gets garbage collected.)
1630func (re *Regexp) putRunner(r *runner) {
1631 re.muRun.Lock()
1632 re.runner = append(re.runner, r)
1633 re.muRun.Unlock()
1634}
Note: See TracBrowser for help on using the repository browser.