1 | package regexp2
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "bytes"
|
---|
5 | "errors"
|
---|
6 | "fmt"
|
---|
7 | "math"
|
---|
8 | "strconv"
|
---|
9 | "strings"
|
---|
10 | "time"
|
---|
11 | "unicode"
|
---|
12 |
|
---|
13 | "github.com/dlclark/regexp2/syntax"
|
---|
14 | )
|
---|
15 |
|
---|
16 | type runner struct {
|
---|
17 | re *Regexp
|
---|
18 | code *syntax.Code
|
---|
19 |
|
---|
20 | runtextstart int // starting point for search
|
---|
21 |
|
---|
22 | runtext []rune // text to search
|
---|
23 | runtextpos int // current position in text
|
---|
24 | runtextend int
|
---|
25 |
|
---|
26 | // The backtracking stack. Opcodes use this to store data regarding
|
---|
27 | // what they have matched and where to backtrack to. Each "frame" on
|
---|
28 | // the stack takes the form of [CodePosition Data1 Data2...], where
|
---|
29 | // CodePosition is the position of the current opcode and
|
---|
30 | // the data values are all optional. The CodePosition can be negative, and
|
---|
31 | // these values (also called "back2") are used by the BranchMark family of opcodes
|
---|
32 | // to indicate whether they are backtracking after a successful or failed
|
---|
33 | // match.
|
---|
34 | // When we backtrack, we pop the CodePosition off the stack, set the current
|
---|
35 | // instruction pointer to that code position, and mark the opcode
|
---|
36 | // with a backtracking flag ("Back"). Each opcode then knows how to
|
---|
37 | // handle its own data.
|
---|
38 | runtrack []int
|
---|
39 | runtrackpos int
|
---|
40 |
|
---|
41 | // This stack is used to track text positions across different opcodes.
|
---|
42 | // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
|
---|
43 | // pair. SetMark records the text position before we match a*b. Then
|
---|
44 | // CaptureMark uses that position to figure out where the capture starts.
|
---|
45 | // Opcodes which push onto this stack are always paired with other opcodes
|
---|
46 | // which will pop the value from it later. A successful match should mean
|
---|
47 | // that this stack is empty.
|
---|
48 | runstack []int
|
---|
49 | runstackpos int
|
---|
50 |
|
---|
51 | // The crawl stack is used to keep track of captures. Every time a group
|
---|
52 | // has a capture, we push its group number onto the runcrawl stack. In
|
---|
53 | // the case of a balanced match, we push BOTH groups onto the stack.
|
---|
54 | runcrawl []int
|
---|
55 | runcrawlpos int
|
---|
56 |
|
---|
57 | runtrackcount int // count of states that may do backtracking
|
---|
58 |
|
---|
59 | runmatch *Match // result object
|
---|
60 |
|
---|
61 | ignoreTimeout bool
|
---|
62 | timeout time.Duration // timeout in milliseconds (needed for actual)
|
---|
63 | timeoutChecksToSkip int
|
---|
64 | timeoutAt time.Time
|
---|
65 |
|
---|
66 | operator syntax.InstOp
|
---|
67 | codepos int
|
---|
68 | rightToLeft bool
|
---|
69 | caseInsensitive bool
|
---|
70 | }
|
---|
71 |
|
---|
72 | // run searches for matches and can continue from the previous match
|
---|
73 | //
|
---|
74 | // quick is usually false, but can be true to not return matches, just put it in caches
|
---|
75 | // textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
|
---|
76 | // input is the string to search for our regex pattern
|
---|
77 | func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
|
---|
78 |
|
---|
79 | // get a cached runner
|
---|
80 | runner := re.getRunner()
|
---|
81 | defer re.putRunner(runner)
|
---|
82 |
|
---|
83 | if textstart < 0 {
|
---|
84 | if re.RightToLeft() {
|
---|
85 | textstart = len(input)
|
---|
86 | } else {
|
---|
87 | textstart = 0
|
---|
88 | }
|
---|
89 | }
|
---|
90 |
|
---|
91 | return runner.scan(input, textstart, quick, re.MatchTimeout)
|
---|
92 | }
|
---|
93 |
|
---|
94 | // Scans the string to find the first match. Uses the Match object
|
---|
95 | // both to feed text in and as a place to store matches that come out.
|
---|
96 | //
|
---|
97 | // All the action is in the Go() method. Our
|
---|
98 | // responsibility is to load up the class members before
|
---|
99 | // calling Go.
|
---|
100 | //
|
---|
101 | // The optimizer can compute a set of candidate starting characters,
|
---|
102 | // and we could use a separate method Skip() that will quickly scan past
|
---|
103 | // any characters that we know can't match.
|
---|
104 | func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
|
---|
105 | r.timeout = timeout
|
---|
106 | r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
|
---|
107 | r.runtextstart = textstart
|
---|
108 | r.runtext = rt
|
---|
109 | r.runtextend = len(rt)
|
---|
110 |
|
---|
111 | stoppos := r.runtextend
|
---|
112 | bump := 1
|
---|
113 |
|
---|
114 | if r.re.RightToLeft() {
|
---|
115 | bump = -1
|
---|
116 | stoppos = 0
|
---|
117 | }
|
---|
118 |
|
---|
119 | r.runtextpos = textstart
|
---|
120 | initted := false
|
---|
121 |
|
---|
122 | r.startTimeoutWatch()
|
---|
123 | for {
|
---|
124 | if r.re.Debug() {
|
---|
125 | //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
|
---|
126 | fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
|
---|
127 | fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
|
---|
128 | }
|
---|
129 |
|
---|
130 | if r.findFirstChar() {
|
---|
131 | if err := r.checkTimeout(); err != nil {
|
---|
132 | return nil, err
|
---|
133 | }
|
---|
134 |
|
---|
135 | if !initted {
|
---|
136 | r.initMatch()
|
---|
137 | initted = true
|
---|
138 | }
|
---|
139 |
|
---|
140 | if r.re.Debug() {
|
---|
141 | fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
|
---|
142 | }
|
---|
143 |
|
---|
144 | if err := r.execute(); err != nil {
|
---|
145 | return nil, err
|
---|
146 | }
|
---|
147 |
|
---|
148 | if r.runmatch.matchcount[0] > 0 {
|
---|
149 | // We'll return a match even if it touches a previous empty match
|
---|
150 | return r.tidyMatch(quick), nil
|
---|
151 | }
|
---|
152 |
|
---|
153 | // reset state for another go
|
---|
154 | r.runtrackpos = len(r.runtrack)
|
---|
155 | r.runstackpos = len(r.runstack)
|
---|
156 | r.runcrawlpos = len(r.runcrawl)
|
---|
157 | }
|
---|
158 |
|
---|
159 | // failure!
|
---|
160 |
|
---|
161 | if r.runtextpos == stoppos {
|
---|
162 | r.tidyMatch(true)
|
---|
163 | return nil, nil
|
---|
164 | }
|
---|
165 |
|
---|
166 | // Recognize leading []* and various anchors, and bump on failure accordingly
|
---|
167 |
|
---|
168 | // r.bump by one and start again
|
---|
169 |
|
---|
170 | r.runtextpos += bump
|
---|
171 | }
|
---|
172 | // We never get here
|
---|
173 | }
|
---|
174 |
|
---|
175 | func (r *runner) execute() error {
|
---|
176 |
|
---|
177 | r.goTo(0)
|
---|
178 |
|
---|
179 | for {
|
---|
180 |
|
---|
181 | if r.re.Debug() {
|
---|
182 | r.dumpState()
|
---|
183 | }
|
---|
184 |
|
---|
185 | if err := r.checkTimeout(); err != nil {
|
---|
186 | return err
|
---|
187 | }
|
---|
188 |
|
---|
189 | switch r.operator {
|
---|
190 | case syntax.Stop:
|
---|
191 | return nil
|
---|
192 |
|
---|
193 | case syntax.Nothing:
|
---|
194 | break
|
---|
195 |
|
---|
196 | case syntax.Goto:
|
---|
197 | r.goTo(r.operand(0))
|
---|
198 | continue
|
---|
199 |
|
---|
200 | case syntax.Testref:
|
---|
201 | if !r.runmatch.isMatched(r.operand(0)) {
|
---|
202 | break
|
---|
203 | }
|
---|
204 | r.advance(1)
|
---|
205 | continue
|
---|
206 |
|
---|
207 | case syntax.Lazybranch:
|
---|
208 | r.trackPush1(r.textPos())
|
---|
209 | r.advance(1)
|
---|
210 | continue
|
---|
211 |
|
---|
212 | case syntax.Lazybranch | syntax.Back:
|
---|
213 | r.trackPop()
|
---|
214 | r.textto(r.trackPeek())
|
---|
215 | r.goTo(r.operand(0))
|
---|
216 | continue
|
---|
217 |
|
---|
218 | case syntax.Setmark:
|
---|
219 | r.stackPush(r.textPos())
|
---|
220 | r.trackPush()
|
---|
221 | r.advance(0)
|
---|
222 | continue
|
---|
223 |
|
---|
224 | case syntax.Nullmark:
|
---|
225 | r.stackPush(-1)
|
---|
226 | r.trackPush()
|
---|
227 | r.advance(0)
|
---|
228 | continue
|
---|
229 |
|
---|
230 | case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
|
---|
231 | r.stackPop()
|
---|
232 | break
|
---|
233 |
|
---|
234 | case syntax.Getmark:
|
---|
235 | r.stackPop()
|
---|
236 | r.trackPush1(r.stackPeek())
|
---|
237 | r.textto(r.stackPeek())
|
---|
238 | r.advance(0)
|
---|
239 | continue
|
---|
240 |
|
---|
241 | case syntax.Getmark | syntax.Back:
|
---|
242 | r.trackPop()
|
---|
243 | r.stackPush(r.trackPeek())
|
---|
244 | break
|
---|
245 |
|
---|
246 | case syntax.Capturemark:
|
---|
247 | if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
|
---|
248 | break
|
---|
249 | }
|
---|
250 | r.stackPop()
|
---|
251 | if r.operand(1) != -1 {
|
---|
252 | r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
|
---|
253 | } else {
|
---|
254 | r.capture(r.operand(0), r.stackPeek(), r.textPos())
|
---|
255 | }
|
---|
256 | r.trackPush1(r.stackPeek())
|
---|
257 |
|
---|
258 | r.advance(2)
|
---|
259 |
|
---|
260 | continue
|
---|
261 |
|
---|
262 | case syntax.Capturemark | syntax.Back:
|
---|
263 | r.trackPop()
|
---|
264 | r.stackPush(r.trackPeek())
|
---|
265 | r.uncapture()
|
---|
266 | if r.operand(0) != -1 && r.operand(1) != -1 {
|
---|
267 | r.uncapture()
|
---|
268 | }
|
---|
269 |
|
---|
270 | break
|
---|
271 |
|
---|
272 | case syntax.Branchmark:
|
---|
273 | r.stackPop()
|
---|
274 |
|
---|
275 | matched := r.textPos() - r.stackPeek()
|
---|
276 |
|
---|
277 | if matched != 0 { // Nonempty match -> loop now
|
---|
278 | r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
|
---|
279 | r.stackPush(r.textPos()) // Make new mark
|
---|
280 | r.goTo(r.operand(0)) // Loop
|
---|
281 | } else { // Empty match -> straight now
|
---|
282 | r.trackPushNeg1(r.stackPeek()) // Save old mark
|
---|
283 | r.advance(1) // Straight
|
---|
284 | }
|
---|
285 | continue
|
---|
286 |
|
---|
287 | case syntax.Branchmark | syntax.Back:
|
---|
288 | r.trackPopN(2)
|
---|
289 | r.stackPop()
|
---|
290 | r.textto(r.trackPeekN(1)) // Recall position
|
---|
291 | r.trackPushNeg1(r.trackPeek()) // Save old mark
|
---|
292 | r.advance(1) // Straight
|
---|
293 | continue
|
---|
294 |
|
---|
295 | case syntax.Branchmark | syntax.Back2:
|
---|
296 | r.trackPop()
|
---|
297 | r.stackPush(r.trackPeek()) // Recall old mark
|
---|
298 | break // Backtrack
|
---|
299 |
|
---|
300 | case syntax.Lazybranchmark:
|
---|
301 | {
|
---|
302 | // We hit this the first time through a lazy loop and after each
|
---|
303 | // successful match of the inner expression. It simply continues
|
---|
304 | // on and doesn't loop.
|
---|
305 | r.stackPop()
|
---|
306 |
|
---|
307 | oldMarkPos := r.stackPeek()
|
---|
308 |
|
---|
309 | if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
|
---|
310 | if oldMarkPos != -1 {
|
---|
311 | r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
|
---|
312 | } else {
|
---|
313 | r.trackPush2(r.textPos(), r.textPos())
|
---|
314 | }
|
---|
315 | } else {
|
---|
316 | // The inner expression found an empty match, so we'll go directly to 'back2' if we
|
---|
317 | // backtrack. In this case, we need to push something on the stack, since back2 pops.
|
---|
318 | // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
|
---|
319 | // position associated with that empty match.
|
---|
320 | r.stackPush(oldMarkPos)
|
---|
321 |
|
---|
322 | r.trackPushNeg1(r.stackPeek()) // Save old mark
|
---|
323 | }
|
---|
324 | r.advance(1)
|
---|
325 | continue
|
---|
326 | }
|
---|
327 |
|
---|
328 | case syntax.Lazybranchmark | syntax.Back:
|
---|
329 |
|
---|
330 | // After the first time, Lazybranchmark | syntax.Back occurs
|
---|
331 | // with each iteration of the loop, and therefore with every attempted
|
---|
332 | // match of the inner expression. We'll try to match the inner expression,
|
---|
333 | // then go back to Lazybranchmark if successful. If the inner expression
|
---|
334 | // fails, we go to Lazybranchmark | syntax.Back2
|
---|
335 |
|
---|
336 | r.trackPopN(2)
|
---|
337 | pos := r.trackPeekN(1)
|
---|
338 | r.trackPushNeg1(r.trackPeek()) // Save old mark
|
---|
339 | r.stackPush(pos) // Make new mark
|
---|
340 | r.textto(pos) // Recall position
|
---|
341 | r.goTo(r.operand(0)) // Loop
|
---|
342 | continue
|
---|
343 |
|
---|
344 | case syntax.Lazybranchmark | syntax.Back2:
|
---|
345 | // The lazy loop has failed. We'll do a true backtrack and
|
---|
346 | // start over before the lazy loop.
|
---|
347 | r.stackPop()
|
---|
348 | r.trackPop()
|
---|
349 | r.stackPush(r.trackPeek()) // Recall old mark
|
---|
350 | break
|
---|
351 |
|
---|
352 | case syntax.Setcount:
|
---|
353 | r.stackPush2(r.textPos(), r.operand(0))
|
---|
354 | r.trackPush()
|
---|
355 | r.advance(1)
|
---|
356 | continue
|
---|
357 |
|
---|
358 | case syntax.Nullcount:
|
---|
359 | r.stackPush2(-1, r.operand(0))
|
---|
360 | r.trackPush()
|
---|
361 | r.advance(1)
|
---|
362 | continue
|
---|
363 |
|
---|
364 | case syntax.Setcount | syntax.Back:
|
---|
365 | r.stackPopN(2)
|
---|
366 | break
|
---|
367 |
|
---|
368 | case syntax.Nullcount | syntax.Back:
|
---|
369 | r.stackPopN(2)
|
---|
370 | break
|
---|
371 |
|
---|
372 | case syntax.Branchcount:
|
---|
373 | // r.stackPush:
|
---|
374 | // 0: Mark
|
---|
375 | // 1: Count
|
---|
376 |
|
---|
377 | r.stackPopN(2)
|
---|
378 | mark := r.stackPeek()
|
---|
379 | count := r.stackPeekN(1)
|
---|
380 | matched := r.textPos() - mark
|
---|
381 |
|
---|
382 | if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
|
---|
383 | r.trackPushNeg2(mark, count) // Save old mark, count
|
---|
384 | r.advance(2) // Straight
|
---|
385 | } else { // Nonempty match -> count+loop now
|
---|
386 | r.trackPush1(mark) // remember mark
|
---|
387 | r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
|
---|
388 | r.goTo(r.operand(0)) // Loop
|
---|
389 | }
|
---|
390 | continue
|
---|
391 |
|
---|
392 | case syntax.Branchcount | syntax.Back:
|
---|
393 | // r.trackPush:
|
---|
394 | // 0: Previous mark
|
---|
395 | // r.stackPush:
|
---|
396 | // 0: Mark (= current pos, discarded)
|
---|
397 | // 1: Count
|
---|
398 | r.trackPop()
|
---|
399 | r.stackPopN(2)
|
---|
400 | if r.stackPeekN(1) > 0 { // Positive -> can go straight
|
---|
401 | r.textto(r.stackPeek()) // Zap to mark
|
---|
402 | r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
|
---|
403 | r.advance(2) // Straight
|
---|
404 | continue
|
---|
405 | }
|
---|
406 | r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
|
---|
407 | break
|
---|
408 |
|
---|
409 | case syntax.Branchcount | syntax.Back2:
|
---|
410 | // r.trackPush:
|
---|
411 | // 0: Previous mark
|
---|
412 | // 1: Previous count
|
---|
413 | r.trackPopN(2)
|
---|
414 | r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
|
---|
415 | break // Backtrack
|
---|
416 |
|
---|
417 | case syntax.Lazybranchcount:
|
---|
418 | // r.stackPush:
|
---|
419 | // 0: Mark
|
---|
420 | // 1: Count
|
---|
421 |
|
---|
422 | r.stackPopN(2)
|
---|
423 | mark := r.stackPeek()
|
---|
424 | count := r.stackPeekN(1)
|
---|
425 |
|
---|
426 | if count < 0 { // Negative count -> loop now
|
---|
427 | r.trackPushNeg1(mark) // Save old mark
|
---|
428 | r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
|
---|
429 | r.goTo(r.operand(0)) // Loop
|
---|
430 | } else { // Nonneg count -> straight now
|
---|
431 | r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
|
---|
432 | r.advance(2) // Straight
|
---|
433 | }
|
---|
434 | continue
|
---|
435 |
|
---|
436 | case syntax.Lazybranchcount | syntax.Back:
|
---|
437 | // r.trackPush:
|
---|
438 | // 0: Mark
|
---|
439 | // 1: Count
|
---|
440 | // 2: r.textPos
|
---|
441 |
|
---|
442 | r.trackPopN(3)
|
---|
443 | mark := r.trackPeek()
|
---|
444 | textpos := r.trackPeekN(2)
|
---|
445 |
|
---|
446 | if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
|
---|
447 | r.textto(textpos) // Recall position
|
---|
448 | r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
|
---|
449 | r.trackPushNeg1(mark) // Save old mark
|
---|
450 | r.goTo(r.operand(0)) // Loop
|
---|
451 | continue
|
---|
452 | } else { // Max loops or empty match -> backtrack
|
---|
453 | r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
|
---|
454 | break // backtrack
|
---|
455 | }
|
---|
456 |
|
---|
457 | case syntax.Lazybranchcount | syntax.Back2:
|
---|
458 | // r.trackPush:
|
---|
459 | // 0: Previous mark
|
---|
460 | // r.stackPush:
|
---|
461 | // 0: Mark (== current pos, discarded)
|
---|
462 | // 1: Count
|
---|
463 | r.trackPop()
|
---|
464 | r.stackPopN(2)
|
---|
465 | r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
|
---|
466 | break // Backtrack
|
---|
467 |
|
---|
468 | case syntax.Setjump:
|
---|
469 | r.stackPush2(r.trackpos(), r.crawlpos())
|
---|
470 | r.trackPush()
|
---|
471 | r.advance(0)
|
---|
472 | continue
|
---|
473 |
|
---|
474 | case syntax.Setjump | syntax.Back:
|
---|
475 | r.stackPopN(2)
|
---|
476 | break
|
---|
477 |
|
---|
478 | case syntax.Backjump:
|
---|
479 | // r.stackPush:
|
---|
480 | // 0: Saved trackpos
|
---|
481 | // 1: r.crawlpos
|
---|
482 | r.stackPopN(2)
|
---|
483 | r.trackto(r.stackPeek())
|
---|
484 |
|
---|
485 | for r.crawlpos() != r.stackPeekN(1) {
|
---|
486 | r.uncapture()
|
---|
487 | }
|
---|
488 |
|
---|
489 | break
|
---|
490 |
|
---|
491 | case syntax.Forejump:
|
---|
492 | // r.stackPush:
|
---|
493 | // 0: Saved trackpos
|
---|
494 | // 1: r.crawlpos
|
---|
495 | r.stackPopN(2)
|
---|
496 | r.trackto(r.stackPeek())
|
---|
497 | r.trackPush1(r.stackPeekN(1))
|
---|
498 | r.advance(0)
|
---|
499 | continue
|
---|
500 |
|
---|
501 | case syntax.Forejump | syntax.Back:
|
---|
502 | // r.trackPush:
|
---|
503 | // 0: r.crawlpos
|
---|
504 | r.trackPop()
|
---|
505 |
|
---|
506 | for r.crawlpos() != r.trackPeek() {
|
---|
507 | r.uncapture()
|
---|
508 | }
|
---|
509 |
|
---|
510 | break
|
---|
511 |
|
---|
512 | case syntax.Bol:
|
---|
513 | if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
|
---|
514 | break
|
---|
515 | }
|
---|
516 | r.advance(0)
|
---|
517 | continue
|
---|
518 |
|
---|
519 | case syntax.Eol:
|
---|
520 | if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
|
---|
521 | break
|
---|
522 | }
|
---|
523 | r.advance(0)
|
---|
524 | continue
|
---|
525 |
|
---|
526 | case syntax.Boundary:
|
---|
527 | if !r.isBoundary(r.textPos(), 0, r.runtextend) {
|
---|
528 | break
|
---|
529 | }
|
---|
530 | r.advance(0)
|
---|
531 | continue
|
---|
532 |
|
---|
533 | case syntax.Nonboundary:
|
---|
534 | if r.isBoundary(r.textPos(), 0, r.runtextend) {
|
---|
535 | break
|
---|
536 | }
|
---|
537 | r.advance(0)
|
---|
538 | continue
|
---|
539 |
|
---|
540 | case syntax.ECMABoundary:
|
---|
541 | if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
|
---|
542 | break
|
---|
543 | }
|
---|
544 | r.advance(0)
|
---|
545 | continue
|
---|
546 |
|
---|
547 | case syntax.NonECMABoundary:
|
---|
548 | if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
|
---|
549 | break
|
---|
550 | }
|
---|
551 | r.advance(0)
|
---|
552 | continue
|
---|
553 |
|
---|
554 | case syntax.Beginning:
|
---|
555 | if r.leftchars() > 0 {
|
---|
556 | break
|
---|
557 | }
|
---|
558 | r.advance(0)
|
---|
559 | continue
|
---|
560 |
|
---|
561 | case syntax.Start:
|
---|
562 | if r.textPos() != r.textstart() {
|
---|
563 | break
|
---|
564 | }
|
---|
565 | r.advance(0)
|
---|
566 | continue
|
---|
567 |
|
---|
568 | case syntax.EndZ:
|
---|
569 | rchars := r.rightchars()
|
---|
570 | if rchars > 1 {
|
---|
571 | break
|
---|
572 | }
|
---|
573 | // RE2 and EcmaScript define $ as "asserts position at the end of the string"
|
---|
574 | // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
|
---|
575 | if (r.re.options & (RE2 | ECMAScript)) != 0 {
|
---|
576 | // RE2/Ecmascript mode
|
---|
577 | if rchars > 0 {
|
---|
578 | break
|
---|
579 | }
|
---|
580 | } else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
|
---|
581 | // "regular" mode
|
---|
582 | break
|
---|
583 | }
|
---|
584 |
|
---|
585 | r.advance(0)
|
---|
586 | continue
|
---|
587 |
|
---|
588 | case syntax.End:
|
---|
589 | if r.rightchars() > 0 {
|
---|
590 | break
|
---|
591 | }
|
---|
592 | r.advance(0)
|
---|
593 | continue
|
---|
594 |
|
---|
595 | case syntax.One:
|
---|
596 | if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
|
---|
597 | break
|
---|
598 | }
|
---|
599 |
|
---|
600 | r.advance(1)
|
---|
601 | continue
|
---|
602 |
|
---|
603 | case syntax.Notone:
|
---|
604 | if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
|
---|
605 | break
|
---|
606 | }
|
---|
607 |
|
---|
608 | r.advance(1)
|
---|
609 | continue
|
---|
610 |
|
---|
611 | case syntax.Set:
|
---|
612 |
|
---|
613 | if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
|
---|
614 | break
|
---|
615 | }
|
---|
616 |
|
---|
617 | r.advance(1)
|
---|
618 | continue
|
---|
619 |
|
---|
620 | case syntax.Multi:
|
---|
621 | if !r.runematch(r.code.Strings[r.operand(0)]) {
|
---|
622 | break
|
---|
623 | }
|
---|
624 |
|
---|
625 | r.advance(1)
|
---|
626 | continue
|
---|
627 |
|
---|
628 | case syntax.Ref:
|
---|
629 |
|
---|
630 | capnum := r.operand(0)
|
---|
631 |
|
---|
632 | if r.runmatch.isMatched(capnum) {
|
---|
633 | if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
|
---|
634 | break
|
---|
635 | }
|
---|
636 | } else {
|
---|
637 | if (r.re.options & ECMAScript) == 0 {
|
---|
638 | break
|
---|
639 | }
|
---|
640 | }
|
---|
641 |
|
---|
642 | r.advance(1)
|
---|
643 | continue
|
---|
644 |
|
---|
645 | case syntax.Onerep:
|
---|
646 |
|
---|
647 | c := r.operand(1)
|
---|
648 |
|
---|
649 | if r.forwardchars() < c {
|
---|
650 | break
|
---|
651 | }
|
---|
652 |
|
---|
653 | ch := rune(r.operand(0))
|
---|
654 |
|
---|
655 | for c > 0 {
|
---|
656 | if r.forwardcharnext() != ch {
|
---|
657 | goto BreakBackward
|
---|
658 | }
|
---|
659 | c--
|
---|
660 | }
|
---|
661 |
|
---|
662 | r.advance(2)
|
---|
663 | continue
|
---|
664 |
|
---|
665 | case syntax.Notonerep:
|
---|
666 |
|
---|
667 | c := r.operand(1)
|
---|
668 |
|
---|
669 | if r.forwardchars() < c {
|
---|
670 | break
|
---|
671 | }
|
---|
672 | ch := rune(r.operand(0))
|
---|
673 |
|
---|
674 | for c > 0 {
|
---|
675 | if r.forwardcharnext() == ch {
|
---|
676 | goto BreakBackward
|
---|
677 | }
|
---|
678 | c--
|
---|
679 | }
|
---|
680 |
|
---|
681 | r.advance(2)
|
---|
682 | continue
|
---|
683 |
|
---|
684 | case syntax.Setrep:
|
---|
685 |
|
---|
686 | c := r.operand(1)
|
---|
687 |
|
---|
688 | if r.forwardchars() < c {
|
---|
689 | break
|
---|
690 | }
|
---|
691 |
|
---|
692 | set := r.code.Sets[r.operand(0)]
|
---|
693 |
|
---|
694 | for c > 0 {
|
---|
695 | if !set.CharIn(r.forwardcharnext()) {
|
---|
696 | goto BreakBackward
|
---|
697 | }
|
---|
698 | c--
|
---|
699 | }
|
---|
700 |
|
---|
701 | r.advance(2)
|
---|
702 | continue
|
---|
703 |
|
---|
704 | case syntax.Oneloop:
|
---|
705 |
|
---|
706 | c := r.operand(1)
|
---|
707 |
|
---|
708 | if c > r.forwardchars() {
|
---|
709 | c = r.forwardchars()
|
---|
710 | }
|
---|
711 |
|
---|
712 | ch := rune(r.operand(0))
|
---|
713 | i := c
|
---|
714 |
|
---|
715 | for ; i > 0; i-- {
|
---|
716 | if r.forwardcharnext() != ch {
|
---|
717 | r.backwardnext()
|
---|
718 | break
|
---|
719 | }
|
---|
720 | }
|
---|
721 |
|
---|
722 | if c > i {
|
---|
723 | r.trackPush2(c-i-1, r.textPos()-r.bump())
|
---|
724 | }
|
---|
725 |
|
---|
726 | r.advance(2)
|
---|
727 | continue
|
---|
728 |
|
---|
729 | case syntax.Notoneloop:
|
---|
730 |
|
---|
731 | c := r.operand(1)
|
---|
732 |
|
---|
733 | if c > r.forwardchars() {
|
---|
734 | c = r.forwardchars()
|
---|
735 | }
|
---|
736 |
|
---|
737 | ch := rune(r.operand(0))
|
---|
738 | i := c
|
---|
739 |
|
---|
740 | for ; i > 0; i-- {
|
---|
741 | if r.forwardcharnext() == ch {
|
---|
742 | r.backwardnext()
|
---|
743 | break
|
---|
744 | }
|
---|
745 | }
|
---|
746 |
|
---|
747 | if c > i {
|
---|
748 | r.trackPush2(c-i-1, r.textPos()-r.bump())
|
---|
749 | }
|
---|
750 |
|
---|
751 | r.advance(2)
|
---|
752 | continue
|
---|
753 |
|
---|
754 | case syntax.Setloop:
|
---|
755 |
|
---|
756 | c := r.operand(1)
|
---|
757 |
|
---|
758 | if c > r.forwardchars() {
|
---|
759 | c = r.forwardchars()
|
---|
760 | }
|
---|
761 |
|
---|
762 | set := r.code.Sets[r.operand(0)]
|
---|
763 | i := c
|
---|
764 |
|
---|
765 | for ; i > 0; i-- {
|
---|
766 | if !set.CharIn(r.forwardcharnext()) {
|
---|
767 | r.backwardnext()
|
---|
768 | break
|
---|
769 | }
|
---|
770 | }
|
---|
771 |
|
---|
772 | if c > i {
|
---|
773 | r.trackPush2(c-i-1, r.textPos()-r.bump())
|
---|
774 | }
|
---|
775 |
|
---|
776 | r.advance(2)
|
---|
777 | continue
|
---|
778 |
|
---|
779 | case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
|
---|
780 |
|
---|
781 | r.trackPopN(2)
|
---|
782 | i := r.trackPeek()
|
---|
783 | pos := r.trackPeekN(1)
|
---|
784 |
|
---|
785 | r.textto(pos)
|
---|
786 |
|
---|
787 | if i > 0 {
|
---|
788 | r.trackPush2(i-1, pos-r.bump())
|
---|
789 | }
|
---|
790 |
|
---|
791 | r.advance(2)
|
---|
792 | continue
|
---|
793 |
|
---|
794 | case syntax.Setloop | syntax.Back:
|
---|
795 |
|
---|
796 | r.trackPopN(2)
|
---|
797 | i := r.trackPeek()
|
---|
798 | pos := r.trackPeekN(1)
|
---|
799 |
|
---|
800 | r.textto(pos)
|
---|
801 |
|
---|
802 | if i > 0 {
|
---|
803 | r.trackPush2(i-1, pos-r.bump())
|
---|
804 | }
|
---|
805 |
|
---|
806 | r.advance(2)
|
---|
807 | continue
|
---|
808 |
|
---|
809 | case syntax.Onelazy, syntax.Notonelazy:
|
---|
810 |
|
---|
811 | c := r.operand(1)
|
---|
812 |
|
---|
813 | if c > r.forwardchars() {
|
---|
814 | c = r.forwardchars()
|
---|
815 | }
|
---|
816 |
|
---|
817 | if c > 0 {
|
---|
818 | r.trackPush2(c-1, r.textPos())
|
---|
819 | }
|
---|
820 |
|
---|
821 | r.advance(2)
|
---|
822 | continue
|
---|
823 |
|
---|
824 | case syntax.Setlazy:
|
---|
825 |
|
---|
826 | c := r.operand(1)
|
---|
827 |
|
---|
828 | if c > r.forwardchars() {
|
---|
829 | c = r.forwardchars()
|
---|
830 | }
|
---|
831 |
|
---|
832 | if c > 0 {
|
---|
833 | r.trackPush2(c-1, r.textPos())
|
---|
834 | }
|
---|
835 |
|
---|
836 | r.advance(2)
|
---|
837 | continue
|
---|
838 |
|
---|
839 | case syntax.Onelazy | syntax.Back:
|
---|
840 |
|
---|
841 | r.trackPopN(2)
|
---|
842 | pos := r.trackPeekN(1)
|
---|
843 | r.textto(pos)
|
---|
844 |
|
---|
845 | if r.forwardcharnext() != rune(r.operand(0)) {
|
---|
846 | break
|
---|
847 | }
|
---|
848 |
|
---|
849 | i := r.trackPeek()
|
---|
850 |
|
---|
851 | if i > 0 {
|
---|
852 | r.trackPush2(i-1, pos+r.bump())
|
---|
853 | }
|
---|
854 |
|
---|
855 | r.advance(2)
|
---|
856 | continue
|
---|
857 |
|
---|
858 | case syntax.Notonelazy | syntax.Back:
|
---|
859 |
|
---|
860 | r.trackPopN(2)
|
---|
861 | pos := r.trackPeekN(1)
|
---|
862 | r.textto(pos)
|
---|
863 |
|
---|
864 | if r.forwardcharnext() == rune(r.operand(0)) {
|
---|
865 | break
|
---|
866 | }
|
---|
867 |
|
---|
868 | i := r.trackPeek()
|
---|
869 |
|
---|
870 | if i > 0 {
|
---|
871 | r.trackPush2(i-1, pos+r.bump())
|
---|
872 | }
|
---|
873 |
|
---|
874 | r.advance(2)
|
---|
875 | continue
|
---|
876 |
|
---|
877 | case syntax.Setlazy | syntax.Back:
|
---|
878 |
|
---|
879 | r.trackPopN(2)
|
---|
880 | pos := r.trackPeekN(1)
|
---|
881 | r.textto(pos)
|
---|
882 |
|
---|
883 | if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
|
---|
884 | break
|
---|
885 | }
|
---|
886 |
|
---|
887 | i := r.trackPeek()
|
---|
888 |
|
---|
889 | if i > 0 {
|
---|
890 | r.trackPush2(i-1, pos+r.bump())
|
---|
891 | }
|
---|
892 |
|
---|
893 | r.advance(2)
|
---|
894 | continue
|
---|
895 |
|
---|
896 | default:
|
---|
897 | return errors.New("unknown state in regex runner")
|
---|
898 | }
|
---|
899 |
|
---|
900 | BreakBackward:
|
---|
901 | ;
|
---|
902 |
|
---|
903 | // "break Backward" comes here:
|
---|
904 | r.backtrack()
|
---|
905 | }
|
---|
906 | }
|
---|
907 |
|
---|
908 | // increase the size of stack and track storage
|
---|
909 | func (r *runner) ensureStorage() {
|
---|
910 | if r.runstackpos < r.runtrackcount*4 {
|
---|
911 | doubleIntSlice(&r.runstack, &r.runstackpos)
|
---|
912 | }
|
---|
913 | if r.runtrackpos < r.runtrackcount*4 {
|
---|
914 | doubleIntSlice(&r.runtrack, &r.runtrackpos)
|
---|
915 | }
|
---|
916 | }
|
---|
917 |
|
---|
918 | func doubleIntSlice(s *[]int, pos *int) {
|
---|
919 | oldLen := len(*s)
|
---|
920 | newS := make([]int, oldLen*2)
|
---|
921 |
|
---|
922 | copy(newS[oldLen:], *s)
|
---|
923 | *pos += oldLen
|
---|
924 | *s = newS
|
---|
925 | }
|
---|
926 |
|
---|
927 | // Save a number on the longjump unrolling stack
|
---|
928 | func (r *runner) crawl(i int) {
|
---|
929 | if r.runcrawlpos == 0 {
|
---|
930 | doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
|
---|
931 | }
|
---|
932 | r.runcrawlpos--
|
---|
933 | r.runcrawl[r.runcrawlpos] = i
|
---|
934 | }
|
---|
935 |
|
---|
936 | // Remove a number from the longjump unrolling stack
|
---|
937 | func (r *runner) popcrawl() int {
|
---|
938 | val := r.runcrawl[r.runcrawlpos]
|
---|
939 | r.runcrawlpos++
|
---|
940 | return val
|
---|
941 | }
|
---|
942 |
|
---|
943 | // Get the height of the stack
|
---|
944 | func (r *runner) crawlpos() int {
|
---|
945 | return len(r.runcrawl) - r.runcrawlpos
|
---|
946 | }
|
---|
947 |
|
---|
948 | func (r *runner) advance(i int) {
|
---|
949 | r.codepos += (i + 1)
|
---|
950 | r.setOperator(r.code.Codes[r.codepos])
|
---|
951 | }
|
---|
952 |
|
---|
953 | func (r *runner) goTo(newpos int) {
|
---|
954 | // when branching backward or in place, ensure storage
|
---|
955 | if newpos <= r.codepos {
|
---|
956 | r.ensureStorage()
|
---|
957 | }
|
---|
958 |
|
---|
959 | r.setOperator(r.code.Codes[newpos])
|
---|
960 | r.codepos = newpos
|
---|
961 | }
|
---|
962 |
|
---|
963 | func (r *runner) textto(newpos int) {
|
---|
964 | r.runtextpos = newpos
|
---|
965 | }
|
---|
966 |
|
---|
967 | func (r *runner) trackto(newpos int) {
|
---|
968 | r.runtrackpos = len(r.runtrack) - newpos
|
---|
969 | }
|
---|
970 |
|
---|
971 | func (r *runner) textstart() int {
|
---|
972 | return r.runtextstart
|
---|
973 | }
|
---|
974 |
|
---|
975 | func (r *runner) textPos() int {
|
---|
976 | return r.runtextpos
|
---|
977 | }
|
---|
978 |
|
---|
979 | // push onto the backtracking stack
|
---|
980 | func (r *runner) trackpos() int {
|
---|
981 | return len(r.runtrack) - r.runtrackpos
|
---|
982 | }
|
---|
983 |
|
---|
984 | func (r *runner) trackPush() {
|
---|
985 | r.runtrackpos--
|
---|
986 | r.runtrack[r.runtrackpos] = r.codepos
|
---|
987 | }
|
---|
988 |
|
---|
989 | func (r *runner) trackPush1(I1 int) {
|
---|
990 | r.runtrackpos--
|
---|
991 | r.runtrack[r.runtrackpos] = I1
|
---|
992 | r.runtrackpos--
|
---|
993 | r.runtrack[r.runtrackpos] = r.codepos
|
---|
994 | }
|
---|
995 |
|
---|
996 | func (r *runner) trackPush2(I1, I2 int) {
|
---|
997 | r.runtrackpos--
|
---|
998 | r.runtrack[r.runtrackpos] = I1
|
---|
999 | r.runtrackpos--
|
---|
1000 | r.runtrack[r.runtrackpos] = I2
|
---|
1001 | r.runtrackpos--
|
---|
1002 | r.runtrack[r.runtrackpos] = r.codepos
|
---|
1003 | }
|
---|
1004 |
|
---|
1005 | func (r *runner) trackPush3(I1, I2, I3 int) {
|
---|
1006 | r.runtrackpos--
|
---|
1007 | r.runtrack[r.runtrackpos] = I1
|
---|
1008 | r.runtrackpos--
|
---|
1009 | r.runtrack[r.runtrackpos] = I2
|
---|
1010 | r.runtrackpos--
|
---|
1011 | r.runtrack[r.runtrackpos] = I3
|
---|
1012 | r.runtrackpos--
|
---|
1013 | r.runtrack[r.runtrackpos] = r.codepos
|
---|
1014 | }
|
---|
1015 |
|
---|
1016 | func (r *runner) trackPushNeg1(I1 int) {
|
---|
1017 | r.runtrackpos--
|
---|
1018 | r.runtrack[r.runtrackpos] = I1
|
---|
1019 | r.runtrackpos--
|
---|
1020 | r.runtrack[r.runtrackpos] = -r.codepos
|
---|
1021 | }
|
---|
1022 |
|
---|
1023 | func (r *runner) trackPushNeg2(I1, I2 int) {
|
---|
1024 | r.runtrackpos--
|
---|
1025 | r.runtrack[r.runtrackpos] = I1
|
---|
1026 | r.runtrackpos--
|
---|
1027 | r.runtrack[r.runtrackpos] = I2
|
---|
1028 | r.runtrackpos--
|
---|
1029 | r.runtrack[r.runtrackpos] = -r.codepos
|
---|
1030 | }
|
---|
1031 |
|
---|
1032 | func (r *runner) backtrack() {
|
---|
1033 | newpos := r.runtrack[r.runtrackpos]
|
---|
1034 | r.runtrackpos++
|
---|
1035 |
|
---|
1036 | if r.re.Debug() {
|
---|
1037 | if newpos < 0 {
|
---|
1038 | fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
|
---|
1039 | } else {
|
---|
1040 | fmt.Printf(" Backtracking to code position %v\n", newpos)
|
---|
1041 | }
|
---|
1042 | }
|
---|
1043 |
|
---|
1044 | if newpos < 0 {
|
---|
1045 | newpos = -newpos
|
---|
1046 | r.setOperator(r.code.Codes[newpos] | syntax.Back2)
|
---|
1047 | } else {
|
---|
1048 | r.setOperator(r.code.Codes[newpos] | syntax.Back)
|
---|
1049 | }
|
---|
1050 |
|
---|
1051 | // When branching backward, ensure storage
|
---|
1052 | if newpos < r.codepos {
|
---|
1053 | r.ensureStorage()
|
---|
1054 | }
|
---|
1055 |
|
---|
1056 | r.codepos = newpos
|
---|
1057 | }
|
---|
1058 |
|
---|
1059 | func (r *runner) setOperator(op int) {
|
---|
1060 | r.caseInsensitive = (0 != (op & syntax.Ci))
|
---|
1061 | r.rightToLeft = (0 != (op & syntax.Rtl))
|
---|
1062 | r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
|
---|
1063 | }
|
---|
1064 |
|
---|
1065 | func (r *runner) trackPop() {
|
---|
1066 | r.runtrackpos++
|
---|
1067 | }
|
---|
1068 |
|
---|
1069 | // pop framesize items from the backtracking stack
|
---|
1070 | func (r *runner) trackPopN(framesize int) {
|
---|
1071 | r.runtrackpos += framesize
|
---|
1072 | }
|
---|
1073 |
|
---|
1074 | // Technically we are actually peeking at items already popped. So if you want to
|
---|
1075 | // get and pop the top item from the stack, you do
|
---|
1076 | // r.trackPop();
|
---|
1077 | // r.trackPeek();
|
---|
1078 | func (r *runner) trackPeek() int {
|
---|
1079 | return r.runtrack[r.runtrackpos-1]
|
---|
1080 | }
|
---|
1081 |
|
---|
1082 | // get the ith element down on the backtracking stack
|
---|
1083 | func (r *runner) trackPeekN(i int) int {
|
---|
1084 | return r.runtrack[r.runtrackpos-i-1]
|
---|
1085 | }
|
---|
1086 |
|
---|
1087 | // Push onto the grouping stack
|
---|
1088 | func (r *runner) stackPush(I1 int) {
|
---|
1089 | r.runstackpos--
|
---|
1090 | r.runstack[r.runstackpos] = I1
|
---|
1091 | }
|
---|
1092 |
|
---|
1093 | func (r *runner) stackPush2(I1, I2 int) {
|
---|
1094 | r.runstackpos--
|
---|
1095 | r.runstack[r.runstackpos] = I1
|
---|
1096 | r.runstackpos--
|
---|
1097 | r.runstack[r.runstackpos] = I2
|
---|
1098 | }
|
---|
1099 |
|
---|
1100 | func (r *runner) stackPop() {
|
---|
1101 | r.runstackpos++
|
---|
1102 | }
|
---|
1103 |
|
---|
1104 | // pop framesize items from the grouping stack
|
---|
1105 | func (r *runner) stackPopN(framesize int) {
|
---|
1106 | r.runstackpos += framesize
|
---|
1107 | }
|
---|
1108 |
|
---|
1109 | // Technically we are actually peeking at items already popped. So if you want to
|
---|
1110 | // get and pop the top item from the stack, you do
|
---|
1111 | // r.stackPop();
|
---|
1112 | // r.stackPeek();
|
---|
1113 | func (r *runner) stackPeek() int {
|
---|
1114 | return r.runstack[r.runstackpos-1]
|
---|
1115 | }
|
---|
1116 |
|
---|
1117 | // get the ith element down on the grouping stack
|
---|
1118 | func (r *runner) stackPeekN(i int) int {
|
---|
1119 | return r.runstack[r.runstackpos-i-1]
|
---|
1120 | }
|
---|
1121 |
|
---|
1122 | func (r *runner) operand(i int) int {
|
---|
1123 | return r.code.Codes[r.codepos+i+1]
|
---|
1124 | }
|
---|
1125 |
|
---|
1126 | func (r *runner) leftchars() int {
|
---|
1127 | return r.runtextpos
|
---|
1128 | }
|
---|
1129 |
|
---|
1130 | func (r *runner) rightchars() int {
|
---|
1131 | return r.runtextend - r.runtextpos
|
---|
1132 | }
|
---|
1133 |
|
---|
1134 | func (r *runner) bump() int {
|
---|
1135 | if r.rightToLeft {
|
---|
1136 | return -1
|
---|
1137 | }
|
---|
1138 | return 1
|
---|
1139 | }
|
---|
1140 |
|
---|
1141 | func (r *runner) forwardchars() int {
|
---|
1142 | if r.rightToLeft {
|
---|
1143 | return r.runtextpos
|
---|
1144 | }
|
---|
1145 | return r.runtextend - r.runtextpos
|
---|
1146 | }
|
---|
1147 |
|
---|
1148 | func (r *runner) forwardcharnext() rune {
|
---|
1149 | var ch rune
|
---|
1150 | if r.rightToLeft {
|
---|
1151 | r.runtextpos--
|
---|
1152 | ch = r.runtext[r.runtextpos]
|
---|
1153 | } else {
|
---|
1154 | ch = r.runtext[r.runtextpos]
|
---|
1155 | r.runtextpos++
|
---|
1156 | }
|
---|
1157 |
|
---|
1158 | if r.caseInsensitive {
|
---|
1159 | return unicode.ToLower(ch)
|
---|
1160 | }
|
---|
1161 | return ch
|
---|
1162 | }
|
---|
1163 |
|
---|
1164 | func (r *runner) runematch(str []rune) bool {
|
---|
1165 | var pos int
|
---|
1166 |
|
---|
1167 | c := len(str)
|
---|
1168 | if !r.rightToLeft {
|
---|
1169 | if r.runtextend-r.runtextpos < c {
|
---|
1170 | return false
|
---|
1171 | }
|
---|
1172 |
|
---|
1173 | pos = r.runtextpos + c
|
---|
1174 | } else {
|
---|
1175 | if r.runtextpos-0 < c {
|
---|
1176 | return false
|
---|
1177 | }
|
---|
1178 |
|
---|
1179 | pos = r.runtextpos
|
---|
1180 | }
|
---|
1181 |
|
---|
1182 | if !r.caseInsensitive {
|
---|
1183 | for c != 0 {
|
---|
1184 | c--
|
---|
1185 | pos--
|
---|
1186 | if str[c] != r.runtext[pos] {
|
---|
1187 | return false
|
---|
1188 | }
|
---|
1189 | }
|
---|
1190 | } else {
|
---|
1191 | for c != 0 {
|
---|
1192 | c--
|
---|
1193 | pos--
|
---|
1194 | if str[c] != unicode.ToLower(r.runtext[pos]) {
|
---|
1195 | return false
|
---|
1196 | }
|
---|
1197 | }
|
---|
1198 | }
|
---|
1199 |
|
---|
1200 | if !r.rightToLeft {
|
---|
1201 | pos += len(str)
|
---|
1202 | }
|
---|
1203 |
|
---|
1204 | r.runtextpos = pos
|
---|
1205 |
|
---|
1206 | return true
|
---|
1207 | }
|
---|
1208 |
|
---|
1209 | func (r *runner) refmatch(index, len int) bool {
|
---|
1210 | var c, pos, cmpos int
|
---|
1211 |
|
---|
1212 | if !r.rightToLeft {
|
---|
1213 | if r.runtextend-r.runtextpos < len {
|
---|
1214 | return false
|
---|
1215 | }
|
---|
1216 |
|
---|
1217 | pos = r.runtextpos + len
|
---|
1218 | } else {
|
---|
1219 | if r.runtextpos-0 < len {
|
---|
1220 | return false
|
---|
1221 | }
|
---|
1222 |
|
---|
1223 | pos = r.runtextpos
|
---|
1224 | }
|
---|
1225 | cmpos = index + len
|
---|
1226 |
|
---|
1227 | c = len
|
---|
1228 |
|
---|
1229 | if !r.caseInsensitive {
|
---|
1230 | for c != 0 {
|
---|
1231 | c--
|
---|
1232 | cmpos--
|
---|
1233 | pos--
|
---|
1234 | if r.runtext[cmpos] != r.runtext[pos] {
|
---|
1235 | return false
|
---|
1236 | }
|
---|
1237 |
|
---|
1238 | }
|
---|
1239 | } else {
|
---|
1240 | for c != 0 {
|
---|
1241 | c--
|
---|
1242 | cmpos--
|
---|
1243 | pos--
|
---|
1244 |
|
---|
1245 | if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
|
---|
1246 | return false
|
---|
1247 | }
|
---|
1248 | }
|
---|
1249 | }
|
---|
1250 |
|
---|
1251 | if !r.rightToLeft {
|
---|
1252 | pos += len
|
---|
1253 | }
|
---|
1254 |
|
---|
1255 | r.runtextpos = pos
|
---|
1256 |
|
---|
1257 | return true
|
---|
1258 | }
|
---|
1259 |
|
---|
1260 | func (r *runner) backwardnext() {
|
---|
1261 | if r.rightToLeft {
|
---|
1262 | r.runtextpos++
|
---|
1263 | } else {
|
---|
1264 | r.runtextpos--
|
---|
1265 | }
|
---|
1266 | }
|
---|
1267 |
|
---|
1268 | func (r *runner) charAt(j int) rune {
|
---|
1269 | return r.runtext[j]
|
---|
1270 | }
|
---|
1271 |
|
---|
1272 | func (r *runner) findFirstChar() bool {
|
---|
1273 |
|
---|
1274 | if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
|
---|
1275 | if !r.code.RightToLeft {
|
---|
1276 | if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
|
---|
1277 | (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
|
---|
1278 | r.runtextpos = r.runtextend
|
---|
1279 | return false
|
---|
1280 | }
|
---|
1281 | if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
|
---|
1282 | r.runtextpos = r.runtextend - 1
|
---|
1283 | } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
|
---|
1284 | r.runtextpos = r.runtextend
|
---|
1285 | }
|
---|
1286 | } else {
|
---|
1287 | if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
|
---|
1288 | (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
|
---|
1289 | (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
|
---|
1290 | (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
|
---|
1291 | r.runtextpos = 0
|
---|
1292 | return false
|
---|
1293 | }
|
---|
1294 | if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
|
---|
1295 | r.runtextpos = 0
|
---|
1296 | }
|
---|
1297 | }
|
---|
1298 |
|
---|
1299 | if r.code.BmPrefix != nil {
|
---|
1300 | return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
|
---|
1301 | }
|
---|
1302 |
|
---|
1303 | return true // found a valid start or end anchor
|
---|
1304 | } else if r.code.BmPrefix != nil {
|
---|
1305 | r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
|
---|
1306 |
|
---|
1307 | if r.runtextpos == -1 {
|
---|
1308 | if r.code.RightToLeft {
|
---|
1309 | r.runtextpos = 0
|
---|
1310 | } else {
|
---|
1311 | r.runtextpos = r.runtextend
|
---|
1312 | }
|
---|
1313 | return false
|
---|
1314 | }
|
---|
1315 |
|
---|
1316 | return true
|
---|
1317 | } else if r.code.FcPrefix == nil {
|
---|
1318 | return true
|
---|
1319 | }
|
---|
1320 |
|
---|
1321 | r.rightToLeft = r.code.RightToLeft
|
---|
1322 | r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
|
---|
1323 |
|
---|
1324 | set := r.code.FcPrefix.PrefixSet
|
---|
1325 | if set.IsSingleton() {
|
---|
1326 | ch := set.SingletonChar()
|
---|
1327 | for i := r.forwardchars(); i > 0; i-- {
|
---|
1328 | if ch == r.forwardcharnext() {
|
---|
1329 | r.backwardnext()
|
---|
1330 | return true
|
---|
1331 | }
|
---|
1332 | }
|
---|
1333 | } else {
|
---|
1334 | for i := r.forwardchars(); i > 0; i-- {
|
---|
1335 | n := r.forwardcharnext()
|
---|
1336 | //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
|
---|
1337 | if set.CharIn(n) {
|
---|
1338 | r.backwardnext()
|
---|
1339 | return true
|
---|
1340 | }
|
---|
1341 | }
|
---|
1342 | }
|
---|
1343 |
|
---|
1344 | return false
|
---|
1345 | }
|
---|
1346 |
|
---|
1347 | func (r *runner) initMatch() {
|
---|
1348 | // Use a hashtable'ed Match object if the capture numbers are sparse
|
---|
1349 |
|
---|
1350 | if r.runmatch == nil {
|
---|
1351 | if r.re.caps != nil {
|
---|
1352 | r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
|
---|
1353 | } else {
|
---|
1354 | r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
|
---|
1355 | }
|
---|
1356 | } else {
|
---|
1357 | r.runmatch.reset(r.runtext, r.runtextstart)
|
---|
1358 | }
|
---|
1359 |
|
---|
1360 | // note we test runcrawl, because it is the last one to be allocated
|
---|
1361 | // If there is an alloc failure in the middle of the three allocations,
|
---|
1362 | // we may still return to reuse this instance, and we want to behave
|
---|
1363 | // as if the allocations didn't occur. (we used to test _trackcount != 0)
|
---|
1364 |
|
---|
1365 | if r.runcrawl != nil {
|
---|
1366 | r.runtrackpos = len(r.runtrack)
|
---|
1367 | r.runstackpos = len(r.runstack)
|
---|
1368 | r.runcrawlpos = len(r.runcrawl)
|
---|
1369 | return
|
---|
1370 | }
|
---|
1371 |
|
---|
1372 | r.initTrackCount()
|
---|
1373 |
|
---|
1374 | tracksize := r.runtrackcount * 8
|
---|
1375 | stacksize := r.runtrackcount * 8
|
---|
1376 |
|
---|
1377 | if tracksize < 32 {
|
---|
1378 | tracksize = 32
|
---|
1379 | }
|
---|
1380 | if stacksize < 16 {
|
---|
1381 | stacksize = 16
|
---|
1382 | }
|
---|
1383 |
|
---|
1384 | r.runtrack = make([]int, tracksize)
|
---|
1385 | r.runtrackpos = tracksize
|
---|
1386 |
|
---|
1387 | r.runstack = make([]int, stacksize)
|
---|
1388 | r.runstackpos = stacksize
|
---|
1389 |
|
---|
1390 | r.runcrawl = make([]int, 32)
|
---|
1391 | r.runcrawlpos = 32
|
---|
1392 | }
|
---|
1393 |
|
---|
1394 | func (r *runner) tidyMatch(quick bool) *Match {
|
---|
1395 | if !quick {
|
---|
1396 | match := r.runmatch
|
---|
1397 |
|
---|
1398 | r.runmatch = nil
|
---|
1399 |
|
---|
1400 | match.tidy(r.runtextpos)
|
---|
1401 | return match
|
---|
1402 | } else {
|
---|
1403 | // send back our match -- it's not leaving the package, so it's safe to not clean it up
|
---|
1404 | // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
|
---|
1405 | return r.runmatch
|
---|
1406 | }
|
---|
1407 | }
|
---|
1408 |
|
---|
1409 | // capture captures a subexpression. Note that the
|
---|
1410 | // capnum used here has already been mapped to a non-sparse
|
---|
1411 | // index (by the code generator RegexWriter).
|
---|
1412 | func (r *runner) capture(capnum, start, end int) {
|
---|
1413 | if end < start {
|
---|
1414 | T := end
|
---|
1415 | end = start
|
---|
1416 | start = T
|
---|
1417 | }
|
---|
1418 |
|
---|
1419 | r.crawl(capnum)
|
---|
1420 | r.runmatch.addMatch(capnum, start, end-start)
|
---|
1421 | }
|
---|
1422 |
|
---|
1423 | // transferCapture captures a subexpression. Note that the
|
---|
1424 | // capnum used here has already been mapped to a non-sparse
|
---|
1425 | // index (by the code generator RegexWriter).
|
---|
1426 | func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
|
---|
1427 | var start2, end2 int
|
---|
1428 |
|
---|
1429 | // these are the two intervals that are cancelling each other
|
---|
1430 |
|
---|
1431 | if end < start {
|
---|
1432 | T := end
|
---|
1433 | end = start
|
---|
1434 | start = T
|
---|
1435 | }
|
---|
1436 |
|
---|
1437 | start2 = r.runmatch.matchIndex(uncapnum)
|
---|
1438 | end2 = start2 + r.runmatch.matchLength(uncapnum)
|
---|
1439 |
|
---|
1440 | // The new capture gets the innermost defined interval
|
---|
1441 |
|
---|
1442 | if start >= end2 {
|
---|
1443 | end = start
|
---|
1444 | start = end2
|
---|
1445 | } else if end <= start2 {
|
---|
1446 | start = start2
|
---|
1447 | } else {
|
---|
1448 | if end > end2 {
|
---|
1449 | end = end2
|
---|
1450 | }
|
---|
1451 | if start2 > start {
|
---|
1452 | start = start2
|
---|
1453 | }
|
---|
1454 | }
|
---|
1455 |
|
---|
1456 | r.crawl(uncapnum)
|
---|
1457 | r.runmatch.balanceMatch(uncapnum)
|
---|
1458 |
|
---|
1459 | if capnum != -1 {
|
---|
1460 | r.crawl(capnum)
|
---|
1461 | r.runmatch.addMatch(capnum, start, end-start)
|
---|
1462 | }
|
---|
1463 | }
|
---|
1464 |
|
---|
1465 | // revert the last capture
|
---|
1466 | func (r *runner) uncapture() {
|
---|
1467 | capnum := r.popcrawl()
|
---|
1468 | r.runmatch.removeMatch(capnum)
|
---|
1469 | }
|
---|
1470 |
|
---|
1471 | //debug
|
---|
1472 |
|
---|
1473 | func (r *runner) dumpState() {
|
---|
1474 | back := ""
|
---|
1475 | if r.operator&syntax.Back != 0 {
|
---|
1476 | back = " Back"
|
---|
1477 | }
|
---|
1478 | if r.operator&syntax.Back2 != 0 {
|
---|
1479 | back += " Back2"
|
---|
1480 | }
|
---|
1481 | fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
|
---|
1482 | r.textposDescription(),
|
---|
1483 | r.stackDescription(r.runtrack, r.runtrackpos),
|
---|
1484 | r.stackDescription(r.runstack, r.runstackpos),
|
---|
1485 | r.code.OpcodeDescription(r.codepos),
|
---|
1486 | back)
|
---|
1487 | }
|
---|
1488 |
|
---|
1489 | func (r *runner) stackDescription(a []int, index int) string {
|
---|
1490 | buf := &bytes.Buffer{}
|
---|
1491 |
|
---|
1492 | fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
|
---|
1493 | if buf.Len() < 8 {
|
---|
1494 | buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
|
---|
1495 | }
|
---|
1496 |
|
---|
1497 | buf.WriteRune('(')
|
---|
1498 | for i := index; i < len(a); i++ {
|
---|
1499 | if i > index {
|
---|
1500 | buf.WriteRune(' ')
|
---|
1501 | }
|
---|
1502 |
|
---|
1503 | buf.WriteString(strconv.Itoa(a[i]))
|
---|
1504 | }
|
---|
1505 |
|
---|
1506 | buf.WriteRune(')')
|
---|
1507 |
|
---|
1508 | return buf.String()
|
---|
1509 | }
|
---|
1510 |
|
---|
1511 | func (r *runner) textposDescription() string {
|
---|
1512 | buf := &bytes.Buffer{}
|
---|
1513 |
|
---|
1514 | buf.WriteString(strconv.Itoa(r.runtextpos))
|
---|
1515 |
|
---|
1516 | if buf.Len() < 8 {
|
---|
1517 | buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
|
---|
1518 | }
|
---|
1519 |
|
---|
1520 | if r.runtextpos > 0 {
|
---|
1521 | buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
|
---|
1522 | } else {
|
---|
1523 | buf.WriteRune('^')
|
---|
1524 | }
|
---|
1525 |
|
---|
1526 | buf.WriteRune('>')
|
---|
1527 |
|
---|
1528 | for i := r.runtextpos; i < r.runtextend; i++ {
|
---|
1529 | buf.WriteString(syntax.CharDescription(r.runtext[i]))
|
---|
1530 | }
|
---|
1531 | if buf.Len() >= 64 {
|
---|
1532 | buf.Truncate(61)
|
---|
1533 | buf.WriteString("...")
|
---|
1534 | } else {
|
---|
1535 | buf.WriteRune('$')
|
---|
1536 | }
|
---|
1537 |
|
---|
1538 | return buf.String()
|
---|
1539 | }
|
---|
1540 |
|
---|
1541 | // decide whether the pos
|
---|
1542 | // at the specified index is a boundary or not. It's just not worth
|
---|
1543 | // emitting inline code for this logic.
|
---|
1544 | func (r *runner) isBoundary(index, startpos, endpos int) bool {
|
---|
1545 | return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
|
---|
1546 | (index < endpos && syntax.IsWordChar(r.runtext[index]))
|
---|
1547 | }
|
---|
1548 |
|
---|
1549 | func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
|
---|
1550 | return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
|
---|
1551 | (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
|
---|
1552 | }
|
---|
1553 |
|
---|
1554 | // this seems like a comment to justify randomly picking 1000 :-P
|
---|
1555 | // We have determined this value in a series of experiments where x86 retail
|
---|
1556 | // builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
|
---|
1557 | // of TimeoutCheckFrequency did not tend to increase performance; smaller values
|
---|
1558 | // of TimeoutCheckFrequency tended to slow down the execution.
|
---|
1559 | const timeoutCheckFrequency int = 1000
|
---|
1560 |
|
---|
1561 | func (r *runner) startTimeoutWatch() {
|
---|
1562 | if r.ignoreTimeout {
|
---|
1563 | return
|
---|
1564 | }
|
---|
1565 |
|
---|
1566 | r.timeoutChecksToSkip = timeoutCheckFrequency
|
---|
1567 | r.timeoutAt = time.Now().Add(r.timeout)
|
---|
1568 | }
|
---|
1569 |
|
---|
1570 | func (r *runner) checkTimeout() error {
|
---|
1571 | if r.ignoreTimeout {
|
---|
1572 | return nil
|
---|
1573 | }
|
---|
1574 | r.timeoutChecksToSkip--
|
---|
1575 | if r.timeoutChecksToSkip != 0 {
|
---|
1576 | return nil
|
---|
1577 | }
|
---|
1578 |
|
---|
1579 | r.timeoutChecksToSkip = timeoutCheckFrequency
|
---|
1580 | return r.doCheckTimeout()
|
---|
1581 | }
|
---|
1582 |
|
---|
1583 | func (r *runner) doCheckTimeout() error {
|
---|
1584 | current := time.Now()
|
---|
1585 |
|
---|
1586 | if current.Before(r.timeoutAt) {
|
---|
1587 | return nil
|
---|
1588 | }
|
---|
1589 |
|
---|
1590 | if r.re.Debug() {
|
---|
1591 | //Debug.WriteLine("")
|
---|
1592 | //Debug.WriteLine("RegEx match timeout occurred!")
|
---|
1593 | //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
|
---|
1594 | //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
|
---|
1595 | //Debug.WriteLine("Search pattern: " + _runregex._pattern)
|
---|
1596 | //Debug.WriteLine("Input: " + r.runtext)
|
---|
1597 | //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
|
---|
1598 | }
|
---|
1599 |
|
---|
1600 | return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
|
---|
1601 | }
|
---|
1602 |
|
---|
1603 | func (r *runner) initTrackCount() {
|
---|
1604 | r.runtrackcount = r.code.TrackCount
|
---|
1605 | }
|
---|
1606 |
|
---|
1607 | // getRunner returns a run to use for matching re.
|
---|
1608 | // It uses the re's runner cache if possible, to avoid
|
---|
1609 | // unnecessary allocation.
|
---|
1610 | func (re *Regexp) getRunner() *runner {
|
---|
1611 | re.muRun.Lock()
|
---|
1612 | if n := len(re.runner); n > 0 {
|
---|
1613 | z := re.runner[n-1]
|
---|
1614 | re.runner = re.runner[:n-1]
|
---|
1615 | re.muRun.Unlock()
|
---|
1616 | return z
|
---|
1617 | }
|
---|
1618 | re.muRun.Unlock()
|
---|
1619 | z := &runner{
|
---|
1620 | re: re,
|
---|
1621 | code: re.code,
|
---|
1622 | }
|
---|
1623 | return z
|
---|
1624 | }
|
---|
1625 |
|
---|
1626 | // putRunner returns a runner to the re's cache.
|
---|
1627 | // There is no attempt to limit the size of the cache, so it will
|
---|
1628 | // grow to the maximum number of simultaneous matches
|
---|
1629 | // run using re. (The cache empties when re gets garbage collected.)
|
---|
1630 | func (re *Regexp) putRunner(r *runner) {
|
---|
1631 | re.muRun.Lock()
|
---|
1632 | re.runner = append(re.runner, r)
|
---|
1633 | re.muRun.Unlock()
|
---|
1634 | }
|
---|