1 | package syntax
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "fmt"
|
---|
5 | "math"
|
---|
6 | "os"
|
---|
7 | "sort"
|
---|
8 | "strconv"
|
---|
9 | "unicode"
|
---|
10 | )
|
---|
11 |
|
---|
12 | type RegexOptions int32
|
---|
13 |
|
---|
14 | const (
|
---|
15 | IgnoreCase RegexOptions = 0x0001 // "i"
|
---|
16 | Multiline = 0x0002 // "m"
|
---|
17 | ExplicitCapture = 0x0004 // "n"
|
---|
18 | Compiled = 0x0008 // "c"
|
---|
19 | Singleline = 0x0010 // "s"
|
---|
20 | IgnorePatternWhitespace = 0x0020 // "x"
|
---|
21 | RightToLeft = 0x0040 // "r"
|
---|
22 | Debug = 0x0080 // "d"
|
---|
23 | ECMAScript = 0x0100 // "e"
|
---|
24 | RE2 = 0x0200 // RE2 compat mode
|
---|
25 | )
|
---|
26 |
|
---|
27 | func optionFromCode(ch rune) RegexOptions {
|
---|
28 | // case-insensitive
|
---|
29 | switch ch {
|
---|
30 | case 'i', 'I':
|
---|
31 | return IgnoreCase
|
---|
32 | case 'r', 'R':
|
---|
33 | return RightToLeft
|
---|
34 | case 'm', 'M':
|
---|
35 | return Multiline
|
---|
36 | case 'n', 'N':
|
---|
37 | return ExplicitCapture
|
---|
38 | case 's', 'S':
|
---|
39 | return Singleline
|
---|
40 | case 'x', 'X':
|
---|
41 | return IgnorePatternWhitespace
|
---|
42 | case 'd', 'D':
|
---|
43 | return Debug
|
---|
44 | case 'e', 'E':
|
---|
45 | return ECMAScript
|
---|
46 | default:
|
---|
47 | return 0
|
---|
48 | }
|
---|
49 | }
|
---|
50 |
|
---|
51 | // An Error describes a failure to parse a regular expression
|
---|
52 | // and gives the offending expression.
|
---|
53 | type Error struct {
|
---|
54 | Code ErrorCode
|
---|
55 | Expr string
|
---|
56 | Args []interface{}
|
---|
57 | }
|
---|
58 |
|
---|
59 | func (e *Error) Error() string {
|
---|
60 | if len(e.Args) == 0 {
|
---|
61 | return "error parsing regexp: " + e.Code.String() + " in `" + e.Expr + "`"
|
---|
62 | }
|
---|
63 | return "error parsing regexp: " + fmt.Sprintf(e.Code.String(), e.Args...) + " in `" + e.Expr + "`"
|
---|
64 | }
|
---|
65 |
|
---|
66 | // An ErrorCode describes a failure to parse a regular expression.
|
---|
67 | type ErrorCode string
|
---|
68 |
|
---|
69 | const (
|
---|
70 | // internal issue
|
---|
71 | ErrInternalError ErrorCode = "regexp/syntax: internal error"
|
---|
72 | // Parser errors
|
---|
73 | ErrUnterminatedComment = "unterminated comment"
|
---|
74 | ErrInvalidCharRange = "invalid character class range"
|
---|
75 | ErrInvalidRepeatSize = "invalid repeat count"
|
---|
76 | ErrInvalidUTF8 = "invalid UTF-8"
|
---|
77 | ErrCaptureGroupOutOfRange = "capture group number out of range"
|
---|
78 | ErrUnexpectedParen = "unexpected )"
|
---|
79 | ErrMissingParen = "missing closing )"
|
---|
80 | ErrMissingBrace = "missing closing }"
|
---|
81 | ErrInvalidRepeatOp = "invalid nested repetition operator"
|
---|
82 | ErrMissingRepeatArgument = "missing argument to repetition operator"
|
---|
83 | ErrConditionalExpression = "illegal conditional (?(...)) expression"
|
---|
84 | ErrTooManyAlternates = "too many | in (?()|)"
|
---|
85 | ErrUnrecognizedGrouping = "unrecognized grouping construct: (%v"
|
---|
86 | ErrInvalidGroupName = "invalid group name: group names must begin with a word character and have a matching terminator"
|
---|
87 | ErrCapNumNotZero = "capture number cannot be zero"
|
---|
88 | ErrUndefinedBackRef = "reference to undefined group number %v"
|
---|
89 | ErrUndefinedNameRef = "reference to undefined group name %v"
|
---|
90 | ErrAlternationCantCapture = "alternation conditions do not capture and cannot be named"
|
---|
91 | ErrAlternationCantHaveComment = "alternation conditions cannot be comments"
|
---|
92 | ErrMalformedReference = "(?(%v) ) malformed"
|
---|
93 | ErrUndefinedReference = "(?(%v) ) reference to undefined group"
|
---|
94 | ErrIllegalEndEscape = "illegal \\ at end of pattern"
|
---|
95 | ErrMalformedSlashP = "malformed \\p{X} character escape"
|
---|
96 | ErrIncompleteSlashP = "incomplete \\p{X} character escape"
|
---|
97 | ErrUnknownSlashP = "unknown unicode category, script, or property '%v'"
|
---|
98 | ErrUnrecognizedEscape = "unrecognized escape sequence \\%v"
|
---|
99 | ErrMissingControl = "missing control character"
|
---|
100 | ErrUnrecognizedControl = "unrecognized control character"
|
---|
101 | ErrTooFewHex = "insufficient hexadecimal digits"
|
---|
102 | ErrInvalidHex = "hex values may not be larger than 0x10FFFF"
|
---|
103 | ErrMalformedNameRef = "malformed \\k<...> named back reference"
|
---|
104 | ErrBadClassInCharRange = "cannot include class \\%v in character range"
|
---|
105 | ErrUnterminatedBracket = "unterminated [] set"
|
---|
106 | ErrSubtractionMustBeLast = "a subtraction must be the last element in a character class"
|
---|
107 | ErrReversedCharRange = "[x-y] range in reverse order"
|
---|
108 | )
|
---|
109 |
|
---|
110 | func (e ErrorCode) String() string {
|
---|
111 | return string(e)
|
---|
112 | }
|
---|
113 |
|
---|
114 | type parser struct {
|
---|
115 | stack *regexNode
|
---|
116 | group *regexNode
|
---|
117 | alternation *regexNode
|
---|
118 | concatenation *regexNode
|
---|
119 | unit *regexNode
|
---|
120 |
|
---|
121 | patternRaw string
|
---|
122 | pattern []rune
|
---|
123 |
|
---|
124 | currentPos int
|
---|
125 | specialCase *unicode.SpecialCase
|
---|
126 |
|
---|
127 | autocap int
|
---|
128 | capcount int
|
---|
129 | captop int
|
---|
130 | capsize int
|
---|
131 |
|
---|
132 | caps map[int]int
|
---|
133 | capnames map[string]int
|
---|
134 |
|
---|
135 | capnumlist []int
|
---|
136 | capnamelist []string
|
---|
137 |
|
---|
138 | options RegexOptions
|
---|
139 | optionsStack []RegexOptions
|
---|
140 | ignoreNextParen bool
|
---|
141 | }
|
---|
142 |
|
---|
143 | const (
|
---|
144 | maxValueDiv10 int = math.MaxInt32 / 10
|
---|
145 | maxValueMod10 = math.MaxInt32 % 10
|
---|
146 | )
|
---|
147 |
|
---|
148 | // Parse converts a regex string into a parse tree
|
---|
149 | func Parse(re string, op RegexOptions) (*RegexTree, error) {
|
---|
150 | p := parser{
|
---|
151 | options: op,
|
---|
152 | caps: make(map[int]int),
|
---|
153 | }
|
---|
154 | p.setPattern(re)
|
---|
155 |
|
---|
156 | if err := p.countCaptures(); err != nil {
|
---|
157 | return nil, err
|
---|
158 | }
|
---|
159 |
|
---|
160 | p.reset(op)
|
---|
161 | root, err := p.scanRegex()
|
---|
162 |
|
---|
163 | if err != nil {
|
---|
164 | return nil, err
|
---|
165 | }
|
---|
166 | tree := &RegexTree{
|
---|
167 | root: root,
|
---|
168 | caps: p.caps,
|
---|
169 | capnumlist: p.capnumlist,
|
---|
170 | captop: p.captop,
|
---|
171 | Capnames: p.capnames,
|
---|
172 | Caplist: p.capnamelist,
|
---|
173 | options: op,
|
---|
174 | }
|
---|
175 |
|
---|
176 | if tree.options&Debug > 0 {
|
---|
177 | os.Stdout.WriteString(tree.Dump())
|
---|
178 | }
|
---|
179 |
|
---|
180 | return tree, nil
|
---|
181 | }
|
---|
182 |
|
---|
183 | func (p *parser) setPattern(pattern string) {
|
---|
184 | p.patternRaw = pattern
|
---|
185 | p.pattern = make([]rune, 0, len(pattern))
|
---|
186 |
|
---|
187 | //populate our rune array to handle utf8 encoding
|
---|
188 | for _, r := range pattern {
|
---|
189 | p.pattern = append(p.pattern, r)
|
---|
190 | }
|
---|
191 | }
|
---|
192 | func (p *parser) getErr(code ErrorCode, args ...interface{}) error {
|
---|
193 | return &Error{Code: code, Expr: p.patternRaw, Args: args}
|
---|
194 | }
|
---|
195 |
|
---|
196 | func (p *parser) noteCaptureSlot(i, pos int) {
|
---|
197 | if _, ok := p.caps[i]; !ok {
|
---|
198 | // the rhs of the hashtable isn't used in the parser
|
---|
199 | p.caps[i] = pos
|
---|
200 | p.capcount++
|
---|
201 |
|
---|
202 | if p.captop <= i {
|
---|
203 | if i == math.MaxInt32 {
|
---|
204 | p.captop = i
|
---|
205 | } else {
|
---|
206 | p.captop = i + 1
|
---|
207 | }
|
---|
208 | }
|
---|
209 | }
|
---|
210 | }
|
---|
211 |
|
---|
212 | func (p *parser) noteCaptureName(name string, pos int) {
|
---|
213 | if p.capnames == nil {
|
---|
214 | p.capnames = make(map[string]int)
|
---|
215 | }
|
---|
216 |
|
---|
217 | if _, ok := p.capnames[name]; !ok {
|
---|
218 | p.capnames[name] = pos
|
---|
219 | p.capnamelist = append(p.capnamelist, name)
|
---|
220 | }
|
---|
221 | }
|
---|
222 |
|
---|
223 | func (p *parser) assignNameSlots() {
|
---|
224 | if p.capnames != nil {
|
---|
225 | for _, name := range p.capnamelist {
|
---|
226 | for p.isCaptureSlot(p.autocap) {
|
---|
227 | p.autocap++
|
---|
228 | }
|
---|
229 | pos := p.capnames[name]
|
---|
230 | p.capnames[name] = p.autocap
|
---|
231 | p.noteCaptureSlot(p.autocap, pos)
|
---|
232 |
|
---|
233 | p.autocap++
|
---|
234 | }
|
---|
235 | }
|
---|
236 |
|
---|
237 | // if the caps array has at least one gap, construct the list of used slots
|
---|
238 | if p.capcount < p.captop {
|
---|
239 | p.capnumlist = make([]int, p.capcount)
|
---|
240 | i := 0
|
---|
241 |
|
---|
242 | for k := range p.caps {
|
---|
243 | p.capnumlist[i] = k
|
---|
244 | i++
|
---|
245 | }
|
---|
246 |
|
---|
247 | sort.Ints(p.capnumlist)
|
---|
248 | }
|
---|
249 |
|
---|
250 | // merge capsnumlist into capnamelist
|
---|
251 | if p.capnames != nil || p.capnumlist != nil {
|
---|
252 | var oldcapnamelist []string
|
---|
253 | var next int
|
---|
254 | var k int
|
---|
255 |
|
---|
256 | if p.capnames == nil {
|
---|
257 | oldcapnamelist = nil
|
---|
258 | p.capnames = make(map[string]int)
|
---|
259 | p.capnamelist = []string{}
|
---|
260 | next = -1
|
---|
261 | } else {
|
---|
262 | oldcapnamelist = p.capnamelist
|
---|
263 | p.capnamelist = []string{}
|
---|
264 | next = p.capnames[oldcapnamelist[0]]
|
---|
265 | }
|
---|
266 |
|
---|
267 | for i := 0; i < p.capcount; i++ {
|
---|
268 | j := i
|
---|
269 | if p.capnumlist != nil {
|
---|
270 | j = p.capnumlist[i]
|
---|
271 | }
|
---|
272 |
|
---|
273 | if next == j {
|
---|
274 | p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
|
---|
275 | k++
|
---|
276 |
|
---|
277 | if k == len(oldcapnamelist) {
|
---|
278 | next = -1
|
---|
279 | } else {
|
---|
280 | next = p.capnames[oldcapnamelist[k]]
|
---|
281 | }
|
---|
282 |
|
---|
283 | } else {
|
---|
284 | //feature: culture?
|
---|
285 | str := strconv.Itoa(j)
|
---|
286 | p.capnamelist = append(p.capnamelist, str)
|
---|
287 | p.capnames[str] = j
|
---|
288 | }
|
---|
289 | }
|
---|
290 | }
|
---|
291 | }
|
---|
292 |
|
---|
293 | func (p *parser) consumeAutocap() int {
|
---|
294 | r := p.autocap
|
---|
295 | p.autocap++
|
---|
296 | return r
|
---|
297 | }
|
---|
298 |
|
---|
299 | // CountCaptures is a prescanner for deducing the slots used for
|
---|
300 | // captures by doing a partial tokenization of the pattern.
|
---|
301 | func (p *parser) countCaptures() error {
|
---|
302 | var ch rune
|
---|
303 |
|
---|
304 | p.noteCaptureSlot(0, 0)
|
---|
305 |
|
---|
306 | p.autocap = 1
|
---|
307 |
|
---|
308 | for p.charsRight() > 0 {
|
---|
309 | pos := p.textpos()
|
---|
310 | ch = p.moveRightGetChar()
|
---|
311 | switch ch {
|
---|
312 | case '\\':
|
---|
313 | if p.charsRight() > 0 {
|
---|
314 | p.scanBackslash(true)
|
---|
315 | }
|
---|
316 |
|
---|
317 | case '#':
|
---|
318 | if p.useOptionX() {
|
---|
319 | p.moveLeft()
|
---|
320 | p.scanBlank()
|
---|
321 | }
|
---|
322 |
|
---|
323 | case '[':
|
---|
324 | p.scanCharSet(false, true)
|
---|
325 |
|
---|
326 | case ')':
|
---|
327 | if !p.emptyOptionsStack() {
|
---|
328 | p.popOptions()
|
---|
329 | }
|
---|
330 |
|
---|
331 | case '(':
|
---|
332 | if p.charsRight() >= 2 && p.rightChar(1) == '#' && p.rightChar(0) == '?' {
|
---|
333 | p.moveLeft()
|
---|
334 | p.scanBlank()
|
---|
335 | } else {
|
---|
336 | p.pushOptions()
|
---|
337 | if p.charsRight() > 0 && p.rightChar(0) == '?' {
|
---|
338 | // we have (?...
|
---|
339 | p.moveRight(1)
|
---|
340 |
|
---|
341 | if p.charsRight() > 1 && (p.rightChar(0) == '<' || p.rightChar(0) == '\'') {
|
---|
342 | // named group: (?<... or (?'...
|
---|
343 |
|
---|
344 | p.moveRight(1)
|
---|
345 | ch = p.rightChar(0)
|
---|
346 |
|
---|
347 | if ch != '0' && IsWordChar(ch) {
|
---|
348 | if ch >= '1' && ch <= '9' {
|
---|
349 | dec, err := p.scanDecimal()
|
---|
350 | if err != nil {
|
---|
351 | return err
|
---|
352 | }
|
---|
353 | p.noteCaptureSlot(dec, pos)
|
---|
354 | } else {
|
---|
355 | p.noteCaptureName(p.scanCapname(), pos)
|
---|
356 | }
|
---|
357 | }
|
---|
358 | } else if p.useRE2() && p.charsRight() > 2 && (p.rightChar(0) == 'P' && p.rightChar(1) == '<') {
|
---|
359 | // RE2-compat (?P<)
|
---|
360 | p.moveRight(2)
|
---|
361 | ch = p.rightChar(0)
|
---|
362 | if IsWordChar(ch) {
|
---|
363 | p.noteCaptureName(p.scanCapname(), pos)
|
---|
364 | }
|
---|
365 |
|
---|
366 | } else {
|
---|
367 | // (?...
|
---|
368 |
|
---|
369 | // get the options if it's an option construct (?cimsx-cimsx...)
|
---|
370 | p.scanOptions()
|
---|
371 |
|
---|
372 | if p.charsRight() > 0 {
|
---|
373 | if p.rightChar(0) == ')' {
|
---|
374 | // (?cimsx-cimsx)
|
---|
375 | p.moveRight(1)
|
---|
376 | p.popKeepOptions()
|
---|
377 | } else if p.rightChar(0) == '(' {
|
---|
378 | // alternation construct: (?(foo)yes|no)
|
---|
379 | // ignore the next paren so we don't capture the condition
|
---|
380 | p.ignoreNextParen = true
|
---|
381 |
|
---|
382 | // break from here so we don't reset ignoreNextParen
|
---|
383 | continue
|
---|
384 | }
|
---|
385 | }
|
---|
386 | }
|
---|
387 | } else {
|
---|
388 | if !p.useOptionN() && !p.ignoreNextParen {
|
---|
389 | p.noteCaptureSlot(p.consumeAutocap(), pos)
|
---|
390 | }
|
---|
391 | }
|
---|
392 | }
|
---|
393 |
|
---|
394 | p.ignoreNextParen = false
|
---|
395 |
|
---|
396 | }
|
---|
397 | }
|
---|
398 |
|
---|
399 | p.assignNameSlots()
|
---|
400 | return nil
|
---|
401 | }
|
---|
402 |
|
---|
403 | func (p *parser) reset(topopts RegexOptions) {
|
---|
404 | p.currentPos = 0
|
---|
405 | p.autocap = 1
|
---|
406 | p.ignoreNextParen = false
|
---|
407 |
|
---|
408 | if len(p.optionsStack) > 0 {
|
---|
409 | p.optionsStack = p.optionsStack[:0]
|
---|
410 | }
|
---|
411 |
|
---|
412 | p.options = topopts
|
---|
413 | p.stack = nil
|
---|
414 | }
|
---|
415 |
|
---|
416 | func (p *parser) scanRegex() (*regexNode, error) {
|
---|
417 | ch := '@' // nonspecial ch, means at beginning
|
---|
418 | isQuant := false
|
---|
419 |
|
---|
420 | p.startGroup(newRegexNodeMN(ntCapture, p.options, 0, -1))
|
---|
421 |
|
---|
422 | for p.charsRight() > 0 {
|
---|
423 | wasPrevQuantifier := isQuant
|
---|
424 | isQuant = false
|
---|
425 |
|
---|
426 | if err := p.scanBlank(); err != nil {
|
---|
427 | return nil, err
|
---|
428 | }
|
---|
429 |
|
---|
430 | startpos := p.textpos()
|
---|
431 |
|
---|
432 | // move past all of the normal characters. We'll stop when we hit some kind of control character,
|
---|
433 | // or if IgnorePatternWhiteSpace is on, we'll stop when we see some whitespace.
|
---|
434 | if p.useOptionX() {
|
---|
435 | for p.charsRight() > 0 {
|
---|
436 | ch = p.rightChar(0)
|
---|
437 | //UGLY: clean up, this is ugly
|
---|
438 | if !(!isStopperX(ch) || (ch == '{' && !p.isTrueQuantifier())) {
|
---|
439 | break
|
---|
440 | }
|
---|
441 | p.moveRight(1)
|
---|
442 | }
|
---|
443 | } else {
|
---|
444 | for p.charsRight() > 0 {
|
---|
445 | ch = p.rightChar(0)
|
---|
446 | if !(!isSpecial(ch) || ch == '{' && !p.isTrueQuantifier()) {
|
---|
447 | break
|
---|
448 | }
|
---|
449 | p.moveRight(1)
|
---|
450 | }
|
---|
451 | }
|
---|
452 |
|
---|
453 | endpos := p.textpos()
|
---|
454 |
|
---|
455 | p.scanBlank()
|
---|
456 |
|
---|
457 | if p.charsRight() == 0 {
|
---|
458 | ch = '!' // nonspecial, means at end
|
---|
459 | } else if ch = p.rightChar(0); isSpecial(ch) {
|
---|
460 | isQuant = isQuantifier(ch)
|
---|
461 | p.moveRight(1)
|
---|
462 | } else {
|
---|
463 | ch = ' ' // nonspecial, means at ordinary char
|
---|
464 | }
|
---|
465 |
|
---|
466 | if startpos < endpos {
|
---|
467 | cchUnquantified := endpos - startpos
|
---|
468 | if isQuant {
|
---|
469 | cchUnquantified--
|
---|
470 | }
|
---|
471 | wasPrevQuantifier = false
|
---|
472 |
|
---|
473 | if cchUnquantified > 0 {
|
---|
474 | p.addToConcatenate(startpos, cchUnquantified, false)
|
---|
475 | }
|
---|
476 |
|
---|
477 | if isQuant {
|
---|
478 | p.addUnitOne(p.charAt(endpos - 1))
|
---|
479 | }
|
---|
480 | }
|
---|
481 |
|
---|
482 | switch ch {
|
---|
483 | case '!':
|
---|
484 | goto BreakOuterScan
|
---|
485 |
|
---|
486 | case ' ':
|
---|
487 | goto ContinueOuterScan
|
---|
488 |
|
---|
489 | case '[':
|
---|
490 | cc, err := p.scanCharSet(p.useOptionI(), false)
|
---|
491 | if err != nil {
|
---|
492 | return nil, err
|
---|
493 | }
|
---|
494 | p.addUnitSet(cc)
|
---|
495 |
|
---|
496 | case '(':
|
---|
497 | p.pushOptions()
|
---|
498 |
|
---|
499 | if grouper, err := p.scanGroupOpen(); err != nil {
|
---|
500 | return nil, err
|
---|
501 | } else if grouper == nil {
|
---|
502 | p.popKeepOptions()
|
---|
503 | } else {
|
---|
504 | p.pushGroup()
|
---|
505 | p.startGroup(grouper)
|
---|
506 | }
|
---|
507 |
|
---|
508 | continue
|
---|
509 |
|
---|
510 | case '|':
|
---|
511 | p.addAlternate()
|
---|
512 | goto ContinueOuterScan
|
---|
513 |
|
---|
514 | case ')':
|
---|
515 | if p.emptyStack() {
|
---|
516 | return nil, p.getErr(ErrUnexpectedParen)
|
---|
517 | }
|
---|
518 |
|
---|
519 | if err := p.addGroup(); err != nil {
|
---|
520 | return nil, err
|
---|
521 | }
|
---|
522 | if err := p.popGroup(); err != nil {
|
---|
523 | return nil, err
|
---|
524 | }
|
---|
525 | p.popOptions()
|
---|
526 |
|
---|
527 | if p.unit == nil {
|
---|
528 | goto ContinueOuterScan
|
---|
529 | }
|
---|
530 |
|
---|
531 | case '\\':
|
---|
532 | n, err := p.scanBackslash(false)
|
---|
533 | if err != nil {
|
---|
534 | return nil, err
|
---|
535 | }
|
---|
536 | p.addUnitNode(n)
|
---|
537 |
|
---|
538 | case '^':
|
---|
539 | if p.useOptionM() {
|
---|
540 | p.addUnitType(ntBol)
|
---|
541 | } else {
|
---|
542 | p.addUnitType(ntBeginning)
|
---|
543 | }
|
---|
544 |
|
---|
545 | case '$':
|
---|
546 | if p.useOptionM() {
|
---|
547 | p.addUnitType(ntEol)
|
---|
548 | } else {
|
---|
549 | p.addUnitType(ntEndZ)
|
---|
550 | }
|
---|
551 |
|
---|
552 | case '.':
|
---|
553 | if p.useOptionE() {
|
---|
554 | p.addUnitSet(ECMAAnyClass())
|
---|
555 | } else if p.useOptionS() {
|
---|
556 | p.addUnitSet(AnyClass())
|
---|
557 | } else {
|
---|
558 | p.addUnitNotone('\n')
|
---|
559 | }
|
---|
560 |
|
---|
561 | case '{', '*', '+', '?':
|
---|
562 | if p.unit == nil {
|
---|
563 | if wasPrevQuantifier {
|
---|
564 | return nil, p.getErr(ErrInvalidRepeatOp)
|
---|
565 | } else {
|
---|
566 | return nil, p.getErr(ErrMissingRepeatArgument)
|
---|
567 | }
|
---|
568 | }
|
---|
569 | p.moveLeft()
|
---|
570 |
|
---|
571 | default:
|
---|
572 | return nil, p.getErr(ErrInternalError)
|
---|
573 | }
|
---|
574 |
|
---|
575 | if err := p.scanBlank(); err != nil {
|
---|
576 | return nil, err
|
---|
577 | }
|
---|
578 |
|
---|
579 | if p.charsRight() > 0 {
|
---|
580 | isQuant = p.isTrueQuantifier()
|
---|
581 | }
|
---|
582 | if p.charsRight() == 0 || !isQuant {
|
---|
583 | //maintain odd C# assignment order -- not sure if required, could clean up?
|
---|
584 | p.addConcatenate()
|
---|
585 | goto ContinueOuterScan
|
---|
586 | }
|
---|
587 |
|
---|
588 | ch = p.moveRightGetChar()
|
---|
589 |
|
---|
590 | // Handle quantifiers
|
---|
591 | for p.unit != nil {
|
---|
592 | var min, max int
|
---|
593 | var lazy bool
|
---|
594 |
|
---|
595 | switch ch {
|
---|
596 | case '*':
|
---|
597 | min = 0
|
---|
598 | max = math.MaxInt32
|
---|
599 |
|
---|
600 | case '?':
|
---|
601 | min = 0
|
---|
602 | max = 1
|
---|
603 |
|
---|
604 | case '+':
|
---|
605 | min = 1
|
---|
606 | max = math.MaxInt32
|
---|
607 |
|
---|
608 | case '{':
|
---|
609 | {
|
---|
610 | var err error
|
---|
611 | startpos = p.textpos()
|
---|
612 | if min, err = p.scanDecimal(); err != nil {
|
---|
613 | return nil, err
|
---|
614 | }
|
---|
615 | max = min
|
---|
616 | if startpos < p.textpos() {
|
---|
617 | if p.charsRight() > 0 && p.rightChar(0) == ',' {
|
---|
618 | p.moveRight(1)
|
---|
619 | if p.charsRight() == 0 || p.rightChar(0) == '}' {
|
---|
620 | max = math.MaxInt32
|
---|
621 | } else {
|
---|
622 | if max, err = p.scanDecimal(); err != nil {
|
---|
623 | return nil, err
|
---|
624 | }
|
---|
625 | }
|
---|
626 | }
|
---|
627 | }
|
---|
628 |
|
---|
629 | if startpos == p.textpos() || p.charsRight() == 0 || p.moveRightGetChar() != '}' {
|
---|
630 | p.addConcatenate()
|
---|
631 | p.textto(startpos - 1)
|
---|
632 | goto ContinueOuterScan
|
---|
633 | }
|
---|
634 | }
|
---|
635 |
|
---|
636 | default:
|
---|
637 | return nil, p.getErr(ErrInternalError)
|
---|
638 | }
|
---|
639 |
|
---|
640 | if err := p.scanBlank(); err != nil {
|
---|
641 | return nil, err
|
---|
642 | }
|
---|
643 |
|
---|
644 | if p.charsRight() == 0 || p.rightChar(0) != '?' {
|
---|
645 | lazy = false
|
---|
646 | } else {
|
---|
647 | p.moveRight(1)
|
---|
648 | lazy = true
|
---|
649 | }
|
---|
650 |
|
---|
651 | if min > max {
|
---|
652 | return nil, p.getErr(ErrInvalidRepeatSize)
|
---|
653 | }
|
---|
654 |
|
---|
655 | p.addConcatenate3(lazy, min, max)
|
---|
656 | }
|
---|
657 |
|
---|
658 | ContinueOuterScan:
|
---|
659 | }
|
---|
660 |
|
---|
661 | BreakOuterScan:
|
---|
662 | ;
|
---|
663 |
|
---|
664 | if !p.emptyStack() {
|
---|
665 | return nil, p.getErr(ErrMissingParen)
|
---|
666 | }
|
---|
667 |
|
---|
668 | if err := p.addGroup(); err != nil {
|
---|
669 | return nil, err
|
---|
670 | }
|
---|
671 |
|
---|
672 | return p.unit, nil
|
---|
673 |
|
---|
674 | }
|
---|
675 |
|
---|
676 | /*
|
---|
677 | * Simple parsing for replacement patterns
|
---|
678 | */
|
---|
679 | func (p *parser) scanReplacement() (*regexNode, error) {
|
---|
680 | var c, startpos int
|
---|
681 |
|
---|
682 | p.concatenation = newRegexNode(ntConcatenate, p.options)
|
---|
683 |
|
---|
684 | for {
|
---|
685 | c = p.charsRight()
|
---|
686 | if c == 0 {
|
---|
687 | break
|
---|
688 | }
|
---|
689 |
|
---|
690 | startpos = p.textpos()
|
---|
691 |
|
---|
692 | for c > 0 && p.rightChar(0) != '$' {
|
---|
693 | p.moveRight(1)
|
---|
694 | c--
|
---|
695 | }
|
---|
696 |
|
---|
697 | p.addToConcatenate(startpos, p.textpos()-startpos, true)
|
---|
698 |
|
---|
699 | if c > 0 {
|
---|
700 | if p.moveRightGetChar() == '$' {
|
---|
701 | n, err := p.scanDollar()
|
---|
702 | if err != nil {
|
---|
703 | return nil, err
|
---|
704 | }
|
---|
705 | p.addUnitNode(n)
|
---|
706 | }
|
---|
707 | p.addConcatenate()
|
---|
708 | }
|
---|
709 | }
|
---|
710 |
|
---|
711 | return p.concatenation, nil
|
---|
712 | }
|
---|
713 |
|
---|
714 | /*
|
---|
715 | * Scans $ patterns recognized within replacement patterns
|
---|
716 | */
|
---|
717 | func (p *parser) scanDollar() (*regexNode, error) {
|
---|
718 | if p.charsRight() == 0 {
|
---|
719 | return newRegexNodeCh(ntOne, p.options, '$'), nil
|
---|
720 | }
|
---|
721 |
|
---|
722 | ch := p.rightChar(0)
|
---|
723 | angled := false
|
---|
724 | backpos := p.textpos()
|
---|
725 | lastEndPos := backpos
|
---|
726 |
|
---|
727 | // Note angle
|
---|
728 |
|
---|
729 | if ch == '{' && p.charsRight() > 1 {
|
---|
730 | angled = true
|
---|
731 | p.moveRight(1)
|
---|
732 | ch = p.rightChar(0)
|
---|
733 | }
|
---|
734 |
|
---|
735 | // Try to parse backreference: \1 or \{1} or \{cap}
|
---|
736 |
|
---|
737 | if ch >= '0' && ch <= '9' {
|
---|
738 | if !angled && p.useOptionE() {
|
---|
739 | capnum := -1
|
---|
740 | newcapnum := int(ch - '0')
|
---|
741 | p.moveRight(1)
|
---|
742 | if p.isCaptureSlot(newcapnum) {
|
---|
743 | capnum = newcapnum
|
---|
744 | lastEndPos = p.textpos()
|
---|
745 | }
|
---|
746 |
|
---|
747 | for p.charsRight() > 0 {
|
---|
748 | ch = p.rightChar(0)
|
---|
749 | if ch < '0' || ch > '9' {
|
---|
750 | break
|
---|
751 | }
|
---|
752 | digit := int(ch - '0')
|
---|
753 | if newcapnum > maxValueDiv10 || (newcapnum == maxValueDiv10 && digit > maxValueMod10) {
|
---|
754 | return nil, p.getErr(ErrCaptureGroupOutOfRange)
|
---|
755 | }
|
---|
756 |
|
---|
757 | newcapnum = newcapnum*10 + digit
|
---|
758 |
|
---|
759 | p.moveRight(1)
|
---|
760 | if p.isCaptureSlot(newcapnum) {
|
---|
761 | capnum = newcapnum
|
---|
762 | lastEndPos = p.textpos()
|
---|
763 | }
|
---|
764 | }
|
---|
765 | p.textto(lastEndPos)
|
---|
766 | if capnum >= 0 {
|
---|
767 | return newRegexNodeM(ntRef, p.options, capnum), nil
|
---|
768 | }
|
---|
769 | } else {
|
---|
770 | capnum, err := p.scanDecimal()
|
---|
771 | if err != nil {
|
---|
772 | return nil, err
|
---|
773 | }
|
---|
774 | if !angled || p.charsRight() > 0 && p.moveRightGetChar() == '}' {
|
---|
775 | if p.isCaptureSlot(capnum) {
|
---|
776 | return newRegexNodeM(ntRef, p.options, capnum), nil
|
---|
777 | }
|
---|
778 | }
|
---|
779 | }
|
---|
780 | } else if angled && IsWordChar(ch) {
|
---|
781 | capname := p.scanCapname()
|
---|
782 |
|
---|
783 | if p.charsRight() > 0 && p.moveRightGetChar() == '}' {
|
---|
784 | if p.isCaptureName(capname) {
|
---|
785 | return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
|
---|
786 | }
|
---|
787 | }
|
---|
788 | } else if !angled {
|
---|
789 | capnum := 1
|
---|
790 |
|
---|
791 | switch ch {
|
---|
792 | case '$':
|
---|
793 | p.moveRight(1)
|
---|
794 | return newRegexNodeCh(ntOne, p.options, '$'), nil
|
---|
795 | case '&':
|
---|
796 | capnum = 0
|
---|
797 | case '`':
|
---|
798 | capnum = replaceLeftPortion
|
---|
799 | case '\'':
|
---|
800 | capnum = replaceRightPortion
|
---|
801 | case '+':
|
---|
802 | capnum = replaceLastGroup
|
---|
803 | case '_':
|
---|
804 | capnum = replaceWholeString
|
---|
805 | }
|
---|
806 |
|
---|
807 | if capnum != 1 {
|
---|
808 | p.moveRight(1)
|
---|
809 | return newRegexNodeM(ntRef, p.options, capnum), nil
|
---|
810 | }
|
---|
811 | }
|
---|
812 |
|
---|
813 | // unrecognized $: literalize
|
---|
814 |
|
---|
815 | p.textto(backpos)
|
---|
816 | return newRegexNodeCh(ntOne, p.options, '$'), nil
|
---|
817 | }
|
---|
818 |
|
---|
819 | // scanGroupOpen scans chars following a '(' (not counting the '('), and returns
|
---|
820 | // a RegexNode for the type of group scanned, or nil if the group
|
---|
821 | // simply changed options (?cimsx-cimsx) or was a comment (#...).
|
---|
822 | func (p *parser) scanGroupOpen() (*regexNode, error) {
|
---|
823 | var ch rune
|
---|
824 | var nt nodeType
|
---|
825 | var err error
|
---|
826 | close := '>'
|
---|
827 | start := p.textpos()
|
---|
828 |
|
---|
829 | // just return a RegexNode if we have:
|
---|
830 | // 1. "(" followed by nothing
|
---|
831 | // 2. "(x" where x != ?
|
---|
832 | // 3. "(?)"
|
---|
833 | if p.charsRight() == 0 || p.rightChar(0) != '?' || (p.rightChar(0) == '?' && (p.charsRight() > 1 && p.rightChar(1) == ')')) {
|
---|
834 | if p.useOptionN() || p.ignoreNextParen {
|
---|
835 | p.ignoreNextParen = false
|
---|
836 | return newRegexNode(ntGroup, p.options), nil
|
---|
837 | }
|
---|
838 | return newRegexNodeMN(ntCapture, p.options, p.consumeAutocap(), -1), nil
|
---|
839 | }
|
---|
840 |
|
---|
841 | p.moveRight(1)
|
---|
842 |
|
---|
843 | for {
|
---|
844 | if p.charsRight() == 0 {
|
---|
845 | break
|
---|
846 | }
|
---|
847 |
|
---|
848 | switch ch = p.moveRightGetChar(); ch {
|
---|
849 | case ':':
|
---|
850 | nt = ntGroup
|
---|
851 |
|
---|
852 | case '=':
|
---|
853 | p.options &= ^RightToLeft
|
---|
854 | nt = ntRequire
|
---|
855 |
|
---|
856 | case '!':
|
---|
857 | p.options &= ^RightToLeft
|
---|
858 | nt = ntPrevent
|
---|
859 |
|
---|
860 | case '>':
|
---|
861 | nt = ntGreedy
|
---|
862 |
|
---|
863 | case '\'':
|
---|
864 | close = '\''
|
---|
865 | fallthrough
|
---|
866 |
|
---|
867 | case '<':
|
---|
868 | if p.charsRight() == 0 {
|
---|
869 | goto BreakRecognize
|
---|
870 | }
|
---|
871 |
|
---|
872 | switch ch = p.moveRightGetChar(); ch {
|
---|
873 | case '=':
|
---|
874 | if close == '\'' {
|
---|
875 | goto BreakRecognize
|
---|
876 | }
|
---|
877 |
|
---|
878 | p.options |= RightToLeft
|
---|
879 | nt = ntRequire
|
---|
880 |
|
---|
881 | case '!':
|
---|
882 | if close == '\'' {
|
---|
883 | goto BreakRecognize
|
---|
884 | }
|
---|
885 |
|
---|
886 | p.options |= RightToLeft
|
---|
887 | nt = ntPrevent
|
---|
888 |
|
---|
889 | default:
|
---|
890 | p.moveLeft()
|
---|
891 | capnum := -1
|
---|
892 | uncapnum := -1
|
---|
893 | proceed := false
|
---|
894 |
|
---|
895 | // grab part before -
|
---|
896 |
|
---|
897 | if ch >= '0' && ch <= '9' {
|
---|
898 | if capnum, err = p.scanDecimal(); err != nil {
|
---|
899 | return nil, err
|
---|
900 | }
|
---|
901 |
|
---|
902 | if !p.isCaptureSlot(capnum) {
|
---|
903 | capnum = -1
|
---|
904 | }
|
---|
905 |
|
---|
906 | // check if we have bogus characters after the number
|
---|
907 | if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
|
---|
908 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
909 | }
|
---|
910 | if capnum == 0 {
|
---|
911 | return nil, p.getErr(ErrCapNumNotZero)
|
---|
912 | }
|
---|
913 | } else if IsWordChar(ch) {
|
---|
914 | capname := p.scanCapname()
|
---|
915 |
|
---|
916 | if p.isCaptureName(capname) {
|
---|
917 | capnum = p.captureSlotFromName(capname)
|
---|
918 | }
|
---|
919 |
|
---|
920 | // check if we have bogus character after the name
|
---|
921 | if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
|
---|
922 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
923 | }
|
---|
924 | } else if ch == '-' {
|
---|
925 | proceed = true
|
---|
926 | } else {
|
---|
927 | // bad group name - starts with something other than a word character and isn't a number
|
---|
928 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
929 | }
|
---|
930 |
|
---|
931 | // grab part after - if any
|
---|
932 |
|
---|
933 | if (capnum != -1 || proceed == true) && p.charsRight() > 0 && p.rightChar(0) == '-' {
|
---|
934 | p.moveRight(1)
|
---|
935 |
|
---|
936 | //no more chars left, no closing char, etc
|
---|
937 | if p.charsRight() == 0 {
|
---|
938 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
939 | }
|
---|
940 |
|
---|
941 | ch = p.rightChar(0)
|
---|
942 | if ch >= '0' && ch <= '9' {
|
---|
943 | if uncapnum, err = p.scanDecimal(); err != nil {
|
---|
944 | return nil, err
|
---|
945 | }
|
---|
946 |
|
---|
947 | if !p.isCaptureSlot(uncapnum) {
|
---|
948 | return nil, p.getErr(ErrUndefinedBackRef, uncapnum)
|
---|
949 | }
|
---|
950 |
|
---|
951 | // check if we have bogus characters after the number
|
---|
952 | if p.charsRight() > 0 && p.rightChar(0) != close {
|
---|
953 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
954 | }
|
---|
955 | } else if IsWordChar(ch) {
|
---|
956 | uncapname := p.scanCapname()
|
---|
957 |
|
---|
958 | if !p.isCaptureName(uncapname) {
|
---|
959 | return nil, p.getErr(ErrUndefinedNameRef, uncapname)
|
---|
960 | }
|
---|
961 | uncapnum = p.captureSlotFromName(uncapname)
|
---|
962 |
|
---|
963 | // check if we have bogus character after the name
|
---|
964 | if p.charsRight() > 0 && p.rightChar(0) != close {
|
---|
965 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
966 | }
|
---|
967 | } else {
|
---|
968 | // bad group name - starts with something other than a word character and isn't a number
|
---|
969 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
970 | }
|
---|
971 | }
|
---|
972 |
|
---|
973 | // actually make the node
|
---|
974 |
|
---|
975 | if (capnum != -1 || uncapnum != -1) && p.charsRight() > 0 && p.moveRightGetChar() == close {
|
---|
976 | return newRegexNodeMN(ntCapture, p.options, capnum, uncapnum), nil
|
---|
977 | }
|
---|
978 | goto BreakRecognize
|
---|
979 | }
|
---|
980 |
|
---|
981 | case '(':
|
---|
982 | // alternation construct (?(...) | )
|
---|
983 |
|
---|
984 | parenPos := p.textpos()
|
---|
985 | if p.charsRight() > 0 {
|
---|
986 | ch = p.rightChar(0)
|
---|
987 |
|
---|
988 | // check if the alternation condition is a backref
|
---|
989 | if ch >= '0' && ch <= '9' {
|
---|
990 | var capnum int
|
---|
991 | if capnum, err = p.scanDecimal(); err != nil {
|
---|
992 | return nil, err
|
---|
993 | }
|
---|
994 | if p.charsRight() > 0 && p.moveRightGetChar() == ')' {
|
---|
995 | if p.isCaptureSlot(capnum) {
|
---|
996 | return newRegexNodeM(ntTestref, p.options, capnum), nil
|
---|
997 | }
|
---|
998 | return nil, p.getErr(ErrUndefinedReference, capnum)
|
---|
999 | }
|
---|
1000 |
|
---|
1001 | return nil, p.getErr(ErrMalformedReference, capnum)
|
---|
1002 |
|
---|
1003 | } else if IsWordChar(ch) {
|
---|
1004 | capname := p.scanCapname()
|
---|
1005 |
|
---|
1006 | if p.isCaptureName(capname) && p.charsRight() > 0 && p.moveRightGetChar() == ')' {
|
---|
1007 | return newRegexNodeM(ntTestref, p.options, p.captureSlotFromName(capname)), nil
|
---|
1008 | }
|
---|
1009 | }
|
---|
1010 | }
|
---|
1011 | // not a backref
|
---|
1012 | nt = ntTestgroup
|
---|
1013 | p.textto(parenPos - 1) // jump to the start of the parentheses
|
---|
1014 | p.ignoreNextParen = true // but make sure we don't try to capture the insides
|
---|
1015 |
|
---|
1016 | charsRight := p.charsRight()
|
---|
1017 | if charsRight >= 3 && p.rightChar(1) == '?' {
|
---|
1018 | rightchar2 := p.rightChar(2)
|
---|
1019 | // disallow comments in the condition
|
---|
1020 | if rightchar2 == '#' {
|
---|
1021 | return nil, p.getErr(ErrAlternationCantHaveComment)
|
---|
1022 | }
|
---|
1023 |
|
---|
1024 | // disallow named capture group (?<..>..) in the condition
|
---|
1025 | if rightchar2 == '\'' {
|
---|
1026 | return nil, p.getErr(ErrAlternationCantCapture)
|
---|
1027 | }
|
---|
1028 |
|
---|
1029 | if charsRight >= 4 && (rightchar2 == '<' && p.rightChar(3) != '!' && p.rightChar(3) != '=') {
|
---|
1030 | return nil, p.getErr(ErrAlternationCantCapture)
|
---|
1031 | }
|
---|
1032 | }
|
---|
1033 |
|
---|
1034 | case 'P':
|
---|
1035 | if p.useRE2() {
|
---|
1036 | // support for P<name> syntax
|
---|
1037 | if p.charsRight() < 3 {
|
---|
1038 | goto BreakRecognize
|
---|
1039 | }
|
---|
1040 |
|
---|
1041 | ch = p.moveRightGetChar()
|
---|
1042 | if ch != '<' {
|
---|
1043 | goto BreakRecognize
|
---|
1044 | }
|
---|
1045 |
|
---|
1046 | ch = p.moveRightGetChar()
|
---|
1047 | p.moveLeft()
|
---|
1048 |
|
---|
1049 | if IsWordChar(ch) {
|
---|
1050 | capnum := -1
|
---|
1051 | capname := p.scanCapname()
|
---|
1052 |
|
---|
1053 | if p.isCaptureName(capname) {
|
---|
1054 | capnum = p.captureSlotFromName(capname)
|
---|
1055 | }
|
---|
1056 |
|
---|
1057 | // check if we have bogus character after the name
|
---|
1058 | if p.charsRight() > 0 && p.rightChar(0) != '>' {
|
---|
1059 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
1060 | }
|
---|
1061 |
|
---|
1062 | // actually make the node
|
---|
1063 |
|
---|
1064 | if capnum != -1 && p.charsRight() > 0 && p.moveRightGetChar() == '>' {
|
---|
1065 | return newRegexNodeMN(ntCapture, p.options, capnum, -1), nil
|
---|
1066 | }
|
---|
1067 | goto BreakRecognize
|
---|
1068 |
|
---|
1069 | } else {
|
---|
1070 | // bad group name - starts with something other than a word character and isn't a number
|
---|
1071 | return nil, p.getErr(ErrInvalidGroupName)
|
---|
1072 | }
|
---|
1073 | }
|
---|
1074 | // if we're not using RE2 compat mode then
|
---|
1075 | // we just behave like normal
|
---|
1076 | fallthrough
|
---|
1077 |
|
---|
1078 | default:
|
---|
1079 | p.moveLeft()
|
---|
1080 |
|
---|
1081 | nt = ntGroup
|
---|
1082 | // disallow options in the children of a testgroup node
|
---|
1083 | if p.group.t != ntTestgroup {
|
---|
1084 | p.scanOptions()
|
---|
1085 | }
|
---|
1086 | if p.charsRight() == 0 {
|
---|
1087 | goto BreakRecognize
|
---|
1088 | }
|
---|
1089 |
|
---|
1090 | if ch = p.moveRightGetChar(); ch == ')' {
|
---|
1091 | return nil, nil
|
---|
1092 | }
|
---|
1093 |
|
---|
1094 | if ch != ':' {
|
---|
1095 | goto BreakRecognize
|
---|
1096 | }
|
---|
1097 |
|
---|
1098 | }
|
---|
1099 |
|
---|
1100 | return newRegexNode(nt, p.options), nil
|
---|
1101 | }
|
---|
1102 |
|
---|
1103 | BreakRecognize:
|
---|
1104 |
|
---|
1105 | // break Recognize comes here
|
---|
1106 |
|
---|
1107 | return nil, p.getErr(ErrUnrecognizedGrouping, string(p.pattern[start:p.textpos()]))
|
---|
1108 | }
|
---|
1109 |
|
---|
1110 | // scans backslash specials and basics
|
---|
1111 | func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
|
---|
1112 |
|
---|
1113 | if p.charsRight() == 0 {
|
---|
1114 | return nil, p.getErr(ErrIllegalEndEscape)
|
---|
1115 | }
|
---|
1116 |
|
---|
1117 | switch ch := p.rightChar(0); ch {
|
---|
1118 | case 'b', 'B', 'A', 'G', 'Z', 'z':
|
---|
1119 | p.moveRight(1)
|
---|
1120 | return newRegexNode(p.typeFromCode(ch), p.options), nil
|
---|
1121 |
|
---|
1122 | case 'w':
|
---|
1123 | p.moveRight(1)
|
---|
1124 | if p.useOptionE() {
|
---|
1125 | return newRegexNodeSet(ntSet, p.options, ECMAWordClass()), nil
|
---|
1126 | }
|
---|
1127 | return newRegexNodeSet(ntSet, p.options, WordClass()), nil
|
---|
1128 |
|
---|
1129 | case 'W':
|
---|
1130 | p.moveRight(1)
|
---|
1131 | if p.useOptionE() {
|
---|
1132 | return newRegexNodeSet(ntSet, p.options, NotECMAWordClass()), nil
|
---|
1133 | }
|
---|
1134 | return newRegexNodeSet(ntSet, p.options, NotWordClass()), nil
|
---|
1135 |
|
---|
1136 | case 's':
|
---|
1137 | p.moveRight(1)
|
---|
1138 | if p.useOptionE() {
|
---|
1139 | return newRegexNodeSet(ntSet, p.options, ECMASpaceClass()), nil
|
---|
1140 | }
|
---|
1141 | return newRegexNodeSet(ntSet, p.options, SpaceClass()), nil
|
---|
1142 |
|
---|
1143 | case 'S':
|
---|
1144 | p.moveRight(1)
|
---|
1145 | if p.useOptionE() {
|
---|
1146 | return newRegexNodeSet(ntSet, p.options, NotECMASpaceClass()), nil
|
---|
1147 | }
|
---|
1148 | return newRegexNodeSet(ntSet, p.options, NotSpaceClass()), nil
|
---|
1149 |
|
---|
1150 | case 'd':
|
---|
1151 | p.moveRight(1)
|
---|
1152 | if p.useOptionE() {
|
---|
1153 | return newRegexNodeSet(ntSet, p.options, ECMADigitClass()), nil
|
---|
1154 | }
|
---|
1155 | return newRegexNodeSet(ntSet, p.options, DigitClass()), nil
|
---|
1156 |
|
---|
1157 | case 'D':
|
---|
1158 | p.moveRight(1)
|
---|
1159 | if p.useOptionE() {
|
---|
1160 | return newRegexNodeSet(ntSet, p.options, NotECMADigitClass()), nil
|
---|
1161 | }
|
---|
1162 | return newRegexNodeSet(ntSet, p.options, NotDigitClass()), nil
|
---|
1163 |
|
---|
1164 | case 'p', 'P':
|
---|
1165 | p.moveRight(1)
|
---|
1166 | prop, err := p.parseProperty()
|
---|
1167 | if err != nil {
|
---|
1168 | return nil, err
|
---|
1169 | }
|
---|
1170 | cc := &CharSet{}
|
---|
1171 | cc.addCategory(prop, (ch != 'p'), p.useOptionI(), p.patternRaw)
|
---|
1172 | if p.useOptionI() {
|
---|
1173 | cc.addLowercase()
|
---|
1174 | }
|
---|
1175 |
|
---|
1176 | return newRegexNodeSet(ntSet, p.options, cc), nil
|
---|
1177 |
|
---|
1178 | default:
|
---|
1179 | return p.scanBasicBackslash(scanOnly)
|
---|
1180 | }
|
---|
1181 | }
|
---|
1182 |
|
---|
1183 | // Scans \-style backreferences and character escapes
|
---|
1184 | func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
|
---|
1185 | if p.charsRight() == 0 {
|
---|
1186 | return nil, p.getErr(ErrIllegalEndEscape)
|
---|
1187 | }
|
---|
1188 | angled := false
|
---|
1189 | close := '\x00'
|
---|
1190 |
|
---|
1191 | backpos := p.textpos()
|
---|
1192 | ch := p.rightChar(0)
|
---|
1193 |
|
---|
1194 | // allow \k<foo> instead of \<foo>, which is now deprecated
|
---|
1195 |
|
---|
1196 | if ch == 'k' {
|
---|
1197 | if p.charsRight() >= 2 {
|
---|
1198 | p.moveRight(1)
|
---|
1199 | ch = p.moveRightGetChar()
|
---|
1200 |
|
---|
1201 | if ch == '<' || ch == '\'' {
|
---|
1202 | angled = true
|
---|
1203 | if ch == '\'' {
|
---|
1204 | close = '\''
|
---|
1205 | } else {
|
---|
1206 | close = '>'
|
---|
1207 | }
|
---|
1208 | }
|
---|
1209 | }
|
---|
1210 |
|
---|
1211 | if !angled || p.charsRight() <= 0 {
|
---|
1212 | return nil, p.getErr(ErrMalformedNameRef)
|
---|
1213 | }
|
---|
1214 |
|
---|
1215 | ch = p.rightChar(0)
|
---|
1216 |
|
---|
1217 | } else if (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
|
---|
1218 | angled = true
|
---|
1219 | if ch == '\'' {
|
---|
1220 | close = '\''
|
---|
1221 | } else {
|
---|
1222 | close = '>'
|
---|
1223 | }
|
---|
1224 |
|
---|
1225 | p.moveRight(1)
|
---|
1226 | ch = p.rightChar(0)
|
---|
1227 | }
|
---|
1228 |
|
---|
1229 | // Try to parse backreference: \<1> or \<cap>
|
---|
1230 |
|
---|
1231 | if angled && ch >= '0' && ch <= '9' {
|
---|
1232 | capnum, err := p.scanDecimal()
|
---|
1233 | if err != nil {
|
---|
1234 | return nil, err
|
---|
1235 | }
|
---|
1236 |
|
---|
1237 | if p.charsRight() > 0 && p.moveRightGetChar() == close {
|
---|
1238 | if p.isCaptureSlot(capnum) {
|
---|
1239 | return newRegexNodeM(ntRef, p.options, capnum), nil
|
---|
1240 | }
|
---|
1241 | return nil, p.getErr(ErrUndefinedBackRef, capnum)
|
---|
1242 | }
|
---|
1243 | } else if !angled && ch >= '1' && ch <= '9' { // Try to parse backreference or octal: \1
|
---|
1244 | capnum, err := p.scanDecimal()
|
---|
1245 | if err != nil {
|
---|
1246 | return nil, err
|
---|
1247 | }
|
---|
1248 |
|
---|
1249 | if scanOnly {
|
---|
1250 | return nil, nil
|
---|
1251 | }
|
---|
1252 |
|
---|
1253 | if p.isCaptureSlot(capnum) {
|
---|
1254 | return newRegexNodeM(ntRef, p.options, capnum), nil
|
---|
1255 | }
|
---|
1256 | if capnum <= 9 && !p.useOptionE() {
|
---|
1257 | return nil, p.getErr(ErrUndefinedBackRef, capnum)
|
---|
1258 | }
|
---|
1259 |
|
---|
1260 | } else if angled && IsWordChar(ch) {
|
---|
1261 | capname := p.scanCapname()
|
---|
1262 |
|
---|
1263 | if p.charsRight() > 0 && p.moveRightGetChar() == close {
|
---|
1264 | if p.isCaptureName(capname) {
|
---|
1265 | return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
|
---|
1266 | }
|
---|
1267 | return nil, p.getErr(ErrUndefinedNameRef, capname)
|
---|
1268 | }
|
---|
1269 | }
|
---|
1270 |
|
---|
1271 | // Not backreference: must be char code
|
---|
1272 |
|
---|
1273 | p.textto(backpos)
|
---|
1274 | ch, err := p.scanCharEscape()
|
---|
1275 | if err != nil {
|
---|
1276 | return nil, err
|
---|
1277 | }
|
---|
1278 |
|
---|
1279 | if p.useOptionI() {
|
---|
1280 | ch = unicode.ToLower(ch)
|
---|
1281 | }
|
---|
1282 |
|
---|
1283 | return newRegexNodeCh(ntOne, p.options, ch), nil
|
---|
1284 | }
|
---|
1285 |
|
---|
1286 | // Scans X for \p{X} or \P{X}
|
---|
1287 | func (p *parser) parseProperty() (string, error) {
|
---|
1288 | if p.charsRight() < 3 {
|
---|
1289 | return "", p.getErr(ErrIncompleteSlashP)
|
---|
1290 | }
|
---|
1291 | ch := p.moveRightGetChar()
|
---|
1292 | if ch != '{' {
|
---|
1293 | return "", p.getErr(ErrMalformedSlashP)
|
---|
1294 | }
|
---|
1295 |
|
---|
1296 | startpos := p.textpos()
|
---|
1297 | for p.charsRight() > 0 {
|
---|
1298 | ch = p.moveRightGetChar()
|
---|
1299 | if !(IsWordChar(ch) || ch == '-') {
|
---|
1300 | p.moveLeft()
|
---|
1301 | break
|
---|
1302 | }
|
---|
1303 | }
|
---|
1304 | capname := string(p.pattern[startpos:p.textpos()])
|
---|
1305 |
|
---|
1306 | if p.charsRight() == 0 || p.moveRightGetChar() != '}' {
|
---|
1307 | return "", p.getErr(ErrIncompleteSlashP)
|
---|
1308 | }
|
---|
1309 |
|
---|
1310 | if !isValidUnicodeCat(capname) {
|
---|
1311 | return "", p.getErr(ErrUnknownSlashP, capname)
|
---|
1312 | }
|
---|
1313 |
|
---|
1314 | return capname, nil
|
---|
1315 | }
|
---|
1316 |
|
---|
1317 | // Returns ReNode type for zero-length assertions with a \ code.
|
---|
1318 | func (p *parser) typeFromCode(ch rune) nodeType {
|
---|
1319 | switch ch {
|
---|
1320 | case 'b':
|
---|
1321 | if p.useOptionE() {
|
---|
1322 | return ntECMABoundary
|
---|
1323 | }
|
---|
1324 | return ntBoundary
|
---|
1325 | case 'B':
|
---|
1326 | if p.useOptionE() {
|
---|
1327 | return ntNonECMABoundary
|
---|
1328 | }
|
---|
1329 | return ntNonboundary
|
---|
1330 | case 'A':
|
---|
1331 | return ntBeginning
|
---|
1332 | case 'G':
|
---|
1333 | return ntStart
|
---|
1334 | case 'Z':
|
---|
1335 | return ntEndZ
|
---|
1336 | case 'z':
|
---|
1337 | return ntEnd
|
---|
1338 | default:
|
---|
1339 | return ntNothing
|
---|
1340 | }
|
---|
1341 | }
|
---|
1342 |
|
---|
1343 | // Scans whitespace or x-mode comments.
|
---|
1344 | func (p *parser) scanBlank() error {
|
---|
1345 | if p.useOptionX() {
|
---|
1346 | for {
|
---|
1347 | for p.charsRight() > 0 && isSpace(p.rightChar(0)) {
|
---|
1348 | p.moveRight(1)
|
---|
1349 | }
|
---|
1350 |
|
---|
1351 | if p.charsRight() == 0 {
|
---|
1352 | break
|
---|
1353 | }
|
---|
1354 |
|
---|
1355 | if p.rightChar(0) == '#' {
|
---|
1356 | for p.charsRight() > 0 && p.rightChar(0) != '\n' {
|
---|
1357 | p.moveRight(1)
|
---|
1358 | }
|
---|
1359 | } else if p.charsRight() >= 3 && p.rightChar(2) == '#' &&
|
---|
1360 | p.rightChar(1) == '?' && p.rightChar(0) == '(' {
|
---|
1361 | for p.charsRight() > 0 && p.rightChar(0) != ')' {
|
---|
1362 | p.moveRight(1)
|
---|
1363 | }
|
---|
1364 | if p.charsRight() == 0 {
|
---|
1365 | return p.getErr(ErrUnterminatedComment)
|
---|
1366 | }
|
---|
1367 | p.moveRight(1)
|
---|
1368 | } else {
|
---|
1369 | break
|
---|
1370 | }
|
---|
1371 | }
|
---|
1372 | } else {
|
---|
1373 | for {
|
---|
1374 | if p.charsRight() < 3 || p.rightChar(2) != '#' ||
|
---|
1375 | p.rightChar(1) != '?' || p.rightChar(0) != '(' {
|
---|
1376 | return nil
|
---|
1377 | }
|
---|
1378 |
|
---|
1379 | for p.charsRight() > 0 && p.rightChar(0) != ')' {
|
---|
1380 | p.moveRight(1)
|
---|
1381 | }
|
---|
1382 | if p.charsRight() == 0 {
|
---|
1383 | return p.getErr(ErrUnterminatedComment)
|
---|
1384 | }
|
---|
1385 | p.moveRight(1)
|
---|
1386 | }
|
---|
1387 | }
|
---|
1388 | return nil
|
---|
1389 | }
|
---|
1390 |
|
---|
1391 | func (p *parser) scanCapname() string {
|
---|
1392 | startpos := p.textpos()
|
---|
1393 |
|
---|
1394 | for p.charsRight() > 0 {
|
---|
1395 | if !IsWordChar(p.moveRightGetChar()) {
|
---|
1396 | p.moveLeft()
|
---|
1397 | break
|
---|
1398 | }
|
---|
1399 | }
|
---|
1400 |
|
---|
1401 | return string(p.pattern[startpos:p.textpos()])
|
---|
1402 | }
|
---|
1403 |
|
---|
1404 | //Scans contents of [] (not including []'s), and converts to a set.
|
---|
1405 | func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
|
---|
1406 | ch := '\x00'
|
---|
1407 | chPrev := '\x00'
|
---|
1408 | inRange := false
|
---|
1409 | firstChar := true
|
---|
1410 | closed := false
|
---|
1411 |
|
---|
1412 | var cc *CharSet
|
---|
1413 | if !scanOnly {
|
---|
1414 | cc = &CharSet{}
|
---|
1415 | }
|
---|
1416 |
|
---|
1417 | if p.charsRight() > 0 && p.rightChar(0) == '^' {
|
---|
1418 | p.moveRight(1)
|
---|
1419 | if !scanOnly {
|
---|
1420 | cc.negate = true
|
---|
1421 | }
|
---|
1422 | }
|
---|
1423 |
|
---|
1424 | for ; p.charsRight() > 0; firstChar = false {
|
---|
1425 | fTranslatedChar := false
|
---|
1426 | ch = p.moveRightGetChar()
|
---|
1427 | if ch == ']' {
|
---|
1428 | if !firstChar {
|
---|
1429 | closed = true
|
---|
1430 | break
|
---|
1431 | } else if p.useOptionE() {
|
---|
1432 | if !scanOnly {
|
---|
1433 | cc.addRanges(NoneClass().ranges)
|
---|
1434 | }
|
---|
1435 | closed = true
|
---|
1436 | break
|
---|
1437 | }
|
---|
1438 |
|
---|
1439 | } else if ch == '\\' && p.charsRight() > 0 {
|
---|
1440 | switch ch = p.moveRightGetChar(); ch {
|
---|
1441 | case 'D', 'd':
|
---|
1442 | if !scanOnly {
|
---|
1443 | if inRange {
|
---|
1444 | return nil, p.getErr(ErrBadClassInCharRange, ch)
|
---|
1445 | }
|
---|
1446 | cc.addDigit(p.useOptionE(), ch == 'D', p.patternRaw)
|
---|
1447 | }
|
---|
1448 | continue
|
---|
1449 |
|
---|
1450 | case 'S', 's':
|
---|
1451 | if !scanOnly {
|
---|
1452 | if inRange {
|
---|
1453 | return nil, p.getErr(ErrBadClassInCharRange, ch)
|
---|
1454 | }
|
---|
1455 | cc.addSpace(p.useOptionE(), ch == 'S')
|
---|
1456 | }
|
---|
1457 | continue
|
---|
1458 |
|
---|
1459 | case 'W', 'w':
|
---|
1460 | if !scanOnly {
|
---|
1461 | if inRange {
|
---|
1462 | return nil, p.getErr(ErrBadClassInCharRange, ch)
|
---|
1463 | }
|
---|
1464 |
|
---|
1465 | cc.addWord(p.useOptionE(), ch == 'W')
|
---|
1466 | }
|
---|
1467 | continue
|
---|
1468 |
|
---|
1469 | case 'p', 'P':
|
---|
1470 | if !scanOnly {
|
---|
1471 | if inRange {
|
---|
1472 | return nil, p.getErr(ErrBadClassInCharRange, ch)
|
---|
1473 | }
|
---|
1474 | prop, err := p.parseProperty()
|
---|
1475 | if err != nil {
|
---|
1476 | return nil, err
|
---|
1477 | }
|
---|
1478 | cc.addCategory(prop, (ch != 'p'), caseInsensitive, p.patternRaw)
|
---|
1479 | } else {
|
---|
1480 | p.parseProperty()
|
---|
1481 | }
|
---|
1482 |
|
---|
1483 | continue
|
---|
1484 |
|
---|
1485 | case '-':
|
---|
1486 | if !scanOnly {
|
---|
1487 | cc.addRange(ch, ch)
|
---|
1488 | }
|
---|
1489 | continue
|
---|
1490 |
|
---|
1491 | default:
|
---|
1492 | p.moveLeft()
|
---|
1493 | var err error
|
---|
1494 | ch, err = p.scanCharEscape() // non-literal character
|
---|
1495 | if err != nil {
|
---|
1496 | return nil, err
|
---|
1497 | }
|
---|
1498 | fTranslatedChar = true
|
---|
1499 | break // this break will only break out of the switch
|
---|
1500 | }
|
---|
1501 | } else if ch == '[' {
|
---|
1502 | // This is code for Posix style properties - [:Ll:] or [:IsTibetan:].
|
---|
1503 | // It currently doesn't do anything other than skip the whole thing!
|
---|
1504 | if p.charsRight() > 0 && p.rightChar(0) == ':' && !inRange {
|
---|
1505 | savePos := p.textpos()
|
---|
1506 |
|
---|
1507 | p.moveRight(1)
|
---|
1508 | negate := false
|
---|
1509 | if p.charsRight() > 1 && p.rightChar(0) == '^' {
|
---|
1510 | negate = true
|
---|
1511 | p.moveRight(1)
|
---|
1512 | }
|
---|
1513 |
|
---|
1514 | nm := p.scanCapname() // snag the name
|
---|
1515 | if !scanOnly && p.useRE2() {
|
---|
1516 | // look up the name since these are valid for RE2
|
---|
1517 | // add the group based on the name
|
---|
1518 | if ok := cc.addNamedASCII(nm, negate); !ok {
|
---|
1519 | return nil, p.getErr(ErrInvalidCharRange)
|
---|
1520 | }
|
---|
1521 | }
|
---|
1522 | if p.charsRight() < 2 || p.moveRightGetChar() != ':' || p.moveRightGetChar() != ']' {
|
---|
1523 | p.textto(savePos)
|
---|
1524 | } else if p.useRE2() {
|
---|
1525 | // move on
|
---|
1526 | continue
|
---|
1527 | }
|
---|
1528 | }
|
---|
1529 | }
|
---|
1530 |
|
---|
1531 | if inRange {
|
---|
1532 | inRange = false
|
---|
1533 | if !scanOnly {
|
---|
1534 | if ch == '[' && !fTranslatedChar && !firstChar {
|
---|
1535 | // We thought we were in a range, but we're actually starting a subtraction.
|
---|
1536 | // In that case, we'll add chPrev to our char class, skip the opening [, and
|
---|
1537 | // scan the new character class recursively.
|
---|
1538 | cc.addChar(chPrev)
|
---|
1539 | sub, err := p.scanCharSet(caseInsensitive, false)
|
---|
1540 | if err != nil {
|
---|
1541 | return nil, err
|
---|
1542 | }
|
---|
1543 | cc.addSubtraction(sub)
|
---|
1544 |
|
---|
1545 | if p.charsRight() > 0 && p.rightChar(0) != ']' {
|
---|
1546 | return nil, p.getErr(ErrSubtractionMustBeLast)
|
---|
1547 | }
|
---|
1548 | } else {
|
---|
1549 | // a regular range, like a-z
|
---|
1550 | if chPrev > ch {
|
---|
1551 | return nil, p.getErr(ErrReversedCharRange)
|
---|
1552 | }
|
---|
1553 | cc.addRange(chPrev, ch)
|
---|
1554 | }
|
---|
1555 | }
|
---|
1556 | } else if p.charsRight() >= 2 && p.rightChar(0) == '-' && p.rightChar(1) != ']' {
|
---|
1557 | // this could be the start of a range
|
---|
1558 | chPrev = ch
|
---|
1559 | inRange = true
|
---|
1560 | p.moveRight(1)
|
---|
1561 | } else if p.charsRight() >= 1 && ch == '-' && !fTranslatedChar && p.rightChar(0) == '[' && !firstChar {
|
---|
1562 | // we aren't in a range, and now there is a subtraction. Usually this happens
|
---|
1563 | // only when a subtraction follows a range, like [a-z-[b]]
|
---|
1564 | if !scanOnly {
|
---|
1565 | p.moveRight(1)
|
---|
1566 | sub, err := p.scanCharSet(caseInsensitive, false)
|
---|
1567 | if err != nil {
|
---|
1568 | return nil, err
|
---|
1569 | }
|
---|
1570 | cc.addSubtraction(sub)
|
---|
1571 |
|
---|
1572 | if p.charsRight() > 0 && p.rightChar(0) != ']' {
|
---|
1573 | return nil, p.getErr(ErrSubtractionMustBeLast)
|
---|
1574 | }
|
---|
1575 | } else {
|
---|
1576 | p.moveRight(1)
|
---|
1577 | p.scanCharSet(caseInsensitive, true)
|
---|
1578 | }
|
---|
1579 | } else {
|
---|
1580 | if !scanOnly {
|
---|
1581 | cc.addRange(ch, ch)
|
---|
1582 | }
|
---|
1583 | }
|
---|
1584 | }
|
---|
1585 |
|
---|
1586 | if !closed {
|
---|
1587 | return nil, p.getErr(ErrUnterminatedBracket)
|
---|
1588 | }
|
---|
1589 |
|
---|
1590 | if !scanOnly && caseInsensitive {
|
---|
1591 | cc.addLowercase()
|
---|
1592 | }
|
---|
1593 |
|
---|
1594 | return cc, nil
|
---|
1595 | }
|
---|
1596 |
|
---|
1597 | // Scans any number of decimal digits (pegs value at 2^31-1 if too large)
|
---|
1598 | func (p *parser) scanDecimal() (int, error) {
|
---|
1599 | i := 0
|
---|
1600 | var d int
|
---|
1601 |
|
---|
1602 | for p.charsRight() > 0 {
|
---|
1603 | d = int(p.rightChar(0) - '0')
|
---|
1604 | if d < 0 || d > 9 {
|
---|
1605 | break
|
---|
1606 | }
|
---|
1607 | p.moveRight(1)
|
---|
1608 |
|
---|
1609 | if i > maxValueDiv10 || (i == maxValueDiv10 && d > maxValueMod10) {
|
---|
1610 | return 0, p.getErr(ErrCaptureGroupOutOfRange)
|
---|
1611 | }
|
---|
1612 |
|
---|
1613 | i *= 10
|
---|
1614 | i += d
|
---|
1615 | }
|
---|
1616 |
|
---|
1617 | return int(i), nil
|
---|
1618 | }
|
---|
1619 |
|
---|
1620 | // Returns true for options allowed only at the top level
|
---|
1621 | func isOnlyTopOption(option RegexOptions) bool {
|
---|
1622 | return option == RightToLeft || option == ECMAScript || option == RE2
|
---|
1623 | }
|
---|
1624 |
|
---|
1625 | // Scans cimsx-cimsx option string, stops at the first unrecognized char.
|
---|
1626 | func (p *parser) scanOptions() {
|
---|
1627 |
|
---|
1628 | for off := false; p.charsRight() > 0; p.moveRight(1) {
|
---|
1629 | ch := p.rightChar(0)
|
---|
1630 |
|
---|
1631 | if ch == '-' {
|
---|
1632 | off = true
|
---|
1633 | } else if ch == '+' {
|
---|
1634 | off = false
|
---|
1635 | } else {
|
---|
1636 | option := optionFromCode(ch)
|
---|
1637 | if option == 0 || isOnlyTopOption(option) {
|
---|
1638 | return
|
---|
1639 | }
|
---|
1640 |
|
---|
1641 | if off {
|
---|
1642 | p.options &= ^option
|
---|
1643 | } else {
|
---|
1644 | p.options |= option
|
---|
1645 | }
|
---|
1646 | }
|
---|
1647 | }
|
---|
1648 | }
|
---|
1649 |
|
---|
1650 | // Scans \ code for escape codes that map to single unicode chars.
|
---|
1651 | func (p *parser) scanCharEscape() (r rune, err error) {
|
---|
1652 |
|
---|
1653 | ch := p.moveRightGetChar()
|
---|
1654 |
|
---|
1655 | if ch >= '0' && ch <= '7' {
|
---|
1656 | p.moveLeft()
|
---|
1657 | return p.scanOctal(), nil
|
---|
1658 | }
|
---|
1659 |
|
---|
1660 | pos := p.textpos()
|
---|
1661 |
|
---|
1662 | switch ch {
|
---|
1663 | case 'x':
|
---|
1664 | // support for \x{HEX} syntax from Perl and PCRE
|
---|
1665 | if p.charsRight() > 0 && p.rightChar(0) == '{' {
|
---|
1666 | if p.useOptionE() {
|
---|
1667 | return ch, nil
|
---|
1668 | }
|
---|
1669 | p.moveRight(1)
|
---|
1670 | return p.scanHexUntilBrace()
|
---|
1671 | } else {
|
---|
1672 | r, err = p.scanHex(2)
|
---|
1673 | }
|
---|
1674 | case 'u':
|
---|
1675 | r, err = p.scanHex(4)
|
---|
1676 | case 'a':
|
---|
1677 | return '\u0007', nil
|
---|
1678 | case 'b':
|
---|
1679 | return '\b', nil
|
---|
1680 | case 'e':
|
---|
1681 | return '\u001B', nil
|
---|
1682 | case 'f':
|
---|
1683 | return '\f', nil
|
---|
1684 | case 'n':
|
---|
1685 | return '\n', nil
|
---|
1686 | case 'r':
|
---|
1687 | return '\r', nil
|
---|
1688 | case 't':
|
---|
1689 | return '\t', nil
|
---|
1690 | case 'v':
|
---|
1691 | return '\u000B', nil
|
---|
1692 | case 'c':
|
---|
1693 | r, err = p.scanControl()
|
---|
1694 | default:
|
---|
1695 | if !p.useOptionE() && IsWordChar(ch) {
|
---|
1696 | return 0, p.getErr(ErrUnrecognizedEscape, string(ch))
|
---|
1697 | }
|
---|
1698 | return ch, nil
|
---|
1699 | }
|
---|
1700 | if err != nil && p.useOptionE() {
|
---|
1701 | p.textto(pos)
|
---|
1702 | return ch, nil
|
---|
1703 | }
|
---|
1704 | return
|
---|
1705 | }
|
---|
1706 |
|
---|
1707 | // Grabs and converts an ascii control character
|
---|
1708 | func (p *parser) scanControl() (rune, error) {
|
---|
1709 | if p.charsRight() <= 0 {
|
---|
1710 | return 0, p.getErr(ErrMissingControl)
|
---|
1711 | }
|
---|
1712 |
|
---|
1713 | ch := p.moveRightGetChar()
|
---|
1714 |
|
---|
1715 | // \ca interpreted as \cA
|
---|
1716 |
|
---|
1717 | if ch >= 'a' && ch <= 'z' {
|
---|
1718 | ch = (ch - ('a' - 'A'))
|
---|
1719 | }
|
---|
1720 | ch = (ch - '@')
|
---|
1721 | if ch >= 0 && ch < ' ' {
|
---|
1722 | return ch, nil
|
---|
1723 | }
|
---|
1724 |
|
---|
1725 | return 0, p.getErr(ErrUnrecognizedControl)
|
---|
1726 |
|
---|
1727 | }
|
---|
1728 |
|
---|
1729 | // Scan hex digits until we hit a closing brace.
|
---|
1730 | // Non-hex digits, hex value too large for UTF-8, or running out of chars are errors
|
---|
1731 | func (p *parser) scanHexUntilBrace() (rune, error) {
|
---|
1732 | // PCRE spec reads like unlimited hex digits are allowed, but unicode has a limit
|
---|
1733 | // so we can enforce that
|
---|
1734 | i := 0
|
---|
1735 | hasContent := false
|
---|
1736 |
|
---|
1737 | for p.charsRight() > 0 {
|
---|
1738 | ch := p.moveRightGetChar()
|
---|
1739 | if ch == '}' {
|
---|
1740 | // hit our close brace, we're done here
|
---|
1741 | // prevent \x{}
|
---|
1742 | if !hasContent {
|
---|
1743 | return 0, p.getErr(ErrTooFewHex)
|
---|
1744 | }
|
---|
1745 | return rune(i), nil
|
---|
1746 | }
|
---|
1747 | hasContent = true
|
---|
1748 | // no brace needs to be hex digit
|
---|
1749 | d := hexDigit(ch)
|
---|
1750 | if d < 0 {
|
---|
1751 | return 0, p.getErr(ErrMissingBrace)
|
---|
1752 | }
|
---|
1753 |
|
---|
1754 | i *= 0x10
|
---|
1755 | i += d
|
---|
1756 |
|
---|
1757 | if i > unicode.MaxRune {
|
---|
1758 | return 0, p.getErr(ErrInvalidHex)
|
---|
1759 | }
|
---|
1760 | }
|
---|
1761 |
|
---|
1762 | // we only make it here if we run out of digits without finding the brace
|
---|
1763 | return 0, p.getErr(ErrMissingBrace)
|
---|
1764 | }
|
---|
1765 |
|
---|
1766 | // Scans exactly c hex digits (c=2 for \xFF, c=4 for \uFFFF)
|
---|
1767 | func (p *parser) scanHex(c int) (rune, error) {
|
---|
1768 |
|
---|
1769 | i := 0
|
---|
1770 |
|
---|
1771 | if p.charsRight() >= c {
|
---|
1772 | for c > 0 {
|
---|
1773 | d := hexDigit(p.moveRightGetChar())
|
---|
1774 | if d < 0 {
|
---|
1775 | break
|
---|
1776 | }
|
---|
1777 | i *= 0x10
|
---|
1778 | i += d
|
---|
1779 | c--
|
---|
1780 | }
|
---|
1781 | }
|
---|
1782 |
|
---|
1783 | if c > 0 {
|
---|
1784 | return 0, p.getErr(ErrTooFewHex)
|
---|
1785 | }
|
---|
1786 |
|
---|
1787 | return rune(i), nil
|
---|
1788 | }
|
---|
1789 |
|
---|
1790 | // Returns n <= 0xF for a hex digit.
|
---|
1791 | func hexDigit(ch rune) int {
|
---|
1792 |
|
---|
1793 | if d := uint(ch - '0'); d <= 9 {
|
---|
1794 | return int(d)
|
---|
1795 | }
|
---|
1796 |
|
---|
1797 | if d := uint(ch - 'a'); d <= 5 {
|
---|
1798 | return int(d + 0xa)
|
---|
1799 | }
|
---|
1800 |
|
---|
1801 | if d := uint(ch - 'A'); d <= 5 {
|
---|
1802 | return int(d + 0xa)
|
---|
1803 | }
|
---|
1804 |
|
---|
1805 | return -1
|
---|
1806 | }
|
---|
1807 |
|
---|
1808 | // Scans up to three octal digits (stops before exceeding 0377).
|
---|
1809 | func (p *parser) scanOctal() rune {
|
---|
1810 | // Consume octal chars only up to 3 digits and value 0377
|
---|
1811 |
|
---|
1812 | c := 3
|
---|
1813 |
|
---|
1814 | if c > p.charsRight() {
|
---|
1815 | c = p.charsRight()
|
---|
1816 | }
|
---|
1817 |
|
---|
1818 | //we know the first char is good because the caller had to check
|
---|
1819 | i := 0
|
---|
1820 | d := int(p.rightChar(0) - '0')
|
---|
1821 | for c > 0 && d <= 7 && d >= 0 {
|
---|
1822 | if i >= 0x20 && p.useOptionE() {
|
---|
1823 | break
|
---|
1824 | }
|
---|
1825 | i *= 8
|
---|
1826 | i += d
|
---|
1827 | c--
|
---|
1828 |
|
---|
1829 | p.moveRight(1)
|
---|
1830 | if !p.rightMost() {
|
---|
1831 | d = int(p.rightChar(0) - '0')
|
---|
1832 | }
|
---|
1833 | }
|
---|
1834 |
|
---|
1835 | // Octal codes only go up to 255. Any larger and the behavior that Perl follows
|
---|
1836 | // is simply to truncate the high bits.
|
---|
1837 | i &= 0xFF
|
---|
1838 |
|
---|
1839 | return rune(i)
|
---|
1840 | }
|
---|
1841 |
|
---|
1842 | // Returns the current parsing position.
|
---|
1843 | func (p *parser) textpos() int {
|
---|
1844 | return p.currentPos
|
---|
1845 | }
|
---|
1846 |
|
---|
1847 | // Zaps to a specific parsing position.
|
---|
1848 | func (p *parser) textto(pos int) {
|
---|
1849 | p.currentPos = pos
|
---|
1850 | }
|
---|
1851 |
|
---|
1852 | // Returns the char at the right of the current parsing position and advances to the right.
|
---|
1853 | func (p *parser) moveRightGetChar() rune {
|
---|
1854 | ch := p.pattern[p.currentPos]
|
---|
1855 | p.currentPos++
|
---|
1856 | return ch
|
---|
1857 | }
|
---|
1858 |
|
---|
1859 | // Moves the current position to the right.
|
---|
1860 | func (p *parser) moveRight(i int) {
|
---|
1861 | // default would be 1
|
---|
1862 | p.currentPos += i
|
---|
1863 | }
|
---|
1864 |
|
---|
1865 | // Moves the current parsing position one to the left.
|
---|
1866 | func (p *parser) moveLeft() {
|
---|
1867 | p.currentPos--
|
---|
1868 | }
|
---|
1869 |
|
---|
1870 | // Returns the char left of the current parsing position.
|
---|
1871 | func (p *parser) charAt(i int) rune {
|
---|
1872 | return p.pattern[i]
|
---|
1873 | }
|
---|
1874 |
|
---|
1875 | // Returns the char i chars right of the current parsing position.
|
---|
1876 | func (p *parser) rightChar(i int) rune {
|
---|
1877 | // default would be 0
|
---|
1878 | return p.pattern[p.currentPos+i]
|
---|
1879 | }
|
---|
1880 |
|
---|
1881 | // Number of characters to the right of the current parsing position.
|
---|
1882 | func (p *parser) charsRight() int {
|
---|
1883 | return len(p.pattern) - p.currentPos
|
---|
1884 | }
|
---|
1885 |
|
---|
1886 | func (p *parser) rightMost() bool {
|
---|
1887 | return p.currentPos == len(p.pattern)
|
---|
1888 | }
|
---|
1889 |
|
---|
1890 | // Looks up the slot number for a given name
|
---|
1891 | func (p *parser) captureSlotFromName(capname string) int {
|
---|
1892 | return p.capnames[capname]
|
---|
1893 | }
|
---|
1894 |
|
---|
1895 | // True if the capture slot was noted
|
---|
1896 | func (p *parser) isCaptureSlot(i int) bool {
|
---|
1897 | if p.caps != nil {
|
---|
1898 | _, ok := p.caps[i]
|
---|
1899 | return ok
|
---|
1900 | }
|
---|
1901 |
|
---|
1902 | return (i >= 0 && i < p.capsize)
|
---|
1903 | }
|
---|
1904 |
|
---|
1905 | // Looks up the slot number for a given name
|
---|
1906 | func (p *parser) isCaptureName(capname string) bool {
|
---|
1907 | if p.capnames == nil {
|
---|
1908 | return false
|
---|
1909 | }
|
---|
1910 |
|
---|
1911 | _, ok := p.capnames[capname]
|
---|
1912 | return ok
|
---|
1913 | }
|
---|
1914 |
|
---|
1915 | // option shortcuts
|
---|
1916 |
|
---|
1917 | // True if N option disabling '(' autocapture is on.
|
---|
1918 | func (p *parser) useOptionN() bool {
|
---|
1919 | return (p.options & ExplicitCapture) != 0
|
---|
1920 | }
|
---|
1921 |
|
---|
1922 | // True if I option enabling case-insensitivity is on.
|
---|
1923 | func (p *parser) useOptionI() bool {
|
---|
1924 | return (p.options & IgnoreCase) != 0
|
---|
1925 | }
|
---|
1926 |
|
---|
1927 | // True if M option altering meaning of $ and ^ is on.
|
---|
1928 | func (p *parser) useOptionM() bool {
|
---|
1929 | return (p.options & Multiline) != 0
|
---|
1930 | }
|
---|
1931 |
|
---|
1932 | // True if S option altering meaning of . is on.
|
---|
1933 | func (p *parser) useOptionS() bool {
|
---|
1934 | return (p.options & Singleline) != 0
|
---|
1935 | }
|
---|
1936 |
|
---|
1937 | // True if X option enabling whitespace/comment mode is on.
|
---|
1938 | func (p *parser) useOptionX() bool {
|
---|
1939 | return (p.options & IgnorePatternWhitespace) != 0
|
---|
1940 | }
|
---|
1941 |
|
---|
1942 | // True if E option enabling ECMAScript behavior on.
|
---|
1943 | func (p *parser) useOptionE() bool {
|
---|
1944 | return (p.options & ECMAScript) != 0
|
---|
1945 | }
|
---|
1946 |
|
---|
1947 | // true to use RE2 compatibility parsing behavior.
|
---|
1948 | func (p *parser) useRE2() bool {
|
---|
1949 | return (p.options & RE2) != 0
|
---|
1950 | }
|
---|
1951 |
|
---|
1952 | // True if options stack is empty.
|
---|
1953 | func (p *parser) emptyOptionsStack() bool {
|
---|
1954 | return len(p.optionsStack) == 0
|
---|
1955 | }
|
---|
1956 |
|
---|
1957 | // Finish the current quantifiable (when a quantifier is not found or is not possible)
|
---|
1958 | func (p *parser) addConcatenate() {
|
---|
1959 | // The first (| inside a Testgroup group goes directly to the group
|
---|
1960 | p.concatenation.addChild(p.unit)
|
---|
1961 | p.unit = nil
|
---|
1962 | }
|
---|
1963 |
|
---|
1964 | // Finish the current quantifiable (when a quantifier is found)
|
---|
1965 | func (p *parser) addConcatenate3(lazy bool, min, max int) {
|
---|
1966 | p.concatenation.addChild(p.unit.makeQuantifier(lazy, min, max))
|
---|
1967 | p.unit = nil
|
---|
1968 | }
|
---|
1969 |
|
---|
1970 | // Sets the current unit to a single char node
|
---|
1971 | func (p *parser) addUnitOne(ch rune) {
|
---|
1972 | if p.useOptionI() {
|
---|
1973 | ch = unicode.ToLower(ch)
|
---|
1974 | }
|
---|
1975 |
|
---|
1976 | p.unit = newRegexNodeCh(ntOne, p.options, ch)
|
---|
1977 | }
|
---|
1978 |
|
---|
1979 | // Sets the current unit to a single inverse-char node
|
---|
1980 | func (p *parser) addUnitNotone(ch rune) {
|
---|
1981 | if p.useOptionI() {
|
---|
1982 | ch = unicode.ToLower(ch)
|
---|
1983 | }
|
---|
1984 |
|
---|
1985 | p.unit = newRegexNodeCh(ntNotone, p.options, ch)
|
---|
1986 | }
|
---|
1987 |
|
---|
1988 | // Sets the current unit to a single set node
|
---|
1989 | func (p *parser) addUnitSet(set *CharSet) {
|
---|
1990 | p.unit = newRegexNodeSet(ntSet, p.options, set)
|
---|
1991 | }
|
---|
1992 |
|
---|
1993 | // Sets the current unit to a subtree
|
---|
1994 | func (p *parser) addUnitNode(node *regexNode) {
|
---|
1995 | p.unit = node
|
---|
1996 | }
|
---|
1997 |
|
---|
1998 | // Sets the current unit to an assertion of the specified type
|
---|
1999 | func (p *parser) addUnitType(t nodeType) {
|
---|
2000 | p.unit = newRegexNode(t, p.options)
|
---|
2001 | }
|
---|
2002 |
|
---|
2003 | // Finish the current group (in response to a ')' or end)
|
---|
2004 | func (p *parser) addGroup() error {
|
---|
2005 | if p.group.t == ntTestgroup || p.group.t == ntTestref {
|
---|
2006 | p.group.addChild(p.concatenation.reverseLeft())
|
---|
2007 | if (p.group.t == ntTestref && len(p.group.children) > 2) || len(p.group.children) > 3 {
|
---|
2008 | return p.getErr(ErrTooManyAlternates)
|
---|
2009 | }
|
---|
2010 | } else {
|
---|
2011 | p.alternation.addChild(p.concatenation.reverseLeft())
|
---|
2012 | p.group.addChild(p.alternation)
|
---|
2013 | }
|
---|
2014 |
|
---|
2015 | p.unit = p.group
|
---|
2016 | return nil
|
---|
2017 | }
|
---|
2018 |
|
---|
2019 | // Pops the option stack, but keeps the current options unchanged.
|
---|
2020 | func (p *parser) popKeepOptions() {
|
---|
2021 | lastIdx := len(p.optionsStack) - 1
|
---|
2022 | p.optionsStack = p.optionsStack[:lastIdx]
|
---|
2023 | }
|
---|
2024 |
|
---|
2025 | // Recalls options from the stack.
|
---|
2026 | func (p *parser) popOptions() {
|
---|
2027 | lastIdx := len(p.optionsStack) - 1
|
---|
2028 | // get the last item on the stack and then remove it by reslicing
|
---|
2029 | p.options = p.optionsStack[lastIdx]
|
---|
2030 | p.optionsStack = p.optionsStack[:lastIdx]
|
---|
2031 | }
|
---|
2032 |
|
---|
2033 | // Saves options on a stack.
|
---|
2034 | func (p *parser) pushOptions() {
|
---|
2035 | p.optionsStack = append(p.optionsStack, p.options)
|
---|
2036 | }
|
---|
2037 |
|
---|
2038 | // Add a string to the last concatenate.
|
---|
2039 | func (p *parser) addToConcatenate(pos, cch int, isReplacement bool) {
|
---|
2040 | var node *regexNode
|
---|
2041 |
|
---|
2042 | if cch == 0 {
|
---|
2043 | return
|
---|
2044 | }
|
---|
2045 |
|
---|
2046 | if cch > 1 {
|
---|
2047 | str := p.pattern[pos : pos+cch]
|
---|
2048 |
|
---|
2049 | if p.useOptionI() && !isReplacement {
|
---|
2050 | // We do the ToLower character by character for consistency. With surrogate chars, doing
|
---|
2051 | // a ToLower on the entire string could actually change the surrogate pair. This is more correct
|
---|
2052 | // linguistically, but since Regex doesn't support surrogates, it's more important to be
|
---|
2053 | // consistent.
|
---|
2054 | for i := 0; i < len(str); i++ {
|
---|
2055 | str[i] = unicode.ToLower(str[i])
|
---|
2056 | }
|
---|
2057 | }
|
---|
2058 |
|
---|
2059 | node = newRegexNodeStr(ntMulti, p.options, str)
|
---|
2060 | } else {
|
---|
2061 | ch := p.charAt(pos)
|
---|
2062 |
|
---|
2063 | if p.useOptionI() && !isReplacement {
|
---|
2064 | ch = unicode.ToLower(ch)
|
---|
2065 | }
|
---|
2066 |
|
---|
2067 | node = newRegexNodeCh(ntOne, p.options, ch)
|
---|
2068 | }
|
---|
2069 |
|
---|
2070 | p.concatenation.addChild(node)
|
---|
2071 | }
|
---|
2072 |
|
---|
2073 | // Push the parser state (in response to an open paren)
|
---|
2074 | func (p *parser) pushGroup() {
|
---|
2075 | p.group.next = p.stack
|
---|
2076 | p.alternation.next = p.group
|
---|
2077 | p.concatenation.next = p.alternation
|
---|
2078 | p.stack = p.concatenation
|
---|
2079 | }
|
---|
2080 |
|
---|
2081 | // Remember the pushed state (in response to a ')')
|
---|
2082 | func (p *parser) popGroup() error {
|
---|
2083 | p.concatenation = p.stack
|
---|
2084 | p.alternation = p.concatenation.next
|
---|
2085 | p.group = p.alternation.next
|
---|
2086 | p.stack = p.group.next
|
---|
2087 |
|
---|
2088 | // The first () inside a Testgroup group goes directly to the group
|
---|
2089 | if p.group.t == ntTestgroup && len(p.group.children) == 0 {
|
---|
2090 | if p.unit == nil {
|
---|
2091 | return p.getErr(ErrConditionalExpression)
|
---|
2092 | }
|
---|
2093 |
|
---|
2094 | p.group.addChild(p.unit)
|
---|
2095 | p.unit = nil
|
---|
2096 | }
|
---|
2097 | return nil
|
---|
2098 | }
|
---|
2099 |
|
---|
2100 | // True if the group stack is empty.
|
---|
2101 | func (p *parser) emptyStack() bool {
|
---|
2102 | return p.stack == nil
|
---|
2103 | }
|
---|
2104 |
|
---|
2105 | // Start a new round for the parser state (in response to an open paren or string start)
|
---|
2106 | func (p *parser) startGroup(openGroup *regexNode) {
|
---|
2107 | p.group = openGroup
|
---|
2108 | p.alternation = newRegexNode(ntAlternate, p.options)
|
---|
2109 | p.concatenation = newRegexNode(ntConcatenate, p.options)
|
---|
2110 | }
|
---|
2111 |
|
---|
2112 | // Finish the current concatenation (in response to a |)
|
---|
2113 | func (p *parser) addAlternate() {
|
---|
2114 | // The | parts inside a Testgroup group go directly to the group
|
---|
2115 |
|
---|
2116 | if p.group.t == ntTestgroup || p.group.t == ntTestref {
|
---|
2117 | p.group.addChild(p.concatenation.reverseLeft())
|
---|
2118 | } else {
|
---|
2119 | p.alternation.addChild(p.concatenation.reverseLeft())
|
---|
2120 | }
|
---|
2121 |
|
---|
2122 | p.concatenation = newRegexNode(ntConcatenate, p.options)
|
---|
2123 | }
|
---|
2124 |
|
---|
2125 | // For categorizing ascii characters.
|
---|
2126 |
|
---|
2127 | const (
|
---|
2128 | Q byte = 5 // quantifier
|
---|
2129 | S = 4 // ordinary stopper
|
---|
2130 | Z = 3 // ScanBlank stopper
|
---|
2131 | X = 2 // whitespace
|
---|
2132 | E = 1 // should be escaped
|
---|
2133 | )
|
---|
2134 |
|
---|
2135 | var _category = []byte{
|
---|
2136 | //01 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
---|
2137 | 0, 0, 0, 0, 0, 0, 0, 0, 0, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
2138 | // ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
---|
2139 | X, 0, 0, Z, S, 0, 0, 0, S, S, Q, Q, 0, 0, S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q,
|
---|
2140 | //@A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
|
---|
2141 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, S, 0,
|
---|
2142 | //'a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
|
---|
2143 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q, S, 0, 0, 0,
|
---|
2144 | }
|
---|
2145 |
|
---|
2146 | func isSpace(ch rune) bool {
|
---|
2147 | return (ch <= ' ' && _category[ch] == X)
|
---|
2148 | }
|
---|
2149 |
|
---|
2150 | // Returns true for those characters that terminate a string of ordinary chars.
|
---|
2151 | func isSpecial(ch rune) bool {
|
---|
2152 | return (ch <= '|' && _category[ch] >= S)
|
---|
2153 | }
|
---|
2154 |
|
---|
2155 | // Returns true for those characters that terminate a string of ordinary chars.
|
---|
2156 | func isStopperX(ch rune) bool {
|
---|
2157 | return (ch <= '|' && _category[ch] >= X)
|
---|
2158 | }
|
---|
2159 |
|
---|
2160 | // Returns true for those characters that begin a quantifier.
|
---|
2161 | func isQuantifier(ch rune) bool {
|
---|
2162 | return (ch <= '{' && _category[ch] >= Q)
|
---|
2163 | }
|
---|
2164 |
|
---|
2165 | func (p *parser) isTrueQuantifier() bool {
|
---|
2166 | nChars := p.charsRight()
|
---|
2167 | if nChars == 0 {
|
---|
2168 | return false
|
---|
2169 | }
|
---|
2170 |
|
---|
2171 | startpos := p.textpos()
|
---|
2172 | ch := p.charAt(startpos)
|
---|
2173 | if ch != '{' {
|
---|
2174 | return ch <= '{' && _category[ch] >= Q
|
---|
2175 | }
|
---|
2176 |
|
---|
2177 | //UGLY: this is ugly -- the original code was ugly too
|
---|
2178 | pos := startpos
|
---|
2179 | for {
|
---|
2180 | nChars--
|
---|
2181 | if nChars <= 0 {
|
---|
2182 | break
|
---|
2183 | }
|
---|
2184 | pos++
|
---|
2185 | ch = p.charAt(pos)
|
---|
2186 | if ch < '0' || ch > '9' {
|
---|
2187 | break
|
---|
2188 | }
|
---|
2189 | }
|
---|
2190 |
|
---|
2191 | if nChars == 0 || pos-startpos == 1 {
|
---|
2192 | return false
|
---|
2193 | }
|
---|
2194 | if ch == '}' {
|
---|
2195 | return true
|
---|
2196 | }
|
---|
2197 | if ch != ',' {
|
---|
2198 | return false
|
---|
2199 | }
|
---|
2200 | for {
|
---|
2201 | nChars--
|
---|
2202 | if nChars <= 0 {
|
---|
2203 | break
|
---|
2204 | }
|
---|
2205 | pos++
|
---|
2206 | ch = p.charAt(pos)
|
---|
2207 | if ch < '0' || ch > '9' {
|
---|
2208 | break
|
---|
2209 | }
|
---|
2210 | }
|
---|
2211 |
|
---|
2212 | return nChars > 0 && ch == '}'
|
---|
2213 | }
|
---|