1 | package syntax
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "bytes"
|
---|
5 | "fmt"
|
---|
6 | "math"
|
---|
7 | "strconv"
|
---|
8 | )
|
---|
9 |
|
---|
10 | type RegexTree struct {
|
---|
11 | root *regexNode
|
---|
12 | caps map[int]int
|
---|
13 | capnumlist []int
|
---|
14 | captop int
|
---|
15 | Capnames map[string]int
|
---|
16 | Caplist []string
|
---|
17 | options RegexOptions
|
---|
18 | }
|
---|
19 |
|
---|
20 | // It is built into a parsed tree for a regular expression.
|
---|
21 |
|
---|
22 | // Implementation notes:
|
---|
23 | //
|
---|
24 | // Since the node tree is a temporary data structure only used
|
---|
25 | // during compilation of the regexp to integer codes, it's
|
---|
26 | // designed for clarity and convenience rather than
|
---|
27 | // space efficiency.
|
---|
28 | //
|
---|
29 | // RegexNodes are built into a tree, linked by the n.children list.
|
---|
30 | // Each node also has a n.parent and n.ichild member indicating
|
---|
31 | // its parent and which child # it is in its parent's list.
|
---|
32 | //
|
---|
33 | // RegexNodes come in as many types as there are constructs in
|
---|
34 | // a regular expression, for example, "concatenate", "alternate",
|
---|
35 | // "one", "rept", "group". There are also node types for basic
|
---|
36 | // peephole optimizations, e.g., "onerep", "notsetrep", etc.
|
---|
37 | //
|
---|
38 | // Because perl 5 allows "lookback" groups that scan backwards,
|
---|
39 | // each node also gets a "direction". Normally the value of
|
---|
40 | // boolean n.backward = false.
|
---|
41 | //
|
---|
42 | // During parsing, top-level nodes are also stacked onto a parse
|
---|
43 | // stack (a stack of trees). For this purpose we have a n.next
|
---|
44 | // pointer. [Note that to save a few bytes, we could overload the
|
---|
45 | // n.parent pointer instead.]
|
---|
46 | //
|
---|
47 | // On the parse stack, each tree has a "role" - basically, the
|
---|
48 | // nonterminal in the grammar that the parser has currently
|
---|
49 | // assigned to the tree. That code is stored in n.role.
|
---|
50 | //
|
---|
51 | // Finally, some of the different kinds of nodes have data.
|
---|
52 | // Two integers (for the looping constructs) are stored in
|
---|
53 | // n.operands, an an object (either a string or a set)
|
---|
54 | // is stored in n.data
|
---|
55 | type regexNode struct {
|
---|
56 | t nodeType
|
---|
57 | children []*regexNode
|
---|
58 | str []rune
|
---|
59 | set *CharSet
|
---|
60 | ch rune
|
---|
61 | m int
|
---|
62 | n int
|
---|
63 | options RegexOptions
|
---|
64 | next *regexNode
|
---|
65 | }
|
---|
66 |
|
---|
67 | type nodeType int32
|
---|
68 |
|
---|
69 | const (
|
---|
70 | // The following are leaves, and correspond to primitive operations
|
---|
71 |
|
---|
72 | ntOnerep nodeType = 0 // lef,back char,min,max a {n}
|
---|
73 | ntNotonerep = 1 // lef,back char,min,max .{n}
|
---|
74 | ntSetrep = 2 // lef,back set,min,max [\d]{n}
|
---|
75 | ntOneloop = 3 // lef,back char,min,max a {,n}
|
---|
76 | ntNotoneloop = 4 // lef,back char,min,max .{,n}
|
---|
77 | ntSetloop = 5 // lef,back set,min,max [\d]{,n}
|
---|
78 | ntOnelazy = 6 // lef,back char,min,max a {,n}?
|
---|
79 | ntNotonelazy = 7 // lef,back char,min,max .{,n}?
|
---|
80 | ntSetlazy = 8 // lef,back set,min,max [\d]{,n}?
|
---|
81 | ntOne = 9 // lef char a
|
---|
82 | ntNotone = 10 // lef char [^a]
|
---|
83 | ntSet = 11 // lef set [a-z\s] \w \s \d
|
---|
84 | ntMulti = 12 // lef string abcd
|
---|
85 | ntRef = 13 // lef group \#
|
---|
86 | ntBol = 14 // ^
|
---|
87 | ntEol = 15 // $
|
---|
88 | ntBoundary = 16 // \b
|
---|
89 | ntNonboundary = 17 // \B
|
---|
90 | ntBeginning = 18 // \A
|
---|
91 | ntStart = 19 // \G
|
---|
92 | ntEndZ = 20 // \Z
|
---|
93 | ntEnd = 21 // \Z
|
---|
94 |
|
---|
95 | // Interior nodes do not correspond to primitive operations, but
|
---|
96 | // control structures compositing other operations
|
---|
97 |
|
---|
98 | // Concat and alternate take n children, and can run forward or backwards
|
---|
99 |
|
---|
100 | ntNothing = 22 // []
|
---|
101 | ntEmpty = 23 // ()
|
---|
102 | ntAlternate = 24 // a|b
|
---|
103 | ntConcatenate = 25 // ab
|
---|
104 | ntLoop = 26 // m,x * + ? {,}
|
---|
105 | ntLazyloop = 27 // m,x *? +? ?? {,}?
|
---|
106 | ntCapture = 28 // n ()
|
---|
107 | ntGroup = 29 // (?:)
|
---|
108 | ntRequire = 30 // (?=) (?<=)
|
---|
109 | ntPrevent = 31 // (?!) (?<!)
|
---|
110 | ntGreedy = 32 // (?>) (?<)
|
---|
111 | ntTestref = 33 // (?(n) | )
|
---|
112 | ntTestgroup = 34 // (?(...) | )
|
---|
113 |
|
---|
114 | ntECMABoundary = 41 // \b
|
---|
115 | ntNonECMABoundary = 42 // \B
|
---|
116 | )
|
---|
117 |
|
---|
118 | func newRegexNode(t nodeType, opt RegexOptions) *regexNode {
|
---|
119 | return ®exNode{
|
---|
120 | t: t,
|
---|
121 | options: opt,
|
---|
122 | }
|
---|
123 | }
|
---|
124 |
|
---|
125 | func newRegexNodeCh(t nodeType, opt RegexOptions, ch rune) *regexNode {
|
---|
126 | return ®exNode{
|
---|
127 | t: t,
|
---|
128 | options: opt,
|
---|
129 | ch: ch,
|
---|
130 | }
|
---|
131 | }
|
---|
132 |
|
---|
133 | func newRegexNodeStr(t nodeType, opt RegexOptions, str []rune) *regexNode {
|
---|
134 | return ®exNode{
|
---|
135 | t: t,
|
---|
136 | options: opt,
|
---|
137 | str: str,
|
---|
138 | }
|
---|
139 | }
|
---|
140 |
|
---|
141 | func newRegexNodeSet(t nodeType, opt RegexOptions, set *CharSet) *regexNode {
|
---|
142 | return ®exNode{
|
---|
143 | t: t,
|
---|
144 | options: opt,
|
---|
145 | set: set,
|
---|
146 | }
|
---|
147 | }
|
---|
148 |
|
---|
149 | func newRegexNodeM(t nodeType, opt RegexOptions, m int) *regexNode {
|
---|
150 | return ®exNode{
|
---|
151 | t: t,
|
---|
152 | options: opt,
|
---|
153 | m: m,
|
---|
154 | }
|
---|
155 | }
|
---|
156 | func newRegexNodeMN(t nodeType, opt RegexOptions, m, n int) *regexNode {
|
---|
157 | return ®exNode{
|
---|
158 | t: t,
|
---|
159 | options: opt,
|
---|
160 | m: m,
|
---|
161 | n: n,
|
---|
162 | }
|
---|
163 | }
|
---|
164 |
|
---|
165 | func (n *regexNode) writeStrToBuf(buf *bytes.Buffer) {
|
---|
166 | for i := 0; i < len(n.str); i++ {
|
---|
167 | buf.WriteRune(n.str[i])
|
---|
168 | }
|
---|
169 | }
|
---|
170 |
|
---|
171 | func (n *regexNode) addChild(child *regexNode) {
|
---|
172 | reduced := child.reduce()
|
---|
173 | n.children = append(n.children, reduced)
|
---|
174 | reduced.next = n
|
---|
175 | }
|
---|
176 |
|
---|
177 | func (n *regexNode) insertChildren(afterIndex int, nodes []*regexNode) {
|
---|
178 | newChildren := make([]*regexNode, 0, len(n.children)+len(nodes))
|
---|
179 | n.children = append(append(append(newChildren, n.children[:afterIndex]...), nodes...), n.children[afterIndex:]...)
|
---|
180 | }
|
---|
181 |
|
---|
182 | // removes children including the start but not the end index
|
---|
183 | func (n *regexNode) removeChildren(startIndex, endIndex int) {
|
---|
184 | n.children = append(n.children[:startIndex], n.children[endIndex:]...)
|
---|
185 | }
|
---|
186 |
|
---|
187 | // Pass type as OneLazy or OneLoop
|
---|
188 | func (n *regexNode) makeRep(t nodeType, min, max int) {
|
---|
189 | n.t += (t - ntOne)
|
---|
190 | n.m = min
|
---|
191 | n.n = max
|
---|
192 | }
|
---|
193 |
|
---|
194 | func (n *regexNode) reduce() *regexNode {
|
---|
195 | switch n.t {
|
---|
196 | case ntAlternate:
|
---|
197 | return n.reduceAlternation()
|
---|
198 |
|
---|
199 | case ntConcatenate:
|
---|
200 | return n.reduceConcatenation()
|
---|
201 |
|
---|
202 | case ntLoop, ntLazyloop:
|
---|
203 | return n.reduceRep()
|
---|
204 |
|
---|
205 | case ntGroup:
|
---|
206 | return n.reduceGroup()
|
---|
207 |
|
---|
208 | case ntSet, ntSetloop:
|
---|
209 | return n.reduceSet()
|
---|
210 |
|
---|
211 | default:
|
---|
212 | return n
|
---|
213 | }
|
---|
214 | }
|
---|
215 |
|
---|
216 | // Basic optimization. Single-letter alternations can be replaced
|
---|
217 | // by faster set specifications, and nested alternations with no
|
---|
218 | // intervening operators can be flattened:
|
---|
219 | //
|
---|
220 | // a|b|c|def|g|h -> [a-c]|def|[gh]
|
---|
221 | // apple|(?:orange|pear)|grape -> apple|orange|pear|grape
|
---|
222 | func (n *regexNode) reduceAlternation() *regexNode {
|
---|
223 | if len(n.children) == 0 {
|
---|
224 | return newRegexNode(ntNothing, n.options)
|
---|
225 | }
|
---|
226 |
|
---|
227 | wasLastSet := false
|
---|
228 | lastNodeCannotMerge := false
|
---|
229 | var optionsLast RegexOptions
|
---|
230 | var i, j int
|
---|
231 |
|
---|
232 | for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
|
---|
233 | at := n.children[i]
|
---|
234 |
|
---|
235 | if j < i {
|
---|
236 | n.children[j] = at
|
---|
237 | }
|
---|
238 |
|
---|
239 | for {
|
---|
240 | if at.t == ntAlternate {
|
---|
241 | for k := 0; k < len(at.children); k++ {
|
---|
242 | at.children[k].next = n
|
---|
243 | }
|
---|
244 | n.insertChildren(i+1, at.children)
|
---|
245 |
|
---|
246 | j--
|
---|
247 | } else if at.t == ntSet || at.t == ntOne {
|
---|
248 | // Cannot merge sets if L or I options differ, or if either are negated.
|
---|
249 | optionsAt := at.options & (RightToLeft | IgnoreCase)
|
---|
250 |
|
---|
251 | if at.t == ntSet {
|
---|
252 | if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge || !at.set.IsMergeable() {
|
---|
253 | wasLastSet = true
|
---|
254 | lastNodeCannotMerge = !at.set.IsMergeable()
|
---|
255 | optionsLast = optionsAt
|
---|
256 | break
|
---|
257 | }
|
---|
258 | } else if !wasLastSet || optionsLast != optionsAt || lastNodeCannotMerge {
|
---|
259 | wasLastSet = true
|
---|
260 | lastNodeCannotMerge = false
|
---|
261 | optionsLast = optionsAt
|
---|
262 | break
|
---|
263 | }
|
---|
264 |
|
---|
265 | // The last node was a Set or a One, we're a Set or One and our options are the same.
|
---|
266 | // Merge the two nodes.
|
---|
267 | j--
|
---|
268 | prev := n.children[j]
|
---|
269 |
|
---|
270 | var prevCharClass *CharSet
|
---|
271 | if prev.t == ntOne {
|
---|
272 | prevCharClass = &CharSet{}
|
---|
273 | prevCharClass.addChar(prev.ch)
|
---|
274 | } else {
|
---|
275 | prevCharClass = prev.set
|
---|
276 | }
|
---|
277 |
|
---|
278 | if at.t == ntOne {
|
---|
279 | prevCharClass.addChar(at.ch)
|
---|
280 | } else {
|
---|
281 | prevCharClass.addSet(*at.set)
|
---|
282 | }
|
---|
283 |
|
---|
284 | prev.t = ntSet
|
---|
285 | prev.set = prevCharClass
|
---|
286 | } else if at.t == ntNothing {
|
---|
287 | j--
|
---|
288 | } else {
|
---|
289 | wasLastSet = false
|
---|
290 | lastNodeCannotMerge = false
|
---|
291 | }
|
---|
292 | break
|
---|
293 | }
|
---|
294 | }
|
---|
295 |
|
---|
296 | if j < i {
|
---|
297 | n.removeChildren(j, i)
|
---|
298 | }
|
---|
299 |
|
---|
300 | return n.stripEnation(ntNothing)
|
---|
301 | }
|
---|
302 |
|
---|
303 | // Basic optimization. Adjacent strings can be concatenated.
|
---|
304 | //
|
---|
305 | // (?:abc)(?:def) -> abcdef
|
---|
306 | func (n *regexNode) reduceConcatenation() *regexNode {
|
---|
307 | // Eliminate empties and concat adjacent strings/chars
|
---|
308 |
|
---|
309 | var optionsLast RegexOptions
|
---|
310 | var optionsAt RegexOptions
|
---|
311 | var i, j int
|
---|
312 |
|
---|
313 | if len(n.children) == 0 {
|
---|
314 | return newRegexNode(ntEmpty, n.options)
|
---|
315 | }
|
---|
316 |
|
---|
317 | wasLastString := false
|
---|
318 |
|
---|
319 | for i, j = 0, 0; i < len(n.children); i, j = i+1, j+1 {
|
---|
320 | var at, prev *regexNode
|
---|
321 |
|
---|
322 | at = n.children[i]
|
---|
323 |
|
---|
324 | if j < i {
|
---|
325 | n.children[j] = at
|
---|
326 | }
|
---|
327 |
|
---|
328 | if at.t == ntConcatenate &&
|
---|
329 | ((at.options & RightToLeft) == (n.options & RightToLeft)) {
|
---|
330 | for k := 0; k < len(at.children); k++ {
|
---|
331 | at.children[k].next = n
|
---|
332 | }
|
---|
333 |
|
---|
334 | //insert at.children at i+1 index in n.children
|
---|
335 | n.insertChildren(i+1, at.children)
|
---|
336 |
|
---|
337 | j--
|
---|
338 | } else if at.t == ntMulti || at.t == ntOne {
|
---|
339 | // Cannot merge strings if L or I options differ
|
---|
340 | optionsAt = at.options & (RightToLeft | IgnoreCase)
|
---|
341 |
|
---|
342 | if !wasLastString || optionsLast != optionsAt {
|
---|
343 | wasLastString = true
|
---|
344 | optionsLast = optionsAt
|
---|
345 | continue
|
---|
346 | }
|
---|
347 |
|
---|
348 | j--
|
---|
349 | prev = n.children[j]
|
---|
350 |
|
---|
351 | if prev.t == ntOne {
|
---|
352 | prev.t = ntMulti
|
---|
353 | prev.str = []rune{prev.ch}
|
---|
354 | }
|
---|
355 |
|
---|
356 | if (optionsAt & RightToLeft) == 0 {
|
---|
357 | if at.t == ntOne {
|
---|
358 | prev.str = append(prev.str, at.ch)
|
---|
359 | } else {
|
---|
360 | prev.str = append(prev.str, at.str...)
|
---|
361 | }
|
---|
362 | } else {
|
---|
363 | if at.t == ntOne {
|
---|
364 | // insert at the front by expanding our slice, copying the data over, and then setting the value
|
---|
365 | prev.str = append(prev.str, 0)
|
---|
366 | copy(prev.str[1:], prev.str)
|
---|
367 | prev.str[0] = at.ch
|
---|
368 | } else {
|
---|
369 | //insert at the front...this one we'll make a new slice and copy both into it
|
---|
370 | merge := make([]rune, len(prev.str)+len(at.str))
|
---|
371 | copy(merge, at.str)
|
---|
372 | copy(merge[len(at.str):], prev.str)
|
---|
373 | prev.str = merge
|
---|
374 | }
|
---|
375 | }
|
---|
376 | } else if at.t == ntEmpty {
|
---|
377 | j--
|
---|
378 | } else {
|
---|
379 | wasLastString = false
|
---|
380 | }
|
---|
381 | }
|
---|
382 |
|
---|
383 | if j < i {
|
---|
384 | // remove indices j through i from the children
|
---|
385 | n.removeChildren(j, i)
|
---|
386 | }
|
---|
387 |
|
---|
388 | return n.stripEnation(ntEmpty)
|
---|
389 | }
|
---|
390 |
|
---|
391 | // Nested repeaters just get multiplied with each other if they're not
|
---|
392 | // too lumpy
|
---|
393 | func (n *regexNode) reduceRep() *regexNode {
|
---|
394 |
|
---|
395 | u := n
|
---|
396 | t := n.t
|
---|
397 | min := n.m
|
---|
398 | max := n.n
|
---|
399 |
|
---|
400 | for {
|
---|
401 | if len(u.children) == 0 {
|
---|
402 | break
|
---|
403 | }
|
---|
404 |
|
---|
405 | child := u.children[0]
|
---|
406 |
|
---|
407 | // multiply reps of the same type only
|
---|
408 | if child.t != t {
|
---|
409 | childType := child.t
|
---|
410 |
|
---|
411 | if !(childType >= ntOneloop && childType <= ntSetloop && t == ntLoop ||
|
---|
412 | childType >= ntOnelazy && childType <= ntSetlazy && t == ntLazyloop) {
|
---|
413 | break
|
---|
414 | }
|
---|
415 | }
|
---|
416 |
|
---|
417 | // child can be too lumpy to blur, e.g., (a {100,105}) {3} or (a {2,})?
|
---|
418 | // [but things like (a {2,})+ are not too lumpy...]
|
---|
419 | if u.m == 0 && child.m > 1 || child.n < child.m*2 {
|
---|
420 | break
|
---|
421 | }
|
---|
422 |
|
---|
423 | u = child
|
---|
424 | if u.m > 0 {
|
---|
425 | if (math.MaxInt32-1)/u.m < min {
|
---|
426 | u.m = math.MaxInt32
|
---|
427 | } else {
|
---|
428 | u.m = u.m * min
|
---|
429 | }
|
---|
430 | }
|
---|
431 | if u.n > 0 {
|
---|
432 | if (math.MaxInt32-1)/u.n < max {
|
---|
433 | u.n = math.MaxInt32
|
---|
434 | } else {
|
---|
435 | u.n = u.n * max
|
---|
436 | }
|
---|
437 | }
|
---|
438 | }
|
---|
439 |
|
---|
440 | if math.MaxInt32 == min {
|
---|
441 | return newRegexNode(ntNothing, n.options)
|
---|
442 | }
|
---|
443 | return u
|
---|
444 |
|
---|
445 | }
|
---|
446 |
|
---|
447 | // Simple optimization. If a concatenation or alternation has only
|
---|
448 | // one child strip out the intermediate node. If it has zero children,
|
---|
449 | // turn it into an empty.
|
---|
450 | func (n *regexNode) stripEnation(emptyType nodeType) *regexNode {
|
---|
451 | switch len(n.children) {
|
---|
452 | case 0:
|
---|
453 | return newRegexNode(emptyType, n.options)
|
---|
454 | case 1:
|
---|
455 | return n.children[0]
|
---|
456 | default:
|
---|
457 | return n
|
---|
458 | }
|
---|
459 | }
|
---|
460 |
|
---|
461 | func (n *regexNode) reduceGroup() *regexNode {
|
---|
462 | u := n
|
---|
463 |
|
---|
464 | for u.t == ntGroup {
|
---|
465 | u = u.children[0]
|
---|
466 | }
|
---|
467 |
|
---|
468 | return u
|
---|
469 | }
|
---|
470 |
|
---|
471 | // Simple optimization. If a set is a singleton, an inverse singleton,
|
---|
472 | // or empty, it's transformed accordingly.
|
---|
473 | func (n *regexNode) reduceSet() *regexNode {
|
---|
474 | // Extract empty-set, one and not-one case as special
|
---|
475 |
|
---|
476 | if n.set == nil {
|
---|
477 | n.t = ntNothing
|
---|
478 | } else if n.set.IsSingleton() {
|
---|
479 | n.ch = n.set.SingletonChar()
|
---|
480 | n.set = nil
|
---|
481 | n.t += (ntOne - ntSet)
|
---|
482 | } else if n.set.IsSingletonInverse() {
|
---|
483 | n.ch = n.set.SingletonChar()
|
---|
484 | n.set = nil
|
---|
485 | n.t += (ntNotone - ntSet)
|
---|
486 | }
|
---|
487 |
|
---|
488 | return n
|
---|
489 | }
|
---|
490 |
|
---|
491 | func (n *regexNode) reverseLeft() *regexNode {
|
---|
492 | if n.options&RightToLeft != 0 && n.t == ntConcatenate && len(n.children) > 0 {
|
---|
493 | //reverse children order
|
---|
494 | for left, right := 0, len(n.children)-1; left < right; left, right = left+1, right-1 {
|
---|
495 | n.children[left], n.children[right] = n.children[right], n.children[left]
|
---|
496 | }
|
---|
497 | }
|
---|
498 |
|
---|
499 | return n
|
---|
500 | }
|
---|
501 |
|
---|
502 | func (n *regexNode) makeQuantifier(lazy bool, min, max int) *regexNode {
|
---|
503 | if min == 0 && max == 0 {
|
---|
504 | return newRegexNode(ntEmpty, n.options)
|
---|
505 | }
|
---|
506 |
|
---|
507 | if min == 1 && max == 1 {
|
---|
508 | return n
|
---|
509 | }
|
---|
510 |
|
---|
511 | switch n.t {
|
---|
512 | case ntOne, ntNotone, ntSet:
|
---|
513 | if lazy {
|
---|
514 | n.makeRep(Onelazy, min, max)
|
---|
515 | } else {
|
---|
516 | n.makeRep(Oneloop, min, max)
|
---|
517 | }
|
---|
518 | return n
|
---|
519 |
|
---|
520 | default:
|
---|
521 | var t nodeType
|
---|
522 | if lazy {
|
---|
523 | t = ntLazyloop
|
---|
524 | } else {
|
---|
525 | t = ntLoop
|
---|
526 | }
|
---|
527 | result := newRegexNodeMN(t, n.options, min, max)
|
---|
528 | result.addChild(n)
|
---|
529 | return result
|
---|
530 | }
|
---|
531 | }
|
---|
532 |
|
---|
533 | // debug functions
|
---|
534 |
|
---|
535 | var typeStr = []string{
|
---|
536 | "Onerep", "Notonerep", "Setrep",
|
---|
537 | "Oneloop", "Notoneloop", "Setloop",
|
---|
538 | "Onelazy", "Notonelazy", "Setlazy",
|
---|
539 | "One", "Notone", "Set",
|
---|
540 | "Multi", "Ref",
|
---|
541 | "Bol", "Eol", "Boundary", "Nonboundary",
|
---|
542 | "Beginning", "Start", "EndZ", "End",
|
---|
543 | "Nothing", "Empty",
|
---|
544 | "Alternate", "Concatenate",
|
---|
545 | "Loop", "Lazyloop",
|
---|
546 | "Capture", "Group", "Require", "Prevent", "Greedy",
|
---|
547 | "Testref", "Testgroup",
|
---|
548 | "Unknown", "Unknown", "Unknown",
|
---|
549 | "Unknown", "Unknown", "Unknown",
|
---|
550 | "ECMABoundary", "NonECMABoundary",
|
---|
551 | }
|
---|
552 |
|
---|
553 | func (n *regexNode) description() string {
|
---|
554 | buf := &bytes.Buffer{}
|
---|
555 |
|
---|
556 | buf.WriteString(typeStr[n.t])
|
---|
557 |
|
---|
558 | if (n.options & ExplicitCapture) != 0 {
|
---|
559 | buf.WriteString("-C")
|
---|
560 | }
|
---|
561 | if (n.options & IgnoreCase) != 0 {
|
---|
562 | buf.WriteString("-I")
|
---|
563 | }
|
---|
564 | if (n.options & RightToLeft) != 0 {
|
---|
565 | buf.WriteString("-L")
|
---|
566 | }
|
---|
567 | if (n.options & Multiline) != 0 {
|
---|
568 | buf.WriteString("-M")
|
---|
569 | }
|
---|
570 | if (n.options & Singleline) != 0 {
|
---|
571 | buf.WriteString("-S")
|
---|
572 | }
|
---|
573 | if (n.options & IgnorePatternWhitespace) != 0 {
|
---|
574 | buf.WriteString("-X")
|
---|
575 | }
|
---|
576 | if (n.options & ECMAScript) != 0 {
|
---|
577 | buf.WriteString("-E")
|
---|
578 | }
|
---|
579 |
|
---|
580 | switch n.t {
|
---|
581 | case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntOne, ntNotone:
|
---|
582 | buf.WriteString("(Ch = " + CharDescription(n.ch) + ")")
|
---|
583 | break
|
---|
584 | case ntCapture:
|
---|
585 | buf.WriteString("(index = " + strconv.Itoa(n.m) + ", unindex = " + strconv.Itoa(n.n) + ")")
|
---|
586 | break
|
---|
587 | case ntRef, ntTestref:
|
---|
588 | buf.WriteString("(index = " + strconv.Itoa(n.m) + ")")
|
---|
589 | break
|
---|
590 | case ntMulti:
|
---|
591 | fmt.Fprintf(buf, "(String = %s)", string(n.str))
|
---|
592 | break
|
---|
593 | case ntSet, ntSetloop, ntSetlazy:
|
---|
594 | buf.WriteString("(Set = " + n.set.String() + ")")
|
---|
595 | break
|
---|
596 | }
|
---|
597 |
|
---|
598 | switch n.t {
|
---|
599 | case ntOneloop, ntNotoneloop, ntOnelazy, ntNotonelazy, ntSetloop, ntSetlazy, ntLoop, ntLazyloop:
|
---|
600 | buf.WriteString("(Min = ")
|
---|
601 | buf.WriteString(strconv.Itoa(n.m))
|
---|
602 | buf.WriteString(", Max = ")
|
---|
603 | if n.n == math.MaxInt32 {
|
---|
604 | buf.WriteString("inf")
|
---|
605 | } else {
|
---|
606 | buf.WriteString(strconv.Itoa(n.n))
|
---|
607 | }
|
---|
608 | buf.WriteString(")")
|
---|
609 |
|
---|
610 | break
|
---|
611 | }
|
---|
612 |
|
---|
613 | return buf.String()
|
---|
614 | }
|
---|
615 |
|
---|
616 | var padSpace = []byte(" ")
|
---|
617 |
|
---|
618 | func (t *RegexTree) Dump() string {
|
---|
619 | return t.root.dump()
|
---|
620 | }
|
---|
621 |
|
---|
622 | func (n *regexNode) dump() string {
|
---|
623 | var stack []int
|
---|
624 | CurNode := n
|
---|
625 | CurChild := 0
|
---|
626 |
|
---|
627 | buf := bytes.NewBufferString(CurNode.description())
|
---|
628 | buf.WriteRune('\n')
|
---|
629 |
|
---|
630 | for {
|
---|
631 | if CurNode.children != nil && CurChild < len(CurNode.children) {
|
---|
632 | stack = append(stack, CurChild+1)
|
---|
633 | CurNode = CurNode.children[CurChild]
|
---|
634 | CurChild = 0
|
---|
635 |
|
---|
636 | Depth := len(stack)
|
---|
637 | if Depth > 32 {
|
---|
638 | Depth = 32
|
---|
639 | }
|
---|
640 | buf.Write(padSpace[:Depth])
|
---|
641 | buf.WriteString(CurNode.description())
|
---|
642 | buf.WriteRune('\n')
|
---|
643 | } else {
|
---|
644 | if len(stack) == 0 {
|
---|
645 | break
|
---|
646 | }
|
---|
647 |
|
---|
648 | CurChild = stack[len(stack)-1]
|
---|
649 | stack = stack[:len(stack)-1]
|
---|
650 | CurNode = CurNode.next
|
---|
651 | }
|
---|
652 | }
|
---|
653 | return buf.String()
|
---|
654 | }
|
---|