source: code/trunk/vendor/github.com/dlclark/regexp2/syntax/code.go@ 67

Last change on this file since 67 was 67, checked in by Izuru Yakumo, 23 months ago

Use vendored modules

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 8.2 KB
Line 
1package syntax
2
3import (
4 "bytes"
5 "fmt"
6 "math"
7)
8
9// similar to prog.go in the go regex package...also with comment 'may not belong in this package'
10
11// File provides operator constants for use by the Builder and the Machine.
12
13// Implementation notes:
14//
15// Regexps are built into RegexCodes, which contain an operation array,
16// a string table, and some constants.
17//
18// Each operation is one of the codes below, followed by the integer
19// operands specified for each op.
20//
21// Strings and sets are indices into a string table.
22
23type InstOp int
24
25const (
26 // lef/back operands description
27
28 Onerep InstOp = 0 // lef,back char,min,max a {n}
29 Notonerep = 1 // lef,back char,min,max .{n}
30 Setrep = 2 // lef,back set,min,max [\d]{n}
31
32 Oneloop = 3 // lef,back char,min,max a {,n}
33 Notoneloop = 4 // lef,back char,min,max .{,n}
34 Setloop = 5 // lef,back set,min,max [\d]{,n}
35
36 Onelazy = 6 // lef,back char,min,max a {,n}?
37 Notonelazy = 7 // lef,back char,min,max .{,n}?
38 Setlazy = 8 // lef,back set,min,max [\d]{,n}?
39
40 One = 9 // lef char a
41 Notone = 10 // lef char [^a]
42 Set = 11 // lef set [a-z\s] \w \s \d
43
44 Multi = 12 // lef string abcd
45 Ref = 13 // lef group \#
46
47 Bol = 14 // ^
48 Eol = 15 // $
49 Boundary = 16 // \b
50 Nonboundary = 17 // \B
51 Beginning = 18 // \A
52 Start = 19 // \G
53 EndZ = 20 // \Z
54 End = 21 // \Z
55
56 Nothing = 22 // Reject!
57
58 // Primitive control structures
59
60 Lazybranch = 23 // back jump straight first
61 Branchmark = 24 // back jump branch first for loop
62 Lazybranchmark = 25 // back jump straight first for loop
63 Nullcount = 26 // back val set counter, null mark
64 Setcount = 27 // back val set counter, make mark
65 Branchcount = 28 // back jump,limit branch++ if zero<=c<limit
66 Lazybranchcount = 29 // back jump,limit same, but straight first
67 Nullmark = 30 // back save position
68 Setmark = 31 // back save position
69 Capturemark = 32 // back group define group
70 Getmark = 33 // back recall position
71 Setjump = 34 // back save backtrack state
72 Backjump = 35 // zap back to saved state
73 Forejump = 36 // zap backtracking state
74 Testref = 37 // backtrack if ref undefined
75 Goto = 38 // jump just go
76
77 Prune = 39 // prune it baby
78 Stop = 40 // done!
79
80 ECMABoundary = 41 // \b
81 NonECMABoundary = 42 // \B
82
83 // Modifiers for alternate modes
84
85 Mask = 63 // Mask to get unmodified ordinary operator
86 Rtl = 64 // bit to indicate that we're reverse scanning.
87 Back = 128 // bit to indicate that we're backtracking.
88 Back2 = 256 // bit to indicate that we're backtracking on a second branch.
89 Ci = 512 // bit to indicate that we're case-insensitive.
90)
91
92type Code struct {
93 Codes []int // the code
94 Strings [][]rune // string table
95 Sets []*CharSet //character set table
96 TrackCount int // how many instructions use backtracking
97 Caps map[int]int // mapping of user group numbers -> impl group slots
98 Capsize int // number of impl group slots
99 FcPrefix *Prefix // the set of candidate first characters (may be null)
100 BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null)
101 Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc)
102 RightToLeft bool // true if right to left
103}
104
105func opcodeBacktracks(op InstOp) bool {
106 op &= Mask
107
108 switch op {
109 case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark,
110 Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump,
111 Forejump, Goto:
112 return true
113
114 default:
115 return false
116 }
117}
118
119func opcodeSize(op InstOp) int {
120 op &= Mask
121
122 switch op {
123 case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ,
124 End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop:
125 return 1
126
127 case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark,
128 Prune, Set:
129 return 2
130
131 case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy,
132 Setlazy, Setrep, Setloop:
133 return 3
134
135 default:
136 panic(fmt.Errorf("Unexpected op code: %v", op))
137 }
138}
139
140var codeStr = []string{
141 "Onerep", "Notonerep", "Setrep",
142 "Oneloop", "Notoneloop", "Setloop",
143 "Onelazy", "Notonelazy", "Setlazy",
144 "One", "Notone", "Set",
145 "Multi", "Ref",
146 "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
147 "Nothing",
148 "Lazybranch", "Branchmark", "Lazybranchmark",
149 "Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
150 "Nullmark", "Setmark", "Capturemark", "Getmark",
151 "Setjump", "Backjump", "Forejump", "Testref", "Goto",
152 "Prune", "Stop",
153 "ECMABoundary", "NonECMABoundary",
154}
155
156func operatorDescription(op InstOp) string {
157 desc := codeStr[op&Mask]
158 if (op & Ci) != 0 {
159 desc += "-Ci"
160 }
161 if (op & Rtl) != 0 {
162 desc += "-Rtl"
163 }
164 if (op & Back) != 0 {
165 desc += "-Back"
166 }
167 if (op & Back2) != 0 {
168 desc += "-Back2"
169 }
170
171 return desc
172}
173
174// OpcodeDescription is a humman readable string of the specific offset
175func (c *Code) OpcodeDescription(offset int) string {
176 buf := &bytes.Buffer{}
177
178 op := InstOp(c.Codes[offset])
179 fmt.Fprintf(buf, "%06d ", offset)
180
181 if opcodeBacktracks(op & Mask) {
182 buf.WriteString("*")
183 } else {
184 buf.WriteString(" ")
185 }
186 buf.WriteString(operatorDescription(op))
187 buf.WriteString("(")
188 op &= Mask
189
190 switch op {
191 case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy:
192 buf.WriteString("Ch = ")
193 buf.WriteString(CharDescription(rune(c.Codes[offset+1])))
194
195 case Set, Setrep, Setloop, Setlazy:
196 buf.WriteString("Set = ")
197 buf.WriteString(c.Sets[c.Codes[offset+1]].String())
198
199 case Multi:
200 fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]]))
201
202 case Ref, Testref:
203 fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
204
205 case Capturemark:
206 fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
207 if c.Codes[offset+2] != -1 {
208 fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2])
209 }
210
211 case Nullcount, Setcount:
212 fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1])
213
214 case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount:
215 fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1])
216 }
217
218 switch op {
219 case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy:
220 buf.WriteString(", Rep = ")
221 if c.Codes[offset+2] == math.MaxInt32 {
222 buf.WriteString("inf")
223 } else {
224 fmt.Fprintf(buf, "%d", c.Codes[offset+2])
225 }
226
227 case Branchcount, Lazybranchcount:
228 buf.WriteString(", Limit = ")
229 if c.Codes[offset+2] == math.MaxInt32 {
230 buf.WriteString("inf")
231 } else {
232 fmt.Fprintf(buf, "%d", c.Codes[offset+2])
233 }
234
235 }
236
237 buf.WriteString(")")
238
239 return buf.String()
240}
241
242func (c *Code) Dump() string {
243 buf := &bytes.Buffer{}
244
245 if c.RightToLeft {
246 fmt.Fprintln(buf, "Direction: right-to-left")
247 } else {
248 fmt.Fprintln(buf, "Direction: left-to-right")
249 }
250 if c.FcPrefix == nil {
251 fmt.Fprintln(buf, "Firstchars: n/a")
252 } else {
253 fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String())
254 }
255
256 if c.BmPrefix == nil {
257 fmt.Fprintln(buf, "Prefix: n/a")
258 } else {
259 fmt.Fprintf(buf, "Prefix: %v\n", Escape(c.BmPrefix.String()))
260 }
261
262 fmt.Fprintf(buf, "Anchors: %v\n", c.Anchors)
263 fmt.Fprintln(buf)
264
265 if c.BmPrefix != nil {
266 fmt.Fprintln(buf, "BoyerMoore:")
267 fmt.Fprintln(buf, c.BmPrefix.Dump(" "))
268 }
269 for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) {
270 fmt.Fprintln(buf, c.OpcodeDescription(i))
271 }
272
273 return buf.String()
274}
Note: See TracBrowser for help on using the repository browser.