1 | package regexp2
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "bytes"
|
---|
5 | "fmt"
|
---|
6 | )
|
---|
7 |
|
---|
8 | // Match is a single regex result match that contains groups and repeated captures
|
---|
9 | // -Groups
|
---|
10 | // -Capture
|
---|
11 | type Match struct {
|
---|
12 | Group //embeded group 0
|
---|
13 |
|
---|
14 | regex *Regexp
|
---|
15 | otherGroups []Group
|
---|
16 |
|
---|
17 | // input to the match
|
---|
18 | textpos int
|
---|
19 | textstart int
|
---|
20 |
|
---|
21 | capcount int
|
---|
22 | caps []int
|
---|
23 | sparseCaps map[int]int
|
---|
24 |
|
---|
25 | // output from the match
|
---|
26 | matches [][]int
|
---|
27 | matchcount []int
|
---|
28 |
|
---|
29 | // whether we've done any balancing with this match. If we
|
---|
30 | // have done balancing, we'll need to do extra work in Tidy().
|
---|
31 | balancing bool
|
---|
32 | }
|
---|
33 |
|
---|
34 | // Group is an explicit or implit (group 0) matched group within the pattern
|
---|
35 | type Group struct {
|
---|
36 | Capture // the last capture of this group is embeded for ease of use
|
---|
37 |
|
---|
38 | Name string // group name
|
---|
39 | Captures []Capture // captures of this group
|
---|
40 | }
|
---|
41 |
|
---|
42 | // Capture is a single capture of text within the larger original string
|
---|
43 | type Capture struct {
|
---|
44 | // the original string
|
---|
45 | text []rune
|
---|
46 | // the position in the original string where the first character of
|
---|
47 | // captured substring was found.
|
---|
48 | Index int
|
---|
49 | // the length of the captured substring.
|
---|
50 | Length int
|
---|
51 | }
|
---|
52 |
|
---|
53 | // String returns the captured text as a String
|
---|
54 | func (c *Capture) String() string {
|
---|
55 | return string(c.text[c.Index : c.Index+c.Length])
|
---|
56 | }
|
---|
57 |
|
---|
58 | // Runes returns the captured text as a rune slice
|
---|
59 | func (c *Capture) Runes() []rune {
|
---|
60 | return c.text[c.Index : c.Index+c.Length]
|
---|
61 | }
|
---|
62 |
|
---|
63 | func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
|
---|
64 | m := Match{
|
---|
65 | regex: regex,
|
---|
66 | matchcount: make([]int, capcount),
|
---|
67 | matches: make([][]int, capcount),
|
---|
68 | textstart: startpos,
|
---|
69 | balancing: false,
|
---|
70 | }
|
---|
71 | m.Name = "0"
|
---|
72 | m.text = text
|
---|
73 | m.matches[0] = make([]int, 2)
|
---|
74 | return &m
|
---|
75 | }
|
---|
76 |
|
---|
77 | func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
|
---|
78 | m := newMatch(regex, capcount, text, startpos)
|
---|
79 | m.sparseCaps = caps
|
---|
80 | return m
|
---|
81 | }
|
---|
82 |
|
---|
83 | func (m *Match) reset(text []rune, textstart int) {
|
---|
84 | m.text = text
|
---|
85 | m.textstart = textstart
|
---|
86 | for i := 0; i < len(m.matchcount); i++ {
|
---|
87 | m.matchcount[i] = 0
|
---|
88 | }
|
---|
89 | m.balancing = false
|
---|
90 | }
|
---|
91 |
|
---|
92 | func (m *Match) tidy(textpos int) {
|
---|
93 |
|
---|
94 | interval := m.matches[0]
|
---|
95 | m.Index = interval[0]
|
---|
96 | m.Length = interval[1]
|
---|
97 | m.textpos = textpos
|
---|
98 | m.capcount = m.matchcount[0]
|
---|
99 | //copy our root capture to the list
|
---|
100 | m.Group.Captures = []Capture{m.Group.Capture}
|
---|
101 |
|
---|
102 | if m.balancing {
|
---|
103 | // The idea here is that we want to compact all of our unbalanced captures. To do that we
|
---|
104 | // use j basically as a count of how many unbalanced captures we have at any given time
|
---|
105 | // (really j is an index, but j/2 is the count). First we skip past all of the real captures
|
---|
106 | // until we find a balance captures. Then we check each subsequent entry. If it's a balance
|
---|
107 | // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
|
---|
108 | // it down to the last free position.
|
---|
109 | for cap := 0; cap < len(m.matchcount); cap++ {
|
---|
110 | limit := m.matchcount[cap] * 2
|
---|
111 | matcharray := m.matches[cap]
|
---|
112 |
|
---|
113 | var i, j int
|
---|
114 |
|
---|
115 | for i = 0; i < limit; i++ {
|
---|
116 | if matcharray[i] < 0 {
|
---|
117 | break
|
---|
118 | }
|
---|
119 | }
|
---|
120 |
|
---|
121 | for j = i; i < limit; i++ {
|
---|
122 | if matcharray[i] < 0 {
|
---|
123 | // skip negative values
|
---|
124 | j--
|
---|
125 | } else {
|
---|
126 | // but if we find something positive (an actual capture), copy it back to the last
|
---|
127 | // unbalanced position.
|
---|
128 | if i != j {
|
---|
129 | matcharray[j] = matcharray[i]
|
---|
130 | }
|
---|
131 | j++
|
---|
132 | }
|
---|
133 | }
|
---|
134 |
|
---|
135 | m.matchcount[cap] = j / 2
|
---|
136 | }
|
---|
137 |
|
---|
138 | m.balancing = false
|
---|
139 | }
|
---|
140 | }
|
---|
141 |
|
---|
142 | // isMatched tells if a group was matched by capnum
|
---|
143 | func (m *Match) isMatched(cap int) bool {
|
---|
144 | return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
|
---|
145 | }
|
---|
146 |
|
---|
147 | // matchIndex returns the index of the last specified matched group by capnum
|
---|
148 | func (m *Match) matchIndex(cap int) int {
|
---|
149 | i := m.matches[cap][m.matchcount[cap]*2-2]
|
---|
150 | if i >= 0 {
|
---|
151 | return i
|
---|
152 | }
|
---|
153 |
|
---|
154 | return m.matches[cap][-3-i]
|
---|
155 | }
|
---|
156 |
|
---|
157 | // matchLength returns the length of the last specified matched group by capnum
|
---|
158 | func (m *Match) matchLength(cap int) int {
|
---|
159 | i := m.matches[cap][m.matchcount[cap]*2-1]
|
---|
160 | if i >= 0 {
|
---|
161 | return i
|
---|
162 | }
|
---|
163 |
|
---|
164 | return m.matches[cap][-3-i]
|
---|
165 | }
|
---|
166 |
|
---|
167 | // Nonpublic builder: add a capture to the group specified by "c"
|
---|
168 | func (m *Match) addMatch(c, start, l int) {
|
---|
169 |
|
---|
170 | if m.matches[c] == nil {
|
---|
171 | m.matches[c] = make([]int, 2)
|
---|
172 | }
|
---|
173 |
|
---|
174 | capcount := m.matchcount[c]
|
---|
175 |
|
---|
176 | if capcount*2+2 > len(m.matches[c]) {
|
---|
177 | oldmatches := m.matches[c]
|
---|
178 | newmatches := make([]int, capcount*8)
|
---|
179 | copy(newmatches, oldmatches[:capcount*2])
|
---|
180 | m.matches[c] = newmatches
|
---|
181 | }
|
---|
182 |
|
---|
183 | m.matches[c][capcount*2] = start
|
---|
184 | m.matches[c][capcount*2+1] = l
|
---|
185 | m.matchcount[c] = capcount + 1
|
---|
186 | //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
|
---|
187 | }
|
---|
188 |
|
---|
189 | // Nonpublic builder: Add a capture to balance the specified group. This is used by the
|
---|
190 | // balanced match construct. (?<foo-foo2>...)
|
---|
191 | //
|
---|
192 | // If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
|
---|
193 | // However, since we have backtracking, we need to keep track of everything.
|
---|
194 | func (m *Match) balanceMatch(c int) {
|
---|
195 | m.balancing = true
|
---|
196 |
|
---|
197 | // we'll look at the last capture first
|
---|
198 | capcount := m.matchcount[c]
|
---|
199 | target := capcount*2 - 2
|
---|
200 |
|
---|
201 | // first see if it is negative, and therefore is a reference to the next available
|
---|
202 | // capture group for balancing. If it is, we'll reset target to point to that capture.
|
---|
203 | if m.matches[c][target] < 0 {
|
---|
204 | target = -3 - m.matches[c][target]
|
---|
205 | }
|
---|
206 |
|
---|
207 | // move back to the previous capture
|
---|
208 | target -= 2
|
---|
209 |
|
---|
210 | // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
|
---|
211 | if target >= 0 && m.matches[c][target] < 0 {
|
---|
212 | m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
|
---|
213 | } else {
|
---|
214 | m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
|
---|
215 | }
|
---|
216 | }
|
---|
217 |
|
---|
218 | // Nonpublic builder: removes a group match by capnum
|
---|
219 | func (m *Match) removeMatch(c int) {
|
---|
220 | m.matchcount[c]--
|
---|
221 | }
|
---|
222 |
|
---|
223 | // GroupCount returns the number of groups this match has matched
|
---|
224 | func (m *Match) GroupCount() int {
|
---|
225 | return len(m.matchcount)
|
---|
226 | }
|
---|
227 |
|
---|
228 | // GroupByName returns a group based on the name of the group, or nil if the group name does not exist
|
---|
229 | func (m *Match) GroupByName(name string) *Group {
|
---|
230 | num := m.regex.GroupNumberFromName(name)
|
---|
231 | if num < 0 {
|
---|
232 | return nil
|
---|
233 | }
|
---|
234 | return m.GroupByNumber(num)
|
---|
235 | }
|
---|
236 |
|
---|
237 | // GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
|
---|
238 | func (m *Match) GroupByNumber(num int) *Group {
|
---|
239 | // check our sparse map
|
---|
240 | if m.sparseCaps != nil {
|
---|
241 | if newNum, ok := m.sparseCaps[num]; ok {
|
---|
242 | num = newNum
|
---|
243 | }
|
---|
244 | }
|
---|
245 | if num >= len(m.matchcount) || num < 0 {
|
---|
246 | return nil
|
---|
247 | }
|
---|
248 |
|
---|
249 | if num == 0 {
|
---|
250 | return &m.Group
|
---|
251 | }
|
---|
252 |
|
---|
253 | m.populateOtherGroups()
|
---|
254 |
|
---|
255 | return &m.otherGroups[num-1]
|
---|
256 | }
|
---|
257 |
|
---|
258 | // Groups returns all the capture groups, starting with group 0 (the full match)
|
---|
259 | func (m *Match) Groups() []Group {
|
---|
260 | m.populateOtherGroups()
|
---|
261 | g := make([]Group, len(m.otherGroups)+1)
|
---|
262 | g[0] = m.Group
|
---|
263 | copy(g[1:], m.otherGroups)
|
---|
264 | return g
|
---|
265 | }
|
---|
266 |
|
---|
267 | func (m *Match) populateOtherGroups() {
|
---|
268 | // Construct all the Group objects first time called
|
---|
269 | if m.otherGroups == nil {
|
---|
270 | m.otherGroups = make([]Group, len(m.matchcount)-1)
|
---|
271 | for i := 0; i < len(m.otherGroups); i++ {
|
---|
272 | m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
|
---|
273 | }
|
---|
274 | }
|
---|
275 | }
|
---|
276 |
|
---|
277 | func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
|
---|
278 | c := m.matchcount[groupnum]
|
---|
279 | if c == 0 {
|
---|
280 | return
|
---|
281 | }
|
---|
282 |
|
---|
283 | matches := m.matches[groupnum]
|
---|
284 |
|
---|
285 | index := matches[(c-1)*2]
|
---|
286 | last := index + matches[(c*2)-1]
|
---|
287 |
|
---|
288 | for ; index < last; index++ {
|
---|
289 | buf.WriteRune(m.text[index])
|
---|
290 | }
|
---|
291 | }
|
---|
292 |
|
---|
293 | func newGroup(name string, text []rune, caps []int, capcount int) Group {
|
---|
294 | g := Group{}
|
---|
295 | g.text = text
|
---|
296 | if capcount > 0 {
|
---|
297 | g.Index = caps[(capcount-1)*2]
|
---|
298 | g.Length = caps[(capcount*2)-1]
|
---|
299 | }
|
---|
300 | g.Name = name
|
---|
301 | g.Captures = make([]Capture, capcount)
|
---|
302 | for i := 0; i < capcount; i++ {
|
---|
303 | g.Captures[i] = Capture{
|
---|
304 | text: text,
|
---|
305 | Index: caps[i*2],
|
---|
306 | Length: caps[i*2+1],
|
---|
307 | }
|
---|
308 | }
|
---|
309 | //log.Printf("newGroup! capcount %v, %+v", capcount, g)
|
---|
310 |
|
---|
311 | return g
|
---|
312 | }
|
---|
313 |
|
---|
314 | func (m *Match) dump() string {
|
---|
315 | buf := &bytes.Buffer{}
|
---|
316 | buf.WriteRune('\n')
|
---|
317 | if len(m.sparseCaps) > 0 {
|
---|
318 | for k, v := range m.sparseCaps {
|
---|
319 | fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
|
---|
320 | }
|
---|
321 | }
|
---|
322 |
|
---|
323 | for i, g := range m.Groups() {
|
---|
324 | fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
|
---|
325 |
|
---|
326 | for _, c := range g.Captures {
|
---|
327 | fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
|
---|
328 | }
|
---|
329 | }
|
---|
330 | /*
|
---|
331 | for i := 0; i < len(m.matchcount); i++ {
|
---|
332 | fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
|
---|
333 |
|
---|
334 | for j := 0; j < m.matchcount[i]; j++ {
|
---|
335 | text := ""
|
---|
336 |
|
---|
337 | if m.matches[i][j*2] >= 0 {
|
---|
338 | start := m.matches[i][j*2]
|
---|
339 | text = m.text[start : start+m.matches[i][j*2+1]]
|
---|
340 | }
|
---|
341 |
|
---|
342 | fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
|
---|
343 | }
|
---|
344 | }
|
---|
345 | */
|
---|
346 | return buf.String()
|
---|
347 | }
|
---|