[67] | 1 | package regexp2
|
---|
| 2 |
|
---|
| 3 | import (
|
---|
| 4 | "bytes"
|
---|
| 5 | "fmt"
|
---|
| 6 | )
|
---|
| 7 |
|
---|
| 8 | // Match is a single regex result match that contains groups and repeated captures
|
---|
| 9 | // -Groups
|
---|
| 10 | // -Capture
|
---|
| 11 | type Match struct {
|
---|
| 12 | Group //embeded group 0
|
---|
| 13 |
|
---|
| 14 | regex *Regexp
|
---|
| 15 | otherGroups []Group
|
---|
| 16 |
|
---|
| 17 | // input to the match
|
---|
| 18 | textpos int
|
---|
| 19 | textstart int
|
---|
| 20 |
|
---|
| 21 | capcount int
|
---|
| 22 | caps []int
|
---|
| 23 | sparseCaps map[int]int
|
---|
| 24 |
|
---|
| 25 | // output from the match
|
---|
| 26 | matches [][]int
|
---|
| 27 | matchcount []int
|
---|
| 28 |
|
---|
| 29 | // whether we've done any balancing with this match. If we
|
---|
| 30 | // have done balancing, we'll need to do extra work in Tidy().
|
---|
| 31 | balancing bool
|
---|
| 32 | }
|
---|
| 33 |
|
---|
| 34 | // Group is an explicit or implit (group 0) matched group within the pattern
|
---|
| 35 | type Group struct {
|
---|
| 36 | Capture // the last capture of this group is embeded for ease of use
|
---|
| 37 |
|
---|
| 38 | Name string // group name
|
---|
| 39 | Captures []Capture // captures of this group
|
---|
| 40 | }
|
---|
| 41 |
|
---|
| 42 | // Capture is a single capture of text within the larger original string
|
---|
| 43 | type Capture struct {
|
---|
| 44 | // the original string
|
---|
| 45 | text []rune
|
---|
| 46 | // the position in the original string where the first character of
|
---|
| 47 | // captured substring was found.
|
---|
| 48 | Index int
|
---|
| 49 | // the length of the captured substring.
|
---|
| 50 | Length int
|
---|
| 51 | }
|
---|
| 52 |
|
---|
| 53 | // String returns the captured text as a String
|
---|
| 54 | func (c *Capture) String() string {
|
---|
| 55 | return string(c.text[c.Index : c.Index+c.Length])
|
---|
| 56 | }
|
---|
| 57 |
|
---|
| 58 | // Runes returns the captured text as a rune slice
|
---|
| 59 | func (c *Capture) Runes() []rune {
|
---|
| 60 | return c.text[c.Index : c.Index+c.Length]
|
---|
| 61 | }
|
---|
| 62 |
|
---|
| 63 | func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
|
---|
| 64 | m := Match{
|
---|
| 65 | regex: regex,
|
---|
| 66 | matchcount: make([]int, capcount),
|
---|
| 67 | matches: make([][]int, capcount),
|
---|
| 68 | textstart: startpos,
|
---|
| 69 | balancing: false,
|
---|
| 70 | }
|
---|
| 71 | m.Name = "0"
|
---|
| 72 | m.text = text
|
---|
| 73 | m.matches[0] = make([]int, 2)
|
---|
| 74 | return &m
|
---|
| 75 | }
|
---|
| 76 |
|
---|
| 77 | func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
|
---|
| 78 | m := newMatch(regex, capcount, text, startpos)
|
---|
| 79 | m.sparseCaps = caps
|
---|
| 80 | return m
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 | func (m *Match) reset(text []rune, textstart int) {
|
---|
| 84 | m.text = text
|
---|
| 85 | m.textstart = textstart
|
---|
| 86 | for i := 0; i < len(m.matchcount); i++ {
|
---|
| 87 | m.matchcount[i] = 0
|
---|
| 88 | }
|
---|
| 89 | m.balancing = false
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | func (m *Match) tidy(textpos int) {
|
---|
| 93 |
|
---|
| 94 | interval := m.matches[0]
|
---|
| 95 | m.Index = interval[0]
|
---|
| 96 | m.Length = interval[1]
|
---|
| 97 | m.textpos = textpos
|
---|
| 98 | m.capcount = m.matchcount[0]
|
---|
| 99 | //copy our root capture to the list
|
---|
| 100 | m.Group.Captures = []Capture{m.Group.Capture}
|
---|
| 101 |
|
---|
| 102 | if m.balancing {
|
---|
| 103 | // The idea here is that we want to compact all of our unbalanced captures. To do that we
|
---|
| 104 | // use j basically as a count of how many unbalanced captures we have at any given time
|
---|
| 105 | // (really j is an index, but j/2 is the count). First we skip past all of the real captures
|
---|
| 106 | // until we find a balance captures. Then we check each subsequent entry. If it's a balance
|
---|
| 107 | // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
|
---|
| 108 | // it down to the last free position.
|
---|
| 109 | for cap := 0; cap < len(m.matchcount); cap++ {
|
---|
| 110 | limit := m.matchcount[cap] * 2
|
---|
| 111 | matcharray := m.matches[cap]
|
---|
| 112 |
|
---|
| 113 | var i, j int
|
---|
| 114 |
|
---|
| 115 | for i = 0; i < limit; i++ {
|
---|
| 116 | if matcharray[i] < 0 {
|
---|
| 117 | break
|
---|
| 118 | }
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | for j = i; i < limit; i++ {
|
---|
| 122 | if matcharray[i] < 0 {
|
---|
| 123 | // skip negative values
|
---|
| 124 | j--
|
---|
| 125 | } else {
|
---|
| 126 | // but if we find something positive (an actual capture), copy it back to the last
|
---|
| 127 | // unbalanced position.
|
---|
| 128 | if i != j {
|
---|
| 129 | matcharray[j] = matcharray[i]
|
---|
| 130 | }
|
---|
| 131 | j++
|
---|
| 132 | }
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | m.matchcount[cap] = j / 2
|
---|
| 136 | }
|
---|
| 137 |
|
---|
| 138 | m.balancing = false
|
---|
| 139 | }
|
---|
| 140 | }
|
---|
| 141 |
|
---|
| 142 | // isMatched tells if a group was matched by capnum
|
---|
| 143 | func (m *Match) isMatched(cap int) bool {
|
---|
| 144 | return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
|
---|
| 145 | }
|
---|
| 146 |
|
---|
| 147 | // matchIndex returns the index of the last specified matched group by capnum
|
---|
| 148 | func (m *Match) matchIndex(cap int) int {
|
---|
| 149 | i := m.matches[cap][m.matchcount[cap]*2-2]
|
---|
| 150 | if i >= 0 {
|
---|
| 151 | return i
|
---|
| 152 | }
|
---|
| 153 |
|
---|
| 154 | return m.matches[cap][-3-i]
|
---|
| 155 | }
|
---|
| 156 |
|
---|
| 157 | // matchLength returns the length of the last specified matched group by capnum
|
---|
| 158 | func (m *Match) matchLength(cap int) int {
|
---|
| 159 | i := m.matches[cap][m.matchcount[cap]*2-1]
|
---|
| 160 | if i >= 0 {
|
---|
| 161 | return i
|
---|
| 162 | }
|
---|
| 163 |
|
---|
| 164 | return m.matches[cap][-3-i]
|
---|
| 165 | }
|
---|
| 166 |
|
---|
| 167 | // Nonpublic builder: add a capture to the group specified by "c"
|
---|
| 168 | func (m *Match) addMatch(c, start, l int) {
|
---|
| 169 |
|
---|
| 170 | if m.matches[c] == nil {
|
---|
| 171 | m.matches[c] = make([]int, 2)
|
---|
| 172 | }
|
---|
| 173 |
|
---|
| 174 | capcount := m.matchcount[c]
|
---|
| 175 |
|
---|
| 176 | if capcount*2+2 > len(m.matches[c]) {
|
---|
| 177 | oldmatches := m.matches[c]
|
---|
| 178 | newmatches := make([]int, capcount*8)
|
---|
| 179 | copy(newmatches, oldmatches[:capcount*2])
|
---|
| 180 | m.matches[c] = newmatches
|
---|
| 181 | }
|
---|
| 182 |
|
---|
| 183 | m.matches[c][capcount*2] = start
|
---|
| 184 | m.matches[c][capcount*2+1] = l
|
---|
| 185 | m.matchcount[c] = capcount + 1
|
---|
| 186 | //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
|
---|
| 187 | }
|
---|
| 188 |
|
---|
| 189 | // Nonpublic builder: Add a capture to balance the specified group. This is used by the
|
---|
| 190 | // balanced match construct. (?<foo-foo2>...)
|
---|
| 191 | //
|
---|
| 192 | // If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
|
---|
| 193 | // However, since we have backtracking, we need to keep track of everything.
|
---|
| 194 | func (m *Match) balanceMatch(c int) {
|
---|
| 195 | m.balancing = true
|
---|
| 196 |
|
---|
| 197 | // we'll look at the last capture first
|
---|
| 198 | capcount := m.matchcount[c]
|
---|
| 199 | target := capcount*2 - 2
|
---|
| 200 |
|
---|
| 201 | // first see if it is negative, and therefore is a reference to the next available
|
---|
| 202 | // capture group for balancing. If it is, we'll reset target to point to that capture.
|
---|
| 203 | if m.matches[c][target] < 0 {
|
---|
| 204 | target = -3 - m.matches[c][target]
|
---|
| 205 | }
|
---|
| 206 |
|
---|
| 207 | // move back to the previous capture
|
---|
| 208 | target -= 2
|
---|
| 209 |
|
---|
| 210 | // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
|
---|
| 211 | if target >= 0 && m.matches[c][target] < 0 {
|
---|
| 212 | m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
|
---|
| 213 | } else {
|
---|
| 214 | m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
|
---|
| 215 | }
|
---|
| 216 | }
|
---|
| 217 |
|
---|
| 218 | // Nonpublic builder: removes a group match by capnum
|
---|
| 219 | func (m *Match) removeMatch(c int) {
|
---|
| 220 | m.matchcount[c]--
|
---|
| 221 | }
|
---|
| 222 |
|
---|
| 223 | // GroupCount returns the number of groups this match has matched
|
---|
| 224 | func (m *Match) GroupCount() int {
|
---|
| 225 | return len(m.matchcount)
|
---|
| 226 | }
|
---|
| 227 |
|
---|
| 228 | // GroupByName returns a group based on the name of the group, or nil if the group name does not exist
|
---|
| 229 | func (m *Match) GroupByName(name string) *Group {
|
---|
| 230 | num := m.regex.GroupNumberFromName(name)
|
---|
| 231 | if num < 0 {
|
---|
| 232 | return nil
|
---|
| 233 | }
|
---|
| 234 | return m.GroupByNumber(num)
|
---|
| 235 | }
|
---|
| 236 |
|
---|
| 237 | // GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
|
---|
| 238 | func (m *Match) GroupByNumber(num int) *Group {
|
---|
| 239 | // check our sparse map
|
---|
| 240 | if m.sparseCaps != nil {
|
---|
| 241 | if newNum, ok := m.sparseCaps[num]; ok {
|
---|
| 242 | num = newNum
|
---|
| 243 | }
|
---|
| 244 | }
|
---|
| 245 | if num >= len(m.matchcount) || num < 0 {
|
---|
| 246 | return nil
|
---|
| 247 | }
|
---|
| 248 |
|
---|
| 249 | if num == 0 {
|
---|
| 250 | return &m.Group
|
---|
| 251 | }
|
---|
| 252 |
|
---|
| 253 | m.populateOtherGroups()
|
---|
| 254 |
|
---|
| 255 | return &m.otherGroups[num-1]
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | // Groups returns all the capture groups, starting with group 0 (the full match)
|
---|
| 259 | func (m *Match) Groups() []Group {
|
---|
| 260 | m.populateOtherGroups()
|
---|
| 261 | g := make([]Group, len(m.otherGroups)+1)
|
---|
| 262 | g[0] = m.Group
|
---|
| 263 | copy(g[1:], m.otherGroups)
|
---|
| 264 | return g
|
---|
| 265 | }
|
---|
| 266 |
|
---|
| 267 | func (m *Match) populateOtherGroups() {
|
---|
| 268 | // Construct all the Group objects first time called
|
---|
| 269 | if m.otherGroups == nil {
|
---|
| 270 | m.otherGroups = make([]Group, len(m.matchcount)-1)
|
---|
| 271 | for i := 0; i < len(m.otherGroups); i++ {
|
---|
| 272 | m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
|
---|
| 273 | }
|
---|
| 274 | }
|
---|
| 275 | }
|
---|
| 276 |
|
---|
| 277 | func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
|
---|
| 278 | c := m.matchcount[groupnum]
|
---|
| 279 | if c == 0 {
|
---|
| 280 | return
|
---|
| 281 | }
|
---|
| 282 |
|
---|
| 283 | matches := m.matches[groupnum]
|
---|
| 284 |
|
---|
| 285 | index := matches[(c-1)*2]
|
---|
| 286 | last := index + matches[(c*2)-1]
|
---|
| 287 |
|
---|
| 288 | for ; index < last; index++ {
|
---|
| 289 | buf.WriteRune(m.text[index])
|
---|
| 290 | }
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | func newGroup(name string, text []rune, caps []int, capcount int) Group {
|
---|
| 294 | g := Group{}
|
---|
| 295 | g.text = text
|
---|
| 296 | if capcount > 0 {
|
---|
| 297 | g.Index = caps[(capcount-1)*2]
|
---|
| 298 | g.Length = caps[(capcount*2)-1]
|
---|
| 299 | }
|
---|
| 300 | g.Name = name
|
---|
| 301 | g.Captures = make([]Capture, capcount)
|
---|
| 302 | for i := 0; i < capcount; i++ {
|
---|
| 303 | g.Captures[i] = Capture{
|
---|
| 304 | text: text,
|
---|
| 305 | Index: caps[i*2],
|
---|
| 306 | Length: caps[i*2+1],
|
---|
| 307 | }
|
---|
| 308 | }
|
---|
| 309 | //log.Printf("newGroup! capcount %v, %+v", capcount, g)
|
---|
| 310 |
|
---|
| 311 | return g
|
---|
| 312 | }
|
---|
| 313 |
|
---|
| 314 | func (m *Match) dump() string {
|
---|
| 315 | buf := &bytes.Buffer{}
|
---|
| 316 | buf.WriteRune('\n')
|
---|
| 317 | if len(m.sparseCaps) > 0 {
|
---|
| 318 | for k, v := range m.sparseCaps {
|
---|
| 319 | fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
|
---|
| 320 | }
|
---|
| 321 | }
|
---|
| 322 |
|
---|
| 323 | for i, g := range m.Groups() {
|
---|
| 324 | fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
|
---|
| 325 |
|
---|
| 326 | for _, c := range g.Captures {
|
---|
| 327 | fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
|
---|
| 328 | }
|
---|
| 329 | }
|
---|
| 330 | /*
|
---|
| 331 | for i := 0; i < len(m.matchcount); i++ {
|
---|
| 332 | fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
|
---|
| 333 |
|
---|
| 334 | for j := 0; j < m.matchcount[i]; j++ {
|
---|
| 335 | text := ""
|
---|
| 336 |
|
---|
| 337 | if m.matches[i][j*2] >= 0 {
|
---|
| 338 | start := m.matches[i][j*2]
|
---|
| 339 | text = m.text[start : start+m.matches[i][j*2+1]]
|
---|
| 340 | }
|
---|
| 341 |
|
---|
| 342 | fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
|
---|
| 343 | }
|
---|
| 344 | }
|
---|
| 345 | */
|
---|
| 346 | return buf.String()
|
---|
| 347 | }
|
---|