source: code/trunk/vendor/github.com/dlclark/regexp2/regexp.go@ 67

Last change on this file since 67 was 67, checked in by Izuru Yakumo, 23 months ago

Use vendored modules

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 9.6 KB
Line 
1/*
2Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses a
3more feature full regex engine behind the scenes.
4
5It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET.
6You'll likely be better off with the RE2 engine from the regexp package and should only use this if you
7need to write very complex patterns or require compatibility with .NET.
8*/
9package regexp2
10
11import (
12 "errors"
13 "math"
14 "strconv"
15 "sync"
16 "time"
17
18 "github.com/dlclark/regexp2/syntax"
19)
20
21// Default timeout used when running regexp matches -- "forever"
22var DefaultMatchTimeout = time.Duration(math.MaxInt64)
23
24// Regexp is the representation of a compiled regular expression.
25// A Regexp is safe for concurrent use by multiple goroutines.
26type Regexp struct {
27 //timeout when trying to find matches
28 MatchTimeout time.Duration
29
30 // read-only after Compile
31 pattern string // as passed to Compile
32 options RegexOptions // options
33
34 caps map[int]int // capnum->index
35 capnames map[string]int //capture group name -> index
36 capslist []string //sorted list of capture group names
37 capsize int // size of the capture array
38
39 code *syntax.Code // compiled program
40
41 // cache of machines for running regexp
42 muRun sync.Mutex
43 runner []*runner
44}
45
46// Compile parses a regular expression and returns, if successful,
47// a Regexp object that can be used to match against text.
48func Compile(expr string, opt RegexOptions) (*Regexp, error) {
49 // parse it
50 tree, err := syntax.Parse(expr, syntax.RegexOptions(opt))
51 if err != nil {
52 return nil, err
53 }
54
55 // translate it to code
56 code, err := syntax.Write(tree)
57 if err != nil {
58 return nil, err
59 }
60
61 // return it
62 return &Regexp{
63 pattern: expr,
64 options: opt,
65 caps: code.Caps,
66 capnames: tree.Capnames,
67 capslist: tree.Caplist,
68 capsize: code.Capsize,
69 code: code,
70 MatchTimeout: DefaultMatchTimeout,
71 }, nil
72}
73
74// MustCompile is like Compile but panics if the expression cannot be parsed.
75// It simplifies safe initialization of global variables holding compiled regular
76// expressions.
77func MustCompile(str string, opt RegexOptions) *Regexp {
78 regexp, error := Compile(str, opt)
79 if error != nil {
80 panic(`regexp2: Compile(` + quote(str) + `): ` + error.Error())
81 }
82 return regexp
83}
84
85// Escape adds backslashes to any special characters in the input string
86func Escape(input string) string {
87 return syntax.Escape(input)
88}
89
90// Unescape removes any backslashes from previously-escaped special characters in the input string
91func Unescape(input string) (string, error) {
92 return syntax.Unescape(input)
93}
94
95// String returns the source text used to compile the regular expression.
96func (re *Regexp) String() string {
97 return re.pattern
98}
99
100func quote(s string) string {
101 if strconv.CanBackquote(s) {
102 return "`" + s + "`"
103 }
104 return strconv.Quote(s)
105}
106
107// RegexOptions impact the runtime and parsing behavior
108// for each specific regex. They are setable in code as well
109// as in the regex pattern itself.
110type RegexOptions int32
111
112const (
113 None RegexOptions = 0x0
114 IgnoreCase = 0x0001 // "i"
115 Multiline = 0x0002 // "m"
116 ExplicitCapture = 0x0004 // "n"
117 Compiled = 0x0008 // "c"
118 Singleline = 0x0010 // "s"
119 IgnorePatternWhitespace = 0x0020 // "x"
120 RightToLeft = 0x0040 // "r"
121 Debug = 0x0080 // "d"
122 ECMAScript = 0x0100 // "e"
123 RE2 = 0x0200 // RE2 (regexp package) compatibility mode
124)
125
126func (re *Regexp) RightToLeft() bool {
127 return re.options&RightToLeft != 0
128}
129
130func (re *Regexp) Debug() bool {
131 return re.options&Debug != 0
132}
133
134// Replace searches the input string and replaces each match found with the replacement text.
135// Count will limit the number of matches attempted and startAt will allow
136// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
137// Set startAt and count to -1 to go through the whole string
138func (re *Regexp) Replace(input, replacement string, startAt, count int) (string, error) {
139 data, err := syntax.NewReplacerData(replacement, re.caps, re.capsize, re.capnames, syntax.RegexOptions(re.options))
140 if err != nil {
141 return "", err
142 }
143 //TODO: cache ReplacerData
144
145 return replace(re, data, nil, input, startAt, count)
146}
147
148// ReplaceFunc searches the input string and replaces each match found using the string from the evaluator
149// Count will limit the number of matches attempted and startAt will allow
150// us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
151// Set startAt and count to -1 to go through the whole string.
152func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, count int) (string, error) {
153 return replace(re, nil, evaluator, input, startAt, count)
154}
155
156// FindStringMatch searches the input string for a Regexp match
157func (re *Regexp) FindStringMatch(s string) (*Match, error) {
158 // convert string to runes
159 return re.run(false, -1, getRunes(s))
160}
161
162// FindRunesMatch searches the input rune slice for a Regexp match
163func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) {
164 return re.run(false, -1, r)
165}
166
167// FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt index
168func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, error) {
169 if startAt > len(s) {
170 return nil, errors.New("startAt must be less than the length of the input string")
171 }
172 r, startAt := re.getRunesAndStart(s, startAt)
173 if startAt == -1 {
174 // we didn't find our start index in the string -- that's a problem
175 return nil, errors.New("startAt must align to the start of a valid rune in the input string")
176 }
177
178 return re.run(false, startAt, r)
179}
180
181// FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index
182func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) {
183 return re.run(false, startAt, r)
184}
185
186// FindNextMatch returns the next match in the same input string as the match parameter.
187// Will return nil if there is no next match or if given a nil match.
188func (re *Regexp) FindNextMatch(m *Match) (*Match, error) {
189 if m == nil {
190 return nil, nil
191 }
192
193 // If previous match was empty, advance by one before matching to prevent
194 // infinite loop
195 startAt := m.textpos
196 if m.Length == 0 {
197 if m.textpos == len(m.text) {
198 return nil, nil
199 }
200
201 if re.RightToLeft() {
202 startAt--
203 } else {
204 startAt++
205 }
206 }
207 return re.run(false, startAt, m.text)
208}
209
210// MatchString return true if the string matches the regex
211// error will be set if a timeout occurs
212func (re *Regexp) MatchString(s string) (bool, error) {
213 m, err := re.run(true, -1, getRunes(s))
214 if err != nil {
215 return false, err
216 }
217 return m != nil, nil
218}
219
220func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) {
221 if startAt < 0 {
222 if re.RightToLeft() {
223 r := getRunes(s)
224 return r, len(r)
225 }
226 return getRunes(s), 0
227 }
228 ret := make([]rune, len(s))
229 i := 0
230 runeIdx := -1
231 for strIdx, r := range s {
232 if strIdx == startAt {
233 runeIdx = i
234 }
235 ret[i] = r
236 i++
237 }
238 if startAt == len(s) {
239 runeIdx = i
240 }
241 return ret[:i], runeIdx
242}
243
244func getRunes(s string) []rune {
245 return []rune(s)
246}
247
248// MatchRunes return true if the runes matches the regex
249// error will be set if a timeout occurs
250func (re *Regexp) MatchRunes(r []rune) (bool, error) {
251 m, err := re.run(true, -1, r)
252 if err != nil {
253 return false, err
254 }
255 return m != nil, nil
256}
257
258// GetGroupNames Returns the set of strings used to name capturing groups in the expression.
259func (re *Regexp) GetGroupNames() []string {
260 var result []string
261
262 if re.capslist == nil {
263 result = make([]string, re.capsize)
264
265 for i := 0; i < len(result); i++ {
266 result[i] = strconv.Itoa(i)
267 }
268 } else {
269 result = make([]string, len(re.capslist))
270 copy(result, re.capslist)
271 }
272
273 return result
274}
275
276// GetGroupNumbers returns the integer group numbers corresponding to a group name.
277func (re *Regexp) GetGroupNumbers() []int {
278 var result []int
279
280 if re.caps == nil {
281 result = make([]int, re.capsize)
282
283 for i := 0; i < len(result); i++ {
284 result[i] = i
285 }
286 } else {
287 result = make([]int, len(re.caps))
288
289 for k, v := range re.caps {
290 result[v] = k
291 }
292 }
293
294 return result
295}
296
297// GroupNameFromNumber retrieves a group name that corresponds to a group number.
298// It will return "" for and unknown group number. Unnamed groups automatically
299// receive a name that is the decimal string equivalent of its number.
300func (re *Regexp) GroupNameFromNumber(i int) string {
301 if re.capslist == nil {
302 if i >= 0 && i < re.capsize {
303 return strconv.Itoa(i)
304 }
305
306 return ""
307 }
308
309 if re.caps != nil {
310 var ok bool
311 if i, ok = re.caps[i]; !ok {
312 return ""
313 }
314 }
315
316 if i >= 0 && i < len(re.capslist) {
317 return re.capslist[i]
318 }
319
320 return ""
321}
322
323// GroupNumberFromName returns a group number that corresponds to a group name.
324// Returns -1 if the name is not a recognized group name. Numbered groups
325// automatically get a group name that is the decimal string equivalent of its number.
326func (re *Regexp) GroupNumberFromName(name string) int {
327 // look up name if we have a hashtable of names
328 if re.capnames != nil {
329 if k, ok := re.capnames[name]; ok {
330 return k
331 }
332
333 return -1
334 }
335
336 // convert to an int if it looks like a number
337 result := 0
338 for i := 0; i < len(name); i++ {
339 ch := name[i]
340
341 if ch > '9' || ch < '0' {
342 return -1
343 }
344
345 result *= 10
346 result += int(ch - '0')
347 }
348
349 // return int if it's in range
350 if result >= 0 && result < re.capsize {
351 return result
352 }
353
354 return -1
355}
Note: See TracBrowser for help on using the repository browser.