source: code/trunk/vendor/github.com/google/shlex/shlex.go@ 822

Last change on this file since 822 was 822, checked in by yakumo.izuru, 22 months ago

Prefer immortal.run over runit and rc.d, use vendored modules
for convenience.

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 9.7 KB
RevLine 
[822]1/*
2Copyright 2012 Google Inc. All Rights Reserved.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17/*
18Package shlex implements a simple lexer which splits input in to tokens using
19shell-style rules for quoting and commenting.
20
21The basic use case uses the default ASCII lexer to split a string into sub-strings:
22
23 shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
24
25To process a stream of strings:
26
27 l := NewLexer(os.Stdin)
28 for ; token, err := l.Next(); err != nil {
29 // process token
30 }
31
32To access the raw token stream (which includes tokens for comments):
33
34 t := NewTokenizer(os.Stdin)
35 for ; token, err := t.Next(); err != nil {
36 // process token
37 }
38
39*/
40package shlex
41
42import (
43 "bufio"
44 "fmt"
45 "io"
46 "strings"
47)
48
49// TokenType is a top-level token classification: A word, space, comment, unknown.
50type TokenType int
51
52// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
53type runeTokenClass int
54
55// the internal state used by the lexer state machine
56type lexerState int
57
58// Token is a (type, value) pair representing a lexographical token.
59type Token struct {
60 tokenType TokenType
61 value string
62}
63
64// Equal reports whether tokens a, and b, are equal.
65// Two tokens are equal if both their types and values are equal. A nil token can
66// never be equal to another token.
67func (a *Token) Equal(b *Token) bool {
68 if a == nil || b == nil {
69 return false
70 }
71 if a.tokenType != b.tokenType {
72 return false
73 }
74 return a.value == b.value
75}
76
77// Named classes of UTF-8 runes
78const (
79 spaceRunes = " \t\r\n"
80 escapingQuoteRunes = `"`
81 nonEscapingQuoteRunes = "'"
82 escapeRunes = `\`
83 commentRunes = "#"
84)
85
86// Classes of rune token
87const (
88 unknownRuneClass runeTokenClass = iota
89 spaceRuneClass
90 escapingQuoteRuneClass
91 nonEscapingQuoteRuneClass
92 escapeRuneClass
93 commentRuneClass
94 eofRuneClass
95)
96
97// Classes of lexographic token
98const (
99 UnknownToken TokenType = iota
100 WordToken
101 SpaceToken
102 CommentToken
103)
104
105// Lexer state machine states
106const (
107 startState lexerState = iota // no runes have been seen
108 inWordState // processing regular runes in a word
109 escapingState // we have just consumed an escape rune; the next rune is literal
110 escapingQuotedState // we have just consumed an escape rune within a quoted string
111 quotingEscapingState // we are within a quoted string that supports escaping ("...")
112 quotingState // we are within a string that does not support escaping ('...')
113 commentState // we are within a comment (everything following an unquoted or unescaped #
114)
115
116// tokenClassifier is used for classifying rune characters.
117type tokenClassifier map[rune]runeTokenClass
118
119func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) {
120 for _, runeChar := range runes {
121 typeMap[runeChar] = tokenType
122 }
123}
124
125// newDefaultClassifier creates a new classifier for ASCII characters.
126func newDefaultClassifier() tokenClassifier {
127 t := tokenClassifier{}
128 t.addRuneClass(spaceRunes, spaceRuneClass)
129 t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
130 t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
131 t.addRuneClass(escapeRunes, escapeRuneClass)
132 t.addRuneClass(commentRunes, commentRuneClass)
133 return t
134}
135
136// ClassifyRune classifiees a rune
137func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
138 return t[runeVal]
139}
140
141// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped.
142type Lexer Tokenizer
143
144// NewLexer creates a new lexer from an input stream.
145func NewLexer(r io.Reader) *Lexer {
146
147 return (*Lexer)(NewTokenizer(r))
148}
149
150// Next returns the next word, or an error. If there are no more words,
151// the error will be io.EOF.
152func (l *Lexer) Next() (string, error) {
153 for {
154 token, err := (*Tokenizer)(l).Next()
155 if err != nil {
156 return "", err
157 }
158 switch token.tokenType {
159 case WordToken:
160 return token.value, nil
161 case CommentToken:
162 // skip comments
163 default:
164 return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
165 }
166 }
167}
168
169// Tokenizer turns an input stream into a sequence of typed tokens
170type Tokenizer struct {
171 input bufio.Reader
172 classifier tokenClassifier
173}
174
175// NewTokenizer creates a new tokenizer from an input stream.
176func NewTokenizer(r io.Reader) *Tokenizer {
177 input := bufio.NewReader(r)
178 classifier := newDefaultClassifier()
179 return &Tokenizer{
180 input: *input,
181 classifier: classifier}
182}
183
184// scanStream scans the stream for the next token using the internal state machine.
185// It will panic if it encounters a rune which it does not know how to handle.
186func (t *Tokenizer) scanStream() (*Token, error) {
187 state := startState
188 var tokenType TokenType
189 var value []rune
190 var nextRune rune
191 var nextRuneType runeTokenClass
192 var err error
193
194 for {
195 nextRune, _, err = t.input.ReadRune()
196 nextRuneType = t.classifier.ClassifyRune(nextRune)
197
198 if err == io.EOF {
199 nextRuneType = eofRuneClass
200 err = nil
201 } else if err != nil {
202 return nil, err
203 }
204
205 switch state {
206 case startState: // no runes read yet
207 {
208 switch nextRuneType {
209 case eofRuneClass:
210 {
211 return nil, io.EOF
212 }
213 case spaceRuneClass:
214 {
215 }
216 case escapingQuoteRuneClass:
217 {
218 tokenType = WordToken
219 state = quotingEscapingState
220 }
221 case nonEscapingQuoteRuneClass:
222 {
223 tokenType = WordToken
224 state = quotingState
225 }
226 case escapeRuneClass:
227 {
228 tokenType = WordToken
229 state = escapingState
230 }
231 case commentRuneClass:
232 {
233 tokenType = CommentToken
234 state = commentState
235 }
236 default:
237 {
238 tokenType = WordToken
239 value = append(value, nextRune)
240 state = inWordState
241 }
242 }
243 }
244 case inWordState: // in a regular word
245 {
246 switch nextRuneType {
247 case eofRuneClass:
248 {
249 token := &Token{
250 tokenType: tokenType,
251 value: string(value)}
252 return token, err
253 }
254 case spaceRuneClass:
255 {
256 token := &Token{
257 tokenType: tokenType,
258 value: string(value)}
259 return token, err
260 }
261 case escapingQuoteRuneClass:
262 {
263 state = quotingEscapingState
264 }
265 case nonEscapingQuoteRuneClass:
266 {
267 state = quotingState
268 }
269 case escapeRuneClass:
270 {
271 state = escapingState
272 }
273 default:
274 {
275 value = append(value, nextRune)
276 }
277 }
278 }
279 case escapingState: // the rune after an escape character
280 {
281 switch nextRuneType {
282 case eofRuneClass:
283 {
284 err = fmt.Errorf("EOF found after escape character")
285 token := &Token{
286 tokenType: tokenType,
287 value: string(value)}
288 return token, err
289 }
290 default:
291 {
292 state = inWordState
293 value = append(value, nextRune)
294 }
295 }
296 }
297 case escapingQuotedState: // the next rune after an escape character, in double quotes
298 {
299 switch nextRuneType {
300 case eofRuneClass:
301 {
302 err = fmt.Errorf("EOF found after escape character")
303 token := &Token{
304 tokenType: tokenType,
305 value: string(value)}
306 return token, err
307 }
308 default:
309 {
310 state = quotingEscapingState
311 value = append(value, nextRune)
312 }
313 }
314 }
315 case quotingEscapingState: // in escaping double quotes
316 {
317 switch nextRuneType {
318 case eofRuneClass:
319 {
320 err = fmt.Errorf("EOF found when expecting closing quote")
321 token := &Token{
322 tokenType: tokenType,
323 value: string(value)}
324 return token, err
325 }
326 case escapingQuoteRuneClass:
327 {
328 state = inWordState
329 }
330 case escapeRuneClass:
331 {
332 state = escapingQuotedState
333 }
334 default:
335 {
336 value = append(value, nextRune)
337 }
338 }
339 }
340 case quotingState: // in non-escaping single quotes
341 {
342 switch nextRuneType {
343 case eofRuneClass:
344 {
345 err = fmt.Errorf("EOF found when expecting closing quote")
346 token := &Token{
347 tokenType: tokenType,
348 value: string(value)}
349 return token, err
350 }
351 case nonEscapingQuoteRuneClass:
352 {
353 state = inWordState
354 }
355 default:
356 {
357 value = append(value, nextRune)
358 }
359 }
360 }
361 case commentState: // in a comment
362 {
363 switch nextRuneType {
364 case eofRuneClass:
365 {
366 token := &Token{
367 tokenType: tokenType,
368 value: string(value)}
369 return token, err
370 }
371 case spaceRuneClass:
372 {
373 if nextRune == '\n' {
374 state = startState
375 token := &Token{
376 tokenType: tokenType,
377 value: string(value)}
378 return token, err
379 } else {
380 value = append(value, nextRune)
381 }
382 }
383 default:
384 {
385 value = append(value, nextRune)
386 }
387 }
388 }
389 default:
390 {
391 return nil, fmt.Errorf("Unexpected state: %v", state)
392 }
393 }
394 }
395}
396
397// Next returns the next token in the stream.
398func (t *Tokenizer) Next() (*Token, error) {
399 return t.scanStream()
400}
401
402// Split partitions a string into a slice of strings.
403func Split(s string) ([]string, error) {
404 l := NewLexer(strings.NewReader(s))
405 subStrings := make([]string, 0)
406 for {
407 word, err := l.Next()
408 if err != nil {
409 if err == io.EOF {
410 return subStrings, nil
411 }
412 return subStrings, err
413 }
414 subStrings = append(subStrings, word)
415 }
416}
Note: See TracBrowser for help on using the repository browser.