1 | // Copyright 2010 The Go Authors. All rights reserved.
|
---|
2 | // Use of this source code is governed by a BSD-style
|
---|
3 | // license that can be found in the LICENSE file.
|
---|
4 |
|
---|
5 | package proto
|
---|
6 |
|
---|
7 | import (
|
---|
8 | "encoding"
|
---|
9 | "errors"
|
---|
10 | "fmt"
|
---|
11 | "reflect"
|
---|
12 | "strconv"
|
---|
13 | "strings"
|
---|
14 | "unicode/utf8"
|
---|
15 |
|
---|
16 | "google.golang.org/protobuf/encoding/prototext"
|
---|
17 | protoV2 "google.golang.org/protobuf/proto"
|
---|
18 | "google.golang.org/protobuf/reflect/protoreflect"
|
---|
19 | "google.golang.org/protobuf/reflect/protoregistry"
|
---|
20 | )
|
---|
21 |
|
---|
22 | const wrapTextUnmarshalV2 = false
|
---|
23 |
|
---|
24 | // ParseError is returned by UnmarshalText.
|
---|
25 | type ParseError struct {
|
---|
26 | Message string
|
---|
27 |
|
---|
28 | // Deprecated: Do not use.
|
---|
29 | Line, Offset int
|
---|
30 | }
|
---|
31 |
|
---|
32 | func (e *ParseError) Error() string {
|
---|
33 | if wrapTextUnmarshalV2 {
|
---|
34 | return e.Message
|
---|
35 | }
|
---|
36 | if e.Line == 1 {
|
---|
37 | return fmt.Sprintf("line 1.%d: %v", e.Offset, e.Message)
|
---|
38 | }
|
---|
39 | return fmt.Sprintf("line %d: %v", e.Line, e.Message)
|
---|
40 | }
|
---|
41 |
|
---|
42 | // UnmarshalText parses a proto text formatted string into m.
|
---|
43 | func UnmarshalText(s string, m Message) error {
|
---|
44 | if u, ok := m.(encoding.TextUnmarshaler); ok {
|
---|
45 | return u.UnmarshalText([]byte(s))
|
---|
46 | }
|
---|
47 |
|
---|
48 | m.Reset()
|
---|
49 | mi := MessageV2(m)
|
---|
50 |
|
---|
51 | if wrapTextUnmarshalV2 {
|
---|
52 | err := prototext.UnmarshalOptions{
|
---|
53 | AllowPartial: true,
|
---|
54 | }.Unmarshal([]byte(s), mi)
|
---|
55 | if err != nil {
|
---|
56 | return &ParseError{Message: err.Error()}
|
---|
57 | }
|
---|
58 | return checkRequiredNotSet(mi)
|
---|
59 | } else {
|
---|
60 | if err := newTextParser(s).unmarshalMessage(mi.ProtoReflect(), ""); err != nil {
|
---|
61 | return err
|
---|
62 | }
|
---|
63 | return checkRequiredNotSet(mi)
|
---|
64 | }
|
---|
65 | }
|
---|
66 |
|
---|
67 | type textParser struct {
|
---|
68 | s string // remaining input
|
---|
69 | done bool // whether the parsing is finished (success or error)
|
---|
70 | backed bool // whether back() was called
|
---|
71 | offset, line int
|
---|
72 | cur token
|
---|
73 | }
|
---|
74 |
|
---|
75 | type token struct {
|
---|
76 | value string
|
---|
77 | err *ParseError
|
---|
78 | line int // line number
|
---|
79 | offset int // byte number from start of input, not start of line
|
---|
80 | unquoted string // the unquoted version of value, if it was a quoted string
|
---|
81 | }
|
---|
82 |
|
---|
83 | func newTextParser(s string) *textParser {
|
---|
84 | p := new(textParser)
|
---|
85 | p.s = s
|
---|
86 | p.line = 1
|
---|
87 | p.cur.line = 1
|
---|
88 | return p
|
---|
89 | }
|
---|
90 |
|
---|
91 | func (p *textParser) unmarshalMessage(m protoreflect.Message, terminator string) (err error) {
|
---|
92 | md := m.Descriptor()
|
---|
93 | fds := md.Fields()
|
---|
94 |
|
---|
95 | // A struct is a sequence of "name: value", terminated by one of
|
---|
96 | // '>' or '}', or the end of the input. A name may also be
|
---|
97 | // "[extension]" or "[type/url]".
|
---|
98 | //
|
---|
99 | // The whole struct can also be an expanded Any message, like:
|
---|
100 | // [type/url] < ... struct contents ... >
|
---|
101 | seen := make(map[protoreflect.FieldNumber]bool)
|
---|
102 | for {
|
---|
103 | tok := p.next()
|
---|
104 | if tok.err != nil {
|
---|
105 | return tok.err
|
---|
106 | }
|
---|
107 | if tok.value == terminator {
|
---|
108 | break
|
---|
109 | }
|
---|
110 | if tok.value == "[" {
|
---|
111 | if err := p.unmarshalExtensionOrAny(m, seen); err != nil {
|
---|
112 | return err
|
---|
113 | }
|
---|
114 | continue
|
---|
115 | }
|
---|
116 |
|
---|
117 | // This is a normal, non-extension field.
|
---|
118 | name := protoreflect.Name(tok.value)
|
---|
119 | fd := fds.ByName(name)
|
---|
120 | switch {
|
---|
121 | case fd == nil:
|
---|
122 | gd := fds.ByName(protoreflect.Name(strings.ToLower(string(name))))
|
---|
123 | if gd != nil && gd.Kind() == protoreflect.GroupKind && gd.Message().Name() == name {
|
---|
124 | fd = gd
|
---|
125 | }
|
---|
126 | case fd.Kind() == protoreflect.GroupKind && fd.Message().Name() != name:
|
---|
127 | fd = nil
|
---|
128 | case fd.IsWeak() && fd.Message().IsPlaceholder():
|
---|
129 | fd = nil
|
---|
130 | }
|
---|
131 | if fd == nil {
|
---|
132 | typeName := string(md.FullName())
|
---|
133 | if m, ok := m.Interface().(Message); ok {
|
---|
134 | t := reflect.TypeOf(m)
|
---|
135 | if t.Kind() == reflect.Ptr {
|
---|
136 | typeName = t.Elem().String()
|
---|
137 | }
|
---|
138 | }
|
---|
139 | return p.errorf("unknown field name %q in %v", name, typeName)
|
---|
140 | }
|
---|
141 | if od := fd.ContainingOneof(); od != nil && m.WhichOneof(od) != nil {
|
---|
142 | return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, od.Name())
|
---|
143 | }
|
---|
144 | if fd.Cardinality() != protoreflect.Repeated && seen[fd.Number()] {
|
---|
145 | return p.errorf("non-repeated field %q was repeated", fd.Name())
|
---|
146 | }
|
---|
147 | seen[fd.Number()] = true
|
---|
148 |
|
---|
149 | // Consume any colon.
|
---|
150 | if err := p.checkForColon(fd); err != nil {
|
---|
151 | return err
|
---|
152 | }
|
---|
153 |
|
---|
154 | // Parse into the field.
|
---|
155 | v := m.Get(fd)
|
---|
156 | if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) {
|
---|
157 | v = m.Mutable(fd)
|
---|
158 | }
|
---|
159 | if v, err = p.unmarshalValue(v, fd); err != nil {
|
---|
160 | return err
|
---|
161 | }
|
---|
162 | m.Set(fd, v)
|
---|
163 |
|
---|
164 | if err := p.consumeOptionalSeparator(); err != nil {
|
---|
165 | return err
|
---|
166 | }
|
---|
167 | }
|
---|
168 | return nil
|
---|
169 | }
|
---|
170 |
|
---|
171 | func (p *textParser) unmarshalExtensionOrAny(m protoreflect.Message, seen map[protoreflect.FieldNumber]bool) error {
|
---|
172 | name, err := p.consumeExtensionOrAnyName()
|
---|
173 | if err != nil {
|
---|
174 | return err
|
---|
175 | }
|
---|
176 |
|
---|
177 | // If it contains a slash, it's an Any type URL.
|
---|
178 | if slashIdx := strings.LastIndex(name, "/"); slashIdx >= 0 {
|
---|
179 | tok := p.next()
|
---|
180 | if tok.err != nil {
|
---|
181 | return tok.err
|
---|
182 | }
|
---|
183 | // consume an optional colon
|
---|
184 | if tok.value == ":" {
|
---|
185 | tok = p.next()
|
---|
186 | if tok.err != nil {
|
---|
187 | return tok.err
|
---|
188 | }
|
---|
189 | }
|
---|
190 |
|
---|
191 | var terminator string
|
---|
192 | switch tok.value {
|
---|
193 | case "<":
|
---|
194 | terminator = ">"
|
---|
195 | case "{":
|
---|
196 | terminator = "}"
|
---|
197 | default:
|
---|
198 | return p.errorf("expected '{' or '<', found %q", tok.value)
|
---|
199 | }
|
---|
200 |
|
---|
201 | mt, err := protoregistry.GlobalTypes.FindMessageByURL(name)
|
---|
202 | if err != nil {
|
---|
203 | return p.errorf("unrecognized message %q in google.protobuf.Any", name[slashIdx+len("/"):])
|
---|
204 | }
|
---|
205 | m2 := mt.New()
|
---|
206 | if err := p.unmarshalMessage(m2, terminator); err != nil {
|
---|
207 | return err
|
---|
208 | }
|
---|
209 | b, err := protoV2.Marshal(m2.Interface())
|
---|
210 | if err != nil {
|
---|
211 | return p.errorf("failed to marshal message of type %q: %v", name[slashIdx+len("/"):], err)
|
---|
212 | }
|
---|
213 |
|
---|
214 | urlFD := m.Descriptor().Fields().ByName("type_url")
|
---|
215 | valFD := m.Descriptor().Fields().ByName("value")
|
---|
216 | if seen[urlFD.Number()] {
|
---|
217 | return p.errorf("Any message unpacked multiple times, or %q already set", urlFD.Name())
|
---|
218 | }
|
---|
219 | if seen[valFD.Number()] {
|
---|
220 | return p.errorf("Any message unpacked multiple times, or %q already set", valFD.Name())
|
---|
221 | }
|
---|
222 | m.Set(urlFD, protoreflect.ValueOfString(name))
|
---|
223 | m.Set(valFD, protoreflect.ValueOfBytes(b))
|
---|
224 | seen[urlFD.Number()] = true
|
---|
225 | seen[valFD.Number()] = true
|
---|
226 | return nil
|
---|
227 | }
|
---|
228 |
|
---|
229 | xname := protoreflect.FullName(name)
|
---|
230 | xt, _ := protoregistry.GlobalTypes.FindExtensionByName(xname)
|
---|
231 | if xt == nil && isMessageSet(m.Descriptor()) {
|
---|
232 | xt, _ = protoregistry.GlobalTypes.FindExtensionByName(xname.Append("message_set_extension"))
|
---|
233 | }
|
---|
234 | if xt == nil {
|
---|
235 | return p.errorf("unrecognized extension %q", name)
|
---|
236 | }
|
---|
237 | fd := xt.TypeDescriptor()
|
---|
238 | if fd.ContainingMessage().FullName() != m.Descriptor().FullName() {
|
---|
239 | return p.errorf("extension field %q does not extend message %q", name, m.Descriptor().FullName())
|
---|
240 | }
|
---|
241 |
|
---|
242 | if err := p.checkForColon(fd); err != nil {
|
---|
243 | return err
|
---|
244 | }
|
---|
245 |
|
---|
246 | v := m.Get(fd)
|
---|
247 | if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) {
|
---|
248 | v = m.Mutable(fd)
|
---|
249 | }
|
---|
250 | v, err = p.unmarshalValue(v, fd)
|
---|
251 | if err != nil {
|
---|
252 | return err
|
---|
253 | }
|
---|
254 | m.Set(fd, v)
|
---|
255 | return p.consumeOptionalSeparator()
|
---|
256 | }
|
---|
257 |
|
---|
258 | func (p *textParser) unmarshalValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
|
---|
259 | tok := p.next()
|
---|
260 | if tok.err != nil {
|
---|
261 | return v, tok.err
|
---|
262 | }
|
---|
263 | if tok.value == "" {
|
---|
264 | return v, p.errorf("unexpected EOF")
|
---|
265 | }
|
---|
266 |
|
---|
267 | switch {
|
---|
268 | case fd.IsList():
|
---|
269 | lv := v.List()
|
---|
270 | var err error
|
---|
271 | if tok.value == "[" {
|
---|
272 | // Repeated field with list notation, like [1,2,3].
|
---|
273 | for {
|
---|
274 | vv := lv.NewElement()
|
---|
275 | vv, err = p.unmarshalSingularValue(vv, fd)
|
---|
276 | if err != nil {
|
---|
277 | return v, err
|
---|
278 | }
|
---|
279 | lv.Append(vv)
|
---|
280 |
|
---|
281 | tok := p.next()
|
---|
282 | if tok.err != nil {
|
---|
283 | return v, tok.err
|
---|
284 | }
|
---|
285 | if tok.value == "]" {
|
---|
286 | break
|
---|
287 | }
|
---|
288 | if tok.value != "," {
|
---|
289 | return v, p.errorf("Expected ']' or ',' found %q", tok.value)
|
---|
290 | }
|
---|
291 | }
|
---|
292 | return v, nil
|
---|
293 | }
|
---|
294 |
|
---|
295 | // One value of the repeated field.
|
---|
296 | p.back()
|
---|
297 | vv := lv.NewElement()
|
---|
298 | vv, err = p.unmarshalSingularValue(vv, fd)
|
---|
299 | if err != nil {
|
---|
300 | return v, err
|
---|
301 | }
|
---|
302 | lv.Append(vv)
|
---|
303 | return v, nil
|
---|
304 | case fd.IsMap():
|
---|
305 | // The map entry should be this sequence of tokens:
|
---|
306 | // < key : KEY value : VALUE >
|
---|
307 | // However, implementations may omit key or value, and technically
|
---|
308 | // we should support them in any order.
|
---|
309 | var terminator string
|
---|
310 | switch tok.value {
|
---|
311 | case "<":
|
---|
312 | terminator = ">"
|
---|
313 | case "{":
|
---|
314 | terminator = "}"
|
---|
315 | default:
|
---|
316 | return v, p.errorf("expected '{' or '<', found %q", tok.value)
|
---|
317 | }
|
---|
318 |
|
---|
319 | keyFD := fd.MapKey()
|
---|
320 | valFD := fd.MapValue()
|
---|
321 |
|
---|
322 | mv := v.Map()
|
---|
323 | kv := keyFD.Default()
|
---|
324 | vv := mv.NewValue()
|
---|
325 | for {
|
---|
326 | tok := p.next()
|
---|
327 | if tok.err != nil {
|
---|
328 | return v, tok.err
|
---|
329 | }
|
---|
330 | if tok.value == terminator {
|
---|
331 | break
|
---|
332 | }
|
---|
333 | var err error
|
---|
334 | switch tok.value {
|
---|
335 | case "key":
|
---|
336 | if err := p.consumeToken(":"); err != nil {
|
---|
337 | return v, err
|
---|
338 | }
|
---|
339 | if kv, err = p.unmarshalSingularValue(kv, keyFD); err != nil {
|
---|
340 | return v, err
|
---|
341 | }
|
---|
342 | if err := p.consumeOptionalSeparator(); err != nil {
|
---|
343 | return v, err
|
---|
344 | }
|
---|
345 | case "value":
|
---|
346 | if err := p.checkForColon(valFD); err != nil {
|
---|
347 | return v, err
|
---|
348 | }
|
---|
349 | if vv, err = p.unmarshalSingularValue(vv, valFD); err != nil {
|
---|
350 | return v, err
|
---|
351 | }
|
---|
352 | if err := p.consumeOptionalSeparator(); err != nil {
|
---|
353 | return v, err
|
---|
354 | }
|
---|
355 | default:
|
---|
356 | p.back()
|
---|
357 | return v, p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
|
---|
358 | }
|
---|
359 | }
|
---|
360 | mv.Set(kv.MapKey(), vv)
|
---|
361 | return v, nil
|
---|
362 | default:
|
---|
363 | p.back()
|
---|
364 | return p.unmarshalSingularValue(v, fd)
|
---|
365 | }
|
---|
366 | }
|
---|
367 |
|
---|
368 | func (p *textParser) unmarshalSingularValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
|
---|
369 | tok := p.next()
|
---|
370 | if tok.err != nil {
|
---|
371 | return v, tok.err
|
---|
372 | }
|
---|
373 | if tok.value == "" {
|
---|
374 | return v, p.errorf("unexpected EOF")
|
---|
375 | }
|
---|
376 |
|
---|
377 | switch fd.Kind() {
|
---|
378 | case protoreflect.BoolKind:
|
---|
379 | switch tok.value {
|
---|
380 | case "true", "1", "t", "True":
|
---|
381 | return protoreflect.ValueOfBool(true), nil
|
---|
382 | case "false", "0", "f", "False":
|
---|
383 | return protoreflect.ValueOfBool(false), nil
|
---|
384 | }
|
---|
385 | case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
|
---|
386 | if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
|
---|
387 | return protoreflect.ValueOfInt32(int32(x)), nil
|
---|
388 | }
|
---|
389 |
|
---|
390 | // The C++ parser accepts large positive hex numbers that uses
|
---|
391 | // two's complement arithmetic to represent negative numbers.
|
---|
392 | // This feature is here for backwards compatibility with C++.
|
---|
393 | if strings.HasPrefix(tok.value, "0x") {
|
---|
394 | if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
|
---|
395 | return protoreflect.ValueOfInt32(int32(-(int64(^x) + 1))), nil
|
---|
396 | }
|
---|
397 | }
|
---|
398 | case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
|
---|
399 | if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
|
---|
400 | return protoreflect.ValueOfInt64(int64(x)), nil
|
---|
401 | }
|
---|
402 |
|
---|
403 | // The C++ parser accepts large positive hex numbers that uses
|
---|
404 | // two's complement arithmetic to represent negative numbers.
|
---|
405 | // This feature is here for backwards compatibility with C++.
|
---|
406 | if strings.HasPrefix(tok.value, "0x") {
|
---|
407 | if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
|
---|
408 | return protoreflect.ValueOfInt64(int64(-(int64(^x) + 1))), nil
|
---|
409 | }
|
---|
410 | }
|
---|
411 | case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
|
---|
412 | if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
|
---|
413 | return protoreflect.ValueOfUint32(uint32(x)), nil
|
---|
414 | }
|
---|
415 | case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
|
---|
416 | if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
|
---|
417 | return protoreflect.ValueOfUint64(uint64(x)), nil
|
---|
418 | }
|
---|
419 | case protoreflect.FloatKind:
|
---|
420 | // Ignore 'f' for compatibility with output generated by C++,
|
---|
421 | // but don't remove 'f' when the value is "-inf" or "inf".
|
---|
422 | v := tok.value
|
---|
423 | if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" {
|
---|
424 | v = v[:len(v)-len("f")]
|
---|
425 | }
|
---|
426 | if x, err := strconv.ParseFloat(v, 32); err == nil {
|
---|
427 | return protoreflect.ValueOfFloat32(float32(x)), nil
|
---|
428 | }
|
---|
429 | case protoreflect.DoubleKind:
|
---|
430 | // Ignore 'f' for compatibility with output generated by C++,
|
---|
431 | // but don't remove 'f' when the value is "-inf" or "inf".
|
---|
432 | v := tok.value
|
---|
433 | if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" {
|
---|
434 | v = v[:len(v)-len("f")]
|
---|
435 | }
|
---|
436 | if x, err := strconv.ParseFloat(v, 64); err == nil {
|
---|
437 | return protoreflect.ValueOfFloat64(float64(x)), nil
|
---|
438 | }
|
---|
439 | case protoreflect.StringKind:
|
---|
440 | if isQuote(tok.value[0]) {
|
---|
441 | return protoreflect.ValueOfString(tok.unquoted), nil
|
---|
442 | }
|
---|
443 | case protoreflect.BytesKind:
|
---|
444 | if isQuote(tok.value[0]) {
|
---|
445 | return protoreflect.ValueOfBytes([]byte(tok.unquoted)), nil
|
---|
446 | }
|
---|
447 | case protoreflect.EnumKind:
|
---|
448 | if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
|
---|
449 | return protoreflect.ValueOfEnum(protoreflect.EnumNumber(x)), nil
|
---|
450 | }
|
---|
451 | vd := fd.Enum().Values().ByName(protoreflect.Name(tok.value))
|
---|
452 | if vd != nil {
|
---|
453 | return protoreflect.ValueOfEnum(vd.Number()), nil
|
---|
454 | }
|
---|
455 | case protoreflect.MessageKind, protoreflect.GroupKind:
|
---|
456 | var terminator string
|
---|
457 | switch tok.value {
|
---|
458 | case "{":
|
---|
459 | terminator = "}"
|
---|
460 | case "<":
|
---|
461 | terminator = ">"
|
---|
462 | default:
|
---|
463 | return v, p.errorf("expected '{' or '<', found %q", tok.value)
|
---|
464 | }
|
---|
465 | err := p.unmarshalMessage(v.Message(), terminator)
|
---|
466 | return v, err
|
---|
467 | default:
|
---|
468 | panic(fmt.Sprintf("invalid kind %v", fd.Kind()))
|
---|
469 | }
|
---|
470 | return v, p.errorf("invalid %v: %v", fd.Kind(), tok.value)
|
---|
471 | }
|
---|
472 |
|
---|
473 | // Consume a ':' from the input stream (if the next token is a colon),
|
---|
474 | // returning an error if a colon is needed but not present.
|
---|
475 | func (p *textParser) checkForColon(fd protoreflect.FieldDescriptor) *ParseError {
|
---|
476 | tok := p.next()
|
---|
477 | if tok.err != nil {
|
---|
478 | return tok.err
|
---|
479 | }
|
---|
480 | if tok.value != ":" {
|
---|
481 | if fd.Message() == nil {
|
---|
482 | return p.errorf("expected ':', found %q", tok.value)
|
---|
483 | }
|
---|
484 | p.back()
|
---|
485 | }
|
---|
486 | return nil
|
---|
487 | }
|
---|
488 |
|
---|
489 | // consumeExtensionOrAnyName consumes an extension name or an Any type URL and
|
---|
490 | // the following ']'. It returns the name or URL consumed.
|
---|
491 | func (p *textParser) consumeExtensionOrAnyName() (string, error) {
|
---|
492 | tok := p.next()
|
---|
493 | if tok.err != nil {
|
---|
494 | return "", tok.err
|
---|
495 | }
|
---|
496 |
|
---|
497 | // If extension name or type url is quoted, it's a single token.
|
---|
498 | if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
|
---|
499 | name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
|
---|
500 | if err != nil {
|
---|
501 | return "", err
|
---|
502 | }
|
---|
503 | return name, p.consumeToken("]")
|
---|
504 | }
|
---|
505 |
|
---|
506 | // Consume everything up to "]"
|
---|
507 | var parts []string
|
---|
508 | for tok.value != "]" {
|
---|
509 | parts = append(parts, tok.value)
|
---|
510 | tok = p.next()
|
---|
511 | if tok.err != nil {
|
---|
512 | return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
|
---|
513 | }
|
---|
514 | if p.done && tok.value != "]" {
|
---|
515 | return "", p.errorf("unclosed type_url or extension name")
|
---|
516 | }
|
---|
517 | }
|
---|
518 | return strings.Join(parts, ""), nil
|
---|
519 | }
|
---|
520 |
|
---|
521 | // consumeOptionalSeparator consumes an optional semicolon or comma.
|
---|
522 | // It is used in unmarshalMessage to provide backward compatibility.
|
---|
523 | func (p *textParser) consumeOptionalSeparator() error {
|
---|
524 | tok := p.next()
|
---|
525 | if tok.err != nil {
|
---|
526 | return tok.err
|
---|
527 | }
|
---|
528 | if tok.value != ";" && tok.value != "," {
|
---|
529 | p.back()
|
---|
530 | }
|
---|
531 | return nil
|
---|
532 | }
|
---|
533 |
|
---|
534 | func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
|
---|
535 | pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
|
---|
536 | p.cur.err = pe
|
---|
537 | p.done = true
|
---|
538 | return pe
|
---|
539 | }
|
---|
540 |
|
---|
541 | func (p *textParser) skipWhitespace() {
|
---|
542 | i := 0
|
---|
543 | for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
|
---|
544 | if p.s[i] == '#' {
|
---|
545 | // comment; skip to end of line or input
|
---|
546 | for i < len(p.s) && p.s[i] != '\n' {
|
---|
547 | i++
|
---|
548 | }
|
---|
549 | if i == len(p.s) {
|
---|
550 | break
|
---|
551 | }
|
---|
552 | }
|
---|
553 | if p.s[i] == '\n' {
|
---|
554 | p.line++
|
---|
555 | }
|
---|
556 | i++
|
---|
557 | }
|
---|
558 | p.offset += i
|
---|
559 | p.s = p.s[i:len(p.s)]
|
---|
560 | if len(p.s) == 0 {
|
---|
561 | p.done = true
|
---|
562 | }
|
---|
563 | }
|
---|
564 |
|
---|
565 | func (p *textParser) advance() {
|
---|
566 | // Skip whitespace
|
---|
567 | p.skipWhitespace()
|
---|
568 | if p.done {
|
---|
569 | return
|
---|
570 | }
|
---|
571 |
|
---|
572 | // Start of non-whitespace
|
---|
573 | p.cur.err = nil
|
---|
574 | p.cur.offset, p.cur.line = p.offset, p.line
|
---|
575 | p.cur.unquoted = ""
|
---|
576 | switch p.s[0] {
|
---|
577 | case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
|
---|
578 | // Single symbol
|
---|
579 | p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
|
---|
580 | case '"', '\'':
|
---|
581 | // Quoted string
|
---|
582 | i := 1
|
---|
583 | for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
|
---|
584 | if p.s[i] == '\\' && i+1 < len(p.s) {
|
---|
585 | // skip escaped char
|
---|
586 | i++
|
---|
587 | }
|
---|
588 | i++
|
---|
589 | }
|
---|
590 | if i >= len(p.s) || p.s[i] != p.s[0] {
|
---|
591 | p.errorf("unmatched quote")
|
---|
592 | return
|
---|
593 | }
|
---|
594 | unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
|
---|
595 | if err != nil {
|
---|
596 | p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
|
---|
597 | return
|
---|
598 | }
|
---|
599 | p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
|
---|
600 | p.cur.unquoted = unq
|
---|
601 | default:
|
---|
602 | i := 0
|
---|
603 | for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
|
---|
604 | i++
|
---|
605 | }
|
---|
606 | if i == 0 {
|
---|
607 | p.errorf("unexpected byte %#x", p.s[0])
|
---|
608 | return
|
---|
609 | }
|
---|
610 | p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
|
---|
611 | }
|
---|
612 | p.offset += len(p.cur.value)
|
---|
613 | }
|
---|
614 |
|
---|
615 | // Back off the parser by one token. Can only be done between calls to next().
|
---|
616 | // It makes the next advance() a no-op.
|
---|
617 | func (p *textParser) back() { p.backed = true }
|
---|
618 |
|
---|
619 | // Advances the parser and returns the new current token.
|
---|
620 | func (p *textParser) next() *token {
|
---|
621 | if p.backed || p.done {
|
---|
622 | p.backed = false
|
---|
623 | return &p.cur
|
---|
624 | }
|
---|
625 | p.advance()
|
---|
626 | if p.done {
|
---|
627 | p.cur.value = ""
|
---|
628 | } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
|
---|
629 | // Look for multiple quoted strings separated by whitespace,
|
---|
630 | // and concatenate them.
|
---|
631 | cat := p.cur
|
---|
632 | for {
|
---|
633 | p.skipWhitespace()
|
---|
634 | if p.done || !isQuote(p.s[0]) {
|
---|
635 | break
|
---|
636 | }
|
---|
637 | p.advance()
|
---|
638 | if p.cur.err != nil {
|
---|
639 | return &p.cur
|
---|
640 | }
|
---|
641 | cat.value += " " + p.cur.value
|
---|
642 | cat.unquoted += p.cur.unquoted
|
---|
643 | }
|
---|
644 | p.done = false // parser may have seen EOF, but we want to return cat
|
---|
645 | p.cur = cat
|
---|
646 | }
|
---|
647 | return &p.cur
|
---|
648 | }
|
---|
649 |
|
---|
650 | func (p *textParser) consumeToken(s string) error {
|
---|
651 | tok := p.next()
|
---|
652 | if tok.err != nil {
|
---|
653 | return tok.err
|
---|
654 | }
|
---|
655 | if tok.value != s {
|
---|
656 | p.back()
|
---|
657 | return p.errorf("expected %q, found %q", s, tok.value)
|
---|
658 | }
|
---|
659 | return nil
|
---|
660 | }
|
---|
661 |
|
---|
662 | var errBadUTF8 = errors.New("proto: bad UTF-8")
|
---|
663 |
|
---|
664 | func unquoteC(s string, quote rune) (string, error) {
|
---|
665 | // This is based on C++'s tokenizer.cc.
|
---|
666 | // Despite its name, this is *not* parsing C syntax.
|
---|
667 | // For instance, "\0" is an invalid quoted string.
|
---|
668 |
|
---|
669 | // Avoid allocation in trivial cases.
|
---|
670 | simple := true
|
---|
671 | for _, r := range s {
|
---|
672 | if r == '\\' || r == quote {
|
---|
673 | simple = false
|
---|
674 | break
|
---|
675 | }
|
---|
676 | }
|
---|
677 | if simple {
|
---|
678 | return s, nil
|
---|
679 | }
|
---|
680 |
|
---|
681 | buf := make([]byte, 0, 3*len(s)/2)
|
---|
682 | for len(s) > 0 {
|
---|
683 | r, n := utf8.DecodeRuneInString(s)
|
---|
684 | if r == utf8.RuneError && n == 1 {
|
---|
685 | return "", errBadUTF8
|
---|
686 | }
|
---|
687 | s = s[n:]
|
---|
688 | if r != '\\' {
|
---|
689 | if r < utf8.RuneSelf {
|
---|
690 | buf = append(buf, byte(r))
|
---|
691 | } else {
|
---|
692 | buf = append(buf, string(r)...)
|
---|
693 | }
|
---|
694 | continue
|
---|
695 | }
|
---|
696 |
|
---|
697 | ch, tail, err := unescape(s)
|
---|
698 | if err != nil {
|
---|
699 | return "", err
|
---|
700 | }
|
---|
701 | buf = append(buf, ch...)
|
---|
702 | s = tail
|
---|
703 | }
|
---|
704 | return string(buf), nil
|
---|
705 | }
|
---|
706 |
|
---|
707 | func unescape(s string) (ch string, tail string, err error) {
|
---|
708 | r, n := utf8.DecodeRuneInString(s)
|
---|
709 | if r == utf8.RuneError && n == 1 {
|
---|
710 | return "", "", errBadUTF8
|
---|
711 | }
|
---|
712 | s = s[n:]
|
---|
713 | switch r {
|
---|
714 | case 'a':
|
---|
715 | return "\a", s, nil
|
---|
716 | case 'b':
|
---|
717 | return "\b", s, nil
|
---|
718 | case 'f':
|
---|
719 | return "\f", s, nil
|
---|
720 | case 'n':
|
---|
721 | return "\n", s, nil
|
---|
722 | case 'r':
|
---|
723 | return "\r", s, nil
|
---|
724 | case 't':
|
---|
725 | return "\t", s, nil
|
---|
726 | case 'v':
|
---|
727 | return "\v", s, nil
|
---|
728 | case '?':
|
---|
729 | return "?", s, nil // trigraph workaround
|
---|
730 | case '\'', '"', '\\':
|
---|
731 | return string(r), s, nil
|
---|
732 | case '0', '1', '2', '3', '4', '5', '6', '7':
|
---|
733 | if len(s) < 2 {
|
---|
734 | return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
|
---|
735 | }
|
---|
736 | ss := string(r) + s[:2]
|
---|
737 | s = s[2:]
|
---|
738 | i, err := strconv.ParseUint(ss, 8, 8)
|
---|
739 | if err != nil {
|
---|
740 | return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
|
---|
741 | }
|
---|
742 | return string([]byte{byte(i)}), s, nil
|
---|
743 | case 'x', 'X', 'u', 'U':
|
---|
744 | var n int
|
---|
745 | switch r {
|
---|
746 | case 'x', 'X':
|
---|
747 | n = 2
|
---|
748 | case 'u':
|
---|
749 | n = 4
|
---|
750 | case 'U':
|
---|
751 | n = 8
|
---|
752 | }
|
---|
753 | if len(s) < n {
|
---|
754 | return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
|
---|
755 | }
|
---|
756 | ss := s[:n]
|
---|
757 | s = s[n:]
|
---|
758 | i, err := strconv.ParseUint(ss, 16, 64)
|
---|
759 | if err != nil {
|
---|
760 | return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
|
---|
761 | }
|
---|
762 | if r == 'x' || r == 'X' {
|
---|
763 | return string([]byte{byte(i)}), s, nil
|
---|
764 | }
|
---|
765 | if i > utf8.MaxRune {
|
---|
766 | return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
|
---|
767 | }
|
---|
768 | return string(rune(i)), s, nil
|
---|
769 | }
|
---|
770 | return "", "", fmt.Errorf(`unknown escape \%c`, r)
|
---|
771 | }
|
---|
772 |
|
---|
773 | func isIdentOrNumberChar(c byte) bool {
|
---|
774 | switch {
|
---|
775 | case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
|
---|
776 | return true
|
---|
777 | case '0' <= c && c <= '9':
|
---|
778 | return true
|
---|
779 | }
|
---|
780 | switch c {
|
---|
781 | case '-', '+', '.', '_':
|
---|
782 | return true
|
---|
783 | }
|
---|
784 | return false
|
---|
785 | }
|
---|
786 |
|
---|
787 | func isWhitespace(c byte) bool {
|
---|
788 | switch c {
|
---|
789 | case ' ', '\t', '\n', '\r':
|
---|
790 | return true
|
---|
791 | }
|
---|
792 | return false
|
---|
793 | }
|
---|
794 |
|
---|
795 | func isQuote(c byte) bool {
|
---|
796 | switch c {
|
---|
797 | case '"', '\'':
|
---|
798 | return true
|
---|
799 | }
|
---|
800 | return false
|
---|
801 | }
|
---|