source: code/trunk/vendor/modernc.org/cc/v3/scanner.go@ 822

Last change on this file since 822 was 822, checked in by yakumo.izuru, 22 months ago

Prefer immortal.run over runit and rc.d, use vendored modules
for convenience.

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 29.2 KB
Line 
1// Copyright 2019 The CC Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cc // import "modernc.org/cc/v3"
6
7import (
8 "bufio"
9 "bytes"
10 "fmt"
11 goscanner "go/scanner"
12 "io"
13 "path/filepath"
14 "strconv"
15 "strings"
16 "sync"
17 "unicode/utf8"
18
19 "modernc.org/mathutil"
20 "modernc.org/token"
21)
22
23const (
24 clsEOF = iota + 0x80
25 clsOther
26)
27
28const maxASCII = 0x7f
29
30var (
31 bom = []byte{0xEF, 0xBB, 0xBF}
32
33 idDefine = dict.sid("define")
34 idElif = dict.sid("elif")
35 idElse = dict.sid("else")
36 idEndif = dict.sid("endif")
37 idError = dict.sid("error")
38 idIf = dict.sid("if")
39 idIfdef = dict.sid("ifdef")
40 idIfndef = dict.sid("ifndef")
41 idInclude = dict.sid("include")
42 idIncludeNext = dict.sid("include_next")
43 idLine = dict.sid("line")
44 idPragma = dict.sid("pragma")
45 idPragmaOp = dict.sid("_Pragma")
46 idSpace = dict.sid(" ")
47 idUndef = dict.sid("undef")
48
49 trigraphPrefix = []byte("??")
50 trigraphs = []struct{ from, to []byte }{
51 {[]byte("??="), []byte{'#'}},
52 {[]byte("??("), []byte{'['}},
53 {[]byte("??/"), []byte{'\\'}},
54 {[]byte("??)"), []byte{']'}},
55 {[]byte("??'"), []byte{'^'}},
56 {[]byte("??<"), []byte{'{'}},
57 {[]byte("??!"), []byte{'|'}},
58 {[]byte("??>"), []byte{'}'}},
59 {[]byte("??-"), []byte{'~'}},
60 }
61)
62
63type tokenFile struct {
64 *token.File
65 sync.RWMutex
66}
67
68func tokenNewFile(name string, sz int) *tokenFile { return &tokenFile{File: token.NewFile(name, sz)} }
69
70func (f *tokenFile) Position(pos token.Pos) (r token.Position) {
71 f.RLock()
72 r = f.File.Position(pos)
73 f.RUnlock()
74 return r
75}
76
77func (f *tokenFile) PositionFor(pos token.Pos, adjusted bool) (r token.Position) {
78 f.RLock()
79 r = f.File.PositionFor(pos, adjusted)
80 f.RUnlock()
81 return r
82}
83
84func (f *tokenFile) AddLine(off int) {
85 f.Lock()
86 f.File.AddLine(off)
87 f.Unlock()
88}
89
90func (f *tokenFile) AddLineInfo(off int, fn string, line int) {
91 f.Lock()
92 f.File.AddLineInfo(off, fn, line)
93 f.Unlock()
94}
95
96type node interface {
97 Pos() token.Pos
98}
99
100type dictionary struct {
101 mu sync.RWMutex
102 m map[string]StringID
103 strings []string
104}
105
106func newDictionary() (r *dictionary) {
107 r = &dictionary{m: map[string]StringID{}}
108 b := make([]byte, 1)
109 for i := 0; i < 128; i++ {
110 var s string
111 if i != 0 {
112 b[0] = byte(i)
113 s = string(b)
114 }
115 r.m[s] = StringID(i)
116 r.strings = append(r.strings, s)
117 dictStrings[i] = s
118 }
119 return r
120}
121
122func (d *dictionary) id(key []byte) StringID {
123 switch len(key) {
124 case 0:
125 return 0
126 case 1:
127 if c := key[0]; c != 0 && c < 128 {
128 return StringID(c)
129 }
130 }
131
132 d.mu.Lock()
133 if n, ok := d.m[string(key)]; ok {
134 d.mu.Unlock()
135 return n
136 }
137
138 n := StringID(len(d.strings))
139 s := string(key)
140 if int(n) < 256 {
141 dictStrings[n] = s
142 }
143 d.strings = append(d.strings, s)
144 d.m[s] = n
145 d.mu.Unlock()
146 return n
147}
148
149func (d *dictionary) sid(key string) StringID {
150 switch len(key) {
151 case 0:
152 return 0
153 case 1:
154 if c := key[0]; c != 0 && c < 128 {
155 return StringID(c)
156 }
157 }
158
159 d.mu.Lock()
160 if n, ok := d.m[key]; ok {
161 d.mu.Unlock()
162 return n
163 }
164
165 n := StringID(len(d.strings))
166 if int(n) < 256 {
167 dictStrings[n] = key
168 }
169 d.strings = append(d.strings, key)
170 d.m[key] = n
171 d.mu.Unlock()
172 return n
173}
174
175type char struct {
176 pos int32
177 c byte
178}
179
180// token3 is produced by translation phase 3.
181type token3 struct {
182 char rune
183 pos int32
184 value StringID
185 src StringID
186 macro StringID
187}
188
189func (t token3) Pos() token.Pos { return token.Pos(t.pos) }
190func (t token3) String() string { return t.value.String() }
191
192type scanner struct {
193 bomFix int
194 bytesBuf []byte
195 charBuf []char
196 ctx *context
197 file *tokenFile
198 fileOffset int
199 firstPos token.Pos
200 lineBuf []byte
201 lookaheadChar char
202 lookaheadLine ppLine
203 mark int
204 pos token.Pos
205 r *bufio.Reader
206 srcBuf []byte
207 tokenBuf []token3
208 ungetBuf []char
209
210 tok token3
211
212 closed bool
213 preserveWhiteSpace bool
214}
215
216func newScanner0(ctx *context, r io.Reader, file *tokenFile, bufSize int) *scanner {
217 s := &scanner{
218 ctx: ctx,
219 file: file,
220 r: bufio.NewReaderSize(r, bufSize),
221 }
222 if r != nil {
223 s.init()
224 }
225 return s
226}
227
228func newScanner(ctx *context, r io.Reader, file *tokenFile) *scanner {
229 bufSize := 1 << 17 // emulate gcc
230 if n := ctx.cfg.MaxSourceLine; n > 4096 {
231 bufSize = n
232 }
233 return newScanner0(ctx, r, file, bufSize)
234}
235
236func (s *scanner) abort() (r byte, b bool) {
237 if s.mark >= 0 {
238 if len(s.charBuf) > s.mark {
239 s.unget(s.lookaheadChar)
240 for i := len(s.charBuf) - 1; i >= s.mark; i-- {
241 s.unget(s.charBuf[i])
242 }
243 }
244 s.charBuf = s.charBuf[:s.mark]
245 return 0, false
246 }
247
248 switch n := len(s.charBuf); n {
249 case 0: // [] z
250 c := s.lookaheadChar
251 s.next()
252 return s.class(c.c), true
253 case 1: // [a] z
254 return s.class(s.charBuf[0].c), true
255 default: // [a, b, ...], z
256 c := s.charBuf[0] // a
257 s.unget(s.lookaheadChar) // z
258 for i := n - 1; i > 1; i-- {
259 s.unget(s.charBuf[i]) // ...
260 }
261 s.lookaheadChar = s.charBuf[1] // b
262 s.charBuf = s.charBuf[:1]
263 return s.class(c.c), true
264 }
265}
266
267func (s *scanner) class(b byte) byte {
268 switch {
269 case b == 0:
270 return clsEOF
271 case b > maxASCII:
272 return clsOther
273 default:
274 return b
275 }
276}
277
278func (s *scanner) err(n node, msg string, args ...interface{}) { s.errPos(n.Pos(), msg, args...) }
279
280func (s *scanner) errLine(x interface{}, msg string, args ...interface{}) {
281 var toks []token3
282 switch x := x.(type) {
283 case nil:
284 toks = []token3{{}}
285 case ppLine:
286 toks = x.getToks()
287 default:
288 panic(internalError())
289 }
290 var b strings.Builder
291 for _, v := range toks {
292 switch v.char {
293 case '\n':
294 // nop
295 case ' ':
296 b.WriteByte(' ')
297 default:
298 b.WriteString(v.String())
299 }
300 }
301 s.err(toks[0], "%s"+msg, append([]interface{}{b.String()}, args...)...)
302}
303
304func (s *scanner) errPos(pos token.Pos, msg string, args ...interface{}) {
305 if s.ctx.err(s.file.Position(pos), msg, args...) {
306 s.r.Reset(nil)
307 s.closed = true
308 }
309}
310
311func (s *scanner) init() *scanner {
312 if s.r == nil {
313 return s
314 }
315
316 b, err := s.r.Peek(3)
317 if err == nil && bytes.Equal(b, bom) {
318 s.bomFix, _ = s.r.Discard(3)
319 }
320 s.tokenBuf = nil
321 return s
322}
323
324func (s *scanner) initScan() (r byte) {
325 if s.lookaheadChar.pos == 0 {
326 s.next()
327 }
328 s.firstPos = token.Pos(s.lookaheadChar.pos)
329 s.mark = -1
330 if len(s.charBuf) > 1<<18 { //DONE benchmark tuned
331 s.bytesBuf = nil
332 s.charBuf = nil
333 s.srcBuf = nil
334 } else {
335 s.bytesBuf = s.bytesBuf[:0]
336 s.charBuf = s.charBuf[:0]
337 s.srcBuf = s.bytesBuf[:0]
338 }
339 return s.class(s.lookaheadChar.c)
340}
341
342func (s *scanner) lex() {
343 s.tok.char = s.scan()
344 s.tok.pos = int32(s.firstPos)
345 for _, v := range s.charBuf {
346 s.srcBuf = append(s.srcBuf, v.c)
347 }
348 s.tok.src = dict.id(s.srcBuf)
349 switch {
350 case s.tok.char == ' ' && !s.preserveWhiteSpace && !s.ctx.cfg.PreserveWhiteSpace:
351 s.tok.value = idSpace
352 case s.tok.char == IDENTIFIER:
353 for i := 0; i < len(s.charBuf); {
354 c := s.charBuf[i].c
355 if c != '\\' {
356 s.bytesBuf = append(s.bytesBuf, c)
357 i++
358 continue
359 }
360
361 i++ // Skip '\\'
362 var n int
363 switch s.charBuf[i].c {
364 case 'u':
365 n = 4
366 case 'U':
367 n = 8
368 default:
369 panic(internalError())
370 }
371 i++ // Skip 'u' or 'U'
372 l := len(s.bytesBuf)
373 for i0 := i; i < i0+n; i++ {
374 s.bytesBuf = append(s.bytesBuf, s.charBuf[i].c)
375 }
376 r, err := strconv.ParseUint(string(s.bytesBuf[l:l+n]), 16, 32)
377 if err != nil {
378 panic(internalError())
379 }
380
381 n2 := utf8.EncodeRune(s.bytesBuf[l:], rune(r))
382 s.bytesBuf = s.bytesBuf[:l+n2]
383 }
384 s.tok.value = dict.id(s.bytesBuf)
385 default:
386 s.tok.value = s.tok.src
387 }
388 switch s.tok.char {
389 case clsEOF:
390 s.tok.char = -1
391 s.tok.pos = int32(s.file.Pos(s.file.Size()))
392 }
393 // dbg("lex %q %q", tokName(s.tok.char), s.tok.value)
394}
395
396func (s *scanner) next() (r byte) {
397 if s.lookaheadChar.pos > 0 {
398 s.charBuf = append(s.charBuf, s.lookaheadChar)
399 }
400 if n := len(s.ungetBuf); n != 0 {
401 s.lookaheadChar = s.ungetBuf[n-1]
402 s.ungetBuf = s.ungetBuf[:n-1]
403 return s.class(s.lookaheadChar.c)
404 }
405
406 if len(s.lineBuf) == 0 {
407 more:
408 if s.closed || s.fileOffset == s.file.Size() {
409 s.lookaheadChar.c = 0
410 s.lookaheadChar.pos = 0
411 return clsEOF
412 }
413
414 b, err := s.r.ReadSlice('\n')
415 if err != nil {
416 if err != io.EOF {
417 s.errPos(s.pos, "error while reading %s: %s", s.file.Name(), err)
418 }
419 if len(b) == 0 {
420 return clsEOF
421 }
422 }
423
424 s.file.AddLine(s.fileOffset)
425 s.fileOffset += s.bomFix
426 s.bomFix = 0
427 s.pos = token.Pos(s.fileOffset)
428 s.fileOffset += len(b)
429
430 // [0], 5.1.1.2, 1.1
431 //
432 // Physical source file multibyte characters are mapped, in an
433 // implementation- defined manner, to the source character set
434 // (introducing new-line characters for end-of-line indicators)
435 // if necessary. Trigraph sequences are replaced by
436 // corresponding single-character internal representations.
437 if !s.ctx.cfg.DisableTrigraphs && bytes.Contains(b, trigraphPrefix) {
438 for _, v := range trigraphs {
439 b = bytes.Replace(b, v.from, v.to, -1)
440 }
441 }
442
443 // [0], 5.1.1.2, 2
444 //
445 // Each instance of a backslash character (\) immediately
446 // followed by a new-line character is deleted, splicing
447 // physical source lines to form logical source lines. Only
448 // the last backslash on any physical source line shall be
449 // eligible for being part of such a splice. A source file that
450 // is not empty shall end in a new-line character, which shall
451 // not be immediately preceded by a backslash character before
452 // any such splicing takes place.
453 s.lineBuf = b
454 n := len(b)
455 switch {
456 case b[n-1] != '\n':
457 if s.ctx.cfg.RejectMissingFinalNewline {
458 s.errPos(s.pos+token.Pos(n), "non empty source file shall end in a new-line character")
459 }
460 b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
461 case n > 1 && b[n-2] == '\\':
462 if n == 2 {
463 goto more
464 }
465
466 b = b[:n-2]
467 n = len(b)
468 if s.fileOffset == s.file.Size() {
469 if s.ctx.cfg.RejectFinalBackslash {
470 s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character")
471 }
472 b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
473 }
474 case n > 2 && b[n-3] == '\\' && b[n-2] == '\r':
475 // we've got a windows source that has \r\n line endings.
476 if n == 3 {
477 goto more
478 }
479
480 b = b[:n-3]
481 n = len(b)
482 if s.fileOffset == s.file.Size() {
483 if s.ctx.cfg.RejectFinalBackslash {
484 s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character")
485 }
486 b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
487 }
488 }
489 s.lineBuf = b
490 }
491 s.pos++
492 s.lookaheadChar = char{int32(s.pos), s.lineBuf[0]}
493 s.lineBuf = s.lineBuf[1:]
494 return s.class(s.lookaheadChar.c)
495}
496
497func (s *scanner) unget(c ...char) {
498 s.ungetBuf = append(s.ungetBuf, c...)
499 s.lookaheadChar.pos = 0 // Must invalidate lookahead.
500}
501
502func (s *scanner) unterminatedComment() rune {
503 s.errPos(token.Pos(s.file.Size()), "unterminated comment")
504 n := len(s.charBuf)
505 s.unget(s.charBuf[n-1]) // \n
506 s.charBuf = s.charBuf[:n-1]
507 return ' '
508}
509
510// -------------------------------------------------------- Translation phase 3
511
512// [0], 5.1.1.2, 3
513//
514// The source file is decomposed into preprocessing tokens and sequences of
515// white-space characters (including comments). A source file shall not end in
516// a partial preprocessing token or in a partial comment. Each comment is
517// replaced by one space character. New-line characters are retained. Whether
518// each nonempty sequence of white-space characters other than new-line is
519// retained or replaced by one space character is implementation-defined.
520func (s *scanner) translationPhase3() *ppFile {
521 r := &ppFile{file: s.file}
522 if s.file.Size() == 0 {
523 s.r.Reset(nil)
524 return r
525 }
526
527 s.nextLine()
528 r.groups = s.parseGroup()
529 return r
530}
531
532func (s *scanner) parseGroup() (r []ppGroup) {
533 for {
534 switch x := s.lookaheadLine.(type) {
535 case ppGroup:
536 r = append(r, x)
537 s.nextLine()
538 case ppIfGroupDirective:
539 r = append(r, s.parseIfSection())
540 default:
541 return r
542 }
543 }
544}
545
546func (s *scanner) parseIfSection() *ppIfSection {
547 return &ppIfSection{
548 ifGroup: s.parseIfGroup(),
549 elifGroups: s.parseElifGroup(),
550 elseGroup: s.parseElseGroup(),
551 endifLine: s.parseEndifLine(),
552 }
553}
554
555func (s *scanner) parseEndifLine() *ppEndifDirective {
556 switch x := s.lookaheadLine.(type) {
557 case *ppEndifDirective:
558 s.nextLine()
559 return x
560 default:
561 s.errLine(x, fmt.Sprintf(": expected #endif (unexpected %T)", x))
562 s.nextLine()
563 return nil
564 }
565}
566
567func (s *scanner) parseElseGroup() *ppElseGroup {
568 switch x := s.lookaheadLine.(type) {
569 case *ppElseDirective:
570 r := &ppElseGroup{elseLine: x}
571 s.nextLine()
572 r.groups = s.parseGroup()
573 return r
574 default:
575 return nil
576 }
577}
578
579func (s *scanner) parseElifGroup() (r []*ppElifGroup) {
580 for {
581 var g ppElifGroup
582 switch x := s.lookaheadLine.(type) {
583 case *ppElifDirective:
584 g.elif = x
585 s.nextLine()
586 g.groups = s.parseGroup()
587 r = append(r, &g)
588 default:
589 return r
590 }
591 }
592}
593
594func (s *scanner) parseIfGroup() *ppIfGroup {
595 r := &ppIfGroup{}
596 switch x := s.lookaheadLine.(type) {
597 case ppIfGroupDirective:
598 r.directive = x
599 default:
600 s.errLine(x, fmt.Sprintf(": expected if-group (unexpected %T)", x))
601 }
602 s.nextLine()
603 r.groups = s.parseGroup()
604 return r
605}
606
607func (s *scanner) nextLine() {
608 s.tokenBuf = nil
609 s.lookaheadLine = s.scanLine()
610}
611
612func (s *scanner) scanLine() (r ppLine) {
613again:
614 toks := s.scanToNonBlankToken(nil)
615 if len(toks) == 0 {
616 return nil
617 }
618
619 includeNext := false
620 switch tok := toks[len(toks)-1]; tok.char {
621 case '#':
622 toks = s.scanToNonBlankToken(toks)
623 switch tok := toks[len(toks)-1]; tok.char {
624 case '\n':
625 return &ppEmptyDirective{toks: toks}
626 case IDENTIFIER:
627 switch tok.value {
628 case idDefine:
629 return s.parseDefine(toks)
630 case idElif:
631 return s.parseElif(toks)
632 case idElse:
633 return s.parseElse(toks)
634 case idEndif:
635 return s.parseEndif(toks)
636 case idIf:
637 return s.parseIf(toks)
638 case idIfdef:
639 return s.parseIfdef(toks)
640 case idIfndef:
641 return s.parseIfndef(toks)
642 case idIncludeNext:
643 includeNext = true
644 fallthrough
645 case idInclude:
646 // # include pp-tokens new-line
647 //
648 // Prevent aliasing of eg. <foo bar.h> and <foo bar.h>.
649 save := s.preserveWhiteSpace
650 s.preserveWhiteSpace = true
651 n := len(toks)
652 toks := s.scanLineToEOL(toks)
653 r := &ppIncludeDirective{arg: toks[n : len(toks)-1], toks: toks, includeNext: includeNext}
654 s.preserveWhiteSpace = save
655 return r
656 case idUndef:
657 return s.parseUndef(toks)
658 case idLine:
659 return s.parseLine(toks)
660 case idError:
661 // # error pp-tokens_opt new-line
662 n := len(toks)
663 toks := s.scanLineToEOL(toks)
664 msg := toks[n : len(toks)-1]
665 if len(msg) != 0 && msg[0].char == ' ' {
666 msg = msg[1:]
667 }
668 return &ppErrorDirective{toks: toks, msg: msg}
669 case idPragma:
670 return s.parsePragma(toks)
671 }
672 }
673
674 // # non-directive
675 return &ppNonDirective{toks: s.scanLineToEOL(toks)}
676 case '\n':
677 return &ppTextLine{toks: toks}
678 case IDENTIFIER:
679 if tok.value == idPragmaOp {
680 toks = s.scanToNonBlankToken(toks)
681 switch tok = toks[len(toks)-1]; tok.char {
682 case '(':
683 // ok
684 default:
685 s.err(tok, "expected (")
686 return &ppTextLine{toks: toks}
687 }
688
689 var lit string
690 toks = s.scanToNonBlankToken(toks)
691 switch tok = toks[len(toks)-1]; tok.char {
692 case STRINGLITERAL:
693 lit = tok.String()
694 case LONGSTRINGLITERAL:
695 lit = tok.String()[1:] // [0], 6.9.10, 1
696 default:
697 s.err(tok, "expected string literal")
698 return &ppTextLine{toks: toks}
699 }
700
701 pos := tok.pos
702 toks = s.scanToNonBlankToken(toks)
703 switch tok = toks[len(toks)-1]; tok.char {
704 case ')':
705 // ok
706 default:
707 s.err(tok, "expected )")
708 return &ppTextLine{toks: toks}
709 }
710
711 s.unget(s.lookaheadChar)
712 // [0], 6.9.10, 1
713 lit = lit[1 : len(lit)-1]
714 lit = strings.ReplaceAll(lit, `\"`, `"`)
715 lit = strings.ReplaceAll(lit, `\\`, `\`)
716 lit = "#pragma " + lit + "\n"
717 for i := len(lit) - 1; i >= 0; i-- {
718 s.unget(char{pos, lit[i]})
719 }
720 goto again
721 }
722
723 fallthrough
724 default:
725 return &ppTextLine{toks: s.scanLineToEOL(toks)}
726 }
727}
728
729func (s *scanner) parsePragma(toks []token3) *ppPragmaDirective {
730 toks = s.scanToNonBlankToken(toks)
731 n := len(toks)
732 if toks[n-1].char != '\n' {
733 toks = s.scanLineToEOL(toks)
734 }
735 return &ppPragmaDirective{toks: toks, args: toks[n-1:]}
736}
737
738// # line pp-tokens new-line
739func (s *scanner) parseLine(toks []token3) *ppLineDirective {
740 toks = s.scanToNonBlankToken(toks)
741 switch tok := toks[len(toks)-1]; tok.char {
742 case '\n':
743 s.err(tok, "unexpected new-line")
744 return &ppLineDirective{toks: toks}
745 default:
746 toks := s.scanLineToEOL(toks)
747 last := toks[len(toks)-1]
748 r := &ppLineDirective{toks: toks, nextPos: int(last.pos) + len(last.src.String())}
749 toks = toks[:len(toks)-1] // sans new-line
750 toks = ltrim3(toks)
751 toks = toks[1:] // Skip '#'
752 toks = ltrim3(toks)
753 toks = toks[1:] // Skip "line"
754 r.args = ltrim3(toks)
755 return r
756 }
757}
758
759func ltrim3(toks []token3) []token3 {
760 for len(toks) != 0 && toks[0].char == ' ' {
761 toks = toks[1:]
762 }
763 return toks
764}
765
766// # undef identifier new-line
767func (s *scanner) parseUndef(toks []token3) *ppUndefDirective {
768 toks = s.scanToNonBlankToken(toks)
769 switch tok := toks[len(toks)-1]; tok.char {
770 case '\n':
771 s.err(&tok, "expected identifier")
772 return &ppUndefDirective{toks: toks}
773 case IDENTIFIER:
774 name := tok
775 toks = s.scanToNonBlankToken(toks)
776 switch tok := toks[len(toks)-1]; tok.char {
777 case '\n':
778 return &ppUndefDirective{name: name, toks: toks}
779 default:
780 if s.ctx.cfg.RejectUndefExtraTokens {
781 s.err(&tok, "extra tokens after #undef")
782 }
783 return &ppUndefDirective{name: name, toks: s.scanLineToEOL(toks)}
784 }
785 default:
786 s.err(&tok, "expected identifier")
787 return &ppUndefDirective{toks: s.scanLineToEOL(toks)}
788 }
789}
790
791func (s *scanner) scanLineToEOL(toks []token3) []token3 {
792 n := len(s.tokenBuf) - len(toks)
793 for {
794 s.lex()
795 s.tokenBuf = append(s.tokenBuf, s.tok)
796 if s.tok.char == '\n' {
797 return s.tokenBuf[n:]
798 }
799 }
800}
801
802// # ifndef identifier new-line
803func (s *scanner) parseIfndef(toks []token3) *ppIfndefDirective {
804 var name StringID
805 toks = s.scanToNonBlankToken(toks)
806 switch tok := toks[len(toks)-1]; tok.char {
807 case IDENTIFIER:
808 name = tok.value
809 toks = s.scanToNonBlankToken(toks)
810 switch tok := toks[len(toks)-1]; tok.char {
811 case '\n':
812 return &ppIfndefDirective{name: name, toks: toks}
813 default:
814 if s.ctx.cfg.RejectIfndefExtraTokens {
815 s.err(&tok, "extra tokens after #ifndef")
816 }
817 return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)}
818 }
819 case '\n':
820 s.err(tok, "expected identifier")
821 return &ppIfndefDirective{name: name, toks: toks}
822 default:
823 s.err(tok, "expected identifier")
824 return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)}
825 }
826}
827
828// # ifdef identifier new-line
829func (s *scanner) parseIfdef(toks []token3) *ppIfdefDirective {
830 var name StringID
831 toks = s.scanToNonBlankToken(toks)
832 switch tok := toks[len(toks)-1]; tok.char {
833 case IDENTIFIER:
834 name = tok.value
835 toks = s.scanToNonBlankToken(toks)
836 switch tok := toks[len(toks)-1]; tok.char {
837 case '\n':
838 return &ppIfdefDirective{name: name, toks: toks}
839 default:
840 if s.ctx.cfg.RejectIfdefExtraTokens {
841 s.err(&tok, "extra tokens after #ifdef")
842 }
843 return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)}
844 }
845 case '\n':
846 s.err(tok, "expected identifier")
847 return &ppIfdefDirective{name: name, toks: toks}
848 default:
849 s.err(tok, "expected identifier")
850 return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)}
851 }
852}
853
854// # if constant-expression new-line
855func (s *scanner) parseIf(toks []token3) *ppIfDirective {
856 n := len(toks)
857 toks = s.scanToNonBlankToken(toks)
858 switch tok := toks[len(toks)-1]; tok.char {
859 case '\n':
860 s.err(tok, "expected expression")
861 return &ppIfDirective{toks: toks}
862 default:
863 toks = s.scanLineToEOL(toks)
864 expr := toks[n:]
865 if expr[0].char == ' ' { // sans leading space
866 expr = expr[1:]
867 }
868 expr = expr[:len(expr)-1] // sans '\n'
869 return &ppIfDirective{toks: toks, expr: expr}
870 }
871}
872
873// # endif new-line
874func (s *scanner) parseEndif(toks []token3) *ppEndifDirective {
875 toks = s.scanToNonBlankToken(toks)
876 switch tok := toks[len(toks)-1]; tok.char {
877 case '\n':
878 return &ppEndifDirective{toks}
879 default:
880 if s.ctx.cfg.RejectEndifExtraTokens {
881 s.err(&tok, "extra tokens after #else")
882 }
883 return &ppEndifDirective{s.scanLineToEOL(toks)}
884 }
885}
886
887// # else new-line
888func (s *scanner) parseElse(toks []token3) *ppElseDirective {
889 toks = s.scanToNonBlankToken(toks)
890 switch tok := toks[len(toks)-1]; tok.char {
891 case '\n':
892 return &ppElseDirective{toks}
893 default:
894 if s.ctx.cfg.RejectElseExtraTokens {
895 s.err(&tok, "extra tokens after #else")
896 }
897 return &ppElseDirective{s.scanLineToEOL(toks)}
898 }
899}
900
901// # elif constant-expression new-line
902func (s *scanner) parseElif(toks []token3) *ppElifDirective {
903 n := len(toks)
904 toks = s.scanToNonBlankToken(toks)
905 switch tok := toks[len(toks)-1]; tok.char {
906 case '\n':
907 s.err(tok, "expected expression")
908 return &ppElifDirective{toks, nil}
909 default:
910 toks = s.scanLineToEOL(toks)
911 expr := toks[n:]
912 if expr[0].char == ' ' { // sans leading space
913 expr = expr[1:]
914 }
915 expr = expr[:len(expr)-1] // sans '\n'
916 return &ppElifDirective{toks, expr}
917 }
918}
919
920func (s *scanner) parseDefine(toks []token3) ppLine {
921 toks = s.scanToNonBlankToken(toks)
922 switch tok := toks[len(toks)-1]; tok.char {
923 case IDENTIFIER:
924 name := tok
925 n := len(toks)
926 toks = s.scanToNonBlankToken(toks)
927 switch tok := toks[len(toks)-1]; tok.char {
928 case '\n':
929 return &ppDefineObjectMacroDirective{name: name, toks: toks}
930 case '(':
931 if toks[n].char == ' ' {
932 return s.parseDefineObjectMacro(n, name, toks)
933 }
934
935 return s.parseDefineFunctionMacro(name, toks)
936 default:
937 return s.parseDefineObjectMacro(n, name, toks)
938 }
939 case '\n':
940 s.err(tok, "expected identifier")
941 return &ppDefineObjectMacroDirective{toks: toks}
942 default:
943 s.err(tok, "expected identifier")
944 return &ppDefineObjectMacroDirective{toks: s.scanLineToEOL(toks)}
945 }
946}
947
948// # define identifier lparen identifier-list_opt ) replacement-list new-line
949// # define identifier lparen ... ) replacement-list new-line
950// # define identifier lparen identifier-list , ... ) replacement-list new-line
951func (s *scanner) parseDefineFunctionMacro(name token3, toks []token3) *ppDefineFunctionMacroDirective {
952 // Parse parameters after "#define name(".
953 var list []token3
954 variadic := false
955 namedVariadic := false
956again:
957 toks = s.scanToNonBlankToken(toks)
958 switch tok := toks[len(toks)-1]; tok.char {
959 case IDENTIFIER:
960 more:
961 list = append(list, tok)
962 toks = s.scanToNonBlankToken(toks)
963 switch tok = toks[len(toks)-1]; tok.char {
964 case ',':
965 toks = s.scanToNonBlankToken(toks)
966 switch tok = toks[len(toks)-1]; tok.char {
967 case IDENTIFIER:
968 goto more
969 case DDD:
970 if toks, variadic = s.parseDDD(toks); !variadic {
971 goto again
972 }
973 case ')':
974 s.err(tok, "expected parameter name")
975 default:
976 s.err(tok, "unexpected %q", &tok)
977 }
978 case DDD:
979 namedVariadic = true
980 if s.ctx.cfg.RejectInvalidVariadicMacros {
981 s.err(tok, "expected comma")
982 }
983 if toks, variadic = s.parseDDD(toks); !variadic {
984 goto again
985 }
986 case ')':
987 // ok
988 case '\n':
989 s.err(tok, "unexpected new-line")
990 return &ppDefineFunctionMacroDirective{toks: toks}
991 case IDENTIFIER:
992 s.err(tok, "expected comma")
993 goto more
994 default:
995 s.err(tok, "unexpected %q", &tok)
996 }
997 case DDD:
998 if toks, variadic = s.parseDDD(toks); !variadic {
999 goto again
1000 }
1001 case ',':
1002 s.err(tok, "expected parameter name")
1003 goto again
1004 case ')':
1005 // ok
1006 default:
1007 s.err(tok, "expected parameter name")
1008 goto again
1009 }
1010 // Parse replacement list.
1011 n := len(toks)
1012 toks = s.scanToNonBlankToken(toks)
1013 switch tok := toks[len(toks)-1]; tok.char {
1014 case '\n':
1015 if s.ctx.cfg.RejectFunctionMacroEmptyReplacementList {
1016 s.err(tok, "expected replacement list")
1017 }
1018 return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, variadic: variadic, namedVariadic: namedVariadic}
1019 default:
1020 toks = s.scanLineToEOL(toks)
1021 repl := toks[n:] // sans #define identifier
1022 repl = repl[:len(repl)-1] // sans '\n'
1023 // 6.10.3, 7
1024 //
1025 // Any white-space characters preceding or following the
1026 // replacement list of preprocessing tokens are not considered
1027 // part of the replacement list for either form of macro.
1028 repl = trim3(repl)
1029 repl = normalizeHashes(repl)
1030 return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, replacementList: repl, variadic: variadic, namedVariadic: namedVariadic}
1031 }
1032}
1033
1034func isWhite(char rune) bool {
1035 switch char {
1036 case ' ', '\t', '\n', '\v', '\f':
1037 return true
1038 }
1039 return false
1040}
1041
1042func trim3(toks []token3) []token3 {
1043 for len(toks) != 0 && isWhite(toks[0].char) {
1044 toks = toks[1:]
1045 }
1046 for len(toks) != 0 && isWhite(toks[len(toks)-1].char) {
1047 toks = toks[:len(toks)-1]
1048 }
1049 return toks
1050}
1051
1052func normalizeHashes(toks []token3) []token3 {
1053 w := 0
1054 var last rune
1055 for _, v := range toks {
1056 switch {
1057 case v.char == PPPASTE:
1058 if isWhite(last) {
1059 w--
1060 }
1061 case isWhite(v.char):
1062 if last == '#' || last == PPPASTE {
1063 continue
1064 }
1065 }
1066 last = v.char
1067 toks[w] = v
1068 w++
1069 }
1070 return toks[:w]
1071}
1072
1073func (s *scanner) parseDDD(toks []token3) ([]token3, bool) {
1074 toks = s.scanToNonBlankToken(toks)
1075 switch tok := toks[len(toks)-1]; tok.char {
1076 case ')':
1077 return toks, true
1078 default:
1079 s.err(tok, "expected right parenthesis")
1080 return toks, false
1081 }
1082}
1083
1084// # define identifier replacement-list new-line
1085func (s *scanner) parseDefineObjectMacro(n int, name token3, toks []token3) *ppDefineObjectMacroDirective {
1086 toks = s.scanLineToEOL(toks)
1087 repl := toks[n:] // sans #define identifier
1088 repl = repl[:len(repl)-1] // sans '\n'
1089 // 6.10.3, 7
1090 //
1091 // Any white-space characters preceding or following the replacement
1092 // list of preprocessing tokens are not considered part of the
1093 // replacement list for either form of macro.
1094 repl = trim3(repl)
1095 repl = normalizeHashes(repl)
1096 return &ppDefineObjectMacroDirective{name: name, toks: toks, replacementList: repl}
1097}
1098
1099// Return {}, {x} or {' ', x}
1100func (s *scanner) scanToNonBlankToken(toks []token3) []token3 {
1101 n := len(s.tokenBuf) - len(toks)
1102 for {
1103 s.lex()
1104 if s.tok.char < 0 {
1105 return s.tokenBuf[n:]
1106 }
1107
1108 s.tokenBuf = append(s.tokenBuf, s.tok)
1109 if s.tok.char != ' ' {
1110 return s.tokenBuf[n:]
1111 }
1112 }
1113}
1114
1115// ---------------------------------------------------------------------- Cache
1116
1117// Translation phase4 source.
1118type source interface {
1119 ppFile() (*ppFile, error)
1120}
1121
1122type cachedPPFile struct {
1123 err error
1124 errs goscanner.ErrorList
1125 modTime int64 // time.Time.UnixNano()
1126 pf *ppFile
1127 readyCh chan struct{}
1128 size int
1129}
1130
1131func (c *cachedPPFile) ready() *cachedPPFile { close(c.readyCh); return c }
1132func (c *cachedPPFile) waitFor() (*cachedPPFile, error) { <-c.readyCh; return c, c.err }
1133
1134func (c *cachedPPFile) ppFile() (*ppFile, error) {
1135 c.waitFor()
1136 if c.err == nil {
1137 return c.pf, nil
1138 }
1139
1140 return nil, c.err
1141}
1142
1143type cacheKey struct {
1144 name StringID
1145 sys bool
1146 value StringID
1147 Config3
1148}
1149
1150type ppCache struct {
1151 mu sync.RWMutex
1152 m map[cacheKey]*cachedPPFile
1153}
1154
1155func newPPCache() *ppCache { return &ppCache{m: map[cacheKey]*cachedPPFile{}} }
1156
1157func (c *ppCache) get(ctx *context, src Source) (source, error) {
1158 if src.Value != "" {
1159 return c.getValue(ctx, src.Name, src.Value, false, src.DoNotCache)
1160 }
1161
1162 return c.getFile(ctx, src.Name, false, src.DoNotCache)
1163}
1164
1165func (c *ppCache) getFile(ctx *context, name string, sys bool, doNotCache bool) (*cachedPPFile, error) {
1166 fi, err := ctx.statFile(name, sys)
1167 if err != nil {
1168 return nil, err
1169 }
1170
1171 if !fi.Mode().IsRegular() {
1172 return nil, fmt.Errorf("%s is not a regular file", name)
1173 }
1174
1175 if fi.Size() > mathutil.MaxInt {
1176 return nil, fmt.Errorf("%s: file too big", name)
1177 }
1178
1179 size := int(fi.Size())
1180 if !filepath.IsAbs(name) { // Never cache relative paths
1181 f, err := ctx.openFile(name, sys)
1182 if err != nil {
1183 return nil, err
1184 }
1185
1186 defer f.Close()
1187
1188 tf := tokenNewFile(name, size)
1189 ppFile := newScanner(ctx, f, tf).translationPhase3()
1190 cf := &cachedPPFile{pf: ppFile, readyCh: make(chan struct{})}
1191 cf.ready()
1192 return cf, nil
1193 }
1194
1195 modTime := fi.ModTime().UnixNano()
1196 key := cacheKey{dict.sid(name), sys, 0, ctx.cfg.Config3}
1197 c.mu.Lock()
1198 if cf, ok := c.m[key]; ok {
1199 if modTime <= cf.modTime && size == cf.size {
1200 c.mu.Unlock()
1201 if cf.err != nil {
1202 return nil, cf.err
1203 }
1204
1205 r, err := cf.waitFor()
1206 ctx.errs(cf.errs)
1207 return r, err
1208 }
1209
1210 delete(c.m, key)
1211 }
1212
1213 tf := tokenNewFile(name, size)
1214 cf := &cachedPPFile{modTime: modTime, size: size, readyCh: make(chan struct{})}
1215 if !doNotCache {
1216 c.m[key] = cf
1217 }
1218 c.mu.Unlock()
1219
1220 go func() {
1221 defer cf.ready()
1222
1223 f, err := ctx.openFile(name, sys)
1224 if err != nil {
1225 cf.err = err
1226 return
1227 }
1228
1229 defer f.Close()
1230
1231 ctx2 := newContext(ctx.cfg)
1232 cf.pf = newScanner(ctx2, f, tf).translationPhase3()
1233 cf.errs = ctx2.ErrorList
1234 ctx.errs(cf.errs)
1235 }()
1236
1237 return cf.waitFor()
1238}
1239
1240func (c *ppCache) getValue(ctx *context, name, value string, sys bool, doNotCache bool) (*cachedPPFile, error) {
1241 key := cacheKey{dict.sid(name), sys, dict.sid(value), ctx.cfg.Config3}
1242 c.mu.Lock()
1243 if cf, ok := c.m[key]; ok {
1244 c.mu.Unlock()
1245 if cf.err != nil {
1246 return nil, cf.err
1247 }
1248
1249 r, err := cf.waitFor()
1250 ctx.errs(cf.errs)
1251 return r, err
1252 }
1253
1254 tf := tokenNewFile(name, len(value))
1255 cf := &cachedPPFile{readyCh: make(chan struct{})}
1256 if !doNotCache {
1257 c.m[key] = cf
1258 }
1259 c.mu.Unlock()
1260 ctx2 := newContext(ctx.cfg)
1261 cf.pf = newScanner(ctx2, strings.NewReader(value), tf).translationPhase3()
1262 cf.errs = ctx2.ErrorList
1263 ctx.errs(cf.errs)
1264 cf.ready()
1265 return cf.waitFor()
1266}
Note: See TracBrowser for help on using the repository browser.