summaryrefslogtreecommitdiffstats
path: root/vendor/modernc.org/cc/v3/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/modernc.org/cc/v3/scanner.go')
-rw-r--r--vendor/modernc.org/cc/v3/scanner.go1266
1 files changed, 1266 insertions, 0 deletions
diff --git a/vendor/modernc.org/cc/v3/scanner.go b/vendor/modernc.org/cc/v3/scanner.go
new file mode 100644
index 00000000..6217d7db
--- /dev/null
+++ b/vendor/modernc.org/cc/v3/scanner.go
@@ -0,0 +1,1266 @@
+// Copyright 2019 The CC Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cc // import "modernc.org/cc/v3"
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ goscanner "go/scanner"
+ "io"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "sync"
+ "unicode/utf8"
+
+ "modernc.org/mathutil"
+ "modernc.org/token"
+)
+
+const (
+ clsEOF = iota + 0x80
+ clsOther
+)
+
+const maxASCII = 0x7f
+
+var (
+ bom = []byte{0xEF, 0xBB, 0xBF}
+
+ idDefine = dict.sid("define")
+ idElif = dict.sid("elif")
+ idElse = dict.sid("else")
+ idEndif = dict.sid("endif")
+ idError = dict.sid("error")
+ idIf = dict.sid("if")
+ idIfdef = dict.sid("ifdef")
+ idIfndef = dict.sid("ifndef")
+ idInclude = dict.sid("include")
+ idIncludeNext = dict.sid("include_next")
+ idLine = dict.sid("line")
+ idPragma = dict.sid("pragma")
+ idPragmaOp = dict.sid("_Pragma")
+ idSpace = dict.sid(" ")
+ idUndef = dict.sid("undef")
+
+ trigraphPrefix = []byte("??")
+ trigraphs = []struct{ from, to []byte }{
+ {[]byte("??="), []byte{'#'}},
+ {[]byte("??("), []byte{'['}},
+ {[]byte("??/"), []byte{'\\'}},
+ {[]byte("??)"), []byte{']'}},
+ {[]byte("??'"), []byte{'^'}},
+ {[]byte("??<"), []byte{'{'}},
+ {[]byte("??!"), []byte{'|'}},
+ {[]byte("??>"), []byte{'}'}},
+ {[]byte("??-"), []byte{'~'}},
+ }
+)
+
+type tokenFile struct {
+ *token.File
+ sync.RWMutex
+}
+
+func tokenNewFile(name string, sz int) *tokenFile { return &tokenFile{File: token.NewFile(name, sz)} }
+
+func (f *tokenFile) Position(pos token.Pos) (r token.Position) {
+ f.RLock()
+ r = f.File.Position(pos)
+ f.RUnlock()
+ return r
+}
+
+func (f *tokenFile) PositionFor(pos token.Pos, adjusted bool) (r token.Position) {
+ f.RLock()
+ r = f.File.PositionFor(pos, adjusted)
+ f.RUnlock()
+ return r
+}
+
+func (f *tokenFile) AddLine(off int) {
+ f.Lock()
+ f.File.AddLine(off)
+ f.Unlock()
+}
+
+func (f *tokenFile) AddLineInfo(off int, fn string, line int) {
+ f.Lock()
+ f.File.AddLineInfo(off, fn, line)
+ f.Unlock()
+}
+
+type node interface {
+ Pos() token.Pos
+}
+
+type dictionary struct {
+ mu sync.RWMutex
+ m map[string]StringID
+ strings []string
+}
+
+func newDictionary() (r *dictionary) {
+ r = &dictionary{m: map[string]StringID{}}
+ b := make([]byte, 1)
+ for i := 0; i < 128; i++ {
+ var s string
+ if i != 0 {
+ b[0] = byte(i)
+ s = string(b)
+ }
+ r.m[s] = StringID(i)
+ r.strings = append(r.strings, s)
+ dictStrings[i] = s
+ }
+ return r
+}
+
+func (d *dictionary) id(key []byte) StringID {
+ switch len(key) {
+ case 0:
+ return 0
+ case 1:
+ if c := key[0]; c != 0 && c < 128 {
+ return StringID(c)
+ }
+ }
+
+ d.mu.Lock()
+ if n, ok := d.m[string(key)]; ok {
+ d.mu.Unlock()
+ return n
+ }
+
+ n := StringID(len(d.strings))
+ s := string(key)
+ if int(n) < 256 {
+ dictStrings[n] = s
+ }
+ d.strings = append(d.strings, s)
+ d.m[s] = n
+ d.mu.Unlock()
+ return n
+}
+
+func (d *dictionary) sid(key string) StringID {
+ switch len(key) {
+ case 0:
+ return 0
+ case 1:
+ if c := key[0]; c != 0 && c < 128 {
+ return StringID(c)
+ }
+ }
+
+ d.mu.Lock()
+ if n, ok := d.m[key]; ok {
+ d.mu.Unlock()
+ return n
+ }
+
+ n := StringID(len(d.strings))
+ if int(n) < 256 {
+ dictStrings[n] = key
+ }
+ d.strings = append(d.strings, key)
+ d.m[key] = n
+ d.mu.Unlock()
+ return n
+}
+
+type char struct {
+ pos int32
+ c byte
+}
+
+// token3 is produced by translation phase 3.
+type token3 struct {
+ char rune
+ pos int32
+ value StringID
+ src StringID
+ macro StringID
+}
+
+func (t token3) Pos() token.Pos { return token.Pos(t.pos) }
+func (t token3) String() string { return t.value.String() }
+
+type scanner struct {
+ bomFix int
+ bytesBuf []byte
+ charBuf []char
+ ctx *context
+ file *tokenFile
+ fileOffset int
+ firstPos token.Pos
+ lineBuf []byte
+ lookaheadChar char
+ lookaheadLine ppLine
+ mark int
+ pos token.Pos
+ r *bufio.Reader
+ srcBuf []byte
+ tokenBuf []token3
+ ungetBuf []char
+
+ tok token3
+
+ closed bool
+ preserveWhiteSpace bool
+}
+
+func newScanner0(ctx *context, r io.Reader, file *tokenFile, bufSize int) *scanner {
+ s := &scanner{
+ ctx: ctx,
+ file: file,
+ r: bufio.NewReaderSize(r, bufSize),
+ }
+ if r != nil {
+ s.init()
+ }
+ return s
+}
+
+func newScanner(ctx *context, r io.Reader, file *tokenFile) *scanner {
+ bufSize := 1 << 17 // emulate gcc
+ if n := ctx.cfg.MaxSourceLine; n > 4096 {
+ bufSize = n
+ }
+ return newScanner0(ctx, r, file, bufSize)
+}
+
+func (s *scanner) abort() (r byte, b bool) {
+ if s.mark >= 0 {
+ if len(s.charBuf) > s.mark {
+ s.unget(s.lookaheadChar)
+ for i := len(s.charBuf) - 1; i >= s.mark; i-- {
+ s.unget(s.charBuf[i])
+ }
+ }
+ s.charBuf = s.charBuf[:s.mark]
+ return 0, false
+ }
+
+ switch n := len(s.charBuf); n {
+ case 0: // [] z
+ c := s.lookaheadChar
+ s.next()
+ return s.class(c.c), true
+ case 1: // [a] z
+ return s.class(s.charBuf[0].c), true
+ default: // [a, b, ...], z
+ c := s.charBuf[0] // a
+ s.unget(s.lookaheadChar) // z
+ for i := n - 1; i > 1; i-- {
+ s.unget(s.charBuf[i]) // ...
+ }
+ s.lookaheadChar = s.charBuf[1] // b
+ s.charBuf = s.charBuf[:1]
+ return s.class(c.c), true
+ }
+}
+
+func (s *scanner) class(b byte) byte {
+ switch {
+ case b == 0:
+ return clsEOF
+ case b > maxASCII:
+ return clsOther
+ default:
+ return b
+ }
+}
+
+func (s *scanner) err(n node, msg string, args ...interface{}) { s.errPos(n.Pos(), msg, args...) }
+
+func (s *scanner) errLine(x interface{}, msg string, args ...interface{}) {
+ var toks []token3
+ switch x := x.(type) {
+ case nil:
+ toks = []token3{{}}
+ case ppLine:
+ toks = x.getToks()
+ default:
+ panic(internalError())
+ }
+ var b strings.Builder
+ for _, v := range toks {
+ switch v.char {
+ case '\n':
+ // nop
+ case ' ':
+ b.WriteByte(' ')
+ default:
+ b.WriteString(v.String())
+ }
+ }
+ s.err(toks[0], "%s"+msg, append([]interface{}{b.String()}, args...)...)
+}
+
+func (s *scanner) errPos(pos token.Pos, msg string, args ...interface{}) {
+ if s.ctx.err(s.file.Position(pos), msg, args...) {
+ s.r.Reset(nil)
+ s.closed = true
+ }
+}
+
+func (s *scanner) init() *scanner {
+ if s.r == nil {
+ return s
+ }
+
+ b, err := s.r.Peek(3)
+ if err == nil && bytes.Equal(b, bom) {
+ s.bomFix, _ = s.r.Discard(3)
+ }
+ s.tokenBuf = nil
+ return s
+}
+
+func (s *scanner) initScan() (r byte) {
+ if s.lookaheadChar.pos == 0 {
+ s.next()
+ }
+ s.firstPos = token.Pos(s.lookaheadChar.pos)
+ s.mark = -1
+ if len(s.charBuf) > 1<<18 { //DONE benchmark tuned
+ s.bytesBuf = nil
+ s.charBuf = nil
+ s.srcBuf = nil
+ } else {
+ s.bytesBuf = s.bytesBuf[:0]
+ s.charBuf = s.charBuf[:0]
+ s.srcBuf = s.bytesBuf[:0]
+ }
+ return s.class(s.lookaheadChar.c)
+}
+
+func (s *scanner) lex() {
+ s.tok.char = s.scan()
+ s.tok.pos = int32(s.firstPos)
+ for _, v := range s.charBuf {
+ s.srcBuf = append(s.srcBuf, v.c)
+ }
+ s.tok.src = dict.id(s.srcBuf)
+ switch {
+ case s.tok.char == ' ' && !s.preserveWhiteSpace && !s.ctx.cfg.PreserveWhiteSpace:
+ s.tok.value = idSpace
+ case s.tok.char == IDENTIFIER:
+ for i := 0; i < len(s.charBuf); {
+ c := s.charBuf[i].c
+ if c != '\\' {
+ s.bytesBuf = append(s.bytesBuf, c)
+ i++
+ continue
+ }
+
+ i++ // Skip '\\'
+ var n int
+ switch s.charBuf[i].c {
+ case 'u':
+ n = 4
+ case 'U':
+ n = 8
+ default:
+ panic(internalError())
+ }
+ i++ // Skip 'u' or 'U'
+ l := len(s.bytesBuf)
+ for i0 := i; i < i0+n; i++ {
+ s.bytesBuf = append(s.bytesBuf, s.charBuf[i].c)
+ }
+ r, err := strconv.ParseUint(string(s.bytesBuf[l:l+n]), 16, 32)
+ if err != nil {
+ panic(internalError())
+ }
+
+ n2 := utf8.EncodeRune(s.bytesBuf[l:], rune(r))
+ s.bytesBuf = s.bytesBuf[:l+n2]
+ }
+ s.tok.value = dict.id(s.bytesBuf)
+ default:
+ s.tok.value = s.tok.src
+ }
+ switch s.tok.char {
+ case clsEOF:
+ s.tok.char = -1
+ s.tok.pos = int32(s.file.Pos(s.file.Size()))
+ }
+ // dbg("lex %q %q", tokName(s.tok.char), s.tok.value)
+}
+
+func (s *scanner) next() (r byte) {
+ if s.lookaheadChar.pos > 0 {
+ s.charBuf = append(s.charBuf, s.lookaheadChar)
+ }
+ if n := len(s.ungetBuf); n != 0 {
+ s.lookaheadChar = s.ungetBuf[n-1]
+ s.ungetBuf = s.ungetBuf[:n-1]
+ return s.class(s.lookaheadChar.c)
+ }
+
+ if len(s.lineBuf) == 0 {
+ more:
+ if s.closed || s.fileOffset == s.file.Size() {
+ s.lookaheadChar.c = 0
+ s.lookaheadChar.pos = 0
+ return clsEOF
+ }
+
+ b, err := s.r.ReadSlice('\n')
+ if err != nil {
+ if err != io.EOF {
+ s.errPos(s.pos, "error while reading %s: %s", s.file.Name(), err)
+ }
+ if len(b) == 0 {
+ return clsEOF
+ }
+ }
+
+ s.file.AddLine(s.fileOffset)
+ s.fileOffset += s.bomFix
+ s.bomFix = 0
+ s.pos = token.Pos(s.fileOffset)
+ s.fileOffset += len(b)
+
+ // [0], 5.1.1.2, 1.1
+ //
+ // Physical source file multibyte characters are mapped, in an
+ // implementation- defined manner, to the source character set
+ // (introducing new-line characters for end-of-line indicators)
+ // if necessary. Trigraph sequences are replaced by
+ // corresponding single-character internal representations.
+ if !s.ctx.cfg.DisableTrigraphs && bytes.Contains(b, trigraphPrefix) {
+ for _, v := range trigraphs {
+ b = bytes.Replace(b, v.from, v.to, -1)
+ }
+ }
+
+ // [0], 5.1.1.2, 2
+ //
+ // Each instance of a backslash character (\) immediately
+ // followed by a new-line character is deleted, splicing
+ // physical source lines to form logical source lines. Only
+ // the last backslash on any physical source line shall be
+ // eligible for being part of such a splice. A source file that
+ // is not empty shall end in a new-line character, which shall
+ // not be immediately preceded by a backslash character before
+ // any such splicing takes place.
+ s.lineBuf = b
+ n := len(b)
+ switch {
+ case b[n-1] != '\n':
+ if s.ctx.cfg.RejectMissingFinalNewline {
+ s.errPos(s.pos+token.Pos(n), "non empty source file shall end in a new-line character")
+ }
+ b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
+ case n > 1 && b[n-2] == '\\':
+ if n == 2 {
+ goto more
+ }
+
+ b = b[:n-2]
+ n = len(b)
+ if s.fileOffset == s.file.Size() {
+ if s.ctx.cfg.RejectFinalBackslash {
+ s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character")
+ }
+ b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
+ }
+ case n > 2 && b[n-3] == '\\' && b[n-2] == '\r':
+ // we've got a windows source that has \r\n line endings.
+ if n == 3 {
+ goto more
+ }
+
+ b = b[:n-3]
+ n = len(b)
+ if s.fileOffset == s.file.Size() {
+ if s.ctx.cfg.RejectFinalBackslash {
+ s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character")
+ }
+ b = append(b[:n:n], '\n') // bufio.Reader owns the bytes
+ }
+ }
+ s.lineBuf = b
+ }
+ s.pos++
+ s.lookaheadChar = char{int32(s.pos), s.lineBuf[0]}
+ s.lineBuf = s.lineBuf[1:]
+ return s.class(s.lookaheadChar.c)
+}
+
+func (s *scanner) unget(c ...char) {
+ s.ungetBuf = append(s.ungetBuf, c...)
+ s.lookaheadChar.pos = 0 // Must invalidate lookahead.
+}
+
+func (s *scanner) unterminatedComment() rune {
+ s.errPos(token.Pos(s.file.Size()), "unterminated comment")
+ n := len(s.charBuf)
+ s.unget(s.charBuf[n-1]) // \n
+ s.charBuf = s.charBuf[:n-1]
+ return ' '
+}
+
+// -------------------------------------------------------- Translation phase 3
+
+// [0], 5.1.1.2, 3
+//
+// The source file is decomposed into preprocessing tokens and sequences of
+// white-space characters (including comments). A source file shall not end in
+// a partial preprocessing token or in a partial comment. Each comment is
+// replaced by one space character. New-line characters are retained. Whether
+// each nonempty sequence of white-space characters other than new-line is
+// retained or replaced by one space character is implementation-defined.
+func (s *scanner) translationPhase3() *ppFile {
+ r := &ppFile{file: s.file}
+ if s.file.Size() == 0 {
+ s.r.Reset(nil)
+ return r
+ }
+
+ s.nextLine()
+ r.groups = s.parseGroup()
+ return r
+}
+
+func (s *scanner) parseGroup() (r []ppGroup) {
+ for {
+ switch x := s.lookaheadLine.(type) {
+ case ppGroup:
+ r = append(r, x)
+ s.nextLine()
+ case ppIfGroupDirective:
+ r = append(r, s.parseIfSection())
+ default:
+ return r
+ }
+ }
+}
+
+func (s *scanner) parseIfSection() *ppIfSection {
+ return &ppIfSection{
+ ifGroup: s.parseIfGroup(),
+ elifGroups: s.parseElifGroup(),
+ elseGroup: s.parseElseGroup(),
+ endifLine: s.parseEndifLine(),
+ }
+}
+
+func (s *scanner) parseEndifLine() *ppEndifDirective {
+ switch x := s.lookaheadLine.(type) {
+ case *ppEndifDirective:
+ s.nextLine()
+ return x
+ default:
+ s.errLine(x, fmt.Sprintf(": expected #endif (unexpected %T)", x))
+ s.nextLine()
+ return nil
+ }
+}
+
+func (s *scanner) parseElseGroup() *ppElseGroup {
+ switch x := s.lookaheadLine.(type) {
+ case *ppElseDirective:
+ r := &ppElseGroup{elseLine: x}
+ s.nextLine()
+ r.groups = s.parseGroup()
+ return r
+ default:
+ return nil
+ }
+}
+
+func (s *scanner) parseElifGroup() (r []*ppElifGroup) {
+ for {
+ var g ppElifGroup
+ switch x := s.lookaheadLine.(type) {
+ case *ppElifDirective:
+ g.elif = x
+ s.nextLine()
+ g.groups = s.parseGroup()
+ r = append(r, &g)
+ default:
+ return r
+ }
+ }
+}
+
+func (s *scanner) parseIfGroup() *ppIfGroup {
+ r := &ppIfGroup{}
+ switch x := s.lookaheadLine.(type) {
+ case ppIfGroupDirective:
+ r.directive = x
+ default:
+ s.errLine(x, fmt.Sprintf(": expected if-group (unexpected %T)", x))
+ }
+ s.nextLine()
+ r.groups = s.parseGroup()
+ return r
+}
+
+func (s *scanner) nextLine() {
+ s.tokenBuf = nil
+ s.lookaheadLine = s.scanLine()
+}
+
+func (s *scanner) scanLine() (r ppLine) {
+again:
+ toks := s.scanToNonBlankToken(nil)
+ if len(toks) == 0 {
+ return nil
+ }
+
+ includeNext := false
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '#':
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ return &ppEmptyDirective{toks: toks}
+ case IDENTIFIER:
+ switch tok.value {
+ case idDefine:
+ return s.parseDefine(toks)
+ case idElif:
+ return s.parseElif(toks)
+ case idElse:
+ return s.parseElse(toks)
+ case idEndif:
+ return s.parseEndif(toks)
+ case idIf:
+ return s.parseIf(toks)
+ case idIfdef:
+ return s.parseIfdef(toks)
+ case idIfndef:
+ return s.parseIfndef(toks)
+ case idIncludeNext:
+ includeNext = true
+ fallthrough
+ case idInclude:
+ // # include pp-tokens new-line
+ //
+ // Prevent aliasing of eg. <foo bar.h> and <foo bar.h>.
+ save := s.preserveWhiteSpace
+ s.preserveWhiteSpace = true
+ n := len(toks)
+ toks := s.scanLineToEOL(toks)
+ r := &ppIncludeDirective{arg: toks[n : len(toks)-1], toks: toks, includeNext: includeNext}
+ s.preserveWhiteSpace = save
+ return r
+ case idUndef:
+ return s.parseUndef(toks)
+ case idLine:
+ return s.parseLine(toks)
+ case idError:
+ // # error pp-tokens_opt new-line
+ n := len(toks)
+ toks := s.scanLineToEOL(toks)
+ msg := toks[n : len(toks)-1]
+ if len(msg) != 0 && msg[0].char == ' ' {
+ msg = msg[1:]
+ }
+ return &ppErrorDirective{toks: toks, msg: msg}
+ case idPragma:
+ return s.parsePragma(toks)
+ }
+ }
+
+ // # non-directive
+ return &ppNonDirective{toks: s.scanLineToEOL(toks)}
+ case '\n':
+ return &ppTextLine{toks: toks}
+ case IDENTIFIER:
+ if tok.value == idPragmaOp {
+ toks = s.scanToNonBlankToken(toks)
+ switch tok = toks[len(toks)-1]; tok.char {
+ case '(':
+ // ok
+ default:
+ s.err(tok, "expected (")
+ return &ppTextLine{toks: toks}
+ }
+
+ var lit string
+ toks = s.scanToNonBlankToken(toks)
+ switch tok = toks[len(toks)-1]; tok.char {
+ case STRINGLITERAL:
+ lit = tok.String()
+ case LONGSTRINGLITERAL:
+ lit = tok.String()[1:] // [0], 6.9.10, 1
+ default:
+ s.err(tok, "expected string literal")
+ return &ppTextLine{toks: toks}
+ }
+
+ pos := tok.pos
+ toks = s.scanToNonBlankToken(toks)
+ switch tok = toks[len(toks)-1]; tok.char {
+ case ')':
+ // ok
+ default:
+ s.err(tok, "expected )")
+ return &ppTextLine{toks: toks}
+ }
+
+ s.unget(s.lookaheadChar)
+ // [0], 6.9.10, 1
+ lit = lit[1 : len(lit)-1]
+ lit = strings.ReplaceAll(lit, `\"`, `"`)
+ lit = strings.ReplaceAll(lit, `\\`, `\`)
+ lit = "#pragma " + lit + "\n"
+ for i := len(lit) - 1; i >= 0; i-- {
+ s.unget(char{pos, lit[i]})
+ }
+ goto again
+ }
+
+ fallthrough
+ default:
+ return &ppTextLine{toks: s.scanLineToEOL(toks)}
+ }
+}
+
+func (s *scanner) parsePragma(toks []token3) *ppPragmaDirective {
+ toks = s.scanToNonBlankToken(toks)
+ n := len(toks)
+ if toks[n-1].char != '\n' {
+ toks = s.scanLineToEOL(toks)
+ }
+ return &ppPragmaDirective{toks: toks, args: toks[n-1:]}
+}
+
+// # line pp-tokens new-line
+func (s *scanner) parseLine(toks []token3) *ppLineDirective {
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ s.err(tok, "unexpected new-line")
+ return &ppLineDirective{toks: toks}
+ default:
+ toks := s.scanLineToEOL(toks)
+ last := toks[len(toks)-1]
+ r := &ppLineDirective{toks: toks, nextPos: int(last.pos) + len(last.src.String())}
+ toks = toks[:len(toks)-1] // sans new-line
+ toks = ltrim3(toks)
+ toks = toks[1:] // Skip '#'
+ toks = ltrim3(toks)
+ toks = toks[1:] // Skip "line"
+ r.args = ltrim3(toks)
+ return r
+ }
+}
+
+func ltrim3(toks []token3) []token3 {
+ for len(toks) != 0 && toks[0].char == ' ' {
+ toks = toks[1:]
+ }
+ return toks
+}
+
+// # undef identifier new-line
+func (s *scanner) parseUndef(toks []token3) *ppUndefDirective {
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ s.err(&tok, "expected identifier")
+ return &ppUndefDirective{toks: toks}
+ case IDENTIFIER:
+ name := tok
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ return &ppUndefDirective{name: name, toks: toks}
+ default:
+ if s.ctx.cfg.RejectUndefExtraTokens {
+ s.err(&tok, "extra tokens after #undef")
+ }
+ return &ppUndefDirective{name: name, toks: s.scanLineToEOL(toks)}
+ }
+ default:
+ s.err(&tok, "expected identifier")
+ return &ppUndefDirective{toks: s.scanLineToEOL(toks)}
+ }
+}
+
+func (s *scanner) scanLineToEOL(toks []token3) []token3 {
+ n := len(s.tokenBuf) - len(toks)
+ for {
+ s.lex()
+ s.tokenBuf = append(s.tokenBuf, s.tok)
+ if s.tok.char == '\n' {
+ return s.tokenBuf[n:]
+ }
+ }
+}
+
+// # ifndef identifier new-line
+func (s *scanner) parseIfndef(toks []token3) *ppIfndefDirective {
+ var name StringID
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case IDENTIFIER:
+ name = tok.value
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ return &ppIfndefDirective{name: name, toks: toks}
+ default:
+ if s.ctx.cfg.RejectIfndefExtraTokens {
+ s.err(&tok, "extra tokens after #ifndef")
+ }
+ return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)}
+ }
+ case '\n':
+ s.err(tok, "expected identifier")
+ return &ppIfndefDirective{name: name, toks: toks}
+ default:
+ s.err(tok, "expected identifier")
+ return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)}
+ }
+}
+
+// # ifdef identifier new-line
+func (s *scanner) parseIfdef(toks []token3) *ppIfdefDirective {
+ var name StringID
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case IDENTIFIER:
+ name = tok.value
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ return &ppIfdefDirective{name: name, toks: toks}
+ default:
+ if s.ctx.cfg.RejectIfdefExtraTokens {
+ s.err(&tok, "extra tokens after #ifdef")
+ }
+ return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)}
+ }
+ case '\n':
+ s.err(tok, "expected identifier")
+ return &ppIfdefDirective{name: name, toks: toks}
+ default:
+ s.err(tok, "expected identifier")
+ return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)}
+ }
+}
+
+// # if constant-expression new-line
+func (s *scanner) parseIf(toks []token3) *ppIfDirective {
+ n := len(toks)
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ s.err(tok, "expected expression")
+ return &ppIfDirective{toks: toks}
+ default:
+ toks = s.scanLineToEOL(toks)
+ expr := toks[n:]
+ if expr[0].char == ' ' { // sans leading space
+ expr = expr[1:]
+ }
+ expr = expr[:len(expr)-1] // sans '\n'
+ return &ppIfDirective{toks: toks, expr: expr}
+ }
+}
+
+// # endif new-line
+func (s *scanner) parseEndif(toks []token3) *ppEndifDirective {
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ return &ppEndifDirective{toks}
+ default:
+ if s.ctx.cfg.RejectEndifExtraTokens {
+ s.err(&tok, "extra tokens after #else")
+ }
+ return &ppEndifDirective{s.scanLineToEOL(toks)}
+ }
+}
+
+// # else new-line
+func (s *scanner) parseElse(toks []token3) *ppElseDirective {
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ return &ppElseDirective{toks}
+ default:
+ if s.ctx.cfg.RejectElseExtraTokens {
+ s.err(&tok, "extra tokens after #else")
+ }
+ return &ppElseDirective{s.scanLineToEOL(toks)}
+ }
+}
+
+// # elif constant-expression new-line
+func (s *scanner) parseElif(toks []token3) *ppElifDirective {
+ n := len(toks)
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ s.err(tok, "expected expression")
+ return &ppElifDirective{toks, nil}
+ default:
+ toks = s.scanLineToEOL(toks)
+ expr := toks[n:]
+ if expr[0].char == ' ' { // sans leading space
+ expr = expr[1:]
+ }
+ expr = expr[:len(expr)-1] // sans '\n'
+ return &ppElifDirective{toks, expr}
+ }
+}
+
+func (s *scanner) parseDefine(toks []token3) ppLine {
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case IDENTIFIER:
+ name := tok
+ n := len(toks)
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ return &ppDefineObjectMacroDirective{name: name, toks: toks}
+ case '(':
+ if toks[n].char == ' ' {
+ return s.parseDefineObjectMacro(n, name, toks)
+ }
+
+ return s.parseDefineFunctionMacro(name, toks)
+ default:
+ return s.parseDefineObjectMacro(n, name, toks)
+ }
+ case '\n':
+ s.err(tok, "expected identifier")
+ return &ppDefineObjectMacroDirective{toks: toks}
+ default:
+ s.err(tok, "expected identifier")
+ return &ppDefineObjectMacroDirective{toks: s.scanLineToEOL(toks)}
+ }
+}
+
+// # define identifier lparen identifier-list_opt ) replacement-list new-line
+// # define identifier lparen ... ) replacement-list new-line
+// # define identifier lparen identifier-list , ... ) replacement-list new-line
+func (s *scanner) parseDefineFunctionMacro(name token3, toks []token3) *ppDefineFunctionMacroDirective {
+ // Parse parameters after "#define name(".
+ var list []token3
+ variadic := false
+ namedVariadic := false
+again:
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case IDENTIFIER:
+ more:
+ list = append(list, tok)
+ toks = s.scanToNonBlankToken(toks)
+ switch tok = toks[len(toks)-1]; tok.char {
+ case ',':
+ toks = s.scanToNonBlankToken(toks)
+ switch tok = toks[len(toks)-1]; tok.char {
+ case IDENTIFIER:
+ goto more
+ case DDD:
+ if toks, variadic = s.parseDDD(toks); !variadic {
+ goto again
+ }
+ case ')':
+ s.err(tok, "expected parameter name")
+ default:
+ s.err(tok, "unexpected %q", &tok)
+ }
+ case DDD:
+ namedVariadic = true
+ if s.ctx.cfg.RejectInvalidVariadicMacros {
+ s.err(tok, "expected comma")
+ }
+ if toks, variadic = s.parseDDD(toks); !variadic {
+ goto again
+ }
+ case ')':
+ // ok
+ case '\n':
+ s.err(tok, "unexpected new-line")
+ return &ppDefineFunctionMacroDirective{toks: toks}
+ case IDENTIFIER:
+ s.err(tok, "expected comma")
+ goto more
+ default:
+ s.err(tok, "unexpected %q", &tok)
+ }
+ case DDD:
+ if toks, variadic = s.parseDDD(toks); !variadic {
+ goto again
+ }
+ case ',':
+ s.err(tok, "expected parameter name")
+ goto again
+ case ')':
+ // ok
+ default:
+ s.err(tok, "expected parameter name")
+ goto again
+ }
+ // Parse replacement list.
+ n := len(toks)
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case '\n':
+ if s.ctx.cfg.RejectFunctionMacroEmptyReplacementList {
+ s.err(tok, "expected replacement list")
+ }
+ return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, variadic: variadic, namedVariadic: namedVariadic}
+ default:
+ toks = s.scanLineToEOL(toks)
+ repl := toks[n:] // sans #define identifier
+ repl = repl[:len(repl)-1] // sans '\n'
+ // 6.10.3, 7
+ //
+ // Any white-space characters preceding or following the
+ // replacement list of preprocessing tokens are not considered
+ // part of the replacement list for either form of macro.
+ repl = trim3(repl)
+ repl = normalizeHashes(repl)
+ return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, replacementList: repl, variadic: variadic, namedVariadic: namedVariadic}
+ }
+}
+
+func isWhite(char rune) bool {
+ switch char {
+ case ' ', '\t', '\n', '\v', '\f':
+ return true
+ }
+ return false
+}
+
+func trim3(toks []token3) []token3 {
+ for len(toks) != 0 && isWhite(toks[0].char) {
+ toks = toks[1:]
+ }
+ for len(toks) != 0 && isWhite(toks[len(toks)-1].char) {
+ toks = toks[:len(toks)-1]
+ }
+ return toks
+}
+
+func normalizeHashes(toks []token3) []token3 {
+ w := 0
+ var last rune
+ for _, v := range toks {
+ switch {
+ case v.char == PPPASTE:
+ if isWhite(last) {
+ w--
+ }
+ case isWhite(v.char):
+ if last == '#' || last == PPPASTE {
+ continue
+ }
+ }
+ last = v.char
+ toks[w] = v
+ w++
+ }
+ return toks[:w]
+}
+
+func (s *scanner) parseDDD(toks []token3) ([]token3, bool) {
+ toks = s.scanToNonBlankToken(toks)
+ switch tok := toks[len(toks)-1]; tok.char {
+ case ')':
+ return toks, true
+ default:
+ s.err(tok, "expected right parenthesis")
+ return toks, false
+ }
+}
+
+// # define identifier replacement-list new-line
+func (s *scanner) parseDefineObjectMacro(n int, name token3, toks []token3) *ppDefineObjectMacroDirective {
+ toks = s.scanLineToEOL(toks)
+ repl := toks[n:] // sans #define identifier
+ repl = repl[:len(repl)-1] // sans '\n'
+ // 6.10.3, 7
+ //
+ // Any white-space characters preceding or following the replacement
+ // list of preprocessing tokens are not considered part of the
+ // replacement list for either form of macro.
+ repl = trim3(repl)
+ repl = normalizeHashes(repl)
+ return &ppDefineObjectMacroDirective{name: name, toks: toks, replacementList: repl}
+}
+
+// Return {}, {x} or {' ', x}
+func (s *scanner) scanToNonBlankToken(toks []token3) []token3 {
+ n := len(s.tokenBuf) - len(toks)
+ for {
+ s.lex()
+ if s.tok.char < 0 {
+ return s.tokenBuf[n:]
+ }
+
+ s.tokenBuf = append(s.tokenBuf, s.tok)
+ if s.tok.char != ' ' {
+ return s.tokenBuf[n:]
+ }
+ }
+}
+
+// ---------------------------------------------------------------------- Cache
+
+// Translation phase4 source.
+type source interface {
+ ppFile() (*ppFile, error)
+}
+
+type cachedPPFile struct {
+ err error
+ errs goscanner.ErrorList
+ modTime int64 // time.Time.UnixNano()
+ pf *ppFile
+ readyCh chan struct{}
+ size int
+}
+
+func (c *cachedPPFile) ready() *cachedPPFile { close(c.readyCh); return c }
+func (c *cachedPPFile) waitFor() (*cachedPPFile, error) { <-c.readyCh; return c, c.err }
+
+func (c *cachedPPFile) ppFile() (*ppFile, error) {
+ c.waitFor()
+ if c.err == nil {
+ return c.pf, nil
+ }
+
+ return nil, c.err
+}
+
+type cacheKey struct {
+ name StringID
+ sys bool
+ value StringID
+ Config3
+}
+
+type ppCache struct {
+ mu sync.RWMutex
+ m map[cacheKey]*cachedPPFile
+}
+
+func newPPCache() *ppCache { return &ppCache{m: map[cacheKey]*cachedPPFile{}} }
+
+func (c *ppCache) get(ctx *context, src Source) (source, error) {
+ if src.Value != "" {
+ return c.getValue(ctx, src.Name, src.Value, false, src.DoNotCache)
+ }
+
+ return c.getFile(ctx, src.Name, false, src.DoNotCache)
+}
+
+func (c *ppCache) getFile(ctx *context, name string, sys bool, doNotCache bool) (*cachedPPFile, error) {
+ fi, err := ctx.statFile(name, sys)
+ if err != nil {
+ return nil, err
+ }
+
+ if !fi.Mode().IsRegular() {
+ return nil, fmt.Errorf("%s is not a regular file", name)
+ }
+
+ if fi.Size() > mathutil.MaxInt {
+ return nil, fmt.Errorf("%s: file too big", name)
+ }
+
+ size := int(fi.Size())
+ if !filepath.IsAbs(name) { // Never cache relative paths
+ f, err := ctx.openFile(name, sys)
+ if err != nil {
+ return nil, err
+ }
+
+ defer f.Close()
+
+ tf := tokenNewFile(name, size)
+ ppFile := newScanner(ctx, f, tf).translationPhase3()
+ cf := &cachedPPFile{pf: ppFile, readyCh: make(chan struct{})}
+ cf.ready()
+ return cf, nil
+ }
+
+ modTime := fi.ModTime().UnixNano()
+ key := cacheKey{dict.sid(name), sys, 0, ctx.cfg.Config3}
+ c.mu.Lock()
+ if cf, ok := c.m[key]; ok {
+ if modTime <= cf.modTime && size == cf.size {
+ c.mu.Unlock()
+ if cf.err != nil {
+ return nil, cf.err
+ }
+
+ r, err := cf.waitFor()
+ ctx.errs(cf.errs)
+ return r, err
+ }
+
+ delete(c.m, key)
+ }
+
+ tf := tokenNewFile(name, size)
+ cf := &cachedPPFile{modTime: modTime, size: size, readyCh: make(chan struct{})}
+ if !doNotCache {
+ c.m[key] = cf
+ }
+ c.mu.Unlock()
+
+ go func() {
+ defer cf.ready()
+
+ f, err := ctx.openFile(name, sys)
+ if err != nil {
+ cf.err = err
+ return
+ }
+
+ defer f.Close()
+
+ ctx2 := newContext(ctx.cfg)
+ cf.pf = newScanner(ctx2, f, tf).translationPhase3()
+ cf.errs = ctx2.ErrorList
+ ctx.errs(cf.errs)
+ }()
+
+ return cf.waitFor()
+}
+
+func (c *ppCache) getValue(ctx *context, name, value string, sys bool, doNotCache bool) (*cachedPPFile, error) {
+ key := cacheKey{dict.sid(name), sys, dict.sid(value), ctx.cfg.Config3}
+ c.mu.Lock()
+ if cf, ok := c.m[key]; ok {
+ c.mu.Unlock()
+ if cf.err != nil {
+ return nil, cf.err
+ }
+
+ r, err := cf.waitFor()
+ ctx.errs(cf.errs)
+ return r, err
+ }
+
+ tf := tokenNewFile(name, len(value))
+ cf := &cachedPPFile{readyCh: make(chan struct{})}
+ if !doNotCache {
+ c.m[key] = cf
+ }
+ c.mu.Unlock()
+ ctx2 := newContext(ctx.cfg)
+ cf.pf = newScanner(ctx2, strings.NewReader(value), tf).translationPhase3()
+ cf.errs = ctx2.ErrorList
+ ctx.errs(cf.errs)
+ cf.ready()
+ return cf.waitFor()
+}