diff options
Diffstat (limited to 'vendor/modernc.org/cc/v3/scanner.go')
-rw-r--r-- | vendor/modernc.org/cc/v3/scanner.go | 1266 |
1 files changed, 1266 insertions, 0 deletions
diff --git a/vendor/modernc.org/cc/v3/scanner.go b/vendor/modernc.org/cc/v3/scanner.go new file mode 100644 index 00000000..6217d7db --- /dev/null +++ b/vendor/modernc.org/cc/v3/scanner.go @@ -0,0 +1,1266 @@ +// Copyright 2019 The CC Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cc // import "modernc.org/cc/v3" + +import ( + "bufio" + "bytes" + "fmt" + goscanner "go/scanner" + "io" + "path/filepath" + "strconv" + "strings" + "sync" + "unicode/utf8" + + "modernc.org/mathutil" + "modernc.org/token" +) + +const ( + clsEOF = iota + 0x80 + clsOther +) + +const maxASCII = 0x7f + +var ( + bom = []byte{0xEF, 0xBB, 0xBF} + + idDefine = dict.sid("define") + idElif = dict.sid("elif") + idElse = dict.sid("else") + idEndif = dict.sid("endif") + idError = dict.sid("error") + idIf = dict.sid("if") + idIfdef = dict.sid("ifdef") + idIfndef = dict.sid("ifndef") + idInclude = dict.sid("include") + idIncludeNext = dict.sid("include_next") + idLine = dict.sid("line") + idPragma = dict.sid("pragma") + idPragmaOp = dict.sid("_Pragma") + idSpace = dict.sid(" ") + idUndef = dict.sid("undef") + + trigraphPrefix = []byte("??") + trigraphs = []struct{ from, to []byte }{ + {[]byte("??="), []byte{'#'}}, + {[]byte("??("), []byte{'['}}, + {[]byte("??/"), []byte{'\\'}}, + {[]byte("??)"), []byte{']'}}, + {[]byte("??'"), []byte{'^'}}, + {[]byte("??<"), []byte{'{'}}, + {[]byte("??!"), []byte{'|'}}, + {[]byte("??>"), []byte{'}'}}, + {[]byte("??-"), []byte{'~'}}, + } +) + +type tokenFile struct { + *token.File + sync.RWMutex +} + +func tokenNewFile(name string, sz int) *tokenFile { return &tokenFile{File: token.NewFile(name, sz)} } + +func (f *tokenFile) Position(pos token.Pos) (r token.Position) { + f.RLock() + r = f.File.Position(pos) + f.RUnlock() + return r +} + +func (f *tokenFile) PositionFor(pos token.Pos, adjusted bool) (r token.Position) { + f.RLock() + r = f.File.PositionFor(pos, adjusted) + f.RUnlock() + return r +} + +func (f *tokenFile) AddLine(off int) { + f.Lock() + f.File.AddLine(off) + f.Unlock() +} + +func (f *tokenFile) AddLineInfo(off int, fn string, line int) { + f.Lock() + f.File.AddLineInfo(off, fn, line) + f.Unlock() +} + +type node interface { + Pos() token.Pos +} + +type dictionary struct { + mu sync.RWMutex + m map[string]StringID + strings []string +} + +func newDictionary() (r *dictionary) { + r = &dictionary{m: map[string]StringID{}} + b := make([]byte, 1) + for i := 0; i < 128; i++ { + var s string + if i != 0 { + b[0] = byte(i) + s = string(b) + } + r.m[s] = StringID(i) + r.strings = append(r.strings, s) + dictStrings[i] = s + } + return r +} + +func (d *dictionary) id(key []byte) StringID { + switch len(key) { + case 0: + return 0 + case 1: + if c := key[0]; c != 0 && c < 128 { + return StringID(c) + } + } + + d.mu.Lock() + if n, ok := d.m[string(key)]; ok { + d.mu.Unlock() + return n + } + + n := StringID(len(d.strings)) + s := string(key) + if int(n) < 256 { + dictStrings[n] = s + } + d.strings = append(d.strings, s) + d.m[s] = n + d.mu.Unlock() + return n +} + +func (d *dictionary) sid(key string) StringID { + switch len(key) { + case 0: + return 0 + case 1: + if c := key[0]; c != 0 && c < 128 { + return StringID(c) + } + } + + d.mu.Lock() + if n, ok := d.m[key]; ok { + d.mu.Unlock() + return n + } + + n := StringID(len(d.strings)) + if int(n) < 256 { + dictStrings[n] = key + } + d.strings = append(d.strings, key) + d.m[key] = n + d.mu.Unlock() + return n +} + +type char struct { + pos int32 + c byte +} + +// token3 is produced by translation phase 3. +type token3 struct { + char rune + pos int32 + value StringID + src StringID + macro StringID +} + +func (t token3) Pos() token.Pos { return token.Pos(t.pos) } +func (t token3) String() string { return t.value.String() } + +type scanner struct { + bomFix int + bytesBuf []byte + charBuf []char + ctx *context + file *tokenFile + fileOffset int + firstPos token.Pos + lineBuf []byte + lookaheadChar char + lookaheadLine ppLine + mark int + pos token.Pos + r *bufio.Reader + srcBuf []byte + tokenBuf []token3 + ungetBuf []char + + tok token3 + + closed bool + preserveWhiteSpace bool +} + +func newScanner0(ctx *context, r io.Reader, file *tokenFile, bufSize int) *scanner { + s := &scanner{ + ctx: ctx, + file: file, + r: bufio.NewReaderSize(r, bufSize), + } + if r != nil { + s.init() + } + return s +} + +func newScanner(ctx *context, r io.Reader, file *tokenFile) *scanner { + bufSize := 1 << 17 // emulate gcc + if n := ctx.cfg.MaxSourceLine; n > 4096 { + bufSize = n + } + return newScanner0(ctx, r, file, bufSize) +} + +func (s *scanner) abort() (r byte, b bool) { + if s.mark >= 0 { + if len(s.charBuf) > s.mark { + s.unget(s.lookaheadChar) + for i := len(s.charBuf) - 1; i >= s.mark; i-- { + s.unget(s.charBuf[i]) + } + } + s.charBuf = s.charBuf[:s.mark] + return 0, false + } + + switch n := len(s.charBuf); n { + case 0: // [] z + c := s.lookaheadChar + s.next() + return s.class(c.c), true + case 1: // [a] z + return s.class(s.charBuf[0].c), true + default: // [a, b, ...], z + c := s.charBuf[0] // a + s.unget(s.lookaheadChar) // z + for i := n - 1; i > 1; i-- { + s.unget(s.charBuf[i]) // ... + } + s.lookaheadChar = s.charBuf[1] // b + s.charBuf = s.charBuf[:1] + return s.class(c.c), true + } +} + +func (s *scanner) class(b byte) byte { + switch { + case b == 0: + return clsEOF + case b > maxASCII: + return clsOther + default: + return b + } +} + +func (s *scanner) err(n node, msg string, args ...interface{}) { s.errPos(n.Pos(), msg, args...) } + +func (s *scanner) errLine(x interface{}, msg string, args ...interface{}) { + var toks []token3 + switch x := x.(type) { + case nil: + toks = []token3{{}} + case ppLine: + toks = x.getToks() + default: + panic(internalError()) + } + var b strings.Builder + for _, v := range toks { + switch v.char { + case '\n': + // nop + case ' ': + b.WriteByte(' ') + default: + b.WriteString(v.String()) + } + } + s.err(toks[0], "%s"+msg, append([]interface{}{b.String()}, args...)...) +} + +func (s *scanner) errPos(pos token.Pos, msg string, args ...interface{}) { + if s.ctx.err(s.file.Position(pos), msg, args...) { + s.r.Reset(nil) + s.closed = true + } +} + +func (s *scanner) init() *scanner { + if s.r == nil { + return s + } + + b, err := s.r.Peek(3) + if err == nil && bytes.Equal(b, bom) { + s.bomFix, _ = s.r.Discard(3) + } + s.tokenBuf = nil + return s +} + +func (s *scanner) initScan() (r byte) { + if s.lookaheadChar.pos == 0 { + s.next() + } + s.firstPos = token.Pos(s.lookaheadChar.pos) + s.mark = -1 + if len(s.charBuf) > 1<<18 { //DONE benchmark tuned + s.bytesBuf = nil + s.charBuf = nil + s.srcBuf = nil + } else { + s.bytesBuf = s.bytesBuf[:0] + s.charBuf = s.charBuf[:0] + s.srcBuf = s.bytesBuf[:0] + } + return s.class(s.lookaheadChar.c) +} + +func (s *scanner) lex() { + s.tok.char = s.scan() + s.tok.pos = int32(s.firstPos) + for _, v := range s.charBuf { + s.srcBuf = append(s.srcBuf, v.c) + } + s.tok.src = dict.id(s.srcBuf) + switch { + case s.tok.char == ' ' && !s.preserveWhiteSpace && !s.ctx.cfg.PreserveWhiteSpace: + s.tok.value = idSpace + case s.tok.char == IDENTIFIER: + for i := 0; i < len(s.charBuf); { + c := s.charBuf[i].c + if c != '\\' { + s.bytesBuf = append(s.bytesBuf, c) + i++ + continue + } + + i++ // Skip '\\' + var n int + switch s.charBuf[i].c { + case 'u': + n = 4 + case 'U': + n = 8 + default: + panic(internalError()) + } + i++ // Skip 'u' or 'U' + l := len(s.bytesBuf) + for i0 := i; i < i0+n; i++ { + s.bytesBuf = append(s.bytesBuf, s.charBuf[i].c) + } + r, err := strconv.ParseUint(string(s.bytesBuf[l:l+n]), 16, 32) + if err != nil { + panic(internalError()) + } + + n2 := utf8.EncodeRune(s.bytesBuf[l:], rune(r)) + s.bytesBuf = s.bytesBuf[:l+n2] + } + s.tok.value = dict.id(s.bytesBuf) + default: + s.tok.value = s.tok.src + } + switch s.tok.char { + case clsEOF: + s.tok.char = -1 + s.tok.pos = int32(s.file.Pos(s.file.Size())) + } + // dbg("lex %q %q", tokName(s.tok.char), s.tok.value) +} + +func (s *scanner) next() (r byte) { + if s.lookaheadChar.pos > 0 { + s.charBuf = append(s.charBuf, s.lookaheadChar) + } + if n := len(s.ungetBuf); n != 0 { + s.lookaheadChar = s.ungetBuf[n-1] + s.ungetBuf = s.ungetBuf[:n-1] + return s.class(s.lookaheadChar.c) + } + + if len(s.lineBuf) == 0 { + more: + if s.closed || s.fileOffset == s.file.Size() { + s.lookaheadChar.c = 0 + s.lookaheadChar.pos = 0 + return clsEOF + } + + b, err := s.r.ReadSlice('\n') + if err != nil { + if err != io.EOF { + s.errPos(s.pos, "error while reading %s: %s", s.file.Name(), err) + } + if len(b) == 0 { + return clsEOF + } + } + + s.file.AddLine(s.fileOffset) + s.fileOffset += s.bomFix + s.bomFix = 0 + s.pos = token.Pos(s.fileOffset) + s.fileOffset += len(b) + + // [0], 5.1.1.2, 1.1 + // + // Physical source file multibyte characters are mapped, in an + // implementation- defined manner, to the source character set + // (introducing new-line characters for end-of-line indicators) + // if necessary. Trigraph sequences are replaced by + // corresponding single-character internal representations. + if !s.ctx.cfg.DisableTrigraphs && bytes.Contains(b, trigraphPrefix) { + for _, v := range trigraphs { + b = bytes.Replace(b, v.from, v.to, -1) + } + } + + // [0], 5.1.1.2, 2 + // + // Each instance of a backslash character (\) immediately + // followed by a new-line character is deleted, splicing + // physical source lines to form logical source lines. Only + // the last backslash on any physical source line shall be + // eligible for being part of such a splice. A source file that + // is not empty shall end in a new-line character, which shall + // not be immediately preceded by a backslash character before + // any such splicing takes place. + s.lineBuf = b + n := len(b) + switch { + case b[n-1] != '\n': + if s.ctx.cfg.RejectMissingFinalNewline { + s.errPos(s.pos+token.Pos(n), "non empty source file shall end in a new-line character") + } + b = append(b[:n:n], '\n') // bufio.Reader owns the bytes + case n > 1 && b[n-2] == '\\': + if n == 2 { + goto more + } + + b = b[:n-2] + n = len(b) + if s.fileOffset == s.file.Size() { + if s.ctx.cfg.RejectFinalBackslash { + s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character") + } + b = append(b[:n:n], '\n') // bufio.Reader owns the bytes + } + case n > 2 && b[n-3] == '\\' && b[n-2] == '\r': + // we've got a windows source that has \r\n line endings. + if n == 3 { + goto more + } + + b = b[:n-3] + n = len(b) + if s.fileOffset == s.file.Size() { + if s.ctx.cfg.RejectFinalBackslash { + s.errPos(s.pos+token.Pos(n+1), "source file final new-line character shall not be preceded by a backslash character") + } + b = append(b[:n:n], '\n') // bufio.Reader owns the bytes + } + } + s.lineBuf = b + } + s.pos++ + s.lookaheadChar = char{int32(s.pos), s.lineBuf[0]} + s.lineBuf = s.lineBuf[1:] + return s.class(s.lookaheadChar.c) +} + +func (s *scanner) unget(c ...char) { + s.ungetBuf = append(s.ungetBuf, c...) + s.lookaheadChar.pos = 0 // Must invalidate lookahead. +} + +func (s *scanner) unterminatedComment() rune { + s.errPos(token.Pos(s.file.Size()), "unterminated comment") + n := len(s.charBuf) + s.unget(s.charBuf[n-1]) // \n + s.charBuf = s.charBuf[:n-1] + return ' ' +} + +// -------------------------------------------------------- Translation phase 3 + +// [0], 5.1.1.2, 3 +// +// The source file is decomposed into preprocessing tokens and sequences of +// white-space characters (including comments). A source file shall not end in +// a partial preprocessing token or in a partial comment. Each comment is +// replaced by one space character. New-line characters are retained. Whether +// each nonempty sequence of white-space characters other than new-line is +// retained or replaced by one space character is implementation-defined. +func (s *scanner) translationPhase3() *ppFile { + r := &ppFile{file: s.file} + if s.file.Size() == 0 { + s.r.Reset(nil) + return r + } + + s.nextLine() + r.groups = s.parseGroup() + return r +} + +func (s *scanner) parseGroup() (r []ppGroup) { + for { + switch x := s.lookaheadLine.(type) { + case ppGroup: + r = append(r, x) + s.nextLine() + case ppIfGroupDirective: + r = append(r, s.parseIfSection()) + default: + return r + } + } +} + +func (s *scanner) parseIfSection() *ppIfSection { + return &ppIfSection{ + ifGroup: s.parseIfGroup(), + elifGroups: s.parseElifGroup(), + elseGroup: s.parseElseGroup(), + endifLine: s.parseEndifLine(), + } +} + +func (s *scanner) parseEndifLine() *ppEndifDirective { + switch x := s.lookaheadLine.(type) { + case *ppEndifDirective: + s.nextLine() + return x + default: + s.errLine(x, fmt.Sprintf(": expected #endif (unexpected %T)", x)) + s.nextLine() + return nil + } +} + +func (s *scanner) parseElseGroup() *ppElseGroup { + switch x := s.lookaheadLine.(type) { + case *ppElseDirective: + r := &ppElseGroup{elseLine: x} + s.nextLine() + r.groups = s.parseGroup() + return r + default: + return nil + } +} + +func (s *scanner) parseElifGroup() (r []*ppElifGroup) { + for { + var g ppElifGroup + switch x := s.lookaheadLine.(type) { + case *ppElifDirective: + g.elif = x + s.nextLine() + g.groups = s.parseGroup() + r = append(r, &g) + default: + return r + } + } +} + +func (s *scanner) parseIfGroup() *ppIfGroup { + r := &ppIfGroup{} + switch x := s.lookaheadLine.(type) { + case ppIfGroupDirective: + r.directive = x + default: + s.errLine(x, fmt.Sprintf(": expected if-group (unexpected %T)", x)) + } + s.nextLine() + r.groups = s.parseGroup() + return r +} + +func (s *scanner) nextLine() { + s.tokenBuf = nil + s.lookaheadLine = s.scanLine() +} + +func (s *scanner) scanLine() (r ppLine) { +again: + toks := s.scanToNonBlankToken(nil) + if len(toks) == 0 { + return nil + } + + includeNext := false + switch tok := toks[len(toks)-1]; tok.char { + case '#': + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + return &ppEmptyDirective{toks: toks} + case IDENTIFIER: + switch tok.value { + case idDefine: + return s.parseDefine(toks) + case idElif: + return s.parseElif(toks) + case idElse: + return s.parseElse(toks) + case idEndif: + return s.parseEndif(toks) + case idIf: + return s.parseIf(toks) + case idIfdef: + return s.parseIfdef(toks) + case idIfndef: + return s.parseIfndef(toks) + case idIncludeNext: + includeNext = true + fallthrough + case idInclude: + // # include pp-tokens new-line + // + // Prevent aliasing of eg. <foo bar.h> and <foo bar.h>. + save := s.preserveWhiteSpace + s.preserveWhiteSpace = true + n := len(toks) + toks := s.scanLineToEOL(toks) + r := &ppIncludeDirective{arg: toks[n : len(toks)-1], toks: toks, includeNext: includeNext} + s.preserveWhiteSpace = save + return r + case idUndef: + return s.parseUndef(toks) + case idLine: + return s.parseLine(toks) + case idError: + // # error pp-tokens_opt new-line + n := len(toks) + toks := s.scanLineToEOL(toks) + msg := toks[n : len(toks)-1] + if len(msg) != 0 && msg[0].char == ' ' { + msg = msg[1:] + } + return &ppErrorDirective{toks: toks, msg: msg} + case idPragma: + return s.parsePragma(toks) + } + } + + // # non-directive + return &ppNonDirective{toks: s.scanLineToEOL(toks)} + case '\n': + return &ppTextLine{toks: toks} + case IDENTIFIER: + if tok.value == idPragmaOp { + toks = s.scanToNonBlankToken(toks) + switch tok = toks[len(toks)-1]; tok.char { + case '(': + // ok + default: + s.err(tok, "expected (") + return &ppTextLine{toks: toks} + } + + var lit string + toks = s.scanToNonBlankToken(toks) + switch tok = toks[len(toks)-1]; tok.char { + case STRINGLITERAL: + lit = tok.String() + case LONGSTRINGLITERAL: + lit = tok.String()[1:] // [0], 6.9.10, 1 + default: + s.err(tok, "expected string literal") + return &ppTextLine{toks: toks} + } + + pos := tok.pos + toks = s.scanToNonBlankToken(toks) + switch tok = toks[len(toks)-1]; tok.char { + case ')': + // ok + default: + s.err(tok, "expected )") + return &ppTextLine{toks: toks} + } + + s.unget(s.lookaheadChar) + // [0], 6.9.10, 1 + lit = lit[1 : len(lit)-1] + lit = strings.ReplaceAll(lit, `\"`, `"`) + lit = strings.ReplaceAll(lit, `\\`, `\`) + lit = "#pragma " + lit + "\n" + for i := len(lit) - 1; i >= 0; i-- { + s.unget(char{pos, lit[i]}) + } + goto again + } + + fallthrough + default: + return &ppTextLine{toks: s.scanLineToEOL(toks)} + } +} + +func (s *scanner) parsePragma(toks []token3) *ppPragmaDirective { + toks = s.scanToNonBlankToken(toks) + n := len(toks) + if toks[n-1].char != '\n' { + toks = s.scanLineToEOL(toks) + } + return &ppPragmaDirective{toks: toks, args: toks[n-1:]} +} + +// # line pp-tokens new-line +func (s *scanner) parseLine(toks []token3) *ppLineDirective { + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + s.err(tok, "unexpected new-line") + return &ppLineDirective{toks: toks} + default: + toks := s.scanLineToEOL(toks) + last := toks[len(toks)-1] + r := &ppLineDirective{toks: toks, nextPos: int(last.pos) + len(last.src.String())} + toks = toks[:len(toks)-1] // sans new-line + toks = ltrim3(toks) + toks = toks[1:] // Skip '#' + toks = ltrim3(toks) + toks = toks[1:] // Skip "line" + r.args = ltrim3(toks) + return r + } +} + +func ltrim3(toks []token3) []token3 { + for len(toks) != 0 && toks[0].char == ' ' { + toks = toks[1:] + } + return toks +} + +// # undef identifier new-line +func (s *scanner) parseUndef(toks []token3) *ppUndefDirective { + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + s.err(&tok, "expected identifier") + return &ppUndefDirective{toks: toks} + case IDENTIFIER: + name := tok + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + return &ppUndefDirective{name: name, toks: toks} + default: + if s.ctx.cfg.RejectUndefExtraTokens { + s.err(&tok, "extra tokens after #undef") + } + return &ppUndefDirective{name: name, toks: s.scanLineToEOL(toks)} + } + default: + s.err(&tok, "expected identifier") + return &ppUndefDirective{toks: s.scanLineToEOL(toks)} + } +} + +func (s *scanner) scanLineToEOL(toks []token3) []token3 { + n := len(s.tokenBuf) - len(toks) + for { + s.lex() + s.tokenBuf = append(s.tokenBuf, s.tok) + if s.tok.char == '\n' { + return s.tokenBuf[n:] + } + } +} + +// # ifndef identifier new-line +func (s *scanner) parseIfndef(toks []token3) *ppIfndefDirective { + var name StringID + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case IDENTIFIER: + name = tok.value + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + return &ppIfndefDirective{name: name, toks: toks} + default: + if s.ctx.cfg.RejectIfndefExtraTokens { + s.err(&tok, "extra tokens after #ifndef") + } + return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)} + } + case '\n': + s.err(tok, "expected identifier") + return &ppIfndefDirective{name: name, toks: toks} + default: + s.err(tok, "expected identifier") + return &ppIfndefDirective{name: name, toks: s.scanLineToEOL(toks)} + } +} + +// # ifdef identifier new-line +func (s *scanner) parseIfdef(toks []token3) *ppIfdefDirective { + var name StringID + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case IDENTIFIER: + name = tok.value + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + return &ppIfdefDirective{name: name, toks: toks} + default: + if s.ctx.cfg.RejectIfdefExtraTokens { + s.err(&tok, "extra tokens after #ifdef") + } + return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)} + } + case '\n': + s.err(tok, "expected identifier") + return &ppIfdefDirective{name: name, toks: toks} + default: + s.err(tok, "expected identifier") + return &ppIfdefDirective{name: name, toks: s.scanLineToEOL(toks)} + } +} + +// # if constant-expression new-line +func (s *scanner) parseIf(toks []token3) *ppIfDirective { + n := len(toks) + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + s.err(tok, "expected expression") + return &ppIfDirective{toks: toks} + default: + toks = s.scanLineToEOL(toks) + expr := toks[n:] + if expr[0].char == ' ' { // sans leading space + expr = expr[1:] + } + expr = expr[:len(expr)-1] // sans '\n' + return &ppIfDirective{toks: toks, expr: expr} + } +} + +// # endif new-line +func (s *scanner) parseEndif(toks []token3) *ppEndifDirective { + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + return &ppEndifDirective{toks} + default: + if s.ctx.cfg.RejectEndifExtraTokens { + s.err(&tok, "extra tokens after #else") + } + return &ppEndifDirective{s.scanLineToEOL(toks)} + } +} + +// # else new-line +func (s *scanner) parseElse(toks []token3) *ppElseDirective { + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + return &ppElseDirective{toks} + default: + if s.ctx.cfg.RejectElseExtraTokens { + s.err(&tok, "extra tokens after #else") + } + return &ppElseDirective{s.scanLineToEOL(toks)} + } +} + +// # elif constant-expression new-line +func (s *scanner) parseElif(toks []token3) *ppElifDirective { + n := len(toks) + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + s.err(tok, "expected expression") + return &ppElifDirective{toks, nil} + default: + toks = s.scanLineToEOL(toks) + expr := toks[n:] + if expr[0].char == ' ' { // sans leading space + expr = expr[1:] + } + expr = expr[:len(expr)-1] // sans '\n' + return &ppElifDirective{toks, expr} + } +} + +func (s *scanner) parseDefine(toks []token3) ppLine { + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case IDENTIFIER: + name := tok + n := len(toks) + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + return &ppDefineObjectMacroDirective{name: name, toks: toks} + case '(': + if toks[n].char == ' ' { + return s.parseDefineObjectMacro(n, name, toks) + } + + return s.parseDefineFunctionMacro(name, toks) + default: + return s.parseDefineObjectMacro(n, name, toks) + } + case '\n': + s.err(tok, "expected identifier") + return &ppDefineObjectMacroDirective{toks: toks} + default: + s.err(tok, "expected identifier") + return &ppDefineObjectMacroDirective{toks: s.scanLineToEOL(toks)} + } +} + +// # define identifier lparen identifier-list_opt ) replacement-list new-line +// # define identifier lparen ... ) replacement-list new-line +// # define identifier lparen identifier-list , ... ) replacement-list new-line +func (s *scanner) parseDefineFunctionMacro(name token3, toks []token3) *ppDefineFunctionMacroDirective { + // Parse parameters after "#define name(". + var list []token3 + variadic := false + namedVariadic := false +again: + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case IDENTIFIER: + more: + list = append(list, tok) + toks = s.scanToNonBlankToken(toks) + switch tok = toks[len(toks)-1]; tok.char { + case ',': + toks = s.scanToNonBlankToken(toks) + switch tok = toks[len(toks)-1]; tok.char { + case IDENTIFIER: + goto more + case DDD: + if toks, variadic = s.parseDDD(toks); !variadic { + goto again + } + case ')': + s.err(tok, "expected parameter name") + default: + s.err(tok, "unexpected %q", &tok) + } + case DDD: + namedVariadic = true + if s.ctx.cfg.RejectInvalidVariadicMacros { + s.err(tok, "expected comma") + } + if toks, variadic = s.parseDDD(toks); !variadic { + goto again + } + case ')': + // ok + case '\n': + s.err(tok, "unexpected new-line") + return &ppDefineFunctionMacroDirective{toks: toks} + case IDENTIFIER: + s.err(tok, "expected comma") + goto more + default: + s.err(tok, "unexpected %q", &tok) + } + case DDD: + if toks, variadic = s.parseDDD(toks); !variadic { + goto again + } + case ',': + s.err(tok, "expected parameter name") + goto again + case ')': + // ok + default: + s.err(tok, "expected parameter name") + goto again + } + // Parse replacement list. + n := len(toks) + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case '\n': + if s.ctx.cfg.RejectFunctionMacroEmptyReplacementList { + s.err(tok, "expected replacement list") + } + return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, variadic: variadic, namedVariadic: namedVariadic} + default: + toks = s.scanLineToEOL(toks) + repl := toks[n:] // sans #define identifier + repl = repl[:len(repl)-1] // sans '\n' + // 6.10.3, 7 + // + // Any white-space characters preceding or following the + // replacement list of preprocessing tokens are not considered + // part of the replacement list for either form of macro. + repl = trim3(repl) + repl = normalizeHashes(repl) + return &ppDefineFunctionMacroDirective{name: name, identifierList: list, toks: toks, replacementList: repl, variadic: variadic, namedVariadic: namedVariadic} + } +} + +func isWhite(char rune) bool { + switch char { + case ' ', '\t', '\n', '\v', '\f': + return true + } + return false +} + +func trim3(toks []token3) []token3 { + for len(toks) != 0 && isWhite(toks[0].char) { + toks = toks[1:] + } + for len(toks) != 0 && isWhite(toks[len(toks)-1].char) { + toks = toks[:len(toks)-1] + } + return toks +} + +func normalizeHashes(toks []token3) []token3 { + w := 0 + var last rune + for _, v := range toks { + switch { + case v.char == PPPASTE: + if isWhite(last) { + w-- + } + case isWhite(v.char): + if last == '#' || last == PPPASTE { + continue + } + } + last = v.char + toks[w] = v + w++ + } + return toks[:w] +} + +func (s *scanner) parseDDD(toks []token3) ([]token3, bool) { + toks = s.scanToNonBlankToken(toks) + switch tok := toks[len(toks)-1]; tok.char { + case ')': + return toks, true + default: + s.err(tok, "expected right parenthesis") + return toks, false + } +} + +// # define identifier replacement-list new-line +func (s *scanner) parseDefineObjectMacro(n int, name token3, toks []token3) *ppDefineObjectMacroDirective { + toks = s.scanLineToEOL(toks) + repl := toks[n:] // sans #define identifier + repl = repl[:len(repl)-1] // sans '\n' + // 6.10.3, 7 + // + // Any white-space characters preceding or following the replacement + // list of preprocessing tokens are not considered part of the + // replacement list for either form of macro. + repl = trim3(repl) + repl = normalizeHashes(repl) + return &ppDefineObjectMacroDirective{name: name, toks: toks, replacementList: repl} +} + +// Return {}, {x} or {' ', x} +func (s *scanner) scanToNonBlankToken(toks []token3) []token3 { + n := len(s.tokenBuf) - len(toks) + for { + s.lex() + if s.tok.char < 0 { + return s.tokenBuf[n:] + } + + s.tokenBuf = append(s.tokenBuf, s.tok) + if s.tok.char != ' ' { + return s.tokenBuf[n:] + } + } +} + +// ---------------------------------------------------------------------- Cache + +// Translation phase4 source. +type source interface { + ppFile() (*ppFile, error) +} + +type cachedPPFile struct { + err error + errs goscanner.ErrorList + modTime int64 // time.Time.UnixNano() + pf *ppFile + readyCh chan struct{} + size int +} + +func (c *cachedPPFile) ready() *cachedPPFile { close(c.readyCh); return c } +func (c *cachedPPFile) waitFor() (*cachedPPFile, error) { <-c.readyCh; return c, c.err } + +func (c *cachedPPFile) ppFile() (*ppFile, error) { + c.waitFor() + if c.err == nil { + return c.pf, nil + } + + return nil, c.err +} + +type cacheKey struct { + name StringID + sys bool + value StringID + Config3 +} + +type ppCache struct { + mu sync.RWMutex + m map[cacheKey]*cachedPPFile +} + +func newPPCache() *ppCache { return &ppCache{m: map[cacheKey]*cachedPPFile{}} } + +func (c *ppCache) get(ctx *context, src Source) (source, error) { + if src.Value != "" { + return c.getValue(ctx, src.Name, src.Value, false, src.DoNotCache) + } + + return c.getFile(ctx, src.Name, false, src.DoNotCache) +} + +func (c *ppCache) getFile(ctx *context, name string, sys bool, doNotCache bool) (*cachedPPFile, error) { + fi, err := ctx.statFile(name, sys) + if err != nil { + return nil, err + } + + if !fi.Mode().IsRegular() { + return nil, fmt.Errorf("%s is not a regular file", name) + } + + if fi.Size() > mathutil.MaxInt { + return nil, fmt.Errorf("%s: file too big", name) + } + + size := int(fi.Size()) + if !filepath.IsAbs(name) { // Never cache relative paths + f, err := ctx.openFile(name, sys) + if err != nil { + return nil, err + } + + defer f.Close() + + tf := tokenNewFile(name, size) + ppFile := newScanner(ctx, f, tf).translationPhase3() + cf := &cachedPPFile{pf: ppFile, readyCh: make(chan struct{})} + cf.ready() + return cf, nil + } + + modTime := fi.ModTime().UnixNano() + key := cacheKey{dict.sid(name), sys, 0, ctx.cfg.Config3} + c.mu.Lock() + if cf, ok := c.m[key]; ok { + if modTime <= cf.modTime && size == cf.size { + c.mu.Unlock() + if cf.err != nil { + return nil, cf.err + } + + r, err := cf.waitFor() + ctx.errs(cf.errs) + return r, err + } + + delete(c.m, key) + } + + tf := tokenNewFile(name, size) + cf := &cachedPPFile{modTime: modTime, size: size, readyCh: make(chan struct{})} + if !doNotCache { + c.m[key] = cf + } + c.mu.Unlock() + + go func() { + defer cf.ready() + + f, err := ctx.openFile(name, sys) + if err != nil { + cf.err = err + return + } + + defer f.Close() + + ctx2 := newContext(ctx.cfg) + cf.pf = newScanner(ctx2, f, tf).translationPhase3() + cf.errs = ctx2.ErrorList + ctx.errs(cf.errs) + }() + + return cf.waitFor() +} + +func (c *ppCache) getValue(ctx *context, name, value string, sys bool, doNotCache bool) (*cachedPPFile, error) { + key := cacheKey{dict.sid(name), sys, dict.sid(value), ctx.cfg.Config3} + c.mu.Lock() + if cf, ok := c.m[key]; ok { + c.mu.Unlock() + if cf.err != nil { + return nil, cf.err + } + + r, err := cf.waitFor() + ctx.errs(cf.errs) + return r, err + } + + tf := tokenNewFile(name, len(value)) + cf := &cachedPPFile{readyCh: make(chan struct{})} + if !doNotCache { + c.m[key] = cf + } + c.mu.Unlock() + ctx2 := newContext(ctx.cfg) + cf.pf = newScanner(ctx2, strings.NewReader(value), tf).translationPhase3() + cf.errs = ctx2.ErrorList + ctx.errs(cf.errs) + cf.ready() + return cf.waitFor() +} |