summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/d5/tengo/compiler/scanner/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/d5/tengo/compiler/scanner/scanner.go')
-rw-r--r--vendor/github.com/d5/tengo/compiler/scanner/scanner.go680
1 files changed, 0 insertions, 680 deletions
diff --git a/vendor/github.com/d5/tengo/compiler/scanner/scanner.go b/vendor/github.com/d5/tengo/compiler/scanner/scanner.go
deleted file mode 100644
index 387cd8ee..00000000
--- a/vendor/github.com/d5/tengo/compiler/scanner/scanner.go
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- Scanner reads the Tengo source text and tokenize them.
-
- Scanner is a modified version of Go's scanner implementation.
-
- Copyright 2009 The Go Authors. All rights reserved.
- Use of this source code is governed by a BSD-style
- license that can be found in the LICENSE file.
-*/
-
-package scanner
-
-import (
- "fmt"
- "unicode"
- "unicode/utf8"
-
- "github.com/d5/tengo/compiler/source"
- "github.com/d5/tengo/compiler/token"
-)
-
-// byte order mark
-const bom = 0xFEFF
-
-// Scanner reads the Tengo source text.
-type Scanner struct {
- file *source.File // source file handle
- src []byte // source
- ch rune // current character
- offset int // character offset
- readOffset int // reading offset (position after current character)
- lineOffset int // current line offset
- insertSemi bool // insert a semicolon before next newline
- errorHandler ErrorHandler // error reporting; or nil
- errorCount int // number of errors encountered
- mode Mode
-}
-
-// NewScanner creates a Scanner.
-func NewScanner(file *source.File, src []byte, errorHandler ErrorHandler, mode Mode) *Scanner {
- if file.Size != len(src) {
- panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size, len(src)))
- }
-
- s := &Scanner{
- file: file,
- src: src,
- errorHandler: errorHandler,
- ch: ' ',
- mode: mode,
- }
-
- s.next()
- if s.ch == bom {
- s.next() // ignore BOM at file beginning
- }
-
- return s
-}
-
-// ErrorCount returns the number of errors.
-func (s *Scanner) ErrorCount() int {
- return s.errorCount
-}
-
-// Scan returns a token, token literal and its position.
-func (s *Scanner) Scan() (tok token.Token, literal string, pos source.Pos) {
- s.skipWhitespace()
-
- pos = s.file.FileSetPos(s.offset)
-
- insertSemi := false
-
- // determine token value
- switch ch := s.ch; {
- case isLetter(ch):
- literal = s.scanIdentifier()
- tok = token.Lookup(literal)
- switch tok {
- case token.Ident, token.Break, token.Continue, token.Return, token.Export, token.True, token.False, token.Undefined:
- insertSemi = true
- }
- case '0' <= ch && ch <= '9':
- insertSemi = true
- tok, literal = s.scanNumber(false)
- default:
- s.next() // always make progress
-
- switch ch {
- case -1: // EOF
- if s.insertSemi {
- s.insertSemi = false // EOF consumed
- return token.Semicolon, "\n", pos
- }
- tok = token.EOF
- case '\n':
- // we only reach here if s.insertSemi was set in the first place
- s.insertSemi = false // newline consumed
- return token.Semicolon, "\n", pos
- case '"':
- insertSemi = true
- tok = token.String
- literal = s.scanString()
- case '\'':
- insertSemi = true
- tok = token.Char
- literal = s.scanRune()
- case '`':
- insertSemi = true
- tok = token.String
- literal = s.scanRawString()
- case ':':
- tok = s.switch2(token.Colon, token.Define)
- case '.':
- if '0' <= s.ch && s.ch <= '9' {
- insertSemi = true
- tok, literal = s.scanNumber(true)
- } else {
- tok = token.Period
- if s.ch == '.' && s.peek() == '.' {
- s.next()
- s.next() // consume last '.'
- tok = token.Ellipsis
- }
- }
- case ',':
- tok = token.Comma
- case '?':
- tok = token.Question
- case ';':
- tok = token.Semicolon
- literal = ";"
- case '(':
- tok = token.LParen
- case ')':
- insertSemi = true
- tok = token.RParen
- case '[':
- tok = token.LBrack
- case ']':
- insertSemi = true
- tok = token.RBrack
- case '{':
- tok = token.LBrace
- case '}':
- insertSemi = true
- tok = token.RBrace
- case '+':
- tok = s.switch3(token.Add, token.AddAssign, '+', token.Inc)
- if tok == token.Inc {
- insertSemi = true
- }
- case '-':
- tok = s.switch3(token.Sub, token.SubAssign, '-', token.Dec)
- if tok == token.Dec {
- insertSemi = true
- }
- case '*':
- tok = s.switch2(token.Mul, token.MulAssign)
- case '/':
- if s.ch == '/' || s.ch == '*' {
- // comment
- if s.insertSemi && s.findLineEnd() {
- // reset position to the beginning of the comment
- s.ch = '/'
- s.offset = s.file.Offset(pos)
- s.readOffset = s.offset + 1
- s.insertSemi = false // newline consumed
- return token.Semicolon, "\n", pos
- }
- comment := s.scanComment()
- if s.mode&ScanComments == 0 {
- // skip comment
- s.insertSemi = false // newline consumed
- return s.Scan()
- }
- tok = token.Comment
- literal = comment
- } else {
- tok = s.switch2(token.Quo, token.QuoAssign)
- }
- case '%':
- tok = s.switch2(token.Rem, token.RemAssign)
- case '^':
- tok = s.switch2(token.Xor, token.XorAssign)
- case '<':
- tok = s.switch4(token.Less, token.LessEq, '<', token.Shl, token.ShlAssign)
- case '>':
- tok = s.switch4(token.Greater, token.GreaterEq, '>', token.Shr, token.ShrAssign)
- case '=':
- tok = s.switch2(token.Assign, token.Equal)
- case '!':
- tok = s.switch2(token.Not, token.NotEqual)
- case '&':
- if s.ch == '^' {
- s.next()
- tok = s.switch2(token.AndNot, token.AndNotAssign)
- } else {
- tok = s.switch3(token.And, token.AndAssign, '&', token.LAnd)
- }
- case '|':
- tok = s.switch3(token.Or, token.OrAssign, '|', token.LOr)
- default:
- // next reports unexpected BOMs - don't repeat
- if ch != bom {
- s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
- }
- insertSemi = s.insertSemi // preserve insertSemi info
- tok = token.Illegal
- literal = string(ch)
- }
- }
-
- if s.mode&DontInsertSemis == 0 {
- s.insertSemi = insertSemi
- }
-
- return
-}
-
-func (s *Scanner) next() {
- if s.readOffset < len(s.src) {
- s.offset = s.readOffset
- if s.ch == '\n' {
- s.lineOffset = s.offset
- s.file.AddLine(s.offset)
- }
- r, w := rune(s.src[s.readOffset]), 1
- switch {
- case r == 0:
- s.error(s.offset, "illegal character NUL")
- case r >= utf8.RuneSelf:
- // not ASCII
- r, w = utf8.DecodeRune(s.src[s.readOffset:])
- if r == utf8.RuneError && w == 1 {
- s.error(s.offset, "illegal UTF-8 encoding")
- } else if r == bom && s.offset > 0 {
- s.error(s.offset, "illegal byte order mark")
- }
- }
- s.readOffset += w
- s.ch = r
- } else {
- s.offset = len(s.src)
- if s.ch == '\n' {
- s.lineOffset = s.offset
- s.file.AddLine(s.offset)
- }
- s.ch = -1 // eof
- }
-}
-
-func (s *Scanner) peek() byte {
- if s.readOffset < len(s.src) {
- return s.src[s.readOffset]
- }
-
- return 0
-}
-
-func (s *Scanner) error(offset int, msg string) {
- if s.errorHandler != nil {
- s.errorHandler(s.file.Position(s.file.FileSetPos(offset)), msg)
- }
-
- s.errorCount++
-}
-
-func (s *Scanner) scanComment() string {
- // initial '/' already consumed; s.ch == '/' || s.ch == '*'
- offs := s.offset - 1 // position of initial '/'
- var numCR int
-
- if s.ch == '/' {
- //-style comment
- // (the final '\n' is not considered part of the comment)
- s.next()
- for s.ch != '\n' && s.ch >= 0 {
- if s.ch == '\r' {
- numCR++
- }
- s.next()
- }
- goto exit
- }
-
- /*-style comment */
- s.next()
- for s.ch >= 0 {
- ch := s.ch
- if ch == '\r' {
- numCR++
- }
- s.next()
- if ch == '*' && s.ch == '/' {
- s.next()
- goto exit
- }
- }
-
- s.error(offs, "comment not terminated")
-
-exit:
- lit := s.src[offs:s.offset]
-
- // On Windows, a (//-comment) line may end in "\r\n".
- // Remove the final '\r' before analyzing the text for line directives (matching the compiler).
- // Remove any other '\r' afterwards (matching the pre-existing behavior of the scanner).
- if numCR > 0 && len(lit) >= 2 && lit[1] == '/' && lit[len(lit)-1] == '\r' {
- lit = lit[:len(lit)-1]
- numCR--
- }
-
- if numCR > 0 {
- lit = StripCR(lit, lit[1] == '*')
- }
-
- return string(lit)
-}
-
-func (s *Scanner) findLineEnd() bool {
- // initial '/' already consumed
-
- defer func(offs int) {
- // reset scanner state to where it was upon calling findLineEnd
- s.ch = '/'
- s.offset = offs
- s.readOffset = offs + 1
- s.next() // consume initial '/' again
- }(s.offset - 1)
-
- // read ahead until a newline, EOF, or non-comment tok is found
- for s.ch == '/' || s.ch == '*' {
- if s.ch == '/' {
- //-style comment always contains a newline
- return true
- }
- /*-style comment: look for newline */
- s.next()
- for s.ch >= 0 {
- ch := s.ch
- if ch == '\n' {
- return true
- }
- s.next()
- if ch == '*' && s.ch == '/' {
- s.next()
- break
- }
- }
- s.skipWhitespace() // s.insertSemi is set
- if s.ch < 0 || s.ch == '\n' {
- return true
- }
- if s.ch != '/' {
- // non-comment tok
- return false
- }
- s.next() // consume '/'
- }
-
- return false
-}
-
-func (s *Scanner) scanIdentifier() string {
- offs := s.offset
- for isLetter(s.ch) || isDigit(s.ch) {
- s.next()
- }
-
- return string(s.src[offs:s.offset])
-}
-
-func (s *Scanner) scanMantissa(base int) {
- for digitVal(s.ch) < base {
- s.next()
- }
-}
-
-func (s *Scanner) scanNumber(seenDecimalPoint bool) (tok token.Token, lit string) {
- // digitVal(s.ch) < 10
- offs := s.offset
- tok = token.Int
-
- defer func() {
- lit = string(s.src[offs:s.offset])
- }()
-
- if seenDecimalPoint {
- offs--
- tok = token.Float
- s.scanMantissa(10)
- goto exponent
- }
-
- if s.ch == '0' {
- // int or float
- offs := s.offset
- s.next()
- if s.ch == 'x' || s.ch == 'X' {
- // hexadecimal int
- s.next()
- s.scanMantissa(16)
- if s.offset-offs <= 2 {
- // only scanned "0x" or "0X"
- s.error(offs, "illegal hexadecimal number")
- }
- } else {
- // octal int or float
- seenDecimalDigit := false
- s.scanMantissa(8)
- if s.ch == '8' || s.ch == '9' {
- // illegal octal int or float
- seenDecimalDigit = true
- s.scanMantissa(10)
- }
- if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {
- goto fraction
- }
- // octal int
- if seenDecimalDigit {
- s.error(offs, "illegal octal number")
- }
- }
-
- return
- }
-
- // decimal int or float
- s.scanMantissa(10)
-
-fraction:
- if s.ch == '.' {
- tok = token.Float
- s.next()
- s.scanMantissa(10)
- }
-
-exponent:
- if s.ch == 'e' || s.ch == 'E' {
- tok = token.Float
- s.next()
- if s.ch == '-' || s.ch == '+' {
- s.next()
- }
- if digitVal(s.ch) < 10 {
- s.scanMantissa(10)
- } else {
- s.error(offs, "illegal floating-point exponent")
- }
- }
-
- return
-}
-
-func (s *Scanner) scanEscape(quote rune) bool {
- offs := s.offset
-
- var n int
- var base, max uint32
- switch s.ch {
- case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
- s.next()
- return true
- case '0', '1', '2', '3', '4', '5', '6', '7':
- n, base, max = 3, 8, 255
- case 'x':
- s.next()
- n, base, max = 2, 16, 255
- case 'u':
- s.next()
- n, base, max = 4, 16, unicode.MaxRune
- case 'U':
- s.next()
- n, base, max = 8, 16, unicode.MaxRune
- default:
- msg := "unknown escape sequence"
- if s.ch < 0 {
- msg = "escape sequence not terminated"
- }
- s.error(offs, msg)
- return false
- }
-
- var x uint32
- for n > 0 {
- d := uint32(digitVal(s.ch))
- if d >= base {
- msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
- if s.ch < 0 {
- msg = "escape sequence not terminated"
- }
- s.error(s.offset, msg)
- return false
- }
- x = x*base + d
- s.next()
- n--
- }
-
- if x > max || 0xD800 <= x && x < 0xE000 {
- s.error(offs, "escape sequence is invalid Unicode code point")
- return false
- }
-
- return true
-}
-
-func (s *Scanner) scanRune() string {
- offs := s.offset - 1 // '\'' opening already consumed
-
- valid := true
- n := 0
- for {
- ch := s.ch
- if ch == '\n' || ch < 0 {
- // only report error if we don't have one already
- if valid {
- s.error(offs, "rune literal not terminated")
- valid = false
- }
- break
- }
- s.next()
- if ch == '\'' {
- break
- }
- n++
- if ch == '\\' {
- if !s.scanEscape('\'') {
- valid = false
- }
- // continue to read to closing quote
- }
- }
-
- if valid && n != 1 {
- s.error(offs, "illegal rune literal")
- }
-
- return string(s.src[offs:s.offset])
-}
-
-func (s *Scanner) scanString() string {
- offs := s.offset - 1 // '"' opening already consumed
-
- for {
- ch := s.ch
- if ch == '\n' || ch < 0 {
- s.error(offs, "string literal not terminated")
- break
- }
- s.next()
- if ch == '"' {
- break
- }
- if ch == '\\' {
- s.scanEscape('"')
- }
- }
-
- return string(s.src[offs:s.offset])
-}
-
-func (s *Scanner) scanRawString() string {
- offs := s.offset - 1 // '`' opening already consumed
-
- hasCR := false
- for {
- ch := s.ch
- if ch < 0 {
- s.error(offs, "raw string literal not terminated")
- break
- }
-
- s.next()
-
- if ch == '`' {
- break
- }
-
- if ch == '\r' {
- hasCR = true
- }
- }
-
- lit := s.src[offs:s.offset]
- if hasCR {
- lit = StripCR(lit, false)
- }
-
- return string(lit)
-}
-
-// StripCR removes carriage return characters.
-func StripCR(b []byte, comment bool) []byte {
- c := make([]byte, len(b))
-
- i := 0
- for j, ch := range b {
- // In a /*-style comment, don't strip \r from *\r/ (incl. sequences of \r from *\r\r...\r/)
- // since the resulting */ would terminate the comment too early unless the \r is immediately
- // following the opening /* in which case it's ok because /*/ is not closed yet.
- if ch != '\r' || comment && i > len("/*") && c[i-1] == '*' && j+1 < len(b) && b[j+1] == '/' {
- c[i] = ch
- i++
- }
- }
-
- return c[:i]
-}
-
-func (s *Scanner) skipWhitespace() {
- for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
- s.next()
- }
-}
-
-func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
- if s.ch == '=' {
- s.next()
- return tok1
- }
-
- return tok0
-}
-
-func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
- if s.ch == '=' {
- s.next()
- return tok1
- }
-
- if s.ch == ch2 {
- s.next()
- return tok2
- }
-
- return tok0
-}
-
-func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
- if s.ch == '=' {
- s.next()
- return tok1
- }
-
- if s.ch == ch2 {
- s.next()
- if s.ch == '=' {
- s.next()
- return tok3
- }
-
- return tok2
- }
-
- return tok0
-}
-
-func isLetter(ch rune) bool {
- return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
-}
-
-func isDigit(ch rune) bool {
- return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
-}
-
-func digitVal(ch rune) int {
- switch {
- case '0' <= ch && ch <= '9':
- return int(ch - '0')
- case 'a' <= ch && ch <= 'f':
- return int(ch - 'a' + 10)
- case 'A' <= ch && ch <= 'F':
- return int(ch - 'A' + 10)
- }
-
- return 16 // larger than any legal digit val
-}