diff options
Diffstat (limited to 'vendor/github.com/BurntSushi/toml/lex.go')
-rw-r--r-- | vendor/github.com/BurntSushi/toml/lex.go | 953 |
1 files changed, 0 insertions, 953 deletions
diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go deleted file mode 100644 index f1f4b2de..00000000 --- a/vendor/github.com/BurntSushi/toml/lex.go +++ /dev/null @@ -1,953 +0,0 @@ -package toml - -import ( - "fmt" - "strings" - "unicode" - "unicode/utf8" -) - -type itemType int - -const ( - itemError itemType = iota - itemNIL // used in the parser to indicate no type - itemEOF - itemText - itemString - itemRawString - itemMultilineString - itemRawMultilineString - itemBool - itemInteger - itemFloat - itemDatetime - itemArray // the start of an array - itemArrayEnd - itemTableStart - itemTableEnd - itemArrayTableStart - itemArrayTableEnd - itemKeyStart - itemCommentStart - itemInlineTableStart - itemInlineTableEnd -) - -const ( - eof = 0 - comma = ',' - tableStart = '[' - tableEnd = ']' - arrayTableStart = '[' - arrayTableEnd = ']' - tableSep = '.' - keySep = '=' - arrayStart = '[' - arrayEnd = ']' - commentStart = '#' - stringStart = '"' - stringEnd = '"' - rawStringStart = '\'' - rawStringEnd = '\'' - inlineTableStart = '{' - inlineTableEnd = '}' -) - -type stateFn func(lx *lexer) stateFn - -type lexer struct { - input string - start int - pos int - line int - state stateFn - items chan item - - // Allow for backing up up to three runes. - // This is necessary because TOML contains 3-rune tokens (""" and '''). - prevWidths [3]int - nprev int // how many of prevWidths are in use - // If we emit an eof, we can still back up, but it is not OK to call - // next again. - atEOF bool - - // A stack of state functions used to maintain context. - // The idea is to reuse parts of the state machine in various places. - // For example, values can appear at the top level or within arbitrarily - // nested arrays. The last state on the stack is used after a value has - // been lexed. Similarly for comments. - stack []stateFn -} - -type item struct { - typ itemType - val string - line int -} - -func (lx *lexer) nextItem() item { - for { - select { - case item := <-lx.items: - return item - default: - lx.state = lx.state(lx) - } - } -} - -func lex(input string) *lexer { - lx := &lexer{ - input: input, - state: lexTop, - line: 1, - items: make(chan item, 10), - stack: make([]stateFn, 0, 10), - } - return lx -} - -func (lx *lexer) push(state stateFn) { - lx.stack = append(lx.stack, state) -} - -func (lx *lexer) pop() stateFn { - if len(lx.stack) == 0 { - return lx.errorf("BUG in lexer: no states to pop") - } - last := lx.stack[len(lx.stack)-1] - lx.stack = lx.stack[0 : len(lx.stack)-1] - return last -} - -func (lx *lexer) current() string { - return lx.input[lx.start:lx.pos] -} - -func (lx *lexer) emit(typ itemType) { - lx.items <- item{typ, lx.current(), lx.line} - lx.start = lx.pos -} - -func (lx *lexer) emitTrim(typ itemType) { - lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line} - lx.start = lx.pos -} - -func (lx *lexer) next() (r rune) { - if lx.atEOF { - panic("next called after EOF") - } - if lx.pos >= len(lx.input) { - lx.atEOF = true - return eof - } - - if lx.input[lx.pos] == '\n' { - lx.line++ - } - lx.prevWidths[2] = lx.prevWidths[1] - lx.prevWidths[1] = lx.prevWidths[0] - if lx.nprev < 3 { - lx.nprev++ - } - r, w := utf8.DecodeRuneInString(lx.input[lx.pos:]) - lx.prevWidths[0] = w - lx.pos += w - return r -} - -// ignore skips over the pending input before this point. -func (lx *lexer) ignore() { - lx.start = lx.pos -} - -// backup steps back one rune. Can be called only twice between calls to next. -func (lx *lexer) backup() { - if lx.atEOF { - lx.atEOF = false - return - } - if lx.nprev < 1 { - panic("backed up too far") - } - w := lx.prevWidths[0] - lx.prevWidths[0] = lx.prevWidths[1] - lx.prevWidths[1] = lx.prevWidths[2] - lx.nprev-- - lx.pos -= w - if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { - lx.line-- - } -} - -// accept consumes the next rune if it's equal to `valid`. -func (lx *lexer) accept(valid rune) bool { - if lx.next() == valid { - return true - } - lx.backup() - return false -} - -// peek returns but does not consume the next rune in the input. -func (lx *lexer) peek() rune { - r := lx.next() - lx.backup() - return r -} - -// skip ignores all input that matches the given predicate. -func (lx *lexer) skip(pred func(rune) bool) { - for { - r := lx.next() - if pred(r) { - continue - } - lx.backup() - lx.ignore() - return - } -} - -// errorf stops all lexing by emitting an error and returning `nil`. -// Note that any value that is a character is escaped if it's a special -// character (newlines, tabs, etc.). -func (lx *lexer) errorf(format string, values ...interface{}) stateFn { - lx.items <- item{ - itemError, - fmt.Sprintf(format, values...), - lx.line, - } - return nil -} - -// lexTop consumes elements at the top level of TOML data. -func lexTop(lx *lexer) stateFn { - r := lx.next() - if isWhitespace(r) || isNL(r) { - return lexSkip(lx, lexTop) - } - switch r { - case commentStart: - lx.push(lexTop) - return lexCommentStart - case tableStart: - return lexTableStart - case eof: - if lx.pos > lx.start { - return lx.errorf("unexpected EOF") - } - lx.emit(itemEOF) - return nil - } - - // At this point, the only valid item can be a key, so we back up - // and let the key lexer do the rest. - lx.backup() - lx.push(lexTopEnd) - return lexKeyStart -} - -// lexTopEnd is entered whenever a top-level item has been consumed. (A value -// or a table.) It must see only whitespace, and will turn back to lexTop -// upon a newline. If it sees EOF, it will quit the lexer successfully. -func lexTopEnd(lx *lexer) stateFn { - r := lx.next() - switch { - case r == commentStart: - // a comment will read to a newline for us. - lx.push(lexTop) - return lexCommentStart - case isWhitespace(r): - return lexTopEnd - case isNL(r): - lx.ignore() - return lexTop - case r == eof: - lx.emit(itemEOF) - return nil - } - return lx.errorf("expected a top-level item to end with a newline, "+ - "comment, or EOF, but got %q instead", r) -} - -// lexTable lexes the beginning of a table. Namely, it makes sure that -// it starts with a character other than '.' and ']'. -// It assumes that '[' has already been consumed. -// It also handles the case that this is an item in an array of tables. -// e.g., '[[name]]'. -func lexTableStart(lx *lexer) stateFn { - if lx.peek() == arrayTableStart { - lx.next() - lx.emit(itemArrayTableStart) - lx.push(lexArrayTableEnd) - } else { - lx.emit(itemTableStart) - lx.push(lexTableEnd) - } - return lexTableNameStart -} - -func lexTableEnd(lx *lexer) stateFn { - lx.emit(itemTableEnd) - return lexTopEnd -} - -func lexArrayTableEnd(lx *lexer) stateFn { - if r := lx.next(); r != arrayTableEnd { - return lx.errorf("expected end of table array name delimiter %q, "+ - "but got %q instead", arrayTableEnd, r) - } - lx.emit(itemArrayTableEnd) - return lexTopEnd -} - -func lexTableNameStart(lx *lexer) stateFn { - lx.skip(isWhitespace) - switch r := lx.peek(); { - case r == tableEnd || r == eof: - return lx.errorf("unexpected end of table name " + - "(table names cannot be empty)") - case r == tableSep: - return lx.errorf("unexpected table separator " + - "(table names cannot be empty)") - case r == stringStart || r == rawStringStart: - lx.ignore() - lx.push(lexTableNameEnd) - return lexValue // reuse string lexing - default: - return lexBareTableName - } -} - -// lexBareTableName lexes the name of a table. It assumes that at least one -// valid character for the table has already been read. -func lexBareTableName(lx *lexer) stateFn { - r := lx.next() - if isBareKeyChar(r) { - return lexBareTableName - } - lx.backup() - lx.emit(itemText) - return lexTableNameEnd -} - -// lexTableNameEnd reads the end of a piece of a table name, optionally -// consuming whitespace. -func lexTableNameEnd(lx *lexer) stateFn { - lx.skip(isWhitespace) - switch r := lx.next(); { - case isWhitespace(r): - return lexTableNameEnd - case r == tableSep: - lx.ignore() - return lexTableNameStart - case r == tableEnd: - return lx.pop() - default: - return lx.errorf("expected '.' or ']' to end table name, "+ - "but got %q instead", r) - } -} - -// lexKeyStart consumes a key name up until the first non-whitespace character. -// lexKeyStart will ignore whitespace. -func lexKeyStart(lx *lexer) stateFn { - r := lx.peek() - switch { - case r == keySep: - return lx.errorf("unexpected key separator %q", keySep) - case isWhitespace(r) || isNL(r): - lx.next() - return lexSkip(lx, lexKeyStart) - case r == stringStart || r == rawStringStart: - lx.ignore() - lx.emit(itemKeyStart) - lx.push(lexKeyEnd) - return lexValue // reuse string lexing - default: - lx.ignore() - lx.emit(itemKeyStart) - return lexBareKey - } -} - -// lexBareKey consumes the text of a bare key. Assumes that the first character -// (which is not whitespace) has not yet been consumed. -func lexBareKey(lx *lexer) stateFn { - switch r := lx.next(); { - case isBareKeyChar(r): - return lexBareKey - case isWhitespace(r): - lx.backup() - lx.emit(itemText) - return lexKeyEnd - case r == keySep: - lx.backup() - lx.emit(itemText) - return lexKeyEnd - default: - return lx.errorf("bare keys cannot contain %q", r) - } -} - -// lexKeyEnd consumes the end of a key and trims whitespace (up to the key -// separator). -func lexKeyEnd(lx *lexer) stateFn { - switch r := lx.next(); { - case r == keySep: - return lexSkip(lx, lexValue) - case isWhitespace(r): - return lexSkip(lx, lexKeyEnd) - default: - return lx.errorf("expected key separator %q, but got %q instead", - keySep, r) - } -} - -// lexValue starts the consumption of a value anywhere a value is expected. -// lexValue will ignore whitespace. -// After a value is lexed, the last state on the next is popped and returned. -func lexValue(lx *lexer) stateFn { - // We allow whitespace to precede a value, but NOT newlines. - // In array syntax, the array states are responsible for ignoring newlines. - r := lx.next() - switch { - case isWhitespace(r): - return lexSkip(lx, lexValue) - case isDigit(r): - lx.backup() // avoid an extra state and use the same as above - return lexNumberOrDateStart - } - switch r { - case arrayStart: - lx.ignore() - lx.emit(itemArray) - return lexArrayValue - case inlineTableStart: - lx.ignore() - lx.emit(itemInlineTableStart) - return lexInlineTableValue - case stringStart: - if lx.accept(stringStart) { - if lx.accept(stringStart) { - lx.ignore() // Ignore """ - return lexMultilineString - } - lx.backup() - } - lx.ignore() // ignore the '"' - return lexString - case rawStringStart: - if lx.accept(rawStringStart) { - if lx.accept(rawStringStart) { - lx.ignore() // Ignore """ - return lexMultilineRawString - } - lx.backup() - } - lx.ignore() // ignore the "'" - return lexRawString - case '+', '-': - return lexNumberStart - case '.': // special error case, be kind to users - return lx.errorf("floats must start with a digit, not '.'") - } - if unicode.IsLetter(r) { - // Be permissive here; lexBool will give a nice error if the - // user wrote something like - // x = foo - // (i.e. not 'true' or 'false' but is something else word-like.) - lx.backup() - return lexBool - } - return lx.errorf("expected value but found %q instead", r) -} - -// lexArrayValue consumes one value in an array. It assumes that '[' or ',' -// have already been consumed. All whitespace and newlines are ignored. -func lexArrayValue(lx *lexer) stateFn { - r := lx.next() - switch { - case isWhitespace(r) || isNL(r): - return lexSkip(lx, lexArrayValue) - case r == commentStart: - lx.push(lexArrayValue) - return lexCommentStart - case r == comma: - return lx.errorf("unexpected comma") - case r == arrayEnd: - // NOTE(caleb): The spec isn't clear about whether you can have - // a trailing comma or not, so we'll allow it. - return lexArrayEnd - } - - lx.backup() - lx.push(lexArrayValueEnd) - return lexValue -} - -// lexArrayValueEnd consumes everything between the end of an array value and -// the next value (or the end of the array): it ignores whitespace and newlines -// and expects either a ',' or a ']'. -func lexArrayValueEnd(lx *lexer) stateFn { - r := lx.next() - switch { - case isWhitespace(r) || isNL(r): - return lexSkip(lx, lexArrayValueEnd) - case r == commentStart: - lx.push(lexArrayValueEnd) - return lexCommentStart - case r == comma: - lx.ignore() - return lexArrayValue // move on to the next value - case r == arrayEnd: - return lexArrayEnd - } - return lx.errorf( - "expected a comma or array terminator %q, but got %q instead", - arrayEnd, r, - ) -} - -// lexArrayEnd finishes the lexing of an array. -// It assumes that a ']' has just been consumed. -func lexArrayEnd(lx *lexer) stateFn { - lx.ignore() - lx.emit(itemArrayEnd) - return lx.pop() -} - -// lexInlineTableValue consumes one key/value pair in an inline table. -// It assumes that '{' or ',' have already been consumed. Whitespace is ignored. -func lexInlineTableValue(lx *lexer) stateFn { - r := lx.next() - switch { - case isWhitespace(r): - return lexSkip(lx, lexInlineTableValue) - case isNL(r): - return lx.errorf("newlines not allowed within inline tables") - case r == commentStart: - lx.push(lexInlineTableValue) - return lexCommentStart - case r == comma: - return lx.errorf("unexpected comma") - case r == inlineTableEnd: - return lexInlineTableEnd - } - lx.backup() - lx.push(lexInlineTableValueEnd) - return lexKeyStart -} - -// lexInlineTableValueEnd consumes everything between the end of an inline table -// key/value pair and the next pair (or the end of the table): -// it ignores whitespace and expects either a ',' or a '}'. -func lexInlineTableValueEnd(lx *lexer) stateFn { - r := lx.next() - switch { - case isWhitespace(r): - return lexSkip(lx, lexInlineTableValueEnd) - case isNL(r): - return lx.errorf("newlines not allowed within inline tables") - case r == commentStart: - lx.push(lexInlineTableValueEnd) - return lexCommentStart - case r == comma: - lx.ignore() - return lexInlineTableValue - case r == inlineTableEnd: - return lexInlineTableEnd - } - return lx.errorf("expected a comma or an inline table terminator %q, "+ - "but got %q instead", inlineTableEnd, r) -} - -// lexInlineTableEnd finishes the lexing of an inline table. -// It assumes that a '}' has just been consumed. -func lexInlineTableEnd(lx *lexer) stateFn { - lx.ignore() - lx.emit(itemInlineTableEnd) - return lx.pop() -} - -// lexString consumes the inner contents of a string. It assumes that the -// beginning '"' has already been consumed and ignored. -func lexString(lx *lexer) stateFn { - r := lx.next() - switch { - case r == eof: - return lx.errorf("unexpected EOF") - case isNL(r): - return lx.errorf("strings cannot contain newlines") - case r == '\\': - lx.push(lexString) - return lexStringEscape - case r == stringEnd: - lx.backup() - lx.emit(itemString) - lx.next() - lx.ignore() - return lx.pop() - } - return lexString -} - -// lexMultilineString consumes the inner contents of a string. It assumes that -// the beginning '"""' has already been consumed and ignored. -func lexMultilineString(lx *lexer) stateFn { - switch lx.next() { - case eof: - return lx.errorf("unexpected EOF") - case '\\': - return lexMultilineStringEscape - case stringEnd: - if lx.accept(stringEnd) { - if lx.accept(stringEnd) { - lx.backup() - lx.backup() - lx.backup() - lx.emit(itemMultilineString) - lx.next() - lx.next() - lx.next() - lx.ignore() - return lx.pop() - } - lx.backup() - } - } - return lexMultilineString -} - -// lexRawString consumes a raw string. Nothing can be escaped in such a string. -// It assumes that the beginning "'" has already been consumed and ignored. -func lexRawString(lx *lexer) stateFn { - r := lx.next() - switch { - case r == eof: - return lx.errorf("unexpected EOF") - case isNL(r): - return lx.errorf("strings cannot contain newlines") - case r == rawStringEnd: - lx.backup() - lx.emit(itemRawString) - lx.next() - lx.ignore() - return lx.pop() - } - return lexRawString -} - -// lexMultilineRawString consumes a raw string. Nothing can be escaped in such -// a string. It assumes that the beginning "'''" has already been consumed and -// ignored. -func lexMultilineRawString(lx *lexer) stateFn { - switch lx.next() { - case eof: - return lx.errorf("unexpected EOF") - case rawStringEnd: - if lx.accept(rawStringEnd) { - if lx.accept(rawStringEnd) { - lx.backup() - lx.backup() - lx.backup() - lx.emit(itemRawMultilineString) - lx.next() - lx.next() - lx.next() - lx.ignore() - return lx.pop() - } - lx.backup() - } - } - return lexMultilineRawString -} - -// lexMultilineStringEscape consumes an escaped character. It assumes that the -// preceding '\\' has already been consumed. -func lexMultilineStringEscape(lx *lexer) stateFn { - // Handle the special case first: - if isNL(lx.next()) { - return lexMultilineString - } - lx.backup() - lx.push(lexMultilineString) - return lexStringEscape(lx) -} - -func lexStringEscape(lx *lexer) stateFn { - r := lx.next() - switch r { - case 'b': - fallthrough - case 't': - fallthrough - case 'n': - fallthrough - case 'f': - fallthrough - case 'r': - fallthrough - case '"': - fallthrough - case '\\': - return lx.pop() - case 'u': - return lexShortUnicodeEscape - case 'U': - return lexLongUnicodeEscape - } - return lx.errorf("invalid escape character %q; only the following "+ - "escape characters are allowed: "+ - `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r) -} - -func lexShortUnicodeEscape(lx *lexer) stateFn { - var r rune - for i := 0; i < 4; i++ { - r = lx.next() - if !isHexadecimal(r) { - return lx.errorf(`expected four hexadecimal digits after '\u', `+ - "but got %q instead", lx.current()) - } - } - return lx.pop() -} - -func lexLongUnicodeEscape(lx *lexer) stateFn { - var r rune - for i := 0; i < 8; i++ { - r = lx.next() - if !isHexadecimal(r) { - return lx.errorf(`expected eight hexadecimal digits after '\U', `+ - "but got %q instead", lx.current()) - } - } - return lx.pop() -} - -// lexNumberOrDateStart consumes either an integer, a float, or datetime. -func lexNumberOrDateStart(lx *lexer) stateFn { - r := lx.next() - if isDigit(r) { - return lexNumberOrDate - } - switch r { - case '_': - return lexNumber - case 'e', 'E': - return lexFloat - case '.': - return lx.errorf("floats must start with a digit, not '.'") - } - return lx.errorf("expected a digit but got %q", r) -} - -// lexNumberOrDate consumes either an integer, float or datetime. -func lexNumberOrDate(lx *lexer) stateFn { - r := lx.next() - if isDigit(r) { - return lexNumberOrDate - } - switch r { - case '-': - return lexDatetime - case '_': - return lexNumber - case '.', 'e', 'E': - return lexFloat - } - - lx.backup() - lx.emit(itemInteger) - return lx.pop() -} - -// lexDatetime consumes a Datetime, to a first approximation. -// The parser validates that it matches one of the accepted formats. -func lexDatetime(lx *lexer) stateFn { - r := lx.next() - if isDigit(r) { - return lexDatetime - } - switch r { - case '-', 'T', ':', '.', 'Z': - return lexDatetime - } - - lx.backup() - lx.emit(itemDatetime) - return lx.pop() -} - -// lexNumberStart consumes either an integer or a float. It assumes that a sign -// has already been read, but that *no* digits have been consumed. -// lexNumberStart will move to the appropriate integer or float states. -func lexNumberStart(lx *lexer) stateFn { - // We MUST see a digit. Even floats have to start with a digit. - r := lx.next() - if !isDigit(r) { - if r == '.' { - return lx.errorf("floats must start with a digit, not '.'") - } - return lx.errorf("expected a digit but got %q", r) - } - return lexNumber -} - -// lexNumber consumes an integer or a float after seeing the first digit. -func lexNumber(lx *lexer) stateFn { - r := lx.next() - if isDigit(r) { - return lexNumber - } - switch r { - case '_': - return lexNumber - case '.', 'e', 'E': - return lexFloat - } - - lx.backup() - lx.emit(itemInteger) - return lx.pop() -} - -// lexFloat consumes the elements of a float. It allows any sequence of -// float-like characters, so floats emitted by the lexer are only a first -// approximation and must be validated by the parser. -func lexFloat(lx *lexer) stateFn { - r := lx.next() - if isDigit(r) { - return lexFloat - } - switch r { - case '_', '.', '-', '+', 'e', 'E': - return lexFloat - } - - lx.backup() - lx.emit(itemFloat) - return lx.pop() -} - -// lexBool consumes a bool string: 'true' or 'false. -func lexBool(lx *lexer) stateFn { - var rs []rune - for { - r := lx.next() - if r == eof || isWhitespace(r) || isNL(r) { - lx.backup() - break - } - rs = append(rs, r) - } - s := string(rs) - switch s { - case "true", "false": - lx.emit(itemBool) - return lx.pop() - } - return lx.errorf("expected value but found %q instead", s) -} - -// lexCommentStart begins the lexing of a comment. It will emit -// itemCommentStart and consume no characters, passing control to lexComment. -func lexCommentStart(lx *lexer) stateFn { - lx.ignore() - lx.emit(itemCommentStart) - return lexComment -} - -// lexComment lexes an entire comment. It assumes that '#' has been consumed. -// It will consume *up to* the first newline character, and pass control -// back to the last state on the stack. -func lexComment(lx *lexer) stateFn { - r := lx.peek() - if isNL(r) || r == eof { - lx.emit(itemText) - return lx.pop() - } - lx.next() - return lexComment -} - -// lexSkip ignores all slurped input and moves on to the next state. -func lexSkip(lx *lexer, nextState stateFn) stateFn { - return func(lx *lexer) stateFn { - lx.ignore() - return nextState - } -} - -// isWhitespace returns true if `r` is a whitespace character according -// to the spec. -func isWhitespace(r rune) bool { - return r == '\t' || r == ' ' -} - -func isNL(r rune) bool { - return r == '\n' || r == '\r' -} - -func isDigit(r rune) bool { - return r >= '0' && r <= '9' -} - -func isHexadecimal(r rune) bool { - return (r >= '0' && r <= '9') || - (r >= 'a' && r <= 'f') || - (r >= 'A' && r <= 'F') -} - -func isBareKeyChar(r rune) bool { - return (r >= 'A' && r <= 'Z') || - (r >= 'a' && r <= 'z') || - (r >= '0' && r <= '9') || - r == '_' || - r == '-' -} - -func (itype itemType) String() string { - switch itype { - case itemError: - return "Error" - case itemNIL: - return "NIL" - case itemEOF: - return "EOF" - case itemText: - return "Text" - case itemString, itemRawString, itemMultilineString, itemRawMultilineString: - return "String" - case itemBool: - return "Bool" - case itemInteger: - return "Integer" - case itemFloat: - return "Float" - case itemDatetime: - return "DateTime" - case itemTableStart: - return "TableStart" - case itemTableEnd: - return "TableEnd" - case itemKeyStart: - return "KeyStart" - case itemArray: - return "Array" - case itemArrayEnd: - return "ArrayEnd" - case itemCommentStart: - return "CommentStart" - } - panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype))) -} - -func (item item) String() string { - return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val) -} |