diff options
Diffstat (limited to 'vendor/google.golang.org/protobuf/internal/encoding/text')
6 files changed, 1685 insertions, 0 deletions
diff --git a/vendor/google.golang.org/protobuf/internal/encoding/text/decode.go b/vendor/google.golang.org/protobuf/internal/encoding/text/decode.go new file mode 100644 index 00000000..eb10ea10 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/encoding/text/decode.go @@ -0,0 +1,665 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package text + +import ( + "bytes" + "fmt" + "io" + "regexp" + "strconv" + "unicode/utf8" + + "google.golang.org/protobuf/internal/errors" +) + +// Decoder is a token-based textproto decoder. +type Decoder struct { + // lastCall is last method called, either readCall or peekCall. + // Initial value is readCall. + lastCall call + + // lastToken contains the last read token. + lastToken Token + + // lastErr contains the last read error. + lastErr error + + // openStack is a stack containing the byte characters for MessageOpen and + // ListOpen kinds. The top of stack represents the message or the list that + // the current token is nested in. An empty stack means the current token is + // at the top level message. The characters '{' and '<' both represent the + // MessageOpen kind. + openStack []byte + + // orig is used in reporting line and column. + orig []byte + // in contains the unconsumed input. + in []byte +} + +// NewDecoder returns a Decoder to read the given []byte. +func NewDecoder(b []byte) *Decoder { + return &Decoder{orig: b, in: b} +} + +// ErrUnexpectedEOF means that EOF was encountered in the middle of the input. +var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF) + +// call specifies which Decoder method was invoked. +type call uint8 + +const ( + readCall call = iota + peekCall +) + +// Peek looks ahead and returns the next token and error without advancing a read. +func (d *Decoder) Peek() (Token, error) { + defer func() { d.lastCall = peekCall }() + if d.lastCall == readCall { + d.lastToken, d.lastErr = d.Read() + } + return d.lastToken, d.lastErr +} + +// Read returns the next token. +// It will return an error if there is no valid token. +func (d *Decoder) Read() (Token, error) { + defer func() { d.lastCall = readCall }() + if d.lastCall == peekCall { + return d.lastToken, d.lastErr + } + + tok, err := d.parseNext(d.lastToken.kind) + if err != nil { + return Token{}, err + } + + switch tok.kind { + case comma, semicolon: + tok, err = d.parseNext(tok.kind) + if err != nil { + return Token{}, err + } + } + d.lastToken = tok + return tok, nil +} + +const ( + mismatchedFmt = "mismatched close character %q" + unexpectedFmt = "unexpected character %q" +) + +// parseNext parses the next Token based on given last kind. +func (d *Decoder) parseNext(lastKind Kind) (Token, error) { + // Trim leading spaces. + d.consume(0) + isEOF := false + if len(d.in) == 0 { + isEOF = true + } + + switch lastKind { + case EOF: + return d.consumeToken(EOF, 0, 0), nil + + case bof: + // Start of top level message. Next token can be EOF or Name. + if isEOF { + return d.consumeToken(EOF, 0, 0), nil + } + return d.parseFieldName() + + case Name: + // Next token can be MessageOpen, ListOpen or Scalar. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case '{', '<': + d.pushOpenStack(ch) + return d.consumeToken(MessageOpen, 1, 0), nil + case '[': + d.pushOpenStack(ch) + return d.consumeToken(ListOpen, 1, 0), nil + default: + return d.parseScalar() + } + + case Scalar: + openKind, closeCh := d.currentOpenKind() + switch openKind { + case bof: + // Top level message. + // Next token can be EOF, comma, semicolon or Name. + if isEOF { + return d.consumeToken(EOF, 0, 0), nil + } + switch d.in[0] { + case ',': + return d.consumeToken(comma, 1, 0), nil + case ';': + return d.consumeToken(semicolon, 1, 0), nil + default: + return d.parseFieldName() + } + + case MessageOpen: + // Next token can be MessageClose, comma, semicolon or Name. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case closeCh: + d.popOpenStack() + return d.consumeToken(MessageClose, 1, 0), nil + case otherCloseChar[closeCh]: + return Token{}, d.newSyntaxError(mismatchedFmt, ch) + case ',': + return d.consumeToken(comma, 1, 0), nil + case ';': + return d.consumeToken(semicolon, 1, 0), nil + default: + return d.parseFieldName() + } + + case ListOpen: + // Next token can be ListClose or comma. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case ']': + d.popOpenStack() + return d.consumeToken(ListClose, 1, 0), nil + case ',': + return d.consumeToken(comma, 1, 0), nil + default: + return Token{}, d.newSyntaxError(unexpectedFmt, ch) + } + } + + case MessageOpen: + // Next token can be MessageClose or Name. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + _, closeCh := d.currentOpenKind() + switch ch := d.in[0]; ch { + case closeCh: + d.popOpenStack() + return d.consumeToken(MessageClose, 1, 0), nil + case otherCloseChar[closeCh]: + return Token{}, d.newSyntaxError(mismatchedFmt, ch) + default: + return d.parseFieldName() + } + + case MessageClose: + openKind, closeCh := d.currentOpenKind() + switch openKind { + case bof: + // Top level message. + // Next token can be EOF, comma, semicolon or Name. + if isEOF { + return d.consumeToken(EOF, 0, 0), nil + } + switch ch := d.in[0]; ch { + case ',': + return d.consumeToken(comma, 1, 0), nil + case ';': + return d.consumeToken(semicolon, 1, 0), nil + default: + return d.parseFieldName() + } + + case MessageOpen: + // Next token can be MessageClose, comma, semicolon or Name. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case closeCh: + d.popOpenStack() + return d.consumeToken(MessageClose, 1, 0), nil + case otherCloseChar[closeCh]: + return Token{}, d.newSyntaxError(mismatchedFmt, ch) + case ',': + return d.consumeToken(comma, 1, 0), nil + case ';': + return d.consumeToken(semicolon, 1, 0), nil + default: + return d.parseFieldName() + } + + case ListOpen: + // Next token can be ListClose or comma + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case closeCh: + d.popOpenStack() + return d.consumeToken(ListClose, 1, 0), nil + case ',': + return d.consumeToken(comma, 1, 0), nil + default: + return Token{}, d.newSyntaxError(unexpectedFmt, ch) + } + } + + case ListOpen: + // Next token can be ListClose, MessageStart or Scalar. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case ']': + d.popOpenStack() + return d.consumeToken(ListClose, 1, 0), nil + case '{', '<': + d.pushOpenStack(ch) + return d.consumeToken(MessageOpen, 1, 0), nil + default: + return d.parseScalar() + } + + case ListClose: + openKind, closeCh := d.currentOpenKind() + switch openKind { + case bof: + // Top level message. + // Next token can be EOF, comma, semicolon or Name. + if isEOF { + return d.consumeToken(EOF, 0, 0), nil + } + switch ch := d.in[0]; ch { + case ',': + return d.consumeToken(comma, 1, 0), nil + case ';': + return d.consumeToken(semicolon, 1, 0), nil + default: + return d.parseFieldName() + } + + case MessageOpen: + // Next token can be MessageClose, comma, semicolon or Name. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case closeCh: + d.popOpenStack() + return d.consumeToken(MessageClose, 1, 0), nil + case otherCloseChar[closeCh]: + return Token{}, d.newSyntaxError(mismatchedFmt, ch) + case ',': + return d.consumeToken(comma, 1, 0), nil + case ';': + return d.consumeToken(semicolon, 1, 0), nil + default: + return d.parseFieldName() + } + + default: + // It is not possible to have this case. Let it panic below. + } + + case comma, semicolon: + openKind, closeCh := d.currentOpenKind() + switch openKind { + case bof: + // Top level message. Next token can be EOF or Name. + if isEOF { + return d.consumeToken(EOF, 0, 0), nil + } + return d.parseFieldName() + + case MessageOpen: + // Next token can be MessageClose or Name. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case closeCh: + d.popOpenStack() + return d.consumeToken(MessageClose, 1, 0), nil + case otherCloseChar[closeCh]: + return Token{}, d.newSyntaxError(mismatchedFmt, ch) + default: + return d.parseFieldName() + } + + case ListOpen: + if lastKind == semicolon { + // It is not be possible to have this case as logic here + // should not have produced a semicolon Token when inside a + // list. Let it panic below. + break + } + // Next token can be MessageOpen or Scalar. + if isEOF { + return Token{}, ErrUnexpectedEOF + } + switch ch := d.in[0]; ch { + case '{', '<': + d.pushOpenStack(ch) + return d.consumeToken(MessageOpen, 1, 0), nil + default: + return d.parseScalar() + } + } + } + + line, column := d.Position(len(d.orig) - len(d.in)) + panic(fmt.Sprintf("Decoder.parseNext: bug at handling line %d:%d with lastKind=%v", line, column, lastKind)) +} + +var otherCloseChar = map[byte]byte{ + '}': '>', + '>': '}', +} + +// currentOpenKind indicates whether current position is inside a message, list +// or top-level message by returning MessageOpen, ListOpen or bof respectively. +// If the returned kind is either a MessageOpen or ListOpen, it also returns the +// corresponding closing character. +func (d *Decoder) currentOpenKind() (Kind, byte) { + if len(d.openStack) == 0 { + return bof, 0 + } + openCh := d.openStack[len(d.openStack)-1] + switch openCh { + case '{': + return MessageOpen, '}' + case '<': + return MessageOpen, '>' + case '[': + return ListOpen, ']' + } + panic(fmt.Sprintf("Decoder: openStack contains invalid byte %s", string(openCh))) +} + +func (d *Decoder) pushOpenStack(ch byte) { + d.openStack = append(d.openStack, ch) +} + +func (d *Decoder) popOpenStack() { + d.openStack = d.openStack[:len(d.openStack)-1] +} + +// parseFieldName parses field name and separator. +func (d *Decoder) parseFieldName() (tok Token, err error) { + defer func() { + if err == nil && d.tryConsumeChar(':') { + tok.attrs |= hasSeparator + } + }() + + // Extension or Any type URL. + if d.in[0] == '[' { + return d.parseTypeName() + } + + // Identifier. + if size := parseIdent(d.in, false); size > 0 { + return d.consumeToken(Name, size, uint8(IdentName)), nil + } + + // Field number. Identify if input is a valid number that is not negative + // and is decimal integer within 32-bit range. + if num := parseNumber(d.in); num.size > 0 { + if !num.neg && num.kind == numDec { + if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil { + return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil + } + } + return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size]) + } + + return Token{}, d.newSyntaxError("invalid field name: %s", errRegexp.Find(d.in)) +} + +// parseTypeName parses Any type URL or extension field name. The name is +// enclosed in [ and ] characters. The C++ parser does not handle many legal URL +// strings. This implementation is more liberal and allows for the pattern +// ^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`). Whitespaces and comments are allowed +// in between [ ], '.', '/' and the sub names. +func (d *Decoder) parseTypeName() (Token, error) { + startPos := len(d.orig) - len(d.in) + // Use alias s to advance first in order to use d.in for error handling. + // Caller already checks for [ as first character. + s := consume(d.in[1:], 0) + if len(s) == 0 { + return Token{}, ErrUnexpectedEOF + } + + var name []byte + for len(s) > 0 && isTypeNameChar(s[0]) { + name = append(name, s[0]) + s = s[1:] + } + s = consume(s, 0) + + var closed bool + for len(s) > 0 && !closed { + switch { + case s[0] == ']': + s = s[1:] + closed = true + + case s[0] == '/', s[0] == '.': + if len(name) > 0 && (name[len(name)-1] == '/' || name[len(name)-1] == '.') { + return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s", + d.orig[startPos:len(d.orig)-len(s)+1]) + } + name = append(name, s[0]) + s = s[1:] + s = consume(s, 0) + for len(s) > 0 && isTypeNameChar(s[0]) { + name = append(name, s[0]) + s = s[1:] + } + s = consume(s, 0) + + default: + return Token{}, d.newSyntaxError( + "invalid type URL/extension field name: %s", d.orig[startPos:len(d.orig)-len(s)+1]) + } + } + + if !closed { + return Token{}, ErrUnexpectedEOF + } + + // First character cannot be '.'. Last character cannot be '.' or '/'. + size := len(name) + if size == 0 || name[0] == '.' || name[size-1] == '.' || name[size-1] == '/' { + return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s", + d.orig[startPos:len(d.orig)-len(s)]) + } + + d.in = s + endPos := len(d.orig) - len(d.in) + d.consume(0) + + return Token{ + kind: Name, + attrs: uint8(TypeName), + pos: startPos, + raw: d.orig[startPos:endPos], + str: string(name), + }, nil +} + +func isTypeNameChar(b byte) bool { + return (b == '-' || b == '_' || + ('0' <= b && b <= '9') || + ('a' <= b && b <= 'z') || + ('A' <= b && b <= 'Z')) +} + +func isWhiteSpace(b byte) bool { + switch b { + case ' ', '\n', '\r', '\t': + return true + default: + return false + } +} + +// parseIdent parses an unquoted proto identifier and returns size. +// If allowNeg is true, it allows '-' to be the first character in the +// identifier. This is used when parsing literal values like -infinity, etc. +// Regular expression matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*` +func parseIdent(input []byte, allowNeg bool) int { + var size int + + s := input + if len(s) == 0 { + return 0 + } + + if allowNeg && s[0] == '-' { + s = s[1:] + size++ + if len(s) == 0 { + return 0 + } + } + + switch { + case s[0] == '_', + 'a' <= s[0] && s[0] <= 'z', + 'A' <= s[0] && s[0] <= 'Z': + s = s[1:] + size++ + default: + return 0 + } + + for len(s) > 0 && (s[0] == '_' || + 'a' <= s[0] && s[0] <= 'z' || + 'A' <= s[0] && s[0] <= 'Z' || + '0' <= s[0] && s[0] <= '9') { + s = s[1:] + size++ + } + + if len(s) > 0 && !isDelim(s[0]) { + return 0 + } + + return size +} + +// parseScalar parses for a string, literal or number value. +func (d *Decoder) parseScalar() (Token, error) { + if d.in[0] == '"' || d.in[0] == '\'' { + return d.parseStringValue() + } + + if tok, ok := d.parseLiteralValue(); ok { + return tok, nil + } + + if tok, ok := d.parseNumberValue(); ok { + return tok, nil + } + + return Token{}, d.newSyntaxError("invalid scalar value: %s", errRegexp.Find(d.in)) +} + +// parseLiteralValue parses a literal value. A literal value is used for +// bools, special floats and enums. This function simply identifies that the +// field value is a literal. +func (d *Decoder) parseLiteralValue() (Token, bool) { + size := parseIdent(d.in, true) + if size == 0 { + return Token{}, false + } + return d.consumeToken(Scalar, size, literalValue), true +} + +// consumeToken constructs a Token for given Kind from d.in and consumes given +// size-length from it. +func (d *Decoder) consumeToken(kind Kind, size int, attrs uint8) Token { + // Important to compute raw and pos before consuming. + tok := Token{ + kind: kind, + attrs: attrs, + pos: len(d.orig) - len(d.in), + raw: d.in[:size], + } + d.consume(size) + return tok +} + +// newSyntaxError returns a syntax error with line and column information for +// current position. +func (d *Decoder) newSyntaxError(f string, x ...interface{}) error { + e := errors.New(f, x...) + line, column := d.Position(len(d.orig) - len(d.in)) + return errors.New("syntax error (line %d:%d): %v", line, column, e) +} + +// Position returns line and column number of given index of the original input. +// It will panic if index is out of range. +func (d *Decoder) Position(idx int) (line int, column int) { + b := d.orig[:idx] + line = bytes.Count(b, []byte("\n")) + 1 + if i := bytes.LastIndexByte(b, '\n'); i >= 0 { + b = b[i+1:] + } + column = utf8.RuneCount(b) + 1 // ignore multi-rune characters + return line, column +} + +func (d *Decoder) tryConsumeChar(c byte) bool { + if len(d.in) > 0 && d.in[0] == c { + d.consume(1) + return true + } + return false +} + +// consume consumes n bytes of input and any subsequent whitespace or comments. +func (d *Decoder) consume(n int) { + d.in = consume(d.in, n) + return +} + +// consume consumes n bytes of input and any subsequent whitespace or comments. +func consume(b []byte, n int) []byte { + b = b[n:] + for len(b) > 0 { + switch b[0] { + case ' ', '\n', '\r', '\t': + b = b[1:] + case '#': + if i := bytes.IndexByte(b, '\n'); i >= 0 { + b = b[i+len("\n"):] + } else { + b = nil + } + default: + return b + } + } + return b +} + +// Any sequence that looks like a non-delimiter (for error reporting). +var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9\/]+|.)`) + +// isDelim returns true if given byte is a delimiter character. +func isDelim(c byte) bool { + return !(c == '-' || c == '+' || c == '.' || c == '_' || + ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9')) +} diff --git a/vendor/google.golang.org/protobuf/internal/encoding/text/decode_number.go b/vendor/google.golang.org/protobuf/internal/encoding/text/decode_number.go new file mode 100644 index 00000000..f2d90b78 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/encoding/text/decode_number.go @@ -0,0 +1,190 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package text + +// parseNumberValue parses a number from the input and returns a Token object. +func (d *Decoder) parseNumberValue() (Token, bool) { + in := d.in + num := parseNumber(in) + if num.size == 0 { + return Token{}, false + } + numAttrs := num.kind + if num.neg { + numAttrs |= isNegative + } + strSize := num.size + last := num.size - 1 + if num.kind == numFloat && (d.in[last] == 'f' || d.in[last] == 'F') { + strSize = last + } + tok := Token{ + kind: Scalar, + attrs: numberValue, + pos: len(d.orig) - len(d.in), + raw: d.in[:num.size], + str: string(d.in[:strSize]), + numAttrs: numAttrs, + } + d.consume(num.size) + return tok, true +} + +const ( + numDec uint8 = (1 << iota) / 2 + numHex + numOct + numFloat +) + +// number is the result of parsing out a valid number from parseNumber. It +// contains data for doing float or integer conversion via the strconv package +// in conjunction with the input bytes. +type number struct { + kind uint8 + neg bool + size int +} + +// parseNumber constructs a number object from given input. It allows for the +// following patterns: +// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*) +// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?) +// It also returns the number of parsed bytes for the given number, 0 if it is +// not a number. +func parseNumber(input []byte) number { + kind := numDec + var size int + var neg bool + + s := input + if len(s) == 0 { + return number{} + } + + // Optional - + if s[0] == '-' { + neg = true + s = s[1:] + size++ + if len(s) == 0 { + return number{} + } + } + + // C++ allows for whitespace and comments in between the negative sign and + // the rest of the number. This logic currently does not but is consistent + // with v1. + + switch { + case s[0] == '0': + if len(s) > 1 { + switch { + case s[1] == 'x' || s[1] == 'X': + // Parse as hex number. + kind = numHex + n := 2 + s = s[2:] + for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') || + ('a' <= s[0] && s[0] <= 'f') || + ('A' <= s[0] && s[0] <= 'F')) { + s = s[1:] + n++ + } + if n == 2 { + return number{} + } + size += n + + case '0' <= s[1] && s[1] <= '7': + // Parse as octal number. + kind = numOct + n := 2 + s = s[2:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '7' { + s = s[1:] + n++ + } + size += n + } + + if kind&(numHex|numOct) > 0 { + if len(s) > 0 && !isDelim(s[0]) { + return number{} + } + return number{kind: kind, neg: neg, size: size} + } + } + s = s[1:] + size++ + + case '1' <= s[0] && s[0] <= '9': + n := 1 + s = s[1:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + n++ + } + size += n + + case s[0] == '.': + // Set kind to numFloat to signify the intent to parse as float. And + // that it needs to have other digits after '.'. + kind = numFloat + + default: + return number{} + } + + // . followed by 0 or more digits. + if len(s) > 0 && s[0] == '.' { + n := 1 + s = s[1:] + // If decimal point was before any digits, it should be followed by + // other digits. + if len(s) == 0 && kind == numFloat { + return number{} + } + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + n++ + } + size += n + kind = numFloat + } + + // e or E followed by an optional - or + and 1 or more digits. + if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { + kind = numFloat + s = s[1:] + n := 1 + if s[0] == '+' || s[0] == '-' { + s = s[1:] + n++ + if len(s) == 0 { + return number{} + } + } + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + n++ + } + size += n + } + + // Optional suffix f or F for floats. + if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') { + kind = numFloat + s = s[1:] + size++ + } + + // Check that next byte is a delimiter or it is at the end. + if len(s) > 0 && !isDelim(s[0]) { + return number{} + } + + return number{kind: kind, neg: neg, size: size} +} diff --git a/vendor/google.golang.org/protobuf/internal/encoding/text/decode_string.go b/vendor/google.golang.org/protobuf/internal/encoding/text/decode_string.go new file mode 100644 index 00000000..d4d34902 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/encoding/text/decode_string.go @@ -0,0 +1,161 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package text + +import ( + "bytes" + "strconv" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" + + "google.golang.org/protobuf/internal/strs" +) + +// parseStringValue parses string field token. +// This differs from parseString since the text format allows +// multiple back-to-back string literals where they are semantically treated +// as a single large string with all values concatenated. +// +// E.g., `"foo" "bar" "baz"` => "foobarbaz" +func (d *Decoder) parseStringValue() (Token, error) { + // Note that the ending quote is sufficient to unambiguously mark the end + // of a string. Thus, the text grammar does not require intervening + // whitespace or control characters in-between strings. + // Thus, the following is valid: + // `"foo"'bar'"baz"` => "foobarbaz" + in0 := d.in + var ss []string + for len(d.in) > 0 && (d.in[0] == '"' || d.in[0] == '\'') { + s, err := d.parseString() + if err != nil { + return Token{}, err + } + ss = append(ss, s) + } + // d.in already points to the end of the value at this point. + return Token{ + kind: Scalar, + attrs: stringValue, + pos: len(d.orig) - len(in0), + raw: in0[:len(in0)-len(d.in)], + str: strings.Join(ss, ""), + }, nil +} + +// parseString parses a string value enclosed in " or '. +func (d *Decoder) parseString() (string, error) { + in := d.in + if len(in) == 0 { + return "", ErrUnexpectedEOF + } + quote := in[0] + in = in[1:] + i := indexNeedEscapeInBytes(in) + in, out := in[i:], in[:i:i] // set cap to prevent mutations + for len(in) > 0 { + switch r, n := utf8.DecodeRune(in); { + case r == utf8.RuneError && n == 1: + return "", d.newSyntaxError("invalid UTF-8 detected") + case r == 0 || r == '\n': + return "", d.newSyntaxError("invalid character %q in string", r) + case r == rune(quote): + in = in[1:] + d.consume(len(d.in) - len(in)) + return string(out), nil + case r == '\\': + if len(in) < 2 { + return "", ErrUnexpectedEOF + } + switch r := in[1]; r { + case '"', '\'', '\\', '?': + in, out = in[2:], append(out, r) + case 'a': + in, out = in[2:], append(out, '\a') + case 'b': + in, out = in[2:], append(out, '\b') + case 'n': + in, out = in[2:], append(out, '\n') + case 'r': + in, out = in[2:], append(out, '\r') + case 't': + in, out = in[2:], append(out, '\t') + case 'v': + in, out = in[2:], append(out, '\v') + case 'f': + in, out = in[2:], append(out, '\f') + case '0', '1', '2', '3', '4', '5', '6', '7': + // One, two, or three octal characters. + n := len(in[1:]) - len(bytes.TrimLeft(in[1:], "01234567")) + if n > 3 { + n = 3 + } + v, err := strconv.ParseUint(string(in[1:1+n]), 8, 8) + if err != nil { + return "", d.newSyntaxError("invalid octal escape code %q in string", in[:1+n]) + } + in, out = in[1+n:], append(out, byte(v)) + case 'x': + // One or two hexadecimal characters. + n := len(in[2:]) - len(bytes.TrimLeft(in[2:], "0123456789abcdefABCDEF")) + if n > 2 { + n = 2 + } + v, err := strconv.ParseUint(string(in[2:2+n]), 16, 8) + if err != nil { + return "", d.newSyntaxError("invalid hex escape code %q in string", in[:2+n]) + } + in, out = in[2+n:], append(out, byte(v)) + case 'u', 'U': + // Four or eight hexadecimal characters + n := 6 + if r == 'U' { + n = 10 + } + if len(in) < n { + return "", ErrUnexpectedEOF + } + v, err := strconv.ParseUint(string(in[2:n]), 16, 32) + if utf8.MaxRune < v || err != nil { + return "", d.newSyntaxError("invalid Unicode escape code %q in string", in[:n]) + } + in = in[n:] + + r := rune(v) + if utf16.IsSurrogate(r) { + if len(in) < 6 { + return "", ErrUnexpectedEOF + } + v, err := strconv.ParseUint(string(in[2:6]), 16, 16) + r = utf16.DecodeRune(r, rune(v)) + if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil { + return "", d.newSyntaxError("invalid Unicode escape code %q in string", in[:6]) + } + in = in[6:] + } + out = append(out, string(r)...) + default: + return "", d.newSyntaxError("invalid escape code %q in string", in[:2]) + } + default: + i := indexNeedEscapeInBytes(in[n:]) + in, out = in[n+i:], append(out, in[:n+i]...) + } + } + return "", ErrUnexpectedEOF +} + +// indexNeedEscapeInString returns the index of the character that needs +// escaping. If no characters need escaping, this returns the input length. +func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) } + +// UnmarshalString returns an unescaped string given a textproto string value. +// String value needs to contain single or double quotes. This is only used by +// internal/encoding/defval package for unmarshaling bytes. +func UnmarshalString(s string) (string, error) { + d := NewDecoder([]byte(s)) + return d.parseString() +} diff --git a/vendor/google.golang.org/protobuf/internal/encoding/text/decode_token.go b/vendor/google.golang.org/protobuf/internal/encoding/text/decode_token.go new file mode 100644 index 00000000..83d2b0d5 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/encoding/text/decode_token.go @@ -0,0 +1,373 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package text + +import ( + "bytes" + "fmt" + "math" + "strconv" + "strings" + + "google.golang.org/protobuf/internal/flags" +) + +// Kind represents a token kind expressible in the textproto format. +type Kind uint8 + +// Kind values. +const ( + Invalid Kind = iota + EOF + Name // Name indicates the field name. + Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true. + MessageOpen + MessageClose + ListOpen + ListClose + + // comma and semi-colon are only for parsing in between values and should not be exposed. + comma + semicolon + + // bof indicates beginning of file, which is the default token + // kind at the beginning of parsing. + bof = Invalid +) + +func (t Kind) String() string { + switch t { + case Invalid: + return "<invalid>" + case EOF: + return "eof" + case Scalar: + return "scalar" + case Name: + return "name" + case MessageOpen: + return "{" + case MessageClose: + return "}" + case ListOpen: + return "[" + case ListClose: + return "]" + case comma: + return "," + case semicolon: + return ";" + default: + return fmt.Sprintf("<invalid:%v>", uint8(t)) + } +} + +// NameKind represents different types of field names. +type NameKind uint8 + +// NameKind values. +const ( + IdentName NameKind = iota + 1 + TypeName + FieldNumber +) + +func (t NameKind) String() string { + switch t { + case IdentName: + return "IdentName" + case TypeName: + return "TypeName" + case FieldNumber: + return "FieldNumber" + default: + return fmt.Sprintf("<invalid:%v>", uint8(t)) + } +} + +// Bit mask in Token.attrs to indicate if a Name token is followed by the +// separator char ':'. The field name separator char is optional for message +// field or repeated message field, but required for all other types. Decoder +// simply indicates whether a Name token is followed by separator or not. It is +// up to the prototext package to validate. +const hasSeparator = 1 << 7 + +// Scalar value types. +const ( + numberValue = iota + 1 + stringValue + literalValue +) + +// Bit mask in Token.numAttrs to indicate that the number is a negative. +const isNegative = 1 << 7 + +// Token provides a parsed token kind and value. Values are provided by the +// different accessor methods. +type Token struct { + // Kind of the Token object. + kind Kind + // attrs contains metadata for the following Kinds: + // Name: hasSeparator bit and one of NameKind. + // Scalar: one of numberValue, stringValue, literalValue. + attrs uint8 + // numAttrs contains metadata for numberValue: + // - highest bit is whether negative or positive. + // - lower bits indicate one of numDec, numHex, numOct, numFloat. + numAttrs uint8 + // pos provides the position of the token in the original input. + pos int + // raw bytes of the serialized token. + // This is a subslice into the original input. + raw []byte + // str contains parsed string for the following: + // - stringValue of Scalar kind + // - numberValue of Scalar kind + // - TypeName of Name kind + str string +} + +// Kind returns the token kind. +func (t Token) Kind() Kind { + return t.kind +} + +// RawString returns the read value in string. +func (t Token) RawString() string { + return string(t.raw) +} + +// Pos returns the token position from the input. +func (t Token) Pos() int { + return t.pos +} + +// NameKind returns IdentName, TypeName or FieldNumber. +// It panics if type is not Name. +func (t Token) NameKind() NameKind { + if t.kind == Name { + return NameKind(t.attrs &^ hasSeparator) + } + panic(fmt.Sprintf("Token is not a Name type: %s", t.kind)) +} + +// HasSeparator returns true if the field name is followed by the separator char +// ':', else false. It panics if type is not Name. +func (t Token) HasSeparator() bool { + if t.kind == Name { + return t.attrs&hasSeparator != 0 + } + panic(fmt.Sprintf("Token is not a Name type: %s", t.kind)) +} + +// IdentName returns the value for IdentName type. +func (t Token) IdentName() string { + if t.kind == Name && t.attrs&uint8(IdentName) != 0 { + return string(t.raw) + } + panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator))) +} + +// TypeName returns the value for TypeName type. +func (t Token) TypeName() string { + if t.kind == Name && t.attrs&uint8(TypeName) != 0 { + return t.str + } + panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator))) +} + +// FieldNumber returns the value for FieldNumber type. It returns a +// non-negative int32 value. Caller will still need to validate for the correct +// field number range. +func (t Token) FieldNumber() int32 { + if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 { + panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator))) + } + // Following should not return an error as it had already been called right + // before this Token was constructed. + num, _ := strconv.ParseInt(string(t.raw), 10, 32) + return int32(num) +} + +// String returns the string value for a Scalar type. +func (t Token) String() (string, bool) { + if t.kind != Scalar || t.attrs != stringValue { + return "", false + } + return t.str, true +} + +// Enum returns the literal value for a Scalar type for use as enum literals. +func (t Token) Enum() (string, bool) { + if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') { + return "", false + } + return string(t.raw), true +} + +// Bool returns the bool value for a Scalar type. +func (t Token) Bool() (bool, bool) { + if t.kind != Scalar { + return false, false + } + switch t.attrs { + case literalValue: + if b, ok := boolLits[string(t.raw)]; ok { + return b, true + } + case numberValue: + // Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01, + // 0x1, etc. + n, err := strconv.ParseUint(t.str, 0, 64) + if err == nil { + switch n { + case 0: + return false, true + case 1: + return true, true + } + } + } + return false, false +} + +// These exact boolean literals are the ones supported in C++. +var boolLits = map[string]bool{ + "t": true, + "true": true, + "True": true, + "f": false, + "false": false, + "False": false, +} + +// Uint64 returns the uint64 value for a Scalar type. +func (t Token) Uint64() (uint64, bool) { + if t.kind != Scalar || t.attrs != numberValue || + t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 { + return 0, false + } + n, err := strconv.ParseUint(t.str, 0, 64) + if err != nil { + return 0, false + } + return n, true +} + +// Uint32 returns the uint32 value for a Scalar type. +func (t Token) Uint32() (uint32, bool) { + if t.kind != Scalar || t.attrs != numberValue || + t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 { + return 0, false + } + n, err := strconv.ParseUint(t.str, 0, 32) + if err != nil { + return 0, false + } + return uint32(n), true +} + +// Int64 returns the int64 value for a Scalar type. +func (t Token) Int64() (int64, bool) { + if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 { + return 0, false + } + if n, err := strconv.ParseInt(t.str, 0, 64); err == nil { + return n, true + } + // C++ accepts large positive hex numbers as negative values. + // This feature is here for proto1 backwards compatibility purposes. + if flags.ProtoLegacy && (t.numAttrs == numHex) { + if n, err := strconv.ParseUint(t.str, 0, 64); err == nil { + return int64(n), true + } + } + return 0, false +} + +// Int32 returns the int32 value for a Scalar type. +func (t Token) Int32() (int32, bool) { + if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 { + return 0, false + } + if n, err := strconv.ParseInt(t.str, 0, 32); err == nil { + return int32(n), true + } + // C++ accepts large positive hex numbers as negative values. + // This feature is here for proto1 backwards compatibility purposes. + if flags.ProtoLegacy && (t.numAttrs == numHex) { + if n, err := strconv.ParseUint(t.str, 0, 32); err == nil { + return int32(n), true + } + } + return 0, false +} + +// Float64 returns the float64 value for a Scalar type. +func (t Token) Float64() (float64, bool) { + if t.kind != Scalar { + return 0, false + } + switch t.attrs { + case literalValue: + if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok { + return f, true + } + case numberValue: + n, err := strconv.ParseFloat(t.str, 64) + if err == nil { + return n, true + } + nerr := err.(*strconv.NumError) + if nerr.Err == strconv.ErrRange { + return n, true + } + } + return 0, false +} + +// Float32 returns the float32 value for a Scalar type. +func (t Token) Float32() (float32, bool) { + if t.kind != Scalar { + return 0, false + } + switch t.attrs { + case literalValue: + if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok { + return float32(f), true + } + case numberValue: + n, err := strconv.ParseFloat(t.str, 64) + if err == nil { + // Overflows are treated as (-)infinity. + return float32(n), true + } + nerr := err.(*strconv.NumError) + if nerr.Err == strconv.ErrRange { + return float32(n), true + } + } + return 0, false +} + +// These are the supported float literals which C++ permits case-insensitive +// variants of these. +var floatLits = map[string]float64{ + "nan": math.NaN(), + "inf": math.Inf(1), + "infinity": math.Inf(1), + "-inf": math.Inf(-1), + "-infinity": math.Inf(-1), +} + +// TokenEquals returns true if given Tokens are equal, else false. +func TokenEquals(x, y Token) bool { + return x.kind == y.kind && + x.attrs == y.attrs && + x.numAttrs == y.numAttrs && + x.pos == y.pos && + bytes.Equal(x.raw, y.raw) && + x.str == y.str +} diff --git a/vendor/google.golang.org/protobuf/internal/encoding/text/doc.go b/vendor/google.golang.org/protobuf/internal/encoding/text/doc.go new file mode 100644 index 00000000..0ce8d6fb --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/encoding/text/doc.go @@ -0,0 +1,29 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package text implements the text format for protocol buffers. +// This package has no semantic understanding for protocol buffers and is only +// a parser and composer for the format. +// +// There is no formal specification for the protobuf text format, as such the +// C++ implementation (see google::protobuf::TextFormat) is the reference +// implementation of the text format. +// +// This package is neither a superset nor a subset of the C++ implementation. +// This implementation permits a more liberal grammar in some cases to be +// backwards compatible with the historical Go implementation. +// Future parsings unique to Go should not be added. +// Some grammars allowed by the C++ implementation are deliberately +// not implemented here because they are considered a bug by the protobuf team +// and should not be replicated. +// +// The Go implementation should implement a sufficient amount of the C++ +// grammar such that the default text serialization by C++ can be parsed by Go. +// However, just because the C++ parser accepts some input does not mean that +// the Go implementation should as well. +// +// The text format is almost a superset of JSON except: +// * message keys are not quoted strings, but identifiers +// * the top-level value must be a message without the delimiters +package text diff --git a/vendor/google.golang.org/protobuf/internal/encoding/text/encode.go b/vendor/google.golang.org/protobuf/internal/encoding/text/encode.go new file mode 100644 index 00000000..c4ba1c59 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/encoding/text/encode.go @@ -0,0 +1,267 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package text + +import ( + "math" + "math/bits" + "strconv" + "strings" + "unicode/utf8" + + "google.golang.org/protobuf/internal/detrand" + "google.golang.org/protobuf/internal/errors" +) + +// encType represents an encoding type. +type encType uint8 + +const ( + _ encType = (1 << iota) / 2 + name + scalar + messageOpen + messageClose +) + +// Encoder provides methods to write out textproto constructs and values. The user is +// responsible for producing valid sequences of constructs and values. +type Encoder struct { + encoderState + + indent string + newline string // set to "\n" if len(indent) > 0 + delims [2]byte + outputASCII bool +} + +type encoderState struct { + lastType encType + indents []byte + out []byte +} + +// NewEncoder returns an Encoder. +// +// If indent is a non-empty string, it causes every entry in a List or Message +// to be preceded by the indent and trailed by a newline. +// +// If delims is not the zero value, it controls the delimiter characters used +// for messages (e.g., "{}" vs "<>"). +// +// If outputASCII is true, strings will be serialized in such a way that +// multi-byte UTF-8 sequences are escaped. This property ensures that the +// overall output is ASCII (as opposed to UTF-8). +func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) { + e := &Encoder{} + if len(indent) > 0 { + if strings.Trim(indent, " \t") != "" { + return nil, errors.New("indent may only be composed of space and tab characters") + } + e.indent = indent + e.newline = "\n" + } + switch delims { + case [2]byte{0, 0}: + e.delims = [2]byte{'{', '}'} + case [2]byte{'{', '}'}, [2]byte{'<', '>'}: + e.delims = delims + default: + return nil, errors.New("delimiters may only be \"{}\" or \"<>\"") + } + e.outputASCII = outputASCII + + return e, nil +} + +// Bytes returns the content of the written bytes. +func (e *Encoder) Bytes() []byte { + return e.out +} + +// StartMessage writes out the '{' or '<' symbol. +func (e *Encoder) StartMessage() { + e.prepareNext(messageOpen) + e.out = append(e.out, e.delims[0]) +} + +// EndMessage writes out the '}' or '>' symbol. +func (e *Encoder) EndMessage() { + e.prepareNext(messageClose) + e.out = append(e.out, e.delims[1]) +} + +// WriteName writes out the field name and the separator ':'. +func (e *Encoder) WriteName(s string) { + e.prepareNext(name) + e.out = append(e.out, s...) + e.out = append(e.out, ':') +} + +// WriteBool writes out the given boolean value. +func (e *Encoder) WriteBool(b bool) { + if b { + e.WriteLiteral("true") + } else { + e.WriteLiteral("false") + } +} + +// WriteString writes out the given string value. +func (e *Encoder) WriteString(s string) { + e.prepareNext(scalar) + e.out = appendString(e.out, s, e.outputASCII) +} + +func appendString(out []byte, in string, outputASCII bool) []byte { + out = append(out, '"') + i := indexNeedEscapeInString(in) + in, out = in[i:], append(out, in[:i]...) + for len(in) > 0 { + switch r, n := utf8.DecodeRuneInString(in); { + case r == utf8.RuneError && n == 1: + // We do not report invalid UTF-8 because strings in the text format + // are used to represent both the proto string and bytes type. + r = rune(in[0]) + fallthrough + case r < ' ' || r == '"' || r == '\\': + out = append(out, '\\') + switch r { + case '"', '\\': + out = append(out, byte(r)) + case '\n': + out = append(out, 'n') + case '\r': + out = append(out, 'r') + case '\t': + out = append(out, 't') + default: + out = append(out, 'x') + out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...) + out = strconv.AppendUint(out, uint64(r), 16) + } + in = in[n:] + case outputASCII && r >= utf8.RuneSelf: + out = append(out, '\\') + if r <= math.MaxUint16 { + out = append(out, 'u') + out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...) + out = strconv.AppendUint(out, uint64(r), 16) + } else { + out = append(out, 'U') + out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...) + out = strconv.AppendUint(out, uint64(r), 16) + } + in = in[n:] + default: + i := indexNeedEscapeInString(in[n:]) + in, out = in[n+i:], append(out, in[:n+i]...) + } + } + out = append(out, '"') + return out +} + +// indexNeedEscapeInString returns the index of the character that needs +// escaping. If no characters need escaping, this returns the input length. +func indexNeedEscapeInString(s string) int { + for i := 0; i < len(s); i++ { + if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf { + return i + } + } + return len(s) +} + +// WriteFloat writes out the given float value for given bitSize. +func (e *Encoder) WriteFloat(n float64, bitSize int) { + e.prepareNext(scalar) + e.out = appendFloat(e.out, n, bitSize) +} + +func appendFloat(out []byte, n float64, bitSize int) []byte { + switch { + case math.IsNaN(n): + return append(out, "nan"...) + case math.IsInf(n, +1): + return append(out, "inf"...) + case math.IsInf(n, -1): + return append(out, "-inf"...) + default: + return strconv.AppendFloat(out, n, 'g', -1, bitSize) + } +} + +// WriteInt writes out the given signed integer value. +func (e *Encoder) WriteInt(n int64) { + e.prepareNext(scalar) + e.out = append(e.out, strconv.FormatInt(n, 10)...) +} + +// WriteUint writes out the given unsigned integer value. +func (e *Encoder) WriteUint(n uint64) { + e.prepareNext(scalar) + e.out = append(e.out, strconv.FormatUint(n, 10)...) +} + +// WriteLiteral writes out the given string as a literal value without quotes. +// This is used for writing enum literal strings. +func (e *Encoder) WriteLiteral(s string) { + e.prepareNext(scalar) + e.out = append(e.out, s...) +} + +// prepareNext adds possible space and indentation for the next value based +// on last encType and indent option. It also updates e.lastType to next. +func (e *Encoder) prepareNext(next encType) { + defer func() { + e.lastType = next + }() + + // Single line. + if len(e.indent) == 0 { + // Add space after each field before the next one. + if e.lastType&(scalar|messageClose) != 0 && next == name { + e.out = append(e.out, ' ') + // Add a random extra space to make output unstable. + if detrand.Bool() { + e.out = append(e.out, ' ') + } + } + return + } + + // Multi-line. + switch { + case e.lastType == name: + e.out = append(e.out, ' ') + // Add a random extra space after name: to make output unstable. + if detrand.Bool() { + e.out = append(e.out, ' ') + } + + case e.lastType == messageOpen && next != messageClose: + e.indents = append(e.indents, e.indent...) + e.out = append(e.out, '\n') + e.out = append(e.out, e.indents...) + + case e.lastType&(scalar|messageClose) != 0: + if next == messageClose { + e.indents = e.indents[:len(e.indents)-len(e.indent)] + } + e.out = append(e.out, '\n') + e.out = append(e.out, e.indents...) + } +} + +// Snapshot returns the current snapshot for use in Reset. +func (e *Encoder) Snapshot() encoderState { + return e.encoderState +} + +// Reset resets the Encoder to the given encoderState from a Snapshot. +func (e *Encoder) Reset(es encoderState) { + e.encoderState = es +} |