summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/pelletier/go-toml/v2/unstable/parser.go')
-rw-r--r--vendor/github.com/pelletier/go-toml/v2/unstable/parser.go1147
1 files changed, 1147 insertions, 0 deletions
diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go b/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go
new file mode 100644
index 00000000..52db88e7
--- /dev/null
+++ b/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go
@@ -0,0 +1,1147 @@
+package unstable
+
+import (
+ "bytes"
+ "fmt"
+ "unicode"
+
+ "github.com/pelletier/go-toml/v2/internal/characters"
+ "github.com/pelletier/go-toml/v2/internal/danger"
+)
+
+// ParserError describes an error relative to the content of the document.
+//
+// It cannot outlive the instance of Parser it refers to, and may cause panics
+// if the parser is reset.
+type ParserError struct {
+ Highlight []byte
+ Message string
+ Key []string // optional
+}
+
+// Error is the implementation of the error interface.
+func (e *ParserError) Error() string {
+ return e.Message
+}
+
+// NewParserError is a convenience function to create a ParserError
+//
+// Warning: Highlight needs to be a subslice of Parser.data, so only slices
+// returned by Parser.Raw are valid candidates.
+func NewParserError(highlight []byte, format string, args ...interface{}) error {
+ return &ParserError{
+ Highlight: highlight,
+ Message: fmt.Errorf(format, args...).Error(),
+ }
+}
+
+// Parser scans over a TOML-encoded document and generates an iterative AST.
+//
+// To prime the Parser, first reset it with the contents of a TOML document.
+// Then, process all top-level expressions sequentially. See Example.
+//
+// Don't forget to check Error() after you're done parsing.
+//
+// Each top-level expression needs to be fully processed before calling
+// NextExpression() again. Otherwise, calls to various Node methods may panic if
+// the parser has moved on the next expression.
+//
+// For performance reasons, go-toml doesn't make a copy of the input bytes to
+// the parser. Make sure to copy all the bytes you need to outlive the slice
+// given to the parser.
+//
+// The parser doesn't provide nodes for comments yet, nor for whitespace.
+type Parser struct {
+ data []byte
+ builder builder
+ ref reference
+ left []byte
+ err error
+ first bool
+}
+
+// Data returns the slice provided to the last call to Reset.
+func (p *Parser) Data() []byte {
+ return p.data
+}
+
+// Range returns a range description that corresponds to a given slice of the
+// input. If the argument is not a subslice of the parser input, this function
+// panics.
+func (p *Parser) Range(b []byte) Range {
+ return Range{
+ Offset: uint32(danger.SubsliceOffset(p.data, b)),
+ Length: uint32(len(b)),
+ }
+}
+
+// Raw returns the slice corresponding to the bytes in the given range.
+func (p *Parser) Raw(raw Range) []byte {
+ return p.data[raw.Offset : raw.Offset+raw.Length]
+}
+
+// Reset brings the parser to its initial state for a given input. It wipes an
+// reuses internal storage to reduce allocation.
+func (p *Parser) Reset(b []byte) {
+ p.builder.Reset()
+ p.ref = invalidReference
+ p.data = b
+ p.left = b
+ p.err = nil
+ p.first = true
+}
+
+// NextExpression parses the next top-level expression. If an expression was
+// successfully parsed, it returns true. If the parser is at the end of the
+// document or an error occurred, it returns false.
+//
+// Retrieve the parsed expression with Expression().
+func (p *Parser) NextExpression() bool {
+ if len(p.left) == 0 || p.err != nil {
+ return false
+ }
+
+ p.builder.Reset()
+ p.ref = invalidReference
+
+ for {
+ if len(p.left) == 0 || p.err != nil {
+ return false
+ }
+
+ if !p.first {
+ p.left, p.err = p.parseNewline(p.left)
+ }
+
+ if len(p.left) == 0 || p.err != nil {
+ return false
+ }
+
+ p.ref, p.left, p.err = p.parseExpression(p.left)
+
+ if p.err != nil {
+ return false
+ }
+
+ p.first = false
+
+ if p.ref.Valid() {
+ return true
+ }
+ }
+}
+
+// Expression returns a pointer to the node representing the last successfully
+// parsed expresion.
+func (p *Parser) Expression() *Node {
+ return p.builder.NodeAt(p.ref)
+}
+
+// Error returns any error that has occured during parsing.
+func (p *Parser) Error() error {
+ return p.err
+}
+
+func (p *Parser) parseNewline(b []byte) ([]byte, error) {
+ if b[0] == '\n' {
+ return b[1:], nil
+ }
+
+ if b[0] == '\r' {
+ _, rest, err := scanWindowsNewline(b)
+ return rest, err
+ }
+
+ return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
+}
+
+func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
+ // expression = ws [ comment ]
+ // expression =/ ws keyval ws [ comment ]
+ // expression =/ ws table ws [ comment ]
+ ref := invalidReference
+
+ b = p.parseWhitespace(b)
+
+ if len(b) == 0 {
+ return ref, b, nil
+ }
+
+ if b[0] == '#' {
+ _, rest, err := scanComment(b)
+ return ref, rest, err
+ }
+
+ if b[0] == '\n' || b[0] == '\r' {
+ return ref, b, nil
+ }
+
+ var err error
+ if b[0] == '[' {
+ ref, b, err = p.parseTable(b)
+ } else {
+ ref, b, err = p.parseKeyval(b)
+ }
+
+ if err != nil {
+ return ref, nil, err
+ }
+
+ b = p.parseWhitespace(b)
+
+ if len(b) > 0 && b[0] == '#' {
+ _, rest, err := scanComment(b)
+ return ref, rest, err
+ }
+
+ return ref, b, nil
+}
+
+func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
+ // table = std-table / array-table
+ if len(b) > 1 && b[1] == '[' {
+ return p.parseArrayTable(b)
+ }
+
+ return p.parseStdTable(b)
+}
+
+func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
+ // array-table = array-table-open key array-table-close
+ // array-table-open = %x5B.5B ws ; [[ Double left square bracket
+ // array-table-close = ws %x5D.5D ; ]] Double right square bracket
+ ref := p.builder.Push(Node{
+ Kind: ArrayTable,
+ })
+
+ b = b[2:]
+ b = p.parseWhitespace(b)
+
+ k, b, err := p.parseKey(b)
+ if err != nil {
+ return ref, nil, err
+ }
+
+ p.builder.AttachChild(ref, k)
+ b = p.parseWhitespace(b)
+
+ b, err = expect(']', b)
+ if err != nil {
+ return ref, nil, err
+ }
+
+ b, err = expect(']', b)
+
+ return ref, b, err
+}
+
+func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
+ // std-table = std-table-open key std-table-close
+ // std-table-open = %x5B ws ; [ Left square bracket
+ // std-table-close = ws %x5D ; ] Right square bracket
+ ref := p.builder.Push(Node{
+ Kind: Table,
+ })
+
+ b = b[1:]
+ b = p.parseWhitespace(b)
+
+ key, b, err := p.parseKey(b)
+ if err != nil {
+ return ref, nil, err
+ }
+
+ p.builder.AttachChild(ref, key)
+
+ b = p.parseWhitespace(b)
+
+ b, err = expect(']', b)
+
+ return ref, b, err
+}
+
+func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
+ // keyval = key keyval-sep val
+ ref := p.builder.Push(Node{
+ Kind: KeyValue,
+ })
+
+ key, b, err := p.parseKey(b)
+ if err != nil {
+ return invalidReference, nil, err
+ }
+
+ // keyval-sep = ws %x3D ws ; =
+
+ b = p.parseWhitespace(b)
+
+ if len(b) == 0 {
+ return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
+ }
+
+ b, err = expect('=', b)
+ if err != nil {
+ return invalidReference, nil, err
+ }
+
+ b = p.parseWhitespace(b)
+
+ valRef, b, err := p.parseVal(b)
+ if err != nil {
+ return ref, b, err
+ }
+
+ p.builder.Chain(valRef, key)
+ p.builder.AttachChild(ref, valRef)
+
+ return ref, b, err
+}
+
+//nolint:cyclop,funlen
+func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
+ // val = string / boolean / array / inline-table / date-time / float / integer
+ ref := invalidReference
+
+ if len(b) == 0 {
+ return ref, nil, NewParserError(b, "expected value, not eof")
+ }
+
+ var err error
+ c := b[0]
+
+ switch c {
+ case '"':
+ var raw []byte
+ var v []byte
+ if scanFollowsMultilineBasicStringDelimiter(b) {
+ raw, v, b, err = p.parseMultilineBasicString(b)
+ } else {
+ raw, v, b, err = p.parseBasicString(b)
+ }
+
+ if err == nil {
+ ref = p.builder.Push(Node{
+ Kind: String,
+ Raw: p.Range(raw),
+ Data: v,
+ })
+ }
+
+ return ref, b, err
+ case '\'':
+ var raw []byte
+ var v []byte
+ if scanFollowsMultilineLiteralStringDelimiter(b) {
+ raw, v, b, err = p.parseMultilineLiteralString(b)
+ } else {
+ raw, v, b, err = p.parseLiteralString(b)
+ }
+
+ if err == nil {
+ ref = p.builder.Push(Node{
+ Kind: String,
+ Raw: p.Range(raw),
+ Data: v,
+ })
+ }
+
+ return ref, b, err
+ case 't':
+ if !scanFollowsTrue(b) {
+ return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
+ }
+
+ ref = p.builder.Push(Node{
+ Kind: Bool,
+ Data: b[:4],
+ })
+
+ return ref, b[4:], nil
+ case 'f':
+ if !scanFollowsFalse(b) {
+ return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
+ }
+
+ ref = p.builder.Push(Node{
+ Kind: Bool,
+ Data: b[:5],
+ })
+
+ return ref, b[5:], nil
+ case '[':
+ return p.parseValArray(b)
+ case '{':
+ return p.parseInlineTable(b)
+ default:
+ return p.parseIntOrFloatOrDateTime(b)
+ }
+}
+
+func atmost(b []byte, n int) []byte {
+ if n >= len(b) {
+ return b
+ }
+
+ return b[:n]
+}
+
+func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
+ v, rest, err := scanLiteralString(b)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ return v, v[1 : len(v)-1], rest, nil
+}
+
+func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
+ // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
+ // inline-table-open = %x7B ws ; {
+ // inline-table-close = ws %x7D ; }
+ // inline-table-sep = ws %x2C ws ; , Comma
+ // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
+ parent := p.builder.Push(Node{
+ Kind: InlineTable,
+ })
+
+ first := true
+
+ var child reference
+
+ b = b[1:]
+
+ var err error
+
+ for len(b) > 0 {
+ previousB := b
+ b = p.parseWhitespace(b)
+
+ if len(b) == 0 {
+ return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
+ }
+
+ if b[0] == '}' {
+ break
+ }
+
+ if !first {
+ b, err = expect(',', b)
+ if err != nil {
+ return parent, nil, err
+ }
+ b = p.parseWhitespace(b)
+ }
+
+ var kv reference
+
+ kv, b, err = p.parseKeyval(b)
+ if err != nil {
+ return parent, nil, err
+ }
+
+ if first {
+ p.builder.AttachChild(parent, kv)
+ } else {
+ p.builder.Chain(child, kv)
+ }
+ child = kv
+
+ first = false
+ }
+
+ rest, err := expect('}', b)
+
+ return parent, rest, err
+}
+
+//nolint:funlen,cyclop
+func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
+ // array = array-open [ array-values ] ws-comment-newline array-close
+ // array-open = %x5B ; [
+ // array-close = %x5D ; ]
+ // array-values = ws-comment-newline val ws-comment-newline array-sep array-values
+ // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
+ // array-sep = %x2C ; , Comma
+ // ws-comment-newline = *( wschar / [ comment ] newline )
+ arrayStart := b
+ b = b[1:]
+
+ parent := p.builder.Push(Node{
+ Kind: Array,
+ })
+
+ first := true
+
+ var lastChild reference
+
+ var err error
+ for len(b) > 0 {
+ b, err = p.parseOptionalWhitespaceCommentNewline(b)
+ if err != nil {
+ return parent, nil, err
+ }
+
+ if len(b) == 0 {
+ return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
+ }
+
+ if b[0] == ']' {
+ break
+ }
+
+ if b[0] == ',' {
+ if first {
+ return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
+ }
+ b = b[1:]
+
+ b, err = p.parseOptionalWhitespaceCommentNewline(b)
+ if err != nil {
+ return parent, nil, err
+ }
+ } else if !first {
+ return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
+ }
+
+ // TOML allows trailing commas in arrays.
+ if len(b) > 0 && b[0] == ']' {
+ break
+ }
+
+ var valueRef reference
+ valueRef, b, err = p.parseVal(b)
+ if err != nil {
+ return parent, nil, err
+ }
+
+ if first {
+ p.builder.AttachChild(parent, valueRef)
+ } else {
+ p.builder.Chain(lastChild, valueRef)
+ }
+ lastChild = valueRef
+
+ b, err = p.parseOptionalWhitespaceCommentNewline(b)
+ if err != nil {
+ return parent, nil, err
+ }
+ first = false
+ }
+
+ rest, err := expect(']', b)
+
+ return parent, rest, err
+}
+
+func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) {
+ for len(b) > 0 {
+ var err error
+ b = p.parseWhitespace(b)
+
+ if len(b) > 0 && b[0] == '#' {
+ _, b, err = scanComment(b)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if len(b) == 0 {
+ break
+ }
+
+ if b[0] == '\n' || b[0] == '\r' {
+ b, err = p.parseNewline(b)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ break
+ }
+ }
+
+ return b, nil
+}
+
+func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
+ token, rest, err := scanMultilineLiteralString(b)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ i := 3
+
+ // skip the immediate new line
+ if token[i] == '\n' {
+ i++
+ } else if token[i] == '\r' && token[i+1] == '\n' {
+ i += 2
+ }
+
+ return token, token[i : len(token)-3], rest, err
+}
+
+//nolint:funlen,gocognit,cyclop
+func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
+ // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
+ // ml-basic-string-delim
+ // ml-basic-string-delim = 3quotation-mark
+ // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
+ //
+ // mlb-content = mlb-char / newline / mlb-escaped-nl
+ // mlb-char = mlb-unescaped / escaped
+ // mlb-quotes = 1*2quotation-mark
+ // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
+ // mlb-escaped-nl = escape ws newline *( wschar / newline )
+ token, escaped, rest, err := scanMultilineBasicString(b)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ i := 3
+
+ // skip the immediate new line
+ if token[i] == '\n' {
+ i++
+ } else if token[i] == '\r' && token[i+1] == '\n' {
+ i += 2
+ }
+
+ // fast path
+ startIdx := i
+ endIdx := len(token) - len(`"""`)
+
+ if !escaped {
+ str := token[startIdx:endIdx]
+ verr := characters.Utf8TomlValidAlreadyEscaped(str)
+ if verr.Zero() {
+ return token, str, rest, nil
+ }
+ return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
+ }
+
+ var builder bytes.Buffer
+
+ // The scanner ensures that the token starts and ends with quotes and that
+ // escapes are balanced.
+ for i < len(token)-3 {
+ c := token[i]
+
+ //nolint:nestif
+ if c == '\\' {
+ // When the last non-whitespace character on a line is an unescaped \,
+ // it will be trimmed along with all whitespace (including newlines) up
+ // to the next non-whitespace character or closing delimiter.
+
+ isLastNonWhitespaceOnLine := false
+ j := 1
+ findEOLLoop:
+ for ; j < len(token)-3-i; j++ {
+ switch token[i+j] {
+ case ' ', '\t':
+ continue
+ case '\r':
+ if token[i+j+1] == '\n' {
+ continue
+ }
+ case '\n':
+ isLastNonWhitespaceOnLine = true
+ }
+ break findEOLLoop
+ }
+ if isLastNonWhitespaceOnLine {
+ i += j
+ for ; i < len(token)-3; i++ {
+ c := token[i]
+ if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
+ i--
+ break
+ }
+ }
+ i++
+ continue
+ }
+
+ // handle escaping
+ i++
+ c = token[i]
+
+ switch c {
+ case '"', '\\':
+ builder.WriteByte(c)
+ case 'b':
+ builder.WriteByte('\b')
+ case 'f':
+ builder.WriteByte('\f')
+ case 'n':
+ builder.WriteByte('\n')
+ case 'r':
+ builder.WriteByte('\r')
+ case 't':
+ builder.WriteByte('\t')
+ case 'e':
+ builder.WriteByte(0x1B)
+ case 'u':
+ x, err := hexToRune(atmost(token[i+1:], 4), 4)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+ builder.WriteRune(x)
+ i += 4
+ case 'U':
+ x, err := hexToRune(atmost(token[i+1:], 8), 8)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ builder.WriteRune(x)
+ i += 8
+ default:
+ return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
+ }
+ i++
+ } else {
+ size := characters.Utf8ValidNext(token[i:])
+ if size == 0 {
+ return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
+ }
+ builder.Write(token[i : i+size])
+ i += size
+ }
+ }
+
+ return token, builder.Bytes(), rest, nil
+}
+
+func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
+ // key = simple-key / dotted-key
+ // simple-key = quoted-key / unquoted-key
+ //
+ // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
+ // quoted-key = basic-string / literal-string
+ // dotted-key = simple-key 1*( dot-sep simple-key )
+ //
+ // dot-sep = ws %x2E ws ; . Period
+ raw, key, b, err := p.parseSimpleKey(b)
+ if err != nil {
+ return invalidReference, nil, err
+ }
+
+ ref := p.builder.Push(Node{
+ Kind: Key,
+ Raw: p.Range(raw),
+ Data: key,
+ })
+
+ for {
+ b = p.parseWhitespace(b)
+ if len(b) > 0 && b[0] == '.' {
+ b = p.parseWhitespace(b[1:])
+
+ raw, key, b, err = p.parseSimpleKey(b)
+ if err != nil {
+ return ref, nil, err
+ }
+
+ p.builder.PushAndChain(Node{
+ Kind: Key,
+ Raw: p.Range(raw),
+ Data: key,
+ })
+ } else {
+ break
+ }
+ }
+
+ return ref, b, nil
+}
+
+func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
+ if len(b) == 0 {
+ return nil, nil, nil, NewParserError(b, "expected key but found none")
+ }
+
+ // simple-key = quoted-key / unquoted-key
+ // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
+ // quoted-key = basic-string / literal-string
+ switch {
+ case b[0] == '\'':
+ return p.parseLiteralString(b)
+ case b[0] == '"':
+ return p.parseBasicString(b)
+ case isUnquotedKeyChar(b[0]):
+ key, rest = scanUnquotedKey(b)
+ return key, key, rest, nil
+ default:
+ return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
+ }
+}
+
+//nolint:funlen,cyclop
+func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
+ // basic-string = quotation-mark *basic-char quotation-mark
+ // quotation-mark = %x22 ; "
+ // basic-char = basic-unescaped / escaped
+ // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
+ // escaped = escape escape-seq-char
+ // escape-seq-char = %x22 ; " quotation mark U+0022
+ // escape-seq-char =/ %x5C ; \ reverse solidus U+005C
+ // escape-seq-char =/ %x62 ; b backspace U+0008
+ // escape-seq-char =/ %x66 ; f form feed U+000C
+ // escape-seq-char =/ %x6E ; n line feed U+000A
+ // escape-seq-char =/ %x72 ; r carriage return U+000D
+ // escape-seq-char =/ %x74 ; t tab U+0009
+ // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
+ // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
+ token, escaped, rest, err := scanBasicString(b)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ startIdx := len(`"`)
+ endIdx := len(token) - len(`"`)
+
+ // Fast path. If there is no escape sequence, the string should just be
+ // an UTF-8 encoded string, which is the same as Go. In that case,
+ // validate the string and return a direct reference to the buffer.
+ if !escaped {
+ str := token[startIdx:endIdx]
+ verr := characters.Utf8TomlValidAlreadyEscaped(str)
+ if verr.Zero() {
+ return token, str, rest, nil
+ }
+ return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
+ }
+
+ i := startIdx
+
+ var builder bytes.Buffer
+
+ // The scanner ensures that the token starts and ends with quotes and that
+ // escapes are balanced.
+ for i < len(token)-1 {
+ c := token[i]
+ if c == '\\' {
+ i++
+ c = token[i]
+
+ switch c {
+ case '"', '\\':
+ builder.WriteByte(c)
+ case 'b':
+ builder.WriteByte('\b')
+ case 'f':
+ builder.WriteByte('\f')
+ case 'n':
+ builder.WriteByte('\n')
+ case 'r':
+ builder.WriteByte('\r')
+ case 't':
+ builder.WriteByte('\t')
+ case 'e':
+ builder.WriteByte(0x1B)
+ case 'u':
+ x, err := hexToRune(token[i+1:len(token)-1], 4)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ builder.WriteRune(x)
+ i += 4
+ case 'U':
+ x, err := hexToRune(token[i+1:len(token)-1], 8)
+ if err != nil {
+ return nil, nil, nil, err
+ }
+
+ builder.WriteRune(x)
+ i += 8
+ default:
+ return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
+ }
+ i++
+ } else {
+ size := characters.Utf8ValidNext(token[i:])
+ if size == 0 {
+ return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
+ }
+ builder.Write(token[i : i+size])
+ i += size
+ }
+ }
+
+ return token, builder.Bytes(), rest, nil
+}
+
+func hexToRune(b []byte, length int) (rune, error) {
+ if len(b) < length {
+ return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
+ }
+ b = b[:length]
+
+ var r uint32
+ for i, c := range b {
+ d := uint32(0)
+ switch {
+ case '0' <= c && c <= '9':
+ d = uint32(c - '0')
+ case 'a' <= c && c <= 'f':
+ d = uint32(c - 'a' + 10)
+ case 'A' <= c && c <= 'F':
+ d = uint32(c - 'A' + 10)
+ default:
+ return -1, NewParserError(b[i:i+1], "non-hex character")
+ }
+ r = r*16 + d
+ }
+
+ if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
+ return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
+ }
+
+ return rune(r), nil
+}
+
+func (p *Parser) parseWhitespace(b []byte) []byte {
+ // ws = *wschar
+ // wschar = %x20 ; Space
+ // wschar =/ %x09 ; Horizontal tab
+ _, rest := scanWhitespace(b)
+
+ return rest
+}
+
+//nolint:cyclop
+func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
+ switch b[0] {
+ case 'i':
+ if !scanFollowsInf(b) {
+ return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
+ }
+
+ return p.builder.Push(Node{
+ Kind: Float,
+ Data: b[:3],
+ }), b[3:], nil
+ case 'n':
+ if !scanFollowsNan(b) {
+ return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
+ }
+
+ return p.builder.Push(Node{
+ Kind: Float,
+ Data: b[:3],
+ }), b[3:], nil
+ case '+', '-':
+ return p.scanIntOrFloat(b)
+ }
+
+ if len(b) < 3 {
+ return p.scanIntOrFloat(b)
+ }
+
+ s := 5
+ if len(b) < s {
+ s = len(b)
+ }
+
+ for idx, c := range b[:s] {
+ if isDigit(c) {
+ continue
+ }
+
+ if idx == 2 && c == ':' || (idx == 4 && c == '-') {
+ return p.scanDateTime(b)
+ }
+
+ break
+ }
+
+ return p.scanIntOrFloat(b)
+}
+
+func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
+ // scans for contiguous characters in [0-9T:Z.+-], and up to one space if
+ // followed by a digit.
+ hasDate := false
+ hasTime := false
+ hasTz := false
+ seenSpace := false
+
+ i := 0
+byteLoop:
+ for ; i < len(b); i++ {
+ c := b[i]
+
+ switch {
+ case isDigit(c):
+ case c == '-':
+ hasDate = true
+ const minOffsetOfTz = 8
+ if i >= minOffsetOfTz {
+ hasTz = true
+ }
+ case c == 'T' || c == 't' || c == ':' || c == '.':
+ hasTime = true
+ case c == '+' || c == '-' || c == 'Z' || c == 'z':
+ hasTz = true
+ case c == ' ':
+ if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
+ i += 2
+ // Avoid reaching past the end of the document in case the time
+ // is malformed. See TestIssue585.
+ if i >= len(b) {
+ i--
+ }
+ seenSpace = true
+ hasTime = true
+ } else {
+ break byteLoop
+ }
+ default:
+ break byteLoop
+ }
+ }
+
+ var kind Kind
+
+ if hasTime {
+ if hasDate {
+ if hasTz {
+ kind = DateTime
+ } else {
+ kind = LocalDateTime
+ }
+ } else {
+ kind = LocalTime
+ }
+ } else {
+ kind = LocalDate
+ }
+
+ return p.builder.Push(Node{
+ Kind: kind,
+ Data: b[:i],
+ }), b[i:], nil
+}
+
+//nolint:funlen,gocognit,cyclop
+func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
+ i := 0
+
+ if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
+ var isValidRune validRuneFn
+
+ switch b[1] {
+ case 'x':
+ isValidRune = isValidHexRune
+ case 'o':
+ isValidRune = isValidOctalRune
+ case 'b':
+ isValidRune = isValidBinaryRune
+ default:
+ i++
+ }
+
+ if isValidRune != nil {
+ i += 2
+ for ; i < len(b); i++ {
+ if !isValidRune(b[i]) {
+ break
+ }
+ }
+ }
+
+ return p.builder.Push(Node{
+ Kind: Integer,
+ Data: b[:i],
+ }), b[i:], nil
+ }
+
+ isFloat := false
+
+ for ; i < len(b); i++ {
+ c := b[i]
+
+ if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
+ continue
+ }
+
+ if c == '.' || c == 'e' || c == 'E' {
+ isFloat = true
+
+ continue
+ }
+
+ if c == 'i' {
+ if scanFollowsInf(b[i:]) {
+ return p.builder.Push(Node{
+ Kind: Float,
+ Data: b[:i+3],
+ }), b[i+3:], nil
+ }
+
+ return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
+ }
+
+ if c == 'n' {
+ if scanFollowsNan(b[i:]) {
+ return p.builder.Push(Node{
+ Kind: Float,
+ Data: b[:i+3],
+ }), b[i+3:], nil
+ }
+
+ return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
+ }
+
+ break
+ }
+
+ if i == 0 {
+ return invalidReference, b, NewParserError(b, "incomplete number")
+ }
+
+ kind := Integer
+
+ if isFloat {
+ kind = Float
+ }
+
+ return p.builder.Push(Node{
+ Kind: kind,
+ Data: b[:i],
+ }), b[i:], nil
+}
+
+func isDigit(r byte) bool {
+ return r >= '0' && r <= '9'
+}
+
+type validRuneFn func(r byte) bool
+
+func isValidHexRune(r byte) bool {
+ return r >= 'a' && r <= 'f' ||
+ r >= 'A' && r <= 'F' ||
+ r >= '0' && r <= '9' ||
+ r == '_'
+}
+
+func isValidOctalRune(r byte) bool {
+ return r >= '0' && r <= '7' || r == '_'
+}
+
+func isValidBinaryRune(r byte) bool {
+ return r == '0' || r == '1' || r == '_'
+}
+
+func expect(x byte, b []byte) ([]byte, error) {
+ if len(b) == 0 {
+ return nil, NewParserError(b, "expected character %c but the document ended here", x)
+ }
+
+ if b[0] != x {
+ return nil, NewParserError(b[0:1], "expected character %c", x)
+ }
+
+ return b[1:], nil
+}