summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/compress/zstd
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd')
-rw-r--r--vendor/github.com/klauspost/compress/zstd/README.md2
-rw-r--r--vendor/github.com/klauspost/compress/zstd/blockdec.go19
-rw-r--r--vendor/github.com/klauspost/compress/zstd/bytebuf.go16
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decodeheader.go9
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decoder.go140
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decoder_options.go68
-rw-r--r--vendor/github.com/klauspost/compress/zstd/dict.go51
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_base.go28
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_best.go64
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_better.go35
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_dfast.go23
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_fast.go20
-rw-r--r--vendor/github.com/klauspost/compress/zstd/encoder.go39
-rw-r--r--vendor/github.com/klauspost/compress/zstd/encoder_options.go38
-rw-r--r--vendor/github.com/klauspost/compress/zstd/framedec.go104
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go7
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s1
-rw-r--r--vendor/github.com/klauspost/compress/zstd/history.go21
-rw-r--r--vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md49
-rw-r--r--vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go47
-rw-r--r--vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s336
-rw-r--r--vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s140
-rw-r--r--vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go2
-rw-r--r--vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go19
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec.go22
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go43
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s489
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_generic.go4
-rw-r--r--vendor/github.com/klauspost/compress/zstd/zstd.go31
29 files changed, 1072 insertions, 795 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md
index beb7fa87..65b38abe 100644
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -12,6 +12,8 @@ The `zstd` package is provided as open source software using a Go standard licen
Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors.
+For seekable zstd streams, see [this excellent package](https://github.com/SaveTheRbtz/zstd-seekable-format-go).
+
## Installation
Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 7eed729b..2445bb4f 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -10,7 +10,6 @@ import (
"errors"
"fmt"
"io"
- "io/ioutil"
"os"
"path/filepath"
"sync"
@@ -83,8 +82,9 @@ type blockDec struct {
err error
- // Check against this crc
- checkCRC []byte
+ // Check against this crc, if hasCRC is true.
+ checkCRC uint32
+ hasCRC bool
// Frame to use for singlethreaded decoding.
// Should not be used by the decoder itself since parent may be another frame.
@@ -192,16 +192,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
}
// Read block data.
- if cap(b.dataStorage) < cSize {
+ if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize {
+ // byteBuf doesn't need a destination buffer.
if b.lowMem || cSize > maxCompressedBlockSize {
b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
} else {
b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
}
}
- if cap(b.dst) <= maxSize {
- b.dst = make([]byte, 0, maxSize+1)
- }
b.data, err = br.readBig(cSize, b.dataStorage)
if err != nil {
if debugDecoder {
@@ -210,6 +208,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
}
return err
}
+ if cap(b.dst) <= maxSize {
+ b.dst = make([]byte, 0, maxSize+1)
+ }
return nil
}
@@ -233,7 +234,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
if b.lowMem {
b.dst = make([]byte, b.RLESize)
} else {
- b.dst = make([]byte, maxBlockSize)
+ b.dst = make([]byte, maxCompressedBlockSize)
}
}
b.dst = b.dst[:b.RLESize]
@@ -651,7 +652,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
buf.Write(in)
- ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
+ os.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
}
return nil
diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
index 4493baa7..176788f2 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -7,7 +7,6 @@ package zstd
import (
"fmt"
"io"
- "io/ioutil"
)
type byteBuffer interface {
@@ -23,7 +22,7 @@ type byteBuffer interface {
readByte() (byte, error)
// Skip n bytes.
- skipN(n int) error
+ skipN(n int64) error
}
// in-memory buffer
@@ -62,9 +61,12 @@ func (b *byteBuf) readByte() (byte, error) {
return r, nil
}
-func (b *byteBuf) skipN(n int) error {
+func (b *byteBuf) skipN(n int64) error {
bb := *b
- if len(bb) < n {
+ if n < 0 {
+ return fmt.Errorf("negative skip (%d) requested", n)
+ }
+ if int64(len(bb)) < n {
return io.ErrUnexpectedEOF
}
*b = bb[n:]
@@ -120,9 +122,9 @@ func (r *readerWrapper) readByte() (byte, error) {
return r.tmp[0], nil
}
-func (r *readerWrapper) skipN(n int) error {
- n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
- if n2 != int64(n) {
+func (r *readerWrapper) skipN(n int64) error {
+ n2, err := io.CopyN(io.Discard, r.r, n)
+ if n2 != n {
err = io.ErrUnexpectedEOF
}
return err
diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
index 5022e71c..f6a24097 100644
--- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@@ -4,7 +4,6 @@
package zstd
import (
- "bytes"
"encoding/binary"
"errors"
"io"
@@ -102,8 +101,8 @@ func (h *Header) Decode(in []byte) error {
}
h.HeaderSize += 4
b, in := in[:4], in[4:]
- if !bytes.Equal(b, frameMagic) {
- if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
+ if string(b) != frameMagic {
+ if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch
}
if len(in) < 4 {
@@ -153,7 +152,7 @@ func (h *Header) Decode(in []byte) error {
}
b, in = in[:size], in[size:]
h.HeaderSize += int(size)
- switch size {
+ switch len(b) {
case 1:
h.DictionaryID = uint32(b[0])
case 2:
@@ -183,7 +182,7 @@ func (h *Header) Decode(in []byte) error {
}
b, in = in[:fcsSize], in[fcsSize:]
h.HeaderSize += int(fcsSize)
- switch fcsSize {
+ switch len(b) {
case 1:
h.FrameContentSize = uint64(b[0])
case 2:
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index 286c8f9d..7113e69e 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -5,7 +5,6 @@
package zstd
import (
- "bytes"
"context"
"encoding/binary"
"io"
@@ -35,13 +34,13 @@ type Decoder struct {
br readerWrapper
enabled bool
inFrame bool
+ dstBuf []byte
}
frame *frameDec
// Custom dictionaries.
- // Always uses copies.
- dicts map[uint32]dict
+ dicts map[uint32]*dict
// streamWg is the waitgroup for all streams
streamWg sync.WaitGroup
@@ -103,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
}
// Transfer option dicts.
- d.dicts = make(map[uint32]dict, len(d.o.dicts))
+ d.dicts = make(map[uint32]*dict, len(d.o.dicts))
for _, dc := range d.o.dicts {
d.dicts[dc.id] = dc
}
@@ -187,21 +186,23 @@ func (d *Decoder) Reset(r io.Reader) error {
}
// If bytes buffer and < 5MB, do sync decoding anyway.
- if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
+ if bb, ok := r.(byter); ok && bb.Len() < d.o.decodeBufsBelow && !d.o.limitToCap {
bb2 := bb
if debugDecoder {
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
}
b := bb2.Bytes()
var dst []byte
- if cap(d.current.b) > 0 {
- dst = d.current.b
+ if cap(d.syncStream.dstBuf) > 0 {
+ dst = d.syncStream.dstBuf[:0]
}
- dst, err := d.DecodeAll(b, dst[:0])
+ dst, err := d.DecodeAll(b, dst)
if err == nil {
err = io.EOF
}
+ // Save output buffer
+ d.syncStream.dstBuf = dst
d.current.b = dst
d.current.err = err
d.current.flushed = true
@@ -216,6 +217,7 @@ func (d *Decoder) Reset(r io.Reader) error {
d.current.err = nil
d.current.flushed = false
d.current.d = nil
+ d.syncStream.dstBuf = nil
// Ensure no-one else is still running...
d.streamWg.Wait()
@@ -312,6 +314,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
// Grab a block decoder and frame decoder.
block := <-d.decoders
frame := block.localFrame
+ initialSize := len(dst)
defer func() {
if debugDecoder {
printf("re-adding decoder: %p", block)
@@ -337,21 +340,26 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
return dst, err
}
- if frame.DictionaryID != nil {
- dict, ok := d.dicts[*frame.DictionaryID]
- if !ok {
- return nil, ErrUnknownDictionary
- }
- if debugDecoder {
- println("setting dict", frame.DictionaryID)
- }
- frame.history.setDict(&dict)
+ if err = d.setDict(frame); err != nil {
+ return nil, err
}
if frame.WindowSize > d.o.maxWindowSize {
+ if debugDecoder {
+ println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize)
+ }
return dst, ErrWindowSizeExceeded
}
if frame.FrameContentSize != fcsUnknown {
- if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
+ if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)-initialSize) {
+ if debugDecoder {
+ println("decoder size exceeded; fcs:", frame.FrameContentSize, "> mcs:", d.o.maxDecodedSize-uint64(len(dst)-initialSize), "len:", len(dst))
+ }
+ return dst, ErrDecoderSizeExceeded
+ }
+ if d.o.limitToCap && frame.FrameContentSize > uint64(cap(dst)-len(dst)) {
+ if debugDecoder {
+ println("decoder size exceeded; fcs:", frame.FrameContentSize, "> (cap-len)", cap(dst)-len(dst))
+ }
return dst, ErrDecoderSizeExceeded
}
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
@@ -361,7 +369,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
}
- if cap(dst) == 0 {
+ if cap(dst) == 0 && !d.o.limitToCap {
// Allocate len(input) * 2 by default if nothing is provided
// and we didn't get frame content size.
size := len(input) * 2
@@ -379,6 +387,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
if err != nil {
return dst, err
}
+ if uint64(len(dst)-initialSize) > d.o.maxDecodedSize {
+ return dst, ErrDecoderSizeExceeded
+ }
if len(frame.bBuf) == 0 {
if debugDecoder {
println("frame dbuf empty")
@@ -439,7 +450,11 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
}
- if !d.o.ignoreChecksum && len(next.b) > 0 {
+ if d.o.ignoreChecksum {
+ return true
+ }
+
+ if len(next.b) > 0 {
n, err := d.current.crc.Write(next.b)
if err == nil {
if n != len(next.b) {
@@ -447,18 +462,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
}
}
}
- if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
- got := d.current.crc.Sum64()
- var tmp [4]byte
- binary.LittleEndian.PutUint32(tmp[:], uint32(got))
- if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
+ if next.err == nil && next.d != nil && next.d.hasCRC {
+ got := uint32(d.current.crc.Sum64())
+ if got != next.d.checkCRC {
if debugDecoder {
- println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
+ printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
}
d.current.err = ErrCRCMismatch
} else {
if debugDecoder {
- println("CRC ok", tmp[:])
+ printf("CRC ok %08x\n", got)
}
}
}
@@ -474,18 +487,12 @@ func (d *Decoder) nextBlockSync() (ok bool) {
if !d.syncStream.inFrame {
d.frame.history.reset()
d.current.err = d.frame.reset(&d.syncStream.br)
+ if d.current.err == nil {
+ d.current.err = d.setDict(d.frame)
+ }
if d.current.err != nil {
return false
}
- if d.frame.DictionaryID != nil {
- dict, ok := d.dicts[*d.frame.DictionaryID]
- if !ok {
- d.current.err = ErrUnknownDictionary
- return false
- } else {
- d.frame.history.setDict(&dict)
- }
- }
if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
d.current.err = ErrDecoderSizeExceeded
return false
@@ -664,6 +671,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
if debugDecoder {
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
}
+ hist.reset()
hist.decoders = block.async.newHist.decoders
hist.recentOffsets = block.async.newHist.recentOffsets
hist.windowSize = block.async.newHist.windowSize
@@ -695,6 +703,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
seqExecute <- block
}
close(seqExecute)
+ hist.reset()
}()
var wg sync.WaitGroup
@@ -718,6 +727,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
if debugDecoder {
println("Async 2: new history")
}
+ hist.reset()
hist.windowSize = block.async.newHist.windowSize
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
if block.async.newHist.dict != nil {
@@ -747,7 +757,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
if block.lowMem {
block.dst = make([]byte, block.RLESize)
} else {
- block.dst = make([]byte, maxBlockSize)
+ block.dst = make([]byte, maxCompressedBlockSize)
}
}
block.dst = block.dst[:block.RLESize]
@@ -799,13 +809,14 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
if debugDecoder {
println("decoder goroutines finished")
}
+ hist.reset()
}()
+ var hist history
decodeStream:
for {
- var hist history
var hasErr bool
-
+ hist.reset()
decodeBlock := func(block *blockDec) {
if hasErr {
if block != nil {
@@ -840,15 +851,14 @@ decodeStream:
if debugDecoder && err != nil {
println("Frame decoder returned", err)
}
- if err == nil && frame.DictionaryID != nil {
- dict, ok := d.dicts[*frame.DictionaryID]
- if !ok {
- err = ErrUnknownDictionary
- } else {
- frame.history.setDict(&dict)
- }
+ if err == nil {
+ err = d.setDict(frame)
}
if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
+ if debugDecoder {
+ println("decoder size exceeded, fws:", d.frame.WindowSize, "> mws:", d.o.maxWindowSize)
+ }
+
err = ErrDecoderSizeExceeded
}
if err != nil {
@@ -890,18 +900,22 @@ decodeStream:
println("next block returned error:", err)
}
dec.err = err
- dec.checkCRC = nil
+ dec.hasCRC = false
if dec.Last && frame.HasCheckSum && err == nil {
crc, err := frame.rawInput.readSmall(4)
- if err != nil {
+ if len(crc) < 4 {
+ if err == nil {
+ err = io.ErrUnexpectedEOF
+
+ }
println("CRC missing?", err)
dec.err = err
- }
- var tmp [4]byte
- copy(tmp[:], crc)
- dec.checkCRC = tmp[:]
- if debugDecoder {
- println("found crc to check:", dec.checkCRC)
+ } else {
+ dec.checkCRC = binary.LittleEndian.Uint32(crc)
+ dec.hasCRC = true
+ if debugDecoder {
+ printf("found crc to check: %08x\n", dec.checkCRC)
+ }
}
}
err = dec.err
@@ -917,5 +931,23 @@ decodeStream:
}
close(seqDecode)
wg.Wait()
+ hist.reset()
d.frame.history.b = frameHistCache
}
+
+func (d *Decoder) setDict(frame *frameDec) (err error) {
+ dict, ok := d.dicts[frame.DictionaryID]
+ if ok {
+ if debugDecoder {
+ println("setting dict", frame.DictionaryID)
+ }
+ frame.history.setDict(dict)
+ } else if frame.DictionaryID != 0 {
+ // A zero or missing dictionary id is ambiguous:
+ // either dictionary zero, or no dictionary. In particular,
+ // zstd --patch-from uses this id for the source file,
+ // so only return an error if the dictionary id is not zero.
+ err = ErrUnknownDictionary
+ }
+ return err
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
index c70e6fa0..07a90dd7 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -6,6 +6,8 @@ package zstd
import (
"errors"
+ "fmt"
+ "math/bits"
"runtime"
)
@@ -14,20 +16,23 @@ type DOption func(*decoderOptions) error
// options retains accumulated state of multiple options.
type decoderOptions struct {
- lowMem bool
- concurrent int
- maxDecodedSize uint64
- maxWindowSize uint64
- dicts []dict
- ignoreChecksum bool
+ lowMem bool
+ concurrent int
+ maxDecodedSize uint64
+ maxWindowSize uint64
+ dicts []*dict
+ ignoreChecksum bool
+ limitToCap bool
+ decodeBufsBelow int
}
func (o *decoderOptions) setDefault() {
*o = decoderOptions{
// use less ram: true for now, but may change.
- lowMem: true,
- concurrent: runtime.GOMAXPROCS(0),
- maxWindowSize: MaxWindowSize,
+ lowMem: true,
+ concurrent: runtime.GOMAXPROCS(0),
+ maxWindowSize: MaxWindowSize,
+ decodeBufsBelow: 128 << 10,
}
if o.concurrent > 4 {
o.concurrent = 4
@@ -82,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption {
}
// WithDecoderDicts allows to register one or more dictionaries for the decoder.
-// If several dictionaries with the same ID is provided the last one will be used.
+//
+// Each slice in dict must be in the [dictionary format] produced by
+// "zstd --train" from the Zstandard reference implementation.
+//
+// If several dictionaries with the same ID are provided, the last one will be used.
+//
+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
func WithDecoderDicts(dicts ...[]byte) DOption {
return func(o *decoderOptions) error {
for _, b := range dicts {
@@ -90,12 +101,24 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
if err != nil {
return err
}
- o.dicts = append(o.dicts, *d)
+ o.dicts = append(o.dicts, d)
}
return nil
}
}
+// WithEncoderDictRaw registers a dictionary that may be used by the decoder.
+// The slice content can be arbitrary data.
+func WithDecoderDictRaw(id uint32, content []byte) DOption {
+ return func(o *decoderOptions) error {
+ if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
+ return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
+ }
+ o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}})
+ return nil
+ }
+}
+
// WithDecoderMaxWindow allows to set a maximum window size for decodes.
// This allows rejecting packets that will cause big memory usage.
// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.
@@ -114,6 +137,29 @@ func WithDecoderMaxWindow(size uint64) DOption {
}
}
+// WithDecodeAllCapLimit will limit DecodeAll to decoding cap(dst)-len(dst) bytes,
+// or any size set in WithDecoderMaxMemory.
+// This can be used to limit decoding to a specific maximum output size.
+// Disabled by default.
+func WithDecodeAllCapLimit(b bool) DOption {
+ return func(o *decoderOptions) error {
+ o.limitToCap = b
+ return nil
+ }
+}
+
+// WithDecodeBuffersBelow will fully decode readers that have a
+// `Bytes() []byte` and `Len() int` interface similar to bytes.Buffer.
+// This typically uses less allocations but will have the full decompressed object in memory.
+// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less.
+// Default is 128KiB.
+func WithDecodeBuffersBelow(size int) DOption {
+ return func(o *decoderOptions) error {
+ o.decodeBufsBelow = size
+ return nil
+ }
+}
+
// IgnoreChecksum allows to forcibly ignore checksum checking.
func IgnoreChecksum(b bool) DOption {
return func(o *decoderOptions) error {
diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go
index a36ae83e..ca095145 100644
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -1,7 +1,6 @@
package zstd
import (
- "bytes"
"encoding/binary"
"errors"
"fmt"
@@ -20,7 +19,10 @@ type dict struct {
content []byte
}
-var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
+const dictMagic = "\x37\xa4\x30\xec"
+
+// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
+const dictMaxLength = 1 << 31
// ID returns the dictionary id or 0 if d is nil.
func (d *dict) ID() uint32 {
@@ -30,14 +32,38 @@ func (d *dict) ID() uint32 {
return d.id
}
-// DictContentSize returns the dictionary content size or 0 if d is nil.
-func (d *dict) DictContentSize() int {
+// ContentSize returns the dictionary content size or 0 if d is nil.
+func (d *dict) ContentSize() int {
if d == nil {
return 0
}
return len(d.content)
}
+// Content returns the dictionary content.
+func (d *dict) Content() []byte {
+ if d == nil {
+ return nil
+ }
+ return d.content
+}
+
+// Offsets returns the initial offsets.
+func (d *dict) Offsets() [3]int {
+ if d == nil {
+ return [3]int{}
+ }
+ return d.offsets
+}
+
+// LitEncoder returns the literal encoder.
+func (d *dict) LitEncoder() *huff0.Scratch {
+ if d == nil {
+ return nil
+ }
+ return d.litEnc
+}
+
// Load a dictionary as described in
// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
func loadDict(b []byte) (*dict, error) {
@@ -50,7 +76,7 @@ func loadDict(b []byte) (*dict, error) {
ofDec: sequenceDec{fse: &fseDecoder{}},
mlDec: sequenceDec{fse: &fseDecoder{}},
}
- if !bytes.Equal(b[:4], dictMagic[:]) {
+ if string(b[:4]) != dictMagic {
return nil, ErrMagicMismatch
}
d.id = binary.LittleEndian.Uint32(b[4:8])
@@ -62,7 +88,7 @@ func loadDict(b []byte) (*dict, error) {
var err error
d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("loading literal table: %w", err)
}
d.litEnc.Reuse = huff0.ReusePolicyMust
@@ -120,3 +146,16 @@ func loadDict(b []byte) (*dict, error) {
return &d, nil
}
+
+// InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
+func InspectDictionary(b []byte) (interface {
+ ID() uint32
+ ContentSize() int
+ Content() []byte
+ Offsets() [3]int
+ LitEncoder() *huff0.Scratch
+}, error) {
+ initPredefined()
+ d, err := loadDict(b)
+ return d, err
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go
index 15ae8ee8..e008b992 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -16,6 +16,7 @@ type fastBase struct {
cur int32
// maximum offset. Should be at least 2x block size.
maxMatchOff int32
+ bufferReset int32
hist []byte
crc *xxhash.Digest
tmp [8]byte
@@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc {
}
func (e *fastBase) addBlock(src []byte) int32 {
- if debugAsserts && e.cur > bufferReset {
- panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+ if debugAsserts && e.cur > e.bufferReset {
+ panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
}
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
@@ -126,24 +127,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
- a := src[s:]
- b := src[t:]
- b = b[:len(a)]
- end := int32((len(a) >> 3) << 3)
- for i := int32(0); i < end; i += 8 {
- if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
- return i + int32(bits.TrailingZeros64(diff)>>3)
- }
- }
-
- a = a[end:]
- b = b[end:]
- for i := range a {
- if a[i] != b[i] {
- return int32(i) + end
- }
- }
- return int32(len(a)) + end
+ return int32(matchLen(src[s:], src[t:]))
}
// Reset the encoding table.
@@ -165,13 +149,13 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
if singleBlock {
e.lowMem = true
}
- e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
+ e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
e.lowMem = low
}
// We offset current position so everything will be out of reach.
// If above reset line, history will be purged.
- if e.cur < bufferReset {
+ if e.cur < e.bufferReset {
e.cur += e.maxMatchOff + int32(len(e.hist))
}
e.hist = e.hist[:0]
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go
index 96028ecd..830f5ba7 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -32,6 +32,7 @@ type match struct {
length int32
rep int32
est int32
+ _ [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
}
const highScore = 25000
@@ -84,14 +85,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = prevEntry{}
- }
- for i := range e.longTable[:] {
- e.longTable[i] = prevEntry{}
- }
+ e.table = [bestShortTableSize]prevEntry{}
+ e.longTable = [bestLongTableSize]prevEntry{}
e.cur = e.maxMatchOff
break
}
@@ -192,8 +189,8 @@ encodeLoop:
panic("offset0 was 0")
}
- bestOf := func(a, b match) match {
- if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
+ bestOf := func(a, b *match) *match {
+ if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 {
return a
}
return b
@@ -219,22 +216,26 @@ encodeLoop:
return m
}
- best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+ m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
+ m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
+ m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
+ m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
+ best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))
if canRepeat && best.length < goodEnough {
cv32 := uint32(cv >> 8)
spp := s + 1
- best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
- best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
- best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+ m1 := matchAt(spp-offset1, spp, cv32, 1)
+ m2 := matchAt(spp-offset2, spp, cv32, 2)
+ m3 := matchAt(spp-offset3, spp, cv32, 3)
+ best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
if best.length > 0 {
cv32 = uint32(cv >> 24)
spp += 2
- best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
- best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
- best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+ m1 := matchAt(spp-offset1, spp, cv32, 1)
+ m2 := matchAt(spp-offset2, spp, cv32, 2)
+ m3 := matchAt(spp-offset3, spp, cv32, 3)
+ best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
}
}
// Load next and check...
@@ -261,26 +262,33 @@ encodeLoop:
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
// Short at s+1
- best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+ m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
// Long at s+1, s+2
- best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
- best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+ m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
+ m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
+ m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
+ m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
+ best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5))
if false {
// Short at s+3.
// Too often worse...
- best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
+ m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
+ best = bestOf(best, &m)
}
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
if sAt := best.s + best.length; sAt < sLimit {
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
candidateEnd := e.longTable[nextHashL]
- if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
- bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
- if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
- bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+ // Start check at a fixed offset to allow for a few mismatches.
+ // For this compression level 2 yields the best results.
+ const skipBeginning = 2
+ if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
+ m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+ bestEnd := bestOf(best, &m)
+ if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
+ m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+ bestEnd = bestOf(bestEnd, &m)
}
best = bestEnd
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
index c769f694..8582f31a 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = tableEntry{}
- }
- for i := range e.longTable[:] {
- e.longTable[i] = prevEntry{}
- }
+ e.table = [betterShortTableSize]tableEntry{}
+ e.longTable = [betterLongTableSize]prevEntry{}
e.cur = e.maxMatchOff
break
}
@@ -416,15 +412,23 @@ encodeLoop:
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
+ // Allow some bytes at the beginning to mismatch.
+ // Sweet spot is around 3 bytes, but depends on input.
+ // The skipped bytes are tested in Extend backwards,
+ // and still picked up as part of the match if they do.
+ const skipBeginning = 3
+
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
- cv := load3232(src, s)
+ s2 := s + skipBeginning
+ cv := load3232(src, s2)
candidateL := e.longTable[nextHashL]
- coffsetL := candidateL.offset - e.cur - matched
- if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ coffsetL := candidateL.offset - e.cur - matched + skipBeginning
+ if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
- matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
+ s = s2
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
@@ -434,12 +438,13 @@ encodeLoop:
// Check prev long...
if true {
- coffsetL = candidateL.prev - e.cur - matched
- if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ coffsetL = candidateL.prev - e.cur - matched + skipBeginning
+ if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
- matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
+ s = s2
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
@@ -578,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index 7ff0c64f..7d425109 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = tableEntry{}
- }
- for i := range e.longTable[:] {
- e.longTable[i] = tableEntry{}
- }
+ e.table = [dFastShortTableSize]tableEntry{}
+ e.longTable = [dFastLongTableSize]tableEntry{}
e.cur = e.maxMatchOff
break
}
@@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
)
// Protect against e.cur wraparound.
- if e.cur >= bufferReset {
+ if e.cur >= e.bufferReset {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
@@ -685,7 +681,7 @@ encodeLoop:
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
- if e.cur < bufferReset {
+ if e.cur < e.bufferReset {
e.cur += int32(len(src))
}
}
@@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
@@ -1103,7 +1099,8 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
}
if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
- copy(e.longTable[:], e.dictLongTable)
+ //copy(e.longTable[:], e.dictLongTable)
+ e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
for i := range e.longTableShardDirty {
e.longTableShardDirty[i] = false
}
@@ -1114,7 +1111,9 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
continue
}
- copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+ // copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+ *(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
+
e.longTableShardDirty[i] = false
}
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index f51ab529..315b1a8f 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
@@ -304,13 +304,13 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugEncoder {
- if len(src) > maxBlockSize {
+ if len(src) > maxCompressedBlockSize {
panic("src too big")
}
}
// Protect against e.cur wraparound.
- if e.cur >= bufferReset {
+ if e.cur >= e.bufferReset {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
@@ -538,7 +538,7 @@ encodeLoop:
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
- if e.cur < bufferReset {
+ if e.cur < e.bufferReset {
e.cur += int32(len(src))
}
}
@@ -555,11 +555,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
return
}
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = tableEntry{}
- }
+ e.table = [tableSize]tableEntry{}
e.cur = e.maxMatchOff
break
}
@@ -871,7 +869,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
const shardCnt = tableShardCnt
const shardSize = tableShardSize
if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
- copy(e.table[:], e.dictTable)
+ //copy(e.table[:], e.dictTable)
+ e.table = *(*[tableSize]tableEntry)(e.dictTable)
for i := range e.tableShardDirty {
e.tableShardDirty[i] = false
}
@@ -883,7 +882,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
continue
}
- copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ //copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ *(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
e.tableShardDirty[i] = false
}
e.allDirty = false
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index e6b1d01c..65c6c36d 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -8,6 +8,7 @@ import (
"crypto/rand"
"fmt"
"io"
+ "math"
rdebug "runtime/debug"
"sync"
@@ -528,8 +529,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
// If a non-single block is needed the encoder will reset again.
e.encoders <- enc
}()
- // Use single segments when above minimum window and below 1MB.
- single := len(src) < 1<<20 && len(src) > MinWindowSize
+ // Use single segments when above minimum window and below window size.
+ single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
if e.o.single != nil {
single = *e.o.single
}
@@ -639,3 +640,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
return dst
}
+
+// MaxEncodedSize returns the expected maximum
+// size of an encoded block or stream.
+func (e *Encoder) MaxEncodedSize(size int) int {
+ frameHeader := 4 + 2 // magic + frame header & window descriptor
+ if e.o.dict != nil {
+ frameHeader += 4
+ }
+ // Frame content size:
+ if size < 256 {
+ frameHeader++
+ } else if size < 65536+256 {
+ frameHeader += 2
+ } else if size < math.MaxInt32 {
+ frameHeader += 4
+ } else {
+ frameHeader += 8
+ }
+ // Final crc
+ if e.o.crc {
+ frameHeader += 4
+ }
+
+ // Max overhead is 3 bytes/block.
+ // There cannot be 0 blocks.
+ blocks := (size + e.o.blockSize) / e.o.blockSize
+
+ // Combine, add padding.
+ maxSz := frameHeader + 3*blocks + size
+ if e.o.pad > 1 {
+ maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad))
+ }
+ return maxSz
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
index 44d8dbd1..8e15be2f 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -3,6 +3,8 @@ package zstd
import (
"errors"
"fmt"
+ "math"
+ "math/bits"
"runtime"
"strings"
)
@@ -47,22 +49,22 @@ func (o encoderOptions) encoder() encoder {
switch o.level {
case SpeedFastest:
if o.dict != nil {
- return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
}
- return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
case SpeedDefault:
if o.dict != nil {
- return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
+ return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
}
- return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
case SpeedBetterCompression:
if o.dict != nil {
- return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
}
- return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
case SpeedBestCompression:
- return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
}
panic("unknown compression level")
}
@@ -283,7 +285,7 @@ func WithNoEntropyCompression(b bool) EOption {
// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
-// If this is not specified, block encodes will automatically choose this based on the input size.
+// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
// This setting has no effect on streamed encodes.
func WithSingleSegment(b bool) EOption {
return func(o *encoderOptions) error {
@@ -304,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption {
}
// WithEncoderDict allows to register a dictionary that will be used for the encode.
+//
+// The slice dict must be in the [dictionary format] produced by
+// "zstd --train" from the Zstandard reference implementation.
+//
// The encoder *may* choose to use no dictionary instead for certain payloads.
+//
+// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
func WithEncoderDict(dict []byte) EOption {
return func(o *encoderOptions) error {
d, err := loadDict(dict)
@@ -315,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption {
return nil
}
}
+
+// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
+//
+// The slice content may contain arbitrary data. It will be used as an initial
+// history.
+func WithEncoderDictRaw(id uint32, content []byte) EOption {
+ return func(o *encoderOptions) error {
+ if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
+ return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
+ }
+ o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
+ return nil
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index fa0a633f..d8e8a05b 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -5,7 +5,7 @@
package zstd
import (
- "bytes"
+ "encoding/binary"
"encoding/hex"
"errors"
"io"
@@ -29,7 +29,7 @@ type frameDec struct {
FrameContentSize uint64
- DictionaryID *uint32
+ DictionaryID uint32
HasCheckSum bool
SingleSegment bool
}
@@ -43,9 +43,9 @@ const (
MaxWindowSize = 1 << 29
)
-var (
- frameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
- skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
+const (
+ frameMagic = "\x28\xb5\x2f\xfd"
+ skippableFrameMagic = "\x2a\x4d\x18"
)
func newFrameDec(o decoderOptions) *frameDec {
@@ -89,9 +89,9 @@ func (d *frameDec) reset(br byteBuffer) error {
copy(signature[1:], b)
}
- if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
+ if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
if debugDecoder {
- println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
+ println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
}
// Break if not skippable frame.
break
@@ -106,7 +106,7 @@ func (d *frameDec) reset(br byteBuffer) error {
}
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
println("Skipping frame with", n, "bytes.")
- err = br.skipN(int(n))
+ err = br.skipN(int64(n))
if err != nil {
if debugDecoder {
println("Reading discarded frame", err)
@@ -114,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error {
return err
}
}
- if !bytes.Equal(signature[:], frameMagic) {
+ if string(signature[:]) != frameMagic {
if debugDecoder {
- println("Got magic numbers: ", signature, "want:", frameMagic)
+ println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
}
return ErrMagicMismatch
}
@@ -155,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error {
// Read Dictionary_ID
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
- d.DictionaryID = nil
+ d.DictionaryID = 0
if size := fhd & 3; size != 0 {
if size == 3 {
size = 4
@@ -167,7 +167,7 @@ func (d *frameDec) reset(br byteBuffer) error {
return err
}
var id uint32
- switch size {
+ switch len(b) {
case 1:
id = uint32(b[0])
case 2:
@@ -178,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error {
if debugDecoder {
println("Dict size", size, "ID:", id)
}
- if id > 0 {
- // ID 0 means "sorry, no dictionary anyway".
- // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
- d.DictionaryID = &id
- }
+ d.DictionaryID = id
}
// Read Frame_Content_Size
@@ -204,7 +200,7 @@ func (d *frameDec) reset(br byteBuffer) error {
println("Reading Frame content", err)
return err
}
- switch fcsSize {
+ switch len(b) {
case 1:
d.FrameContentSize = uint64(b[0])
case 2:
@@ -231,20 +227,27 @@ func (d *frameDec) reset(br byteBuffer) error {
d.crc.Reset()
}
+ if d.WindowSize > d.o.maxWindowSize {
+ if debugDecoder {
+ printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
+ }
+ return ErrWindowSizeExceeded
+ }
+
if d.WindowSize == 0 && d.SingleSegment {
// We may not need window in this case.
d.WindowSize = d.FrameContentSize
if d.WindowSize < MinWindowSize {
d.WindowSize = MinWindowSize
}
- }
-
- if d.WindowSize > uint64(d.o.maxWindowSize) {
- if debugDecoder {
- printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
+ if d.WindowSize > d.o.maxDecodedSize {
+ if debugDecoder {
+ printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
+ }
+ return ErrDecoderSizeExceeded
}
- return ErrWindowSizeExceeded
}
+
// The minimum Window_Size is 1 KB.
if d.WindowSize < MinWindowSize {
if debugDecoder {
@@ -254,11 +257,16 @@ func (d *frameDec) reset(br byteBuffer) error {
}
d.history.windowSize = int(d.WindowSize)
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
- // Alloc 2x window size if not low-mem, or very small window size.
+ // Alloc 2x window size if not low-mem, or window size below 2MB.
d.history.allocFrameBuffer = d.history.windowSize * 2
} else {
- // Alloc with one additional block
- d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
+ if d.o.lowMem {
+ // Alloc with 1MB extra.
+ d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2
+ } else {
+ // Alloc with 2MB extra.
+ d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
+ }
}
if debugDecoder {
@@ -293,7 +301,7 @@ func (d *frameDec) checkCRC() error {
}
// We can overwrite upper tmp now
- want, err := d.rawInput.readSmall(4)
+ buf, err := d.rawInput.readSmall(4)
if err != nil {
println("CRC missing?", err)
return err
@@ -303,22 +311,17 @@ func (d *frameDec) checkCRC() error {
return nil
}
- var tmp [4]byte
- got := d.crc.Sum64()
- // Flip to match file order.
- tmp[0] = byte(got >> 0)
- tmp[1] = byte(got >> 8)
- tmp[2] = byte(got >> 16)
- tmp[3] = byte(got >> 24)
+ want := binary.LittleEndian.Uint32(buf[:4])
+ got := uint32(d.crc.Sum64())
- if !bytes.Equal(tmp[:], want) {
+ if got != want {
if debugDecoder {
- println("CRC Check Failed:", tmp[:], "!=", want)
+ printf("CRC check failed: got %08x, want %08x\n", got, want)
}
return ErrCRCMismatch
}
if debugDecoder {
- println("CRC ok", tmp[:])
+ printf("CRC ok %08x\n", got)
}
return nil
}
@@ -336,7 +339,7 @@ func (d *frameDec) consumeCRC() error {
return nil
}
-// runDecoder will create a sync decoder that will decode a block of data.
+// runDecoder will run the decoder for the remainder of the frame.
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
saved := d.history.b
@@ -346,12 +349,23 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
// Store input length, so we only check new data.
crcStart := len(dst)
d.history.decoders.maxSyncLen = 0
+ if d.o.limitToCap {
+ d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst))
+ }
if d.FrameContentSize != fcsUnknown {
- d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
+ if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen {
+ d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
+ }
if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
+ if debugDecoder {
+ println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize)
+ }
return dst, ErrDecoderSizeExceeded
}
- if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
+ if debugDecoder {
+ println("maxSyncLen:", d.history.decoders.maxSyncLen)
+ }
+ if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen {
// Alloc for output
dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
copy(dst2, dst)
@@ -371,7 +385,13 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
if err != nil {
break
}
- if uint64(len(d.history.b)) > d.o.maxDecodedSize {
+ if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize {
+ println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize)
+ err = ErrDecoderSizeExceeded
+ break
+ }
+ if d.o.limitToCap && len(d.history.b) > cap(dst) {
+ println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst))
err = ErrDecoderSizeExceeded
break
}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
index e74df436..d04a829b 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
@@ -21,7 +21,8 @@ type buildDtableAsmContext struct {
// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
// Function returns non-zero exit code on error.
-// go:noescape
+//
+//go:noescape
func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
// please keep in sync with _generate/gen_fse.go
@@ -34,8 +35,8 @@ const (
// buildDtable will build the decoding table.
func (s *fseDecoder) buildDtable() error {
ctx := buildDtableAsmContext{
- stateTable: (*uint16)(&s.stateTable[0]),
- norm: (*int16)(&s.norm[0]),
+ stateTable: &s.stateTable[0],
+ norm: &s.norm[0],
dt: (*uint64)(&s.dt[0]),
}
code := buildDtable_asm(s, &ctx)
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
index da32b442..bcde3986 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
@@ -1,7 +1,6 @@
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
TEXT ·buildDtable_asm(SB), $0-24
diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go
index 28b40153..09164856 100644
--- a/vendor/github.com/klauspost/compress/zstd/history.go
+++ b/vendor/github.com/klauspost/compress/zstd/history.go
@@ -37,24 +37,21 @@ func (h *history) reset() {
h.ignoreBuffer = 0
h.error = false
h.recentOffsets = [3]int{1, 4, 8}
- if f := h.decoders.litLengths.fse; f != nil && !f.preDefined {
- fseDecoderPool.Put(f)
- }
- if f := h.decoders.offsets.fse; f != nil && !f.preDefined {
- fseDecoderPool.Put(f)
- }
- if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
- fseDecoderPool.Put(f)
- }
+ h.decoders.freeDecoders()
h.decoders = sequenceDecs{br: h.decoders.br}
+ h.freeHuffDecoder()
+ h.huffTree = nil
+ h.dict = nil
+ //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
+}
+
+func (h *history) freeHuffDecoder() {
if h.huffTree != nil {
if h.dict == nil || h.dict.litEnc != h.huffTree {
huffDecoderPool.Put(h.huffTree)
+ h.huffTree = nil
}
}
- h.huffTree = nil
- h.dict = nil
- //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
}
func (h *history) setDict(dict *dict) {
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
index 69aa3bb5..777290d4 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
@@ -2,12 +2,7 @@
VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
-
-[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
-[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
-
-xxhash is a Go implementation of the 64-bit
-[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
+xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
high-quality hashing algorithm that is much faster than anything in the Go
standard library.
@@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error)
func (*Digest) Sum64() uint64
```
-This implementation provides a fast pure-Go implementation and an even faster
-assembly implementation for amd64.
+The package is written with optimized pure Go and also contains even faster
+assembly implementations for amd64 and arm64. If desired, the `purego` build tag
+opts into using the Go code even on those architectures.
+
+[xxHash]: http://cyan4973.github.io/xxHash/
+
+## Compatibility
+
+This package is in a module and the latest code is in version 2 of the module.
+You need a version of Go with at least "minimal module compatibility" to use
+github.com/cespare/xxhash/v2:
+
+* 1.9.7+ for Go 1.9
+* 1.10.3+ for Go 1.10
+* Go 1.11 or later
+
+I recommend using the latest release of Go.
## Benchmarks
Here are some quick benchmarks comparing the pure-Go and assembly
implementations of Sum64.
-| input size | purego | asm |
-| --- | --- | --- |
-| 5 B | 979.66 MB/s | 1291.17 MB/s |
-| 100 B | 7475.26 MB/s | 7973.40 MB/s |
-| 4 KB | 17573.46 MB/s | 17602.65 MB/s |
-| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
+| input size | purego | asm |
+| ---------- | --------- | --------- |
+| 4 B | 1.3 GB/s | 1.2 GB/s |
+| 16 B | 2.9 GB/s | 3.5 GB/s |
+| 100 B | 6.9 GB/s | 8.1 GB/s |
+| 4 KB | 11.7 GB/s | 16.7 GB/s |
+| 10 MB | 12.0 GB/s | 17.3 GB/s |
-These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
-the following commands under Go 1.11.2:
+These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
+CPU using the following commands under Go 1.19.2:
```
-$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
-$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
+benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
```
## Projects using this package
- [InfluxDB](https://github.com/influxdata/influxdb)
- [Prometheus](https://github.com/prometheus/prometheus)
+- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
- [FreeCache](https://github.com/coocood/freecache)
+- [FastCache](https://github.com/VictoriaMetrics/fastcache)
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
index 2c112a0a..fc40c820 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
@@ -18,19 +18,11 @@ const (
prime5 uint64 = 2870177450012600261
)
-// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
-// possible in the Go code is worth a small (but measurable) performance boost
-// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
-// convenience in the Go code in a few places where we need to intentionally
-// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
-// result overflows a uint64).
-var (
- prime1v = prime1
- prime2v = prime2
- prime3v = prime3
- prime4v = prime4
- prime5v = prime5
-)
+// Store the primes in an array as well.
+//
+// The consts are used when possible in Go code to avoid MOVs but we need a
+// contiguous array of the assembly code.
+var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
// Digest implements hash.Hash64.
type Digest struct {
@@ -52,10 +44,10 @@ func New() *Digest {
// Reset clears the Digest's state so that it can be reused.
func (d *Digest) Reset() {
- d.v1 = prime1v + prime2
+ d.v1 = primes[0] + prime2
d.v2 = prime2
d.v3 = 0
- d.v4 = -prime1v
+ d.v4 = -primes[0]
d.total = 0
d.n = 0
}
@@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
n = len(b)
d.total += uint64(n)
+ memleft := d.mem[d.n&(len(d.mem)-1):]
+
if d.n+n < 32 {
// This new data doesn't even fill the current block.
- copy(d.mem[d.n:], b)
+ copy(memleft, b)
d.n += n
return
}
if d.n > 0 {
// Finish off the partial block.
- copy(d.mem[d.n:], b)
+ c := copy(memleft, b)
d.v1 = round(d.v1, u64(d.mem[0:8]))
d.v2 = round(d.v2, u64(d.mem[8:16]))
d.v3 = round(d.v3, u64(d.mem[16:24]))
d.v4 = round(d.v4, u64(d.mem[24:32]))
- b = b[32-d.n:]
+ b = b[c:]
d.n = 0
}
@@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 {
h += d.total
- i, end := 0, d.n
- for ; i+8 <= end; i += 8 {
- k1 := round(0, u64(d.mem[i:i+8]))
+ b := d.mem[:d.n&(len(d.mem)-1)]
+ for ; len(b) >= 8; b = b[8:] {
+ k1 := round(0, u64(b[:8]))
h ^= k1
h = rol27(h)*prime1 + prime4
}
- if i+4 <= end {
- h ^= uint64(u32(d.mem[i:i+4])) * prime1
+ if len(b) >= 4 {
+ h ^= uint64(u32(b[:4])) * prime1
h = rol23(h)*prime2 + prime3
- i += 4
+ b = b[4:]
}
- for i < end {
- h ^= uint64(d.mem[i]) * prime5
+ for ; len(b) > 0; b = b[1:] {
+ h ^= uint64(b[0]) * prime5
h = rol11(h) * prime1
- i++
}
h ^= h >> 33
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
index cea17856..ddb63aa9 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
@@ -1,3 +1,4 @@
+//go:build !appengine && gc && !purego && !noasm
// +build !appengine
// +build gc
// +build !purego
@@ -5,212 +6,205 @@
#include "textflag.h"
-// Register allocation:
-// AX h
-// SI pointer to advance through b
-// DX n
-// BX loop end
-// R8 v1, k1
-// R9 v2
-// R10 v3
-// R11 v4
-// R12 tmp
-// R13 prime1v
-// R14 prime2v
-// DI prime4v
-
-// round reads from and advances the buffer pointer in SI.
-// It assumes that R13 has prime1v and R14 has prime2v.
-#define round(r) \
- MOVQ (SI), R12 \
- ADDQ $8, SI \
- IMULQ R14, R12 \
- ADDQ R12, r \
- ROLQ $31, r \
- IMULQ R13, r
-
-// mergeRound applies a merge round on the two registers acc and val.
-// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
-#define mergeRound(acc, val) \
- IMULQ R14, val \
- ROLQ $31, val \
- IMULQ R13, val \
- XORQ val, acc \
- IMULQ R13, acc \
- ADDQ DI, acc
+// Registers:
+#define h AX
+#define d AX
+#define p SI // pointer to advance through b
+#define n DX
+#define end BX // loop end
+#define v1 R8
+#define v2 R9
+#define v3 R10
+#define v4 R11
+#define x R12
+#define prime1 R13
+#define prime2 R14
+#define prime4 DI
+
+#define round(acc, x) \
+ IMULQ prime2, x \
+ ADDQ x, acc \
+ ROLQ $31, acc \
+ IMULQ prime1, acc
+
+// round0 performs the operation x = round(0, x).
+#define round0(x) \
+ IMULQ prime2, x \
+ ROLQ $31, x \
+ IMULQ prime1, x
+
+// mergeRound applies a merge round on the two registers acc and x.
+// It assumes that prime1, prime2, and prime4 have been loaded.
+#define mergeRound(acc, x) \
+ round0(x) \
+ XORQ x, acc \
+ IMULQ prime1, acc \
+ ADDQ prime4, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that there is at least one block
+// to process.
+#define blockLoop() \
+loop: \
+ MOVQ +0(p), x \
+ round(v1, x) \
+ MOVQ +8(p), x \
+ round(v2, x) \
+ MOVQ +16(p), x \
+ round(v3, x) \
+ MOVQ +24(p), x \
+ round(v4, x) \
+ ADDQ $32, p \
+ CMPQ p, end \
+ JLE loop
// func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOSPLIT, $0-32
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
// Load fixed primes.
- MOVQ ·prime1v(SB), R13
- MOVQ ·prime2v(SB), R14
- MOVQ ·prime4v(SB), DI
+ MOVQ ·primes+0(SB), prime1
+ MOVQ ·primes+8(SB), prime2
+ MOVQ ·primes+24(SB), prime4
// Load slice.
- MOVQ b_base+0(FP), SI
- MOVQ b_len+8(FP), DX
- LEAQ (SI)(DX*1), BX
+ MOVQ b_base+0(FP), p
+ MOVQ b_len+8(FP), n
+ LEAQ (p)(n*1), end
// The first loop limit will be len(b)-32.
- SUBQ $32, BX
+ SUBQ $32, end
// Check whether we have at least one block.
- CMPQ DX, $32
+ CMPQ n, $32
JLT noBlocks
// Set up initial state (v1, v2, v3, v4).
- MOVQ R13, R8
- ADDQ R14, R8
- MOVQ R14, R9
- XORQ R10, R10
- XORQ R11, R11
- SUBQ R13, R11
-
- // Loop until SI > BX.
-blockLoop:
- round(R8)
- round(R9)
- round(R10)
- round(R11)
-
- CMPQ SI, BX
- JLE blockLoop
-
- MOVQ R8, AX
- ROLQ $1, AX
- MOVQ R9, R12
- ROLQ $7, R12
- ADDQ R12, AX
- MOVQ R10, R12
- ROLQ $12, R12
- ADDQ R12, AX
- MOVQ R11, R12
- ROLQ $18, R12
- ADDQ R12, AX
-
- mergeRound(AX, R8)
- mergeRound(AX, R9)
- mergeRound(AX, R10)
- mergeRound(AX, R11)
+ MOVQ prime1, v1
+ ADDQ prime2, v1
+ MOVQ prime2, v2
+ XORQ v3, v3
+ XORQ v4, v4
+ SUBQ prime1, v4
+
+ blockLoop()
+
+ MOVQ v1, h
+ ROLQ $1, h
+ MOVQ v2, x
+ ROLQ $7, x
+ ADDQ x, h
+ MOVQ v3, x
+ ROLQ $12, x
+ ADDQ x, h
+ MOVQ v4, x
+ ROLQ $18, x
+ ADDQ x, h
+
+ mergeRound(h, v1)
+ mergeRound(h, v2)
+ mergeRound(h, v3)
+ mergeRound(h, v4)
JMP afterBlocks
noBlocks:
- MOVQ ·prime5v(SB), AX
+ MOVQ ·primes+32(SB), h
afterBlocks:
- ADDQ DX, AX
-
- // Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
- ADDQ $24, BX
-
- CMPQ SI, BX
- JG fourByte
-
-wordLoop:
- // Calculate k1.
- MOVQ (SI), R8
- ADDQ $8, SI
- IMULQ R14, R8
- ROLQ $31, R8
- IMULQ R13, R8
-
- XORQ R8, AX
- ROLQ $27, AX
- IMULQ R13, AX
- ADDQ DI, AX
-
- CMPQ SI, BX
- JLE wordLoop
-
-fourByte:
- ADDQ $4, BX
- CMPQ SI, BX
- JG singles
-
- MOVL (SI), R8
- ADDQ $4, SI
- IMULQ R13, R8
- XORQ R8, AX
-
- ROLQ $23, AX
- IMULQ R14, AX
- ADDQ ·prime3v(SB), AX
-
-singles:
- ADDQ $4, BX
- CMPQ SI, BX
+ ADDQ n, h
+
+ ADDQ $24, end
+ CMPQ p, end
+ JG try4
+
+loop8:
+ MOVQ (p), x
+ ADDQ $8, p
+ round0(x)
+ XORQ x, h
+ ROLQ $27, h
+ IMULQ prime1, h
+ ADDQ prime4, h
+
+ CMPQ p, end
+ JLE loop8
+
+try4:
+ ADDQ $4, end
+ CMPQ p, end
+ JG try1
+
+ MOVL (p), x
+ ADDQ $4, p
+ IMULQ prime1, x
+ XORQ x, h
+
+ ROLQ $23, h
+ IMULQ prime2, h
+ ADDQ ·primes+16(SB), h
+
+try1:
+ ADDQ $4, end
+ CMPQ p, end
JGE finalize
-singlesLoop:
- MOVBQZX (SI), R12
- ADDQ $1, SI
- IMULQ ·prime5v(SB), R12
- XORQ R12, AX
+loop1:
+ MOVBQZX (p), x
+ ADDQ $1, p
+ IMULQ ·primes+32(SB), x
+ XORQ x, h
+ ROLQ $11, h
+ IMULQ prime1, h
- ROLQ $11, AX
- IMULQ R13, AX
-
- CMPQ SI, BX
- JL singlesLoop
+ CMPQ p, end
+ JL loop1
finalize:
- MOVQ AX, R12
- SHRQ $33, R12
- XORQ R12, AX
- IMULQ R14, AX
- MOVQ AX, R12
- SHRQ $29, R12
- XORQ R12, AX
- IMULQ ·prime3v(SB), AX
- MOVQ AX, R12
- SHRQ $32, R12
- XORQ R12, AX
-
- MOVQ AX, ret+24(FP)
+ MOVQ h, x
+ SHRQ $33, x
+ XORQ x, h
+ IMULQ prime2, h
+ MOVQ h, x
+ SHRQ $29, x
+ XORQ x, h
+ IMULQ ·primes+16(SB), h
+ MOVQ h, x
+ SHRQ $32, x
+ XORQ x, h
+
+ MOVQ h, ret+24(FP)
RET
-// writeBlocks uses the same registers as above except that it uses AX to store
-// the d pointer.
-
// func writeBlocks(d *Digest, b []byte) int
-TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
// Load fixed primes needed for round.
- MOVQ ·prime1v(SB), R13
- MOVQ ·prime2v(SB), R14
+ MOVQ ·primes+0(SB), prime1
+ MOVQ ·primes+8(SB), prime2
// Load slice.
- MOVQ b_base+8(FP), SI
- MOVQ b_len+16(FP), DX
- LEAQ (SI)(DX*1), BX
- SUBQ $32, BX
+ MOVQ b_base+8(FP), p
+ MOVQ b_len+16(FP), n
+ LEAQ (p)(n*1), end
+ SUBQ $32, end
// Load vN from d.
- MOVQ d+0(FP), AX
- MOVQ 0(AX), R8 // v1
- MOVQ 8(AX), R9 // v2
- MOVQ 16(AX), R10 // v3
- MOVQ 24(AX), R11 // v4
+ MOVQ s+0(FP), d
+ MOVQ 0(d), v1
+ MOVQ 8(d), v2
+ MOVQ 16(d), v3
+ MOVQ 24(d), v4
// We don't need to check the loop condition here; this function is
// always called with at least one block of data to process.
-blockLoop:
- round(R8)
- round(R9)
- round(R10)
- round(R11)
-
- CMPQ SI, BX
- JLE blockLoop
+ blockLoop()
// Copy vN back to d.
- MOVQ R8, 0(AX)
- MOVQ R9, 8(AX)
- MOVQ R10, 16(AX)
- MOVQ R11, 24(AX)
-
- // The number of bytes written is SI minus the old base pointer.
- SUBQ b_base+8(FP), SI
- MOVQ SI, ret+32(FP)
+ MOVQ v1, 0(d)
+ MOVQ v2, 8(d)
+ MOVQ v3, 16(d)
+ MOVQ v4, 24(d)
+
+ // The number of bytes written is p minus the old base pointer.
+ SUBQ b_base+8(FP), p
+ MOVQ p, ret+32(FP)
RET
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
index 4d64a17d..17901e08 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
@@ -1,13 +1,17 @@
-// +build gc,!purego,!noasm
+//go:build !appengine && gc && !purego && !noasm
+// +build !appengine
+// +build gc
+// +build !purego
+// +build !noasm
#include "textflag.h"
-// Register allocation.
+// Registers:
#define digest R1
-#define h R2 // Return value.
-#define p R3 // Input pointer.
-#define len R4
-#define nblocks R5 // len / 32.
+#define h R2 // return value
+#define p R3 // input pointer
+#define n R4 // input length
+#define nblocks R5 // n / 32
#define prime1 R7
#define prime2 R8
#define prime3 R9
@@ -25,60 +29,52 @@
#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
- MUL prime1, acc \
+ MUL prime1, acc
-// x = round(0, x).
+// round0 performs the operation x = round(0, x).
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
- MUL prime1, x \
-
-#define mergeRound(x) \
- round0(x) \
- EOR x, h \
- MADD h, prime4, prime1, h \
-
-// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
-#define blocksLoop() \
- LSR $5, len, nblocks \
- PCALIGN $16 \
- loop: \
- LDP.P 32(p), (x1, x2) \
- round(v1, x1) \
- LDP -16(p), (x3, x4) \
- round(v2, x2) \
- SUB $1, nblocks \
- round(v3, x3) \
- round(v4, x4) \
- CBNZ nblocks, loop \
-
-// The primes are repeated here to ensure that they're stored
-// in a contiguous array, so we can load them with LDP.
-DATA primes<> +0(SB)/8, $11400714785074694791
-DATA primes<> +8(SB)/8, $14029467366897019727
-DATA primes<>+16(SB)/8, $1609587929392839161
-DATA primes<>+24(SB)/8, $9650029242287828579
-DATA primes<>+32(SB)/8, $2870177450012600261
-GLOBL primes<>(SB), NOPTR+RODATA, $40
+ MUL prime1, x
+
+#define mergeRound(acc, x) \
+ round0(x) \
+ EOR x, acc \
+ MADD acc, prime4, prime1, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that n >= 32.
+#define blockLoop() \
+ LSR $5, n, nblocks \
+ PCALIGN $16 \
+ loop: \
+ LDP.P 16(p), (x1, x2) \
+ LDP.P 16(p), (x3, x4) \
+ round(v1, x1) \
+ round(v2, x2) \
+ round(v3, x3) \
+ round(v4, x4) \
+ SUB $1, nblocks \
+ CBNZ nblocks, loop
// func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
- LDP b_base+0(FP), (p, len)
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
+ LDP b_base+0(FP), (p, n)
- LDP primes<> +0(SB), (prime1, prime2)
- LDP primes<>+16(SB), (prime3, prime4)
- MOVD primes<>+32(SB), prime5
+ LDP ·primes+0(SB), (prime1, prime2)
+ LDP ·primes+16(SB), (prime3, prime4)
+ MOVD ·primes+32(SB), prime5
- CMP $32, len
- CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
- BLO afterLoop
+ CMP $32, n
+ CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
+ BLT afterLoop
ADD prime1, prime2, v1
MOVD prime2, v2
MOVD $0, v3
NEG prime1, v4
- blocksLoop()
+ blockLoop()
ROR $64-1, v1, x1
ROR $64-7, v2, x2
@@ -88,71 +84,75 @@ TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
ADD x3, x4
ADD x2, x4, h
- mergeRound(v1)
- mergeRound(v2)
- mergeRound(v3)
- mergeRound(v4)
+ mergeRound(h, v1)
+ mergeRound(h, v2)
+ mergeRound(h, v3)
+ mergeRound(h, v4)
afterLoop:
- ADD len, h
+ ADD n, h
- TBZ $4, len, try8
+ TBZ $4, n, try8
LDP.P 16(p), (x1, x2)
round0(x1)
+
+ // NOTE: here and below, sequencing the EOR after the ROR (using a
+ // rotated register) is worth a small but measurable speedup for small
+ // inputs.
ROR $64-27, h
EOR x1 @> 64-27, h, h
MADD h, prime4, prime1, h
round0(x2)
ROR $64-27, h
- EOR x2 @> 64-27, h
+ EOR x2 @> 64-27, h, h
MADD h, prime4, prime1, h
try8:
- TBZ $3, len, try4
+ TBZ $3, n, try4
MOVD.P 8(p), x1
round0(x1)
ROR $64-27, h
- EOR x1 @> 64-27, h
+ EOR x1 @> 64-27, h, h
MADD h, prime4, prime1, h
try4:
- TBZ $2, len, try2
+ TBZ $2, n, try2
MOVWU.P 4(p), x2
MUL prime1, x2
ROR $64-23, h
- EOR x2 @> 64-23, h
+ EOR x2 @> 64-23, h, h
MADD h, prime3, prime2, h
try2:
- TBZ $1, len, try1
+ TBZ $1, n, try1
MOVHU.P 2(p), x3
AND $255, x3, x1
LSR $8, x3, x2
MUL prime5, x1
ROR $64-11, h
- EOR x1 @> 64-11, h
+ EOR x1 @> 64-11, h, h
MUL prime1, h
MUL prime5, x2
ROR $64-11, h
- EOR x2 @> 64-11, h
+ EOR x2 @> 64-11, h, h
MUL prime1, h
try1:
- TBZ $0, len, end
+ TBZ $0, n, finalize
MOVBU (p), x4
MUL prime5, x4
ROR $64-11, h
- EOR x4 @> 64-11, h
+ EOR x4 @> 64-11, h, h
MUL prime1, h
-end:
+finalize:
EOR h >> 33, h
MUL prime2, h
EOR h >> 29, h
@@ -163,24 +163,22 @@ end:
RET
// func writeBlocks(d *Digest, b []byte) int
-//
-// Assumes len(b) >= 32.
-TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
- LDP primes<>(SB), (prime1, prime2)
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
+ LDP ·primes+0(SB), (prime1, prime2)
// Load state. Assume v[1-4] are stored contiguously.
MOVD d+0(FP), digest
LDP 0(digest), (v1, v2)
LDP 16(digest), (v3, v4)
- LDP b_base+8(FP), (p, len)
+ LDP b_base+8(FP), (p, n)
- blocksLoop()
+ blockLoop()
// Store updated state.
STP (v1, v2), 0(digest)
STP (v3, v4), 16(digest)
- BIC $31, len
- MOVD len, ret+32(FP)
+ BIC $31, n
+ MOVD n, ret+32(FP)
RET
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
index 1a1fac9c..d4221edf 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
@@ -13,4 +13,4 @@ package xxhash
func Sum64(b []byte) uint64
//go:noescape
-func writeBlocks(d *Digest, b []byte) int
+func writeBlocks(s *Digest, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
index 209cb4a9..0be16cef 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
@@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 {
var h uint64
if n >= 32 {
- v1 := prime1v + prime2
+ v1 := primes[0] + prime2
v2 := prime2
v3 := uint64(0)
- v4 := -prime1v
+ v4 := -primes[0]
for len(b) >= 32 {
v1 = round(v1, u64(b[0:8:len(b)]))
v2 = round(v2, u64(b[8:16:len(b)]))
@@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 {
h += uint64(n)
- i, end := 0, len(b)
- for ; i+8 <= end; i += 8 {
- k1 := round(0, u64(b[i:i+8:len(b)]))
+ for ; len(b) >= 8; b = b[8:] {
+ k1 := round(0, u64(b[:8]))
h ^= k1
h = rol27(h)*prime1 + prime4
}
- if i+4 <= end {
- h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+ if len(b) >= 4 {
+ h ^= uint64(u32(b[:4])) * prime1
h = rol23(h)*prime2 + prime3
- i += 4
+ b = b[4:]
}
- for ; i < end; i++ {
- h ^= uint64(b[i]) * prime5
+ for ; len(b) > 0; b = b[1:] {
+ h ^= uint64(b[0]) * prime5
h = rol11(h) * prime1
}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
index df044720..f833d154 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -99,6 +99,21 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro
return nil
}
+func (s *sequenceDecs) freeDecoders() {
+ if f := s.litLengths.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ s.litLengths.fse = nil
+ }
+ if f := s.offsets.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ s.offsets.fse = nil
+ }
+ if f := s.matchLengths.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ s.matchLengths.fse = nil
+ }
+}
+
// execute will execute the decoded sequence with the provided history.
// The sequence must be evaluated before being sent.
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
@@ -299,7 +314,10 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
}
size := ll + ml + len(out)
if size-startSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
+ if size-startSize == 424242 {
+ panic("here")
+ }
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
if size > cap(out) {
// Not enough size, which can happen under high volume block streaming conditions
@@ -411,7 +429,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
// Check if space for literals
if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
// Add final literals
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
index 847b322a..191384ad 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -32,18 +32,22 @@ type decodeSyncAsmContext struct {
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
+//
//go:noescape
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
+//
//go:noescape
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
+//
//go:noescape
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
+//
//go:noescape
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
@@ -55,16 +59,22 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
return false, nil
}
- useSafe := false
- if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
- useSafe = true
- }
- if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
- useSafe = true
- }
- if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
- useSafe = true
- }
+
+ // FIXME: Using unsafe memory copies leads to rare, random crashes
+ // with fuzz testing. It is therefore disabled for now.
+ const useSafe = true
+ /*
+ useSafe := false
+ if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
+ useSafe = true
+ }
+ if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
+ useSafe = true
+ }
+ if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
+ useSafe = true
+ }
+ */
br := s.br
@@ -129,7 +139,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
if debugDecoder {
println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
}
- return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
+ return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
default:
return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
@@ -137,7 +147,8 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
- return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
+
}
err := br.close()
if err != nil {
@@ -195,20 +206,24 @@ const errorNotEnoughSpace = 5
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
+//
//go:noescape
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
+//
//go:noescape
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
+//
//go:noescape
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
+//
//go:noescape
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
@@ -275,7 +290,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
err := br.close()
if err != nil {
@@ -302,10 +317,12 @@ type executeAsmContext struct {
// Returns false if a match offset is too big.
//
// Please refer to seqdec_generic.go for the reference implementation.
+//
//go:noescape
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Same as above, but with safe memcopies
+//
//go:noescape
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
index 71e64e06..b94993a0 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -1,7 +1,6 @@
// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
@@ -52,34 +51,46 @@ sequenceDecs_decode_amd64_fill_byte_by_byte:
sequenceDecs_decode_amd64_fill_end:
// Update offset
- MOVQ R9, AX
- MOVQ BX, CX
- MOVQ DX, R15
- SHLQ CL, R15
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R15
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R15
- ADDQ R15, AX
- MOVQ AX, 16(R10)
+ MOVQ R9, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decode_amd64_of_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decode_amd64_of_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decode_amd64_of_update_zero
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, AX
+
+sequenceDecs_decode_amd64_of_update_zero:
+ MOVQ AX, 16(R10)
// Update match length
- MOVQ R8, AX
- MOVQ BX, CX
- MOVQ DX, R15
- SHLQ CL, R15
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R15
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R15
- ADDQ R15, AX
- MOVQ AX, 8(R10)
+ MOVQ R8, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decode_amd64_ml_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decode_amd64_ml_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decode_amd64_ml_update_zero
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, AX
+
+sequenceDecs_decode_amd64_ml_update_zero:
+ MOVQ AX, 8(R10)
// Fill bitreader to have enough for the remaining
CMPQ SI, $0x08
@@ -107,19 +118,25 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte:
sequenceDecs_decode_amd64_fill_2_end:
// Update literal length
- MOVQ DI, AX
- MOVQ BX, CX
- MOVQ DX, R15
- SHLQ CL, R15
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R15
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R15
- ADDQ R15, AX
- MOVQ AX, (R10)
+ MOVQ DI, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decode_amd64_ll_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decode_amd64_ll_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decode_amd64_ll_update_zero
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, AX
+
+sequenceDecs_decode_amd64_ll_update_zero:
+ MOVQ AX, (R10)
// Fill bitreader for state updates
MOVQ R14, (SP)
@@ -198,7 +215,7 @@ sequenceDecs_decode_amd64_skip_update:
MOVQ R12, R13
MOVQ R11, R12
MOVQ CX, R11
- JMP sequenceDecs_decode_amd64_adjust_end
+ JMP sequenceDecs_decode_amd64_after_adjust
sequenceDecs_decode_amd64_adjust_offsetB_1_or_0:
CMPQ (R10), $0x00000000
@@ -210,7 +227,7 @@ sequenceDecs_decode_amd64_adjust_offset_maybezero:
TESTQ CX, CX
JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero
MOVQ R11, CX
- JMP sequenceDecs_decode_amd64_adjust_end
+ JMP sequenceDecs_decode_amd64_after_adjust
sequenceDecs_decode_amd64_adjust_offset_nonzero:
CMPQ CX, $0x01
@@ -247,7 +264,7 @@ sequenceDecs_decode_amd64_adjust_temp_valid:
MOVQ AX, R11
MOVQ AX, CX
-sequenceDecs_decode_amd64_adjust_end:
+sequenceDecs_decode_amd64_after_adjust:
MOVQ CX, 16(R10)
// Check values
@@ -303,10 +320,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET
- // Return with not enough output space error
- MOVQ $0x00000005, ret+24(FP)
- RET
-
// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
@@ -356,49 +369,67 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte:
sequenceDecs_decode_56_amd64_fill_end:
// Update offset
- MOVQ R9, AX
- MOVQ BX, CX
- MOVQ DX, R15
- SHLQ CL, R15
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R15
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R15
- ADDQ R15, AX
- MOVQ AX, 16(R10)
+ MOVQ R9, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decode_56_amd64_of_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decode_56_amd64_of_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decode_56_amd64_of_update_zero
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, AX
+
+sequenceDecs_decode_56_amd64_of_update_zero:
+ MOVQ AX, 16(R10)
// Update match length
- MOVQ R8, AX
- MOVQ BX, CX
- MOVQ DX, R15
- SHLQ CL, R15
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R15
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R15
- ADDQ R15, AX
- MOVQ AX, 8(R10)
+ MOVQ R8, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decode_56_amd64_ml_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decode_56_amd64_ml_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decode_56_amd64_ml_update_zero
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, AX
+
+sequenceDecs_decode_56_amd64_ml_update_zero:
+ MOVQ AX, 8(R10)
// Update literal length
- MOVQ DI, AX
- MOVQ BX, CX
- MOVQ DX, R15
- SHLQ CL, R15
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R15
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R15
- ADDQ R15, AX
- MOVQ AX, (R10)
+ MOVQ DI, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decode_56_amd64_ll_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decode_56_amd64_ll_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decode_56_amd64_ll_update_zero
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, AX
+
+sequenceDecs_decode_56_amd64_ll_update_zero:
+ MOVQ AX, (R10)
// Fill bitreader for state updates
MOVQ R14, (SP)
@@ -477,7 +508,7 @@ sequenceDecs_decode_56_amd64_skip_update:
MOVQ R12, R13
MOVQ R11, R12
MOVQ CX, R11
- JMP sequenceDecs_decode_56_amd64_adjust_end
+ JMP sequenceDecs_decode_56_amd64_after_adjust
sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0:
CMPQ (R10), $0x00000000
@@ -489,7 +520,7 @@ sequenceDecs_decode_56_amd64_adjust_offset_maybezero:
TESTQ CX, CX
JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero
MOVQ R11, CX
- JMP sequenceDecs_decode_56_amd64_adjust_end
+ JMP sequenceDecs_decode_56_amd64_after_adjust
sequenceDecs_decode_56_amd64_adjust_offset_nonzero:
CMPQ CX, $0x01
@@ -526,7 +557,7 @@ sequenceDecs_decode_56_amd64_adjust_temp_valid:
MOVQ AX, R11
MOVQ AX, CX
-sequenceDecs_decode_56_amd64_adjust_end:
+sequenceDecs_decode_56_amd64_after_adjust:
MOVQ CX, 16(R10)
// Check values
@@ -582,10 +613,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET
- // Return with not enough output space error
- MOVQ $0x00000005, ret+24(FP)
- RET
-
// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
@@ -757,7 +784,7 @@ sequenceDecs_decode_bmi2_skip_update:
MOVQ R11, R12
MOVQ R10, R11
MOVQ CX, R10
- JMP sequenceDecs_decode_bmi2_adjust_end
+ JMP sequenceDecs_decode_bmi2_after_adjust
sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0:
CMPQ (R9), $0x00000000
@@ -769,7 +796,7 @@ sequenceDecs_decode_bmi2_adjust_offset_maybezero:
TESTQ CX, CX
JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero
MOVQ R10, CX
- JMP sequenceDecs_decode_bmi2_adjust_end
+ JMP sequenceDecs_decode_bmi2_after_adjust
sequenceDecs_decode_bmi2_adjust_offset_nonzero:
CMPQ CX, $0x01
@@ -806,7 +833,7 @@ sequenceDecs_decode_bmi2_adjust_temp_valid:
MOVQ R13, R10
MOVQ R13, CX
-sequenceDecs_decode_bmi2_adjust_end:
+sequenceDecs_decode_bmi2_after_adjust:
MOVQ CX, 16(R9)
// Check values
@@ -862,10 +889,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET
- // Return with not enough output space error
- MOVQ $0x00000005, ret+24(FP)
- RET
-
// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
@@ -1012,7 +1035,7 @@ sequenceDecs_decode_56_bmi2_skip_update:
MOVQ R11, R12
MOVQ R10, R11
MOVQ CX, R10
- JMP sequenceDecs_decode_56_bmi2_adjust_end
+ JMP sequenceDecs_decode_56_bmi2_after_adjust
sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0:
CMPQ (R9), $0x00000000
@@ -1024,7 +1047,7 @@ sequenceDecs_decode_56_bmi2_adjust_offset_maybezero:
TESTQ CX, CX
JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
MOVQ R10, CX
- JMP sequenceDecs_decode_56_bmi2_adjust_end
+ JMP sequenceDecs_decode_56_bmi2_after_adjust
sequenceDecs_decode_56_bmi2_adjust_offset_nonzero:
CMPQ CX, $0x01
@@ -1061,7 +1084,7 @@ sequenceDecs_decode_56_bmi2_adjust_temp_valid:
MOVQ R13, R10
MOVQ R13, CX
-sequenceDecs_decode_56_bmi2_adjust_end:
+sequenceDecs_decode_56_bmi2_after_adjust:
MOVQ CX, 16(R9)
// Check values
@@ -1117,10 +1140,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET
- // Return with not enough output space error
- MOVQ $0x00000005, ret+24(FP)
- RET
-
// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Requires: SSE
TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
@@ -1354,8 +1373,7 @@ loop_finished:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
- MOVQ 80(AX), CX
- SUBQ CX, SI
+ SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET
@@ -1367,8 +1385,7 @@ error_match_off_too_big:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
- MOVQ 80(AX), CX
- SUBQ CX, SI
+ SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET
@@ -1712,8 +1729,7 @@ loop_finished:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
- MOVQ 80(AX), CX
- SUBQ CX, SI
+ SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET
@@ -1725,8 +1741,7 @@ error_match_off_too_big:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
- MOVQ 80(AX), CX
- SUBQ CX, SI
+ SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET
@@ -1749,6 +1764,10 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
MOVQ 72(AX), DI
MOVQ 80(AX), R8
MOVQ 88(AX), R9
+ XORQ CX, CX
+ MOVQ CX, 8(SP)
+ MOVQ CX, 16(SP)
+ MOVQ CX, 24(SP)
MOVQ 112(AX), R10
MOVQ 128(AX), CX
MOVQ CX, 32(SP)
@@ -1798,34 +1817,46 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
sequenceDecs_decodeSync_amd64_fill_end:
// Update offset
- MOVQ R9, AX
- MOVQ BX, CX
- MOVQ DX, R14
- SHLQ CL, R14
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R14
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R14
- ADDQ R14, AX
- MOVQ AX, 8(SP)
+ MOVQ R9, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decodeSync_amd64_of_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decodeSync_amd64_of_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decodeSync_amd64_of_update_zero
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, AX
+
+sequenceDecs_decodeSync_amd64_of_update_zero:
+ MOVQ AX, 8(SP)
// Update match length
- MOVQ R8, AX
- MOVQ BX, CX
- MOVQ DX, R14
- SHLQ CL, R14
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R14
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R14
- ADDQ R14, AX
- MOVQ AX, 16(SP)
+ MOVQ R8, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decodeSync_amd64_ml_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decodeSync_amd64_ml_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decodeSync_amd64_ml_update_zero
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, AX
+
+sequenceDecs_decodeSync_amd64_ml_update_zero:
+ MOVQ AX, 16(SP)
// Fill bitreader to have enough for the remaining
CMPQ SI, $0x08
@@ -1853,19 +1884,25 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
sequenceDecs_decodeSync_amd64_fill_2_end:
// Update literal length
- MOVQ DI, AX
- MOVQ BX, CX
- MOVQ DX, R14
- SHLQ CL, R14
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R14
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R14
- ADDQ R14, AX
- MOVQ AX, 24(SP)
+ MOVQ DI, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decodeSync_amd64_ll_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decodeSync_amd64_ll_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decodeSync_amd64_ll_update_zero
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, AX
+
+sequenceDecs_decodeSync_amd64_ll_update_zero:
+ MOVQ AX, 24(SP)
// Fill bitreader for state updates
MOVQ R13, (SP)
@@ -1945,7 +1982,7 @@ sequenceDecs_decodeSync_amd64_skip_update:
MOVUPS 144(CX), X0
MOVQ R13, 144(CX)
MOVUPS X0, 152(CX)
- JMP sequenceDecs_decodeSync_amd64_adjust_end
+ JMP sequenceDecs_decodeSync_amd64_after_adjust
sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0:
CMPQ 24(SP), $0x00000000
@@ -1957,7 +1994,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_maybezero:
TESTQ R13, R13
JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
MOVQ 144(CX), R13
- JMP sequenceDecs_decodeSync_amd64_adjust_end
+ JMP sequenceDecs_decodeSync_amd64_after_adjust
sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
MOVQ R13, AX
@@ -1966,8 +2003,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
CMPQ R13, $0x03
CMOVQEQ R14, AX
CMOVQEQ R15, R14
- LEAQ 144(CX), R15
- ADDQ (R15)(AX*8), R14
+ ADDQ 144(CX)(AX*8), R14
JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid
MOVQ $0x00000001, R14
@@ -1983,7 +2019,7 @@ sequenceDecs_decodeSync_amd64_adjust_skip:
MOVQ R14, 144(CX)
MOVQ R14, R13
-sequenceDecs_decodeSync_amd64_adjust_end:
+sequenceDecs_decodeSync_amd64_after_adjust:
MOVQ R13, 8(SP)
// Check values
@@ -2280,6 +2316,10 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
MOVQ 72(CX), SI
MOVQ 80(CX), DI
MOVQ 88(CX), R8
+ XORQ R9, R9
+ MOVQ R9, 8(SP)
+ MOVQ R9, 16(SP)
+ MOVQ R9, 24(SP)
MOVQ 112(CX), R9
MOVQ 128(CX), R10
MOVQ R10, 32(SP)
@@ -2452,7 +2492,7 @@ sequenceDecs_decodeSync_bmi2_skip_update:
MOVUPS 144(CX), X0
MOVQ R13, 144(CX)
MOVUPS X0, 152(CX)
- JMP sequenceDecs_decodeSync_bmi2_adjust_end
+ JMP sequenceDecs_decodeSync_bmi2_after_adjust
sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0:
CMPQ 24(SP), $0x00000000
@@ -2464,7 +2504,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero:
TESTQ R13, R13
JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
MOVQ 144(CX), R13
- JMP sequenceDecs_decodeSync_bmi2_adjust_end
+ JMP sequenceDecs_decodeSync_bmi2_after_adjust
sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
MOVQ R13, R12
@@ -2473,8 +2513,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
CMPQ R13, $0x03
CMOVQEQ R14, R12
CMOVQEQ R15, R14
- LEAQ 144(CX), R15
- ADDQ (R15)(R12*8), R14
+ ADDQ 144(CX)(R12*8), R14
JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid
MOVQ $0x00000001, R14
@@ -2490,7 +2529,7 @@ sequenceDecs_decodeSync_bmi2_adjust_skip:
MOVQ R14, 144(CX)
MOVQ R14, R13
-sequenceDecs_decodeSync_bmi2_adjust_end:
+sequenceDecs_decodeSync_bmi2_after_adjust:
MOVQ R13, 8(SP)
// Check values
@@ -2787,6 +2826,10 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
MOVQ 72(AX), DI
MOVQ 80(AX), R8
MOVQ 88(AX), R9
+ XORQ CX, CX
+ MOVQ CX, 8(SP)
+ MOVQ CX, 16(SP)
+ MOVQ CX, 24(SP)
MOVQ 112(AX), R10
MOVQ 128(AX), CX
MOVQ CX, 32(SP)
@@ -2836,34 +2879,46 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
sequenceDecs_decodeSync_safe_amd64_fill_end:
// Update offset
- MOVQ R9, AX
- MOVQ BX, CX
- MOVQ DX, R14
- SHLQ CL, R14
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R14
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R14
- ADDQ R14, AX
- MOVQ AX, 8(SP)
+ MOVQ R9, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decodeSync_safe_amd64_of_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decodeSync_safe_amd64_of_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decodeSync_safe_amd64_of_update_zero
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, AX
+
+sequenceDecs_decodeSync_safe_amd64_of_update_zero:
+ MOVQ AX, 8(SP)
// Update match length
- MOVQ R8, AX
- MOVQ BX, CX
- MOVQ DX, R14
- SHLQ CL, R14
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R14
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R14
- ADDQ R14, AX
- MOVQ AX, 16(SP)
+ MOVQ R8, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decodeSync_safe_amd64_ml_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decodeSync_safe_amd64_ml_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decodeSync_safe_amd64_ml_update_zero
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, AX
+
+sequenceDecs_decodeSync_safe_amd64_ml_update_zero:
+ MOVQ AX, 16(SP)
// Fill bitreader to have enough for the remaining
CMPQ SI, $0x08
@@ -2891,19 +2946,25 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
sequenceDecs_decodeSync_safe_amd64_fill_2_end:
// Update literal length
- MOVQ DI, AX
- MOVQ BX, CX
- MOVQ DX, R14
- SHLQ CL, R14
- MOVB AH, CL
- ADDQ CX, BX
- NEGL CX
- SHRQ CL, R14
- SHRQ $0x20, AX
- TESTQ CX, CX
- CMOVQEQ CX, R14
- ADDQ R14, AX
- MOVQ AX, 24(SP)
+ MOVQ DI, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ JZ sequenceDecs_decodeSync_safe_amd64_ll_update_zero
+ ADDQ CX, BX
+ CMPQ BX, $0x40
+ JA sequenceDecs_decodeSync_safe_amd64_ll_update_zero
+ CMPQ CX, $0x40
+ JAE sequenceDecs_decodeSync_safe_amd64_ll_update_zero
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, AX
+
+sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
+ MOVQ AX, 24(SP)
// Fill bitreader for state updates
MOVQ R13, (SP)
@@ -2983,7 +3044,7 @@ sequenceDecs_decodeSync_safe_amd64_skip_update:
MOVUPS 144(CX), X0
MOVQ R13, 144(CX)
MOVUPS X0, 152(CX)
- JMP sequenceDecs_decodeSync_safe_amd64_adjust_end
+ JMP sequenceDecs_decodeSync_safe_amd64_after_adjust
sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0:
CMPQ 24(SP), $0x00000000
@@ -2995,7 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero:
TESTQ R13, R13
JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
MOVQ 144(CX), R13
- JMP sequenceDecs_decodeSync_safe_amd64_adjust_end
+ JMP sequenceDecs_decodeSync_safe_amd64_after_adjust
sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
MOVQ R13, AX
@@ -3004,8 +3065,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
CMPQ R13, $0x03
CMOVQEQ R14, AX
CMOVQEQ R15, R14
- LEAQ 144(CX), R15
- ADDQ (R15)(AX*8), R14
+ ADDQ 144(CX)(AX*8), R14
JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid
MOVQ $0x00000001, R14
@@ -3021,7 +3081,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_skip:
MOVQ R14, 144(CX)
MOVQ R14, R13
-sequenceDecs_decodeSync_safe_amd64_adjust_end:
+sequenceDecs_decodeSync_safe_amd64_after_adjust:
MOVQ R13, 8(SP)
// Check values
@@ -3420,6 +3480,10 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
MOVQ 72(CX), SI
MOVQ 80(CX), DI
MOVQ 88(CX), R8
+ XORQ R9, R9
+ MOVQ R9, 8(SP)
+ MOVQ R9, 16(SP)
+ MOVQ R9, 24(SP)
MOVQ 112(CX), R9
MOVQ 128(CX), R10
MOVQ R10, 32(SP)
@@ -3592,7 +3656,7 @@ sequenceDecs_decodeSync_safe_bmi2_skip_update:
MOVUPS 144(CX), X0
MOVQ R13, 144(CX)
MOVUPS X0, 152(CX)
- JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end
+ JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust
sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0:
CMPQ 24(SP), $0x00000000
@@ -3604,7 +3668,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero:
TESTQ R13, R13
JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
MOVQ 144(CX), R13
- JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end
+ JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust
sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
MOVQ R13, R12
@@ -3613,8 +3677,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
CMPQ R13, $0x03
CMOVQEQ R14, R12
CMOVQEQ R15, R14
- LEAQ 144(CX), R15
- ADDQ (R15)(R12*8), R14
+ ADDQ 144(CX)(R12*8), R14
JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid
MOVQ $0x00000001, R14
@@ -3630,7 +3693,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_skip:
MOVQ R14, 144(CX)
MOVQ R14, R13
-sequenceDecs_decodeSync_safe_bmi2_adjust_end:
+sequenceDecs_decodeSync_safe_bmi2_after_adjust:
MOVQ R13, 8(SP)
// Check values
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
index c3452bc3..ac2a80d2 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
@@ -111,7 +111,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
}
s.seqSize += ll + ml
if s.seqSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
litRemain -= ll
if litRemain < 0 {
@@ -149,7 +149,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
}
s.seqSize += litRemain
if s.seqSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
err := br.close()
if err != nil {
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index 3eb3f1c8..5ffa82f5 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -36,9 +36,6 @@ const forcePreDef = false
// zstdMinMatch is the minimum zstd match length.
const zstdMinMatch = 3
-// Reset the buffer offset when reaching this.
-const bufferReset = math.MaxInt32 - MaxWindowSize
-
// fcsUnknown is used for unknown frame content size.
const fcsUnknown = math.MaxUint64
@@ -75,7 +72,6 @@ var (
ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
// ErrUnknownDictionary is returned if the dictionary ID is unknown.
- // For the time being dictionaries are not supported.
ErrUnknownDictionary = errors.New("unknown dictionary")
// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
@@ -110,26 +106,25 @@ func printf(format string, a ...interface{}) {
}
}
-// matchLen returns the maximum length.
+// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
-// The function also returns whether all bytes matched.
-func matchLen(a, b []byte) int {
- b = b[:len(a)]
- for i := 0; i < len(a)-7; i += 8 {
- if diff := load64(a, i) ^ load64(b, i); diff != 0 {
- return i + (bits.TrailingZeros64(diff) >> 3)
+func matchLen(a, b []byte) (n int) {
+ for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
+ diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
+ if diff != 0 {
+ return n + bits.TrailingZeros64(diff)>>3
}
+ n += 8
}
- checked := (len(a) >> 3) << 3
- a = a[checked:]
- b = b[checked:]
for i := range a {
if a[i] != b[i] {
- return i + checked
+ break
}
+ n++
}
- return len(a) + checked
+ return n
+
}
func load3232(b []byte, i int32) uint32 {
@@ -140,10 +135,6 @@ func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}
-func load64(b []byte, i int) uint64 {
- return binary.LittleEndian.Uint64(b[i:])
-}
-
type byter interface {
Bytes() []byte
Len() int