summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/compress/s2/dict.go
diff options
context:
space:
mode:
authorWim <wim@42.be>2023-03-09 22:48:00 +0100
committerGitHub <noreply@github.com>2023-03-09 22:48:00 +0100
commit08779c29099e8940493df56d28d8aa131ac8342e (patch)
tree7ad8ce25cf371e582137e1706dd671a6bf4342d0 /vendor/github.com/klauspost/compress/s2/dict.go
parentd5f9cdf912d43cd2a5cb243e086fbdab9a9073b0 (diff)
downloadmatterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.tar.gz
matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.tar.bz2
matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.zip
Update dependencies (#2007)
* Update dependencies
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/dict.go')
-rw-r--r--vendor/github.com/klauspost/compress/s2/dict.go331
1 files changed, 331 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/dict.go b/vendor/github.com/klauspost/compress/s2/dict.go
new file mode 100644
index 00000000..24f7ce80
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/s2/dict.go
@@ -0,0 +1,331 @@
+// Copyright (c) 2022+ Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s2
+
+import (
+ "bytes"
+ "encoding/binary"
+ "sync"
+)
+
+const (
+ // MinDictSize is the minimum dictionary size when repeat has been read.
+ MinDictSize = 16
+
+ // MaxDictSize is the maximum dictionary size when repeat has been read.
+ MaxDictSize = 65536
+
+ // MaxDictSrcOffset is the maximum offset where a dictionary entry can start.
+ MaxDictSrcOffset = 65535
+)
+
+// Dict contains a dictionary that can be used for encoding and decoding s2
+type Dict struct {
+ dict []byte
+ repeat int // Repeat as index of dict
+
+ fast, better, best sync.Once
+ fastTable *[1 << 14]uint16
+
+ betterTableShort *[1 << 14]uint16
+ betterTableLong *[1 << 17]uint16
+
+ bestTableShort *[1 << 16]uint32
+ bestTableLong *[1 << 19]uint32
+}
+
+// NewDict will read a dictionary.
+// It will return nil if the dictionary is invalid.
+func NewDict(dict []byte) *Dict {
+ if len(dict) == 0 {
+ return nil
+ }
+ var d Dict
+ // Repeat is the first value of the dict
+ r, n := binary.Uvarint(dict)
+ if n <= 0 {
+ return nil
+ }
+ dict = dict[n:]
+ d.dict = dict
+ if cap(d.dict) < len(d.dict)+16 {
+ d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
+ }
+ if len(dict) < MinDictSize || len(dict) > MaxDictSize {
+ return nil
+ }
+ d.repeat = int(r)
+ if d.repeat > len(dict) {
+ return nil
+ }
+ return &d
+}
+
+// Bytes will return a serialized version of the dictionary.
+// The output can be sent to NewDict.
+func (d *Dict) Bytes() []byte {
+ dst := make([]byte, binary.MaxVarintLen16+len(d.dict))
+ return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...)
+}
+
+// MakeDict will create a dictionary.
+// 'data' must be at least MinDictSize.
+// If data is longer than MaxDictSize only the last MaxDictSize bytes will be used.
+// If searchStart is set the start repeat value will be set to the last
+// match of this content.
+// If no matches are found, it will attempt to find shorter matches.
+// This content should match the typical start of a block.
+// If at least 4 bytes cannot be matched, repeat is set to start of block.
+func MakeDict(data []byte, searchStart []byte) *Dict {
+ if len(data) == 0 {
+ return nil
+ }
+ if len(data) > MaxDictSize {
+ data = data[len(data)-MaxDictSize:]
+ }
+ var d Dict
+ dict := data
+ d.dict = dict
+ if cap(d.dict) < len(d.dict)+16 {
+ d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
+ }
+ if len(dict) < MinDictSize {
+ return nil
+ }
+
+ // Find the longest match possible, last entry if multiple.
+ for s := len(searchStart); s > 4; s-- {
+ if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 {
+ d.repeat = idx
+ break
+ }
+ }
+
+ return &d
+}
+
+// Encode returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// The blocks will require the same amount of memory to decode as encoding,
+// and does not make for concurrent decoding.
+// Also note that blocks do not contain CRC information, so corruption may be undetected.
+//
+// If you need to encode larger amounts of data, consider using
+// the streaming interface which gives all of these features.
+func (d *Dict) Encode(dst, src []byte) []byte {
+ if n := MaxEncodedLen(len(src)); n < 0 {
+ panic(ErrTooLarge)
+ } else if cap(dst) < n {
+ dst = make([]byte, n)
+ } else {
+ dst = dst[:n]
+ }
+
+ // The block starts with the varint-encoded length of the decompressed bytes.
+ dstP := binary.PutUvarint(dst, uint64(len(src)))
+
+ if len(src) == 0 {
+ return dst[:dstP]
+ }
+ if len(src) < minNonLiteralBlockSize {
+ dstP += emitLiteral(dst[dstP:], src)
+ return dst[:dstP]
+ }
+ n := encodeBlockDictGo(dst[dstP:], src, d)
+ if n > 0 {
+ dstP += n
+ return dst[:dstP]
+ }
+ // Not compressible
+ dstP += emitLiteral(dst[dstP:], src)
+ return dst[:dstP]
+}
+
+// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// EncodeBetter compresses better than Encode but typically with a
+// 10-40% speed decrease on both compression and decompression.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// The blocks will require the same amount of memory to decode as encoding,
+// and does not make for concurrent decoding.
+// Also note that blocks do not contain CRC information, so corruption may be undetected.
+//
+// If you need to encode larger amounts of data, consider using
+// the streaming interface which gives all of these features.
+func (d *Dict) EncodeBetter(dst, src []byte) []byte {
+ if n := MaxEncodedLen(len(src)); n < 0 {
+ panic(ErrTooLarge)
+ } else if len(dst) < n {
+ dst = make([]byte, n)
+ }
+
+ // The block starts with the varint-encoded length of the decompressed bytes.
+ dstP := binary.PutUvarint(dst, uint64(len(src)))
+
+ if len(src) == 0 {
+ return dst[:dstP]
+ }
+ if len(src) < minNonLiteralBlockSize {
+ dstP += emitLiteral(dst[dstP:], src)
+ return dst[:dstP]
+ }
+ n := encodeBlockBetterDict(dst[dstP:], src, d)
+ if n > 0 {
+ dstP += n
+ return dst[:dstP]
+ }
+ // Not compressible
+ dstP += emitLiteral(dst[dstP:], src)
+ return dst[:dstP]
+}
+
+// EncodeBest returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// EncodeBest compresses as good as reasonably possible but with a
+// big speed decrease.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// The blocks will require the same amount of memory to decode as encoding,
+// and does not make for concurrent decoding.
+// Also note that blocks do not contain CRC information, so corruption may be undetected.
+//
+// If you need to encode larger amounts of data, consider using
+// the streaming interface which gives all of these features.
+func (d *Dict) EncodeBest(dst, src []byte) []byte {
+ if n := MaxEncodedLen(len(src)); n < 0 {
+ panic(ErrTooLarge)
+ } else if len(dst) < n {
+ dst = make([]byte, n)
+ }
+
+ // The block starts with the varint-encoded length of the decompressed bytes.
+ dstP := binary.PutUvarint(dst, uint64(len(src)))
+
+ if len(src) == 0 {
+ return dst[:dstP]
+ }
+ if len(src) < minNonLiteralBlockSize {
+ dstP += emitLiteral(dst[dstP:], src)
+ return dst[:dstP]
+ }
+ n := encodeBlockBest(dst[dstP:], src, d)
+ if n > 0 {
+ dstP += n
+ return dst[:dstP]
+ }
+ // Not compressible
+ dstP += emitLiteral(dst[dstP:], src)
+ return dst[:dstP]
+}
+
+// Decode returns the decoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire decoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func (d *Dict) Decode(dst, src []byte) ([]byte, error) {
+ dLen, s, err := decodedLen(src)
+ if err != nil {
+ return nil, err
+ }
+ if dLen <= cap(dst) {
+ dst = dst[:dLen]
+ } else {
+ dst = make([]byte, dLen)
+ }
+ if s2DecodeDict(dst, src[s:], d) != 0 {
+ return nil, ErrCorrupt
+ }
+ return dst, nil
+}
+
+func (d *Dict) initFast() {
+ d.fast.Do(func() {
+ const (
+ tableBits = 14
+ maxTableSize = 1 << tableBits
+ )
+
+ var table [maxTableSize]uint16
+ // We stop so any entry of length 8 can always be read.
+ for i := 0; i < len(d.dict)-8-2; i += 3 {
+ x0 := load64(d.dict, i)
+ h0 := hash6(x0, tableBits)
+ h1 := hash6(x0>>8, tableBits)
+ h2 := hash6(x0>>16, tableBits)
+ table[h0] = uint16(i)
+ table[h1] = uint16(i + 1)
+ table[h2] = uint16(i + 2)
+ }
+ d.fastTable = &table
+ })
+}
+
+func (d *Dict) initBetter() {
+ d.better.Do(func() {
+ const (
+ // Long hash matches.
+ lTableBits = 17
+ maxLTableSize = 1 << lTableBits
+
+ // Short hash matches.
+ sTableBits = 14
+ maxSTableSize = 1 << sTableBits
+ )
+
+ var lTable [maxLTableSize]uint16
+ var sTable [maxSTableSize]uint16
+
+ // We stop so any entry of length 8 can always be read.
+ for i := 0; i < len(d.dict)-8; i++ {
+ cv := load64(d.dict, i)
+ lTable[hash7(cv, lTableBits)] = uint16(i)
+ sTable[hash4(cv, sTableBits)] = uint16(i)
+ }
+ d.betterTableShort = &sTable
+ d.betterTableLong = &lTable
+ })
+}
+
+func (d *Dict) initBest() {
+ d.best.Do(func() {
+ const (
+ // Long hash matches.
+ lTableBits = 19
+ maxLTableSize = 1 << lTableBits
+
+ // Short hash matches.
+ sTableBits = 16
+ maxSTableSize = 1 << sTableBits
+ )
+
+ var lTable [maxLTableSize]uint32
+ var sTable [maxSTableSize]uint32
+
+ // We stop so any entry of length 8 can always be read.
+ for i := 0; i < len(d.dict)-8; i++ {
+ cv := load64(d.dict, i)
+ hashL := hash8(cv, lTableBits)
+ hashS := hash4(cv, sTableBits)
+ candidateL := lTable[hashL]
+ candidateS := sTable[hashS]
+ lTable[hashL] = uint32(i) | candidateL<<16
+ sTable[hashS] = uint32(i) | candidateS<<16
+ }
+ d.bestTableShort = &sTable
+ d.bestTableLong = &lTable
+ })
+}