summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/compress/huff0/huff0.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/huff0/huff0.go')
-rw-r--r--vendor/github.com/klauspost/compress/huff0/huff0.go335
1 files changed, 335 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
new file mode 100644
index 00000000..3ee00ecb
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -0,0 +1,335 @@
+// Package huff0 provides fast huffman encoding as used in zstd.
+//
+// See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details.
+package huff0
+
+import (
+ "errors"
+ "fmt"
+ "math"
+ "math/bits"
+
+ "github.com/klauspost/compress/fse"
+)
+
+const (
+ maxSymbolValue = 255
+
+ // zstandard limits tablelog to 11, see:
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description
+ tableLogMax = 11
+ tableLogDefault = 11
+ minTablelog = 5
+ huffNodesLen = 512
+
+ // BlockSizeMax is maximum input size for a single block uncompressed.
+ BlockSizeMax = 1<<18 - 1
+)
+
+var (
+ // ErrIncompressible is returned when input is judged to be too hard to compress.
+ ErrIncompressible = errors.New("input is not compressible")
+
+ // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
+ ErrUseRLE = errors.New("input is single value repeated")
+
+ // ErrTooBig is return if input is too large for a single block.
+ ErrTooBig = errors.New("input too big")
+
+ // ErrMaxDecodedSizeExceeded is return if input is too large for a single block.
+ ErrMaxDecodedSizeExceeded = errors.New("maximum output size exceeded")
+)
+
+type ReusePolicy uint8
+
+const (
+ // ReusePolicyAllow will allow reuse if it produces smaller output.
+ ReusePolicyAllow ReusePolicy = iota
+
+ // ReusePolicyPrefer will re-use aggressively if possible.
+ // This will not check if a new table will produce smaller output,
+ // except if the current table is impossible to use or
+ // compressed output is bigger than input.
+ ReusePolicyPrefer
+
+ // ReusePolicyNone will disable re-use of tables.
+ // This is slightly faster than ReusePolicyAllow but may produce larger output.
+ ReusePolicyNone
+
+ // ReusePolicyMust must allow reuse and produce smaller output.
+ ReusePolicyMust
+)
+
+type Scratch struct {
+ count [maxSymbolValue + 1]uint32
+
+ // Per block parameters.
+ // These can be used to override compression parameters of the block.
+ // Do not touch, unless you know what you are doing.
+
+ // Out is output buffer.
+ // If the scratch is re-used before the caller is done processing the output,
+ // set this field to nil.
+ // Otherwise the output buffer will be re-used for next Compression/Decompression step
+ // and allocation will be avoided.
+ Out []byte
+
+ // OutTable will contain the table data only, if a new table has been generated.
+ // Slice of the returned data.
+ OutTable []byte
+
+ // OutData will contain the compressed data.
+ // Slice of the returned data.
+ OutData []byte
+
+ // MaxDecodedSize will set the maximum allowed output size.
+ // This value will automatically be set to BlockSizeMax if not set.
+ // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
+ MaxDecodedSize int
+
+ br byteReader
+
+ // MaxSymbolValue will override the maximum symbol value of the next block.
+ MaxSymbolValue uint8
+
+ // TableLog will attempt to override the tablelog for the next block.
+ // Must be <= 11 and >= 5.
+ TableLog uint8
+
+ // Reuse will specify the reuse policy
+ Reuse ReusePolicy
+
+ // WantLogLess allows to specify a log 2 reduction that should at least be achieved,
+ // otherwise the block will be returned as incompressible.
+ // The reduction should then at least be (input size >> WantLogLess)
+ // If WantLogLess == 0 any improvement will do.
+ WantLogLess uint8
+
+ symbolLen uint16 // Length of active part of the symbol table.
+ maxCount int // count of the most probable symbol
+ clearCount bool // clear count
+ actualTableLog uint8 // Selected tablelog.
+ prevTableLog uint8 // Tablelog for previous table
+ prevTable cTable // Table used for previous compression.
+ cTable cTable // compression table
+ dt dTable // decompression table
+ nodes []nodeElt
+ tmpOut [4][]byte
+ fse *fse.Scratch
+ huffWeight [maxSymbolValue + 1]byte
+}
+
+// TransferCTable will transfer the previously used compression table.
+func (s *Scratch) TransferCTable(src *Scratch) {
+ if cap(s.prevTable) < len(src.prevTable) {
+ s.prevTable = make(cTable, 0, maxSymbolValue+1)
+ }
+ s.prevTable = s.prevTable[:len(src.prevTable)]
+ copy(s.prevTable, src.prevTable)
+ s.prevTableLog = src.prevTableLog
+}
+
+func (s *Scratch) prepare(in []byte) (*Scratch, error) {
+ if len(in) > BlockSizeMax {
+ return nil, ErrTooBig
+ }
+ if s == nil {
+ s = &Scratch{}
+ }
+ if s.MaxSymbolValue == 0 {
+ s.MaxSymbolValue = maxSymbolValue
+ }
+ if s.TableLog == 0 {
+ s.TableLog = tableLogDefault
+ }
+ if s.TableLog > tableLogMax || s.TableLog < minTablelog {
+ return nil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", s.TableLog, minTablelog, tableLogMax)
+ }
+ if s.MaxDecodedSize <= 0 || s.MaxDecodedSize > BlockSizeMax {
+ s.MaxDecodedSize = BlockSizeMax
+ }
+ if s.clearCount && s.maxCount == 0 {
+ for i := range s.count {
+ s.count[i] = 0
+ }
+ s.clearCount = false
+ }
+ if cap(s.Out) == 0 {
+ s.Out = make([]byte, 0, len(in))
+ }
+ s.Out = s.Out[:0]
+
+ s.OutTable = nil
+ s.OutData = nil
+ if cap(s.nodes) < huffNodesLen+1 {
+ s.nodes = make([]nodeElt, 0, huffNodesLen+1)
+ }
+ s.nodes = s.nodes[:0]
+ if s.fse == nil {
+ s.fse = &fse.Scratch{}
+ }
+ s.br.init(in)
+
+ return s, nil
+}
+
+type cTable []cTableEntry
+
+func (c cTable) write(s *Scratch) error {
+ var (
+ // precomputed conversion table
+ bitsToWeight [tableLogMax + 1]byte
+ huffLog = s.actualTableLog
+ // last weight is not saved.
+ maxSymbolValue = uint8(s.symbolLen - 1)
+ huffWeight = s.huffWeight[:256]
+ )
+ const (
+ maxFSETableLog = 6
+ )
+ // convert to weight
+ bitsToWeight[0] = 0
+ for n := uint8(1); n < huffLog+1; n++ {
+ bitsToWeight[n] = huffLog + 1 - n
+ }
+
+ // Acquire histogram for FSE.
+ hist := s.fse.Histogram()
+ hist = hist[:256]
+ for i := range hist[:16] {
+ hist[i] = 0
+ }
+ for n := uint8(0); n < maxSymbolValue; n++ {
+ v := bitsToWeight[c[n].nBits] & 15
+ huffWeight[n] = v
+ hist[v]++
+ }
+
+ // FSE compress if feasible.
+ if maxSymbolValue >= 2 {
+ huffMaxCnt := uint32(0)
+ huffMax := uint8(0)
+ for i, v := range hist[:16] {
+ if v == 0 {
+ continue
+ }
+ huffMax = byte(i)
+ if v > huffMaxCnt {
+ huffMaxCnt = v
+ }
+ }
+ s.fse.HistogramFinished(huffMax, int(huffMaxCnt))
+ s.fse.TableLog = maxFSETableLog
+ b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse)
+ if err == nil && len(b) < int(s.symbolLen>>1) {
+ s.Out = append(s.Out, uint8(len(b)))
+ s.Out = append(s.Out, b...)
+ return nil
+ }
+ // Unable to compress (RLE/uncompressible)
+ }
+ // write raw values as 4-bits (max : 15)
+ if maxSymbolValue > (256 - 128) {
+ // should not happen : likely means source cannot be compressed
+ return ErrIncompressible
+ }
+ op := s.Out
+ // special case, pack weights 4 bits/weight.
+ op = append(op, 128|(maxSymbolValue-1))
+ // be sure it doesn't cause msan issue in final combination
+ huffWeight[maxSymbolValue] = 0
+ for n := uint16(0); n < uint16(maxSymbolValue); n += 2 {
+ op = append(op, (huffWeight[n]<<4)|huffWeight[n+1])
+ }
+ s.Out = op
+ return nil
+}
+
+func (c cTable) estTableSize(s *Scratch) (sz int, err error) {
+ var (
+ // precomputed conversion table
+ bitsToWeight [tableLogMax + 1]byte
+ huffLog = s.actualTableLog
+ // last weight is not saved.
+ maxSymbolValue = uint8(s.symbolLen - 1)
+ huffWeight = s.huffWeight[:256]
+ )
+ const (
+ maxFSETableLog = 6
+ )
+ // convert to weight
+ bitsToWeight[0] = 0
+ for n := uint8(1); n < huffLog+1; n++ {
+ bitsToWeight[n] = huffLog + 1 - n
+ }
+
+ // Acquire histogram for FSE.
+ hist := s.fse.Histogram()
+ hist = hist[:256]
+ for i := range hist[:16] {
+ hist[i] = 0
+ }
+ for n := uint8(0); n < maxSymbolValue; n++ {
+ v := bitsToWeight[c[n].nBits] & 15
+ huffWeight[n] = v
+ hist[v]++
+ }
+
+ // FSE compress if feasible.
+ if maxSymbolValue >= 2 {
+ huffMaxCnt := uint32(0)
+ huffMax := uint8(0)
+ for i, v := range hist[:16] {
+ if v == 0 {
+ continue
+ }
+ huffMax = byte(i)
+ if v > huffMaxCnt {
+ huffMaxCnt = v
+ }
+ }
+ s.fse.HistogramFinished(huffMax, int(huffMaxCnt))
+ s.fse.TableLog = maxFSETableLog
+ b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse)
+ if err == nil && len(b) < int(s.symbolLen>>1) {
+ sz += 1 + len(b)
+ return sz, nil
+ }
+ // Unable to compress (RLE/uncompressible)
+ }
+ // write raw values as 4-bits (max : 15)
+ if maxSymbolValue > (256 - 128) {
+ // should not happen : likely means source cannot be compressed
+ return 0, ErrIncompressible
+ }
+ // special case, pack weights 4 bits/weight.
+ sz += 1 + int(maxSymbolValue/2)
+ return sz, nil
+}
+
+// estimateSize returns the estimated size in bytes of the input represented in the
+// histogram supplied.
+func (c cTable) estimateSize(hist []uint32) int {
+ nbBits := uint32(7)
+ for i, v := range c[:len(hist)] {
+ nbBits += uint32(v.nBits) * hist[i]
+ }
+ return int(nbBits >> 3)
+}
+
+// minSize returns the minimum possible size considering the shannon limit.
+func (s *Scratch) minSize(total int) int {
+ nbBits := float64(7)
+ fTotal := float64(total)
+ for _, v := range s.count[:s.symbolLen] {
+ n := float64(v)
+ if n > 0 {
+ nbBits += math.Log2(fTotal/n) * n
+ }
+ }
+ return int(nbBits) >> 3
+}
+
+func highBit32(val uint32) (n uint32) {
+ return uint32(bits.Len32(val) - 1)
+}