diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd/encoder.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/zstd/encoder.go | 599 |
1 files changed, 599 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go new file mode 100644 index 00000000..e6e31596 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -0,0 +1,599 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "crypto/rand" + "fmt" + "io" + rdebug "runtime/debug" + "sync" + + "github.com/klauspost/compress/zstd/internal/xxhash" +) + +// Encoder provides encoding to Zstandard. +// An Encoder can be used for either compressing a stream via the +// io.WriteCloser interface supported by the Encoder or as multiple independent +// tasks via the EncodeAll function. +// Smaller encodes are encouraged to use the EncodeAll function. +// Use NewWriter to create a new instance. +type Encoder struct { + o encoderOptions + encoders chan encoder + state encoderState + init sync.Once +} + +type encoder interface { + Encode(blk *blockEnc, src []byte) + EncodeNoHist(blk *blockEnc, src []byte) + Block() *blockEnc + CRC() *xxhash.Digest + AppendCRC([]byte) []byte + WindowSize(size int64) int32 + UseBlock(*blockEnc) + Reset(d *dict, singleBlock bool) +} + +type encoderState struct { + w io.Writer + filling []byte + current []byte + previous []byte + encoder encoder + writing *blockEnc + err error + writeErr error + nWritten int64 + nInput int64 + frameContentSize int64 + headerWritten bool + eofWritten bool + fullFrameWritten bool + + // This waitgroup indicates an encode is running. + wg sync.WaitGroup + // This waitgroup indicates we have a block encoding/writing. + wWg sync.WaitGroup +} + +// NewWriter will create a new Zstandard encoder. +// If the encoder will be used for encoding blocks a nil writer can be used. +func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) { + initPredefined() + var e Encoder + e.o.setDefault() + for _, o := range opts { + err := o(&e.o) + if err != nil { + return nil, err + } + } + if w != nil { + e.Reset(w) + } + return &e, nil +} + +func (e *Encoder) initialize() { + if e.o.concurrent == 0 { + e.o.setDefault() + } + e.encoders = make(chan encoder, e.o.concurrent) + for i := 0; i < e.o.concurrent; i++ { + enc := e.o.encoder() + e.encoders <- enc + } +} + +// Reset will re-initialize the writer and new writes will encode to the supplied writer +// as a new, independent stream. +func (e *Encoder) Reset(w io.Writer) { + s := &e.state + s.wg.Wait() + s.wWg.Wait() + if cap(s.filling) == 0 { + s.filling = make([]byte, 0, e.o.blockSize) + } + if cap(s.current) == 0 { + s.current = make([]byte, 0, e.o.blockSize) + } + if cap(s.previous) == 0 { + s.previous = make([]byte, 0, e.o.blockSize) + } + if s.encoder == nil { + s.encoder = e.o.encoder() + } + if s.writing == nil { + s.writing = &blockEnc{lowMem: e.o.lowMem} + s.writing.init() + } + s.writing.initNewEncode() + s.filling = s.filling[:0] + s.current = s.current[:0] + s.previous = s.previous[:0] + s.encoder.Reset(e.o.dict, false) + s.headerWritten = false + s.eofWritten = false + s.fullFrameWritten = false + s.w = w + s.err = nil + s.nWritten = 0 + s.nInput = 0 + s.writeErr = nil + s.frameContentSize = 0 +} + +// ResetContentSize will reset and set a content size for the next stream. +// If the bytes written does not match the size given an error will be returned +// when calling Close(). +// This is removed when Reset is called. +// Sizes <= 0 results in no content size set. +func (e *Encoder) ResetContentSize(w io.Writer, size int64) { + e.Reset(w) + if size >= 0 { + e.state.frameContentSize = size + } +} + +// Write data to the encoder. +// Input data will be buffered and as the buffer fills up +// content will be compressed and written to the output. +// When done writing, use Close to flush the remaining output +// and write CRC if requested. +func (e *Encoder) Write(p []byte) (n int, err error) { + s := &e.state + for len(p) > 0 { + if len(p)+len(s.filling) < e.o.blockSize { + if e.o.crc { + _, _ = s.encoder.CRC().Write(p) + } + s.filling = append(s.filling, p...) + return n + len(p), nil + } + add := p + if len(p)+len(s.filling) > e.o.blockSize { + add = add[:e.o.blockSize-len(s.filling)] + } + if e.o.crc { + _, _ = s.encoder.CRC().Write(add) + } + s.filling = append(s.filling, add...) + p = p[len(add):] + n += len(add) + if len(s.filling) < e.o.blockSize { + return n, nil + } + err := e.nextBlock(false) + if err != nil { + return n, err + } + if debugAsserts && len(s.filling) > 0 { + panic(len(s.filling)) + } + } + return n, nil +} + +// nextBlock will synchronize and start compressing input in e.state.filling. +// If an error has occurred during encoding it will be returned. +func (e *Encoder) nextBlock(final bool) error { + s := &e.state + // Wait for current block. + s.wg.Wait() + if s.err != nil { + return s.err + } + if len(s.filling) > e.o.blockSize { + return fmt.Errorf("block > maxStoreBlockSize") + } + if !s.headerWritten { + // If we have a single block encode, do a sync compression. + if final && len(s.filling) == 0 && !e.o.fullZero { + s.headerWritten = true + s.fullFrameWritten = true + s.eofWritten = true + return nil + } + if final && len(s.filling) > 0 { + s.current = e.EncodeAll(s.filling, s.current[:0]) + var n2 int + n2, s.err = s.w.Write(s.current) + if s.err != nil { + return s.err + } + s.nWritten += int64(n2) + s.nInput += int64(len(s.filling)) + s.current = s.current[:0] + s.filling = s.filling[:0] + s.headerWritten = true + s.fullFrameWritten = true + s.eofWritten = true + return nil + } + + var tmp [maxHeaderSize]byte + fh := frameHeader{ + ContentSize: uint64(s.frameContentSize), + WindowSize: uint32(s.encoder.WindowSize(s.frameContentSize)), + SingleSegment: false, + Checksum: e.o.crc, + DictID: e.o.dict.ID(), + } + + dst, err := fh.appendTo(tmp[:0]) + if err != nil { + return err + } + s.headerWritten = true + s.wWg.Wait() + var n2 int + n2, s.err = s.w.Write(dst) + if s.err != nil { + return s.err + } + s.nWritten += int64(n2) + } + if s.eofWritten { + // Ensure we only write it once. + final = false + } + + if len(s.filling) == 0 { + // Final block, but no data. + if final { + enc := s.encoder + blk := enc.Block() + blk.reset(nil) + blk.last = true + blk.encodeRaw(nil) + s.wWg.Wait() + _, s.err = s.w.Write(blk.output) + s.nWritten += int64(len(blk.output)) + s.eofWritten = true + } + return s.err + } + + // Move blocks forward. + s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current + s.nInput += int64(len(s.current)) + s.wg.Add(1) + go func(src []byte) { + if debugEncoder { + println("Adding block,", len(src), "bytes, final:", final) + } + defer func() { + if r := recover(); r != nil { + s.err = fmt.Errorf("panic while encoding: %v", r) + rdebug.PrintStack() + } + s.wg.Done() + }() + enc := s.encoder + blk := enc.Block() + enc.Encode(blk, src) + blk.last = final + if final { + s.eofWritten = true + } + // Wait for pending writes. + s.wWg.Wait() + if s.writeErr != nil { + s.err = s.writeErr + return + } + // Transfer encoders from previous write block. + blk.swapEncoders(s.writing) + // Transfer recent offsets to next. + enc.UseBlock(s.writing) + s.writing = blk + s.wWg.Add(1) + go func() { + defer func() { + if r := recover(); r != nil { + s.writeErr = fmt.Errorf("panic while encoding/writing: %v", r) + rdebug.PrintStack() + } + s.wWg.Done() + }() + err := errIncompressible + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + if len(src) != len(blk.literals) || len(src) != e.o.blockSize { + err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + } + switch err { + case errIncompressible: + if debugEncoder { + println("Storing incompressible block as raw") + } + blk.encodeRaw(src) + // In fast mode, we do not transfer offsets, so we don't have to deal with changing the. + case nil: + default: + s.writeErr = err + return + } + _, s.writeErr = s.w.Write(blk.output) + s.nWritten += int64(len(blk.output)) + }() + }(s.current) + return nil +} + +// ReadFrom reads data from r until EOF or error. +// The return value n is the number of bytes read. +// Any error except io.EOF encountered during the read is also returned. +// +// The Copy function uses ReaderFrom if available. +func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { + if debugEncoder { + println("Using ReadFrom") + } + + // Flush any current writes. + if len(e.state.filling) > 0 { + if err := e.nextBlock(false); err != nil { + return 0, err + } + } + e.state.filling = e.state.filling[:e.o.blockSize] + src := e.state.filling + for { + n2, err := r.Read(src) + if e.o.crc { + _, _ = e.state.encoder.CRC().Write(src[:n2]) + } + // src is now the unfilled part... + src = src[n2:] + n += int64(n2) + switch err { + case io.EOF: + e.state.filling = e.state.filling[:len(e.state.filling)-len(src)] + if debugEncoder { + println("ReadFrom: got EOF final block:", len(e.state.filling)) + } + return n, nil + case nil: + default: + if debugEncoder { + println("ReadFrom: got error:", err) + } + e.state.err = err + return n, err + } + if len(src) > 0 { + if debugEncoder { + println("ReadFrom: got space left in source:", len(src)) + } + continue + } + err = e.nextBlock(false) + if err != nil { + return n, err + } + e.state.filling = e.state.filling[:e.o.blockSize] + src = e.state.filling + } +} + +// Flush will send the currently written data to output +// and block until everything has been written. +// This should only be used on rare occasions where pushing the currently queued data is critical. +func (e *Encoder) Flush() error { + s := &e.state + if len(s.filling) > 0 { + err := e.nextBlock(false) + if err != nil { + return err + } + } + s.wg.Wait() + s.wWg.Wait() + if s.err != nil { + return s.err + } + return s.writeErr +} + +// Close will flush the final output and close the stream. +// The function will block until everything has been written. +// The Encoder can still be re-used after calling this. +func (e *Encoder) Close() error { + s := &e.state + if s.encoder == nil { + return nil + } + err := e.nextBlock(true) + if err != nil { + return err + } + if s.frameContentSize > 0 { + if s.nInput != s.frameContentSize { + return fmt.Errorf("frame content size %d given, but %d bytes was written", s.frameContentSize, s.nInput) + } + } + if e.state.fullFrameWritten { + return s.err + } + s.wg.Wait() + s.wWg.Wait() + + if s.err != nil { + return s.err + } + if s.writeErr != nil { + return s.writeErr + } + + // Write CRC + if e.o.crc && s.err == nil { + // heap alloc. + var tmp [4]byte + _, s.err = s.w.Write(s.encoder.AppendCRC(tmp[:0])) + s.nWritten += 4 + } + + // Add padding with content from crypto/rand.Reader + if s.err == nil && e.o.pad > 0 { + add := calcSkippableFrame(s.nWritten, int64(e.o.pad)) + frame, err := skippableFrame(s.filling[:0], add, rand.Reader) + if err != nil { + return err + } + _, s.err = s.w.Write(frame) + } + return s.err +} + +// EncodeAll will encode all input in src and append it to dst. +// This function can be called concurrently, but each call will only run on a single goroutine. +// If empty input is given, nothing is returned, unless WithZeroFrames is specified. +// Encoded blocks can be concatenated and the result will be the combined input stream. +// Data compressed with EncodeAll can be decoded with the Decoder, +// using either a stream or DecodeAll. +func (e *Encoder) EncodeAll(src, dst []byte) []byte { + if len(src) == 0 { + if e.o.fullZero { + // Add frame header. + fh := frameHeader{ + ContentSize: 0, + WindowSize: MinWindowSize, + SingleSegment: true, + // Adding a checksum would be a waste of space. + Checksum: false, + DictID: 0, + } + dst, _ = fh.appendTo(dst) + + // Write raw block as last one only. + var blk blockHeader + blk.setSize(0) + blk.setType(blockTypeRaw) + blk.setLast(true) + dst = blk.appendTo(dst) + } + return dst + } + e.init.Do(e.initialize) + enc := <-e.encoders + defer func() { + // Release encoder reference to last block. + // If a non-single block is needed the encoder will reset again. + e.encoders <- enc + }() + // Use single segments when above minimum window and below 1MB. + single := len(src) < 1<<20 && len(src) > MinWindowSize + if e.o.single != nil { + single = *e.o.single + } + fh := frameHeader{ + ContentSize: uint64(len(src)), + WindowSize: uint32(enc.WindowSize(int64(len(src)))), + SingleSegment: single, + Checksum: e.o.crc, + DictID: e.o.dict.ID(), + } + + // If less than 1MB, allocate a buffer up front. + if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { + dst = make([]byte, 0, len(src)) + } + dst, err := fh.appendTo(dst) + if err != nil { + panic(err) + } + + // If we can do everything in one block, prefer that. + if len(src) <= maxCompressedBlockSize { + enc.Reset(e.o.dict, true) + // Slightly faster with no history and everything in one block. + if e.o.crc { + _, _ = enc.CRC().Write(src) + } + blk := enc.Block() + blk.last = true + if e.o.dict == nil { + enc.EncodeNoHist(blk, src) + } else { + enc.Encode(blk, src) + } + + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + err := errIncompressible + oldout := blk.output + if len(blk.literals) != len(src) || len(src) != e.o.blockSize { + // Output directly to dst + blk.output = dst + err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + } + + switch err { + case errIncompressible: + if debugEncoder { + println("Storing incompressible block as raw") + } + dst = blk.encodeRawTo(dst, src) + case nil: + dst = blk.output + default: + panic(err) + } + blk.output = oldout + } else { + enc.Reset(e.o.dict, false) + blk := enc.Block() + for len(src) > 0 { + todo := src + if len(todo) > e.o.blockSize { + todo = todo[:e.o.blockSize] + } + src = src[len(todo):] + if e.o.crc { + _, _ = enc.CRC().Write(todo) + } + blk.pushOffsets() + enc.Encode(blk, todo) + if len(src) == 0 { + blk.last = true + } + err := errIncompressible + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { + err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy) + } + + switch err { + case errIncompressible: + if debugEncoder { + println("Storing incompressible block as raw") + } + dst = blk.encodeRawTo(dst, todo) + blk.popOffsets() + case nil: + dst = append(dst, blk.output...) + default: + panic(err) + } + blk.reset(nil) + } + } + if e.o.crc { + dst = enc.AppendCRC(dst) + } + // Add padding with content from crypto/rand.Reader + if e.o.pad > 0 { + add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad)) + dst, err = skippableFrame(dst, add, rand.Reader) + if err != nil { + panic(err) + } + } + return dst +} |