summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/compress/zstd/enc_best.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd/enc_best.go')
-rw-r--r--vendor/github.com/klauspost/compress/zstd/enc_best.go64
1 files changed, 36 insertions, 28 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go
index 96028ecd..830f5ba7 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -32,6 +32,7 @@ type match struct {
length int32
rep int32
est int32
+ _ [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
}
const highScore = 25000
@@ -84,14 +85,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
)
// Protect against e.cur wraparound.
- for e.cur >= bufferReset {
+ for e.cur >= e.bufferReset-int32(len(e.hist)) {
if len(e.hist) == 0 {
- for i := range e.table[:] {
- e.table[i] = prevEntry{}
- }
- for i := range e.longTable[:] {
- e.longTable[i] = prevEntry{}
- }
+ e.table = [bestShortTableSize]prevEntry{}
+ e.longTable = [bestLongTableSize]prevEntry{}
e.cur = e.maxMatchOff
break
}
@@ -192,8 +189,8 @@ encodeLoop:
panic("offset0 was 0")
}
- bestOf := func(a, b match) match {
- if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
+ bestOf := func(a, b *match) *match {
+ if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 {
return a
}
return b
@@ -219,22 +216,26 @@ encodeLoop:
return m
}
- best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+ m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
+ m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
+ m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
+ m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
+ best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))
if canRepeat && best.length < goodEnough {
cv32 := uint32(cv >> 8)
spp := s + 1
- best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
- best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
- best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+ m1 := matchAt(spp-offset1, spp, cv32, 1)
+ m2 := matchAt(spp-offset2, spp, cv32, 2)
+ m3 := matchAt(spp-offset3, spp, cv32, 3)
+ best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
if best.length > 0 {
cv32 = uint32(cv >> 24)
spp += 2
- best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
- best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
- best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+ m1 := matchAt(spp-offset1, spp, cv32, 1)
+ m2 := matchAt(spp-offset2, spp, cv32, 2)
+ m3 := matchAt(spp-offset3, spp, cv32, 3)
+ best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
}
}
// Load next and check...
@@ -261,26 +262,33 @@ encodeLoop:
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
// Short at s+1
- best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+ m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
// Long at s+1, s+2
- best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
- best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
- best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+ m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
+ m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
+ m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
+ m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
+ best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5))
if false {
// Short at s+3.
// Too often worse...
- best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
+ m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
+ best = bestOf(best, &m)
}
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
if sAt := best.s + best.length; sAt < sLimit {
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
candidateEnd := e.longTable[nextHashL]
- if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
- bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
- if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
- bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+ // Start check at a fixed offset to allow for a few mismatches.
+ // For this compression level 2 yields the best results.
+ const skipBeginning = 2
+ if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
+ m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+ bestEnd := bestOf(best, &m)
+ if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
+ m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+ bestEnd = bestOf(bestEnd, &m)
}
best = bestEnd
}