summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/compress/huff0
diff options
context:
space:
mode:
authorWim <wim@42.be>2023-03-09 22:48:00 +0100
committerGitHub <noreply@github.com>2023-03-09 22:48:00 +0100
commit08779c29099e8940493df56d28d8aa131ac8342e (patch)
tree7ad8ce25cf371e582137e1706dd671a6bf4342d0 /vendor/github.com/klauspost/compress/huff0
parentd5f9cdf912d43cd2a5cb243e086fbdab9a9073b0 (diff)
downloadmatterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.tar.gz
matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.tar.bz2
matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.zip
Update dependencies (#2007)
* Update dependencies
Diffstat (limited to 'vendor/github.com/klauspost/compress/huff0')
-rw-r--r--vendor/github.com/klauspost/compress/huff0/bitreader.go8
-rw-r--r--vendor/github.com/klauspost/compress/huff0/compress.go114
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress.go38
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress_amd64.go4
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress_amd64.s585
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress_generic.go18
6 files changed, 391 insertions, 376 deletions
diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go
index 504a7be9..e36d9742 100644
--- a/vendor/github.com/klauspost/compress/huff0/bitreader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@@ -67,7 +67,6 @@ func (b *bitReaderBytes) fillFast() {
// 2 bounds checks.
v := b.in[b.off-4 : b.off]
- v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << (b.bitsRead - 32)
b.bitsRead -= 32
@@ -88,8 +87,7 @@ func (b *bitReaderBytes) fill() {
return
}
if b.off > 4 {
- v := b.in[b.off-4:]
- v = v[:4]
+ v := b.in[b.off-4 : b.off]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << (b.bitsRead - 32)
b.bitsRead -= 32
@@ -179,7 +177,6 @@ func (b *bitReaderShifted) fillFast() {
// 2 bounds checks.
v := b.in[b.off-4 : b.off]
- v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
b.bitsRead -= 32
@@ -200,8 +197,7 @@ func (b *bitReaderShifted) fill() {
return
}
if b.off > 4 {
- v := b.in[b.off-4:]
- v = v[:4]
+ v := b.in[b.off-4 : b.off]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
b.bitsRead -= 32
diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go
index 4d14542f..cdc94856 100644
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -365,29 +365,29 @@ func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
m := uint32(0)
if len(s.prevTable) > 0 {
for i, v := range s.count[:] {
+ if v == 0 {
+ continue
+ }
if v > m {
m = v
}
- if v > 0 {
- s.symbolLen = uint16(i) + 1
- if i >= len(s.prevTable) {
- reuse = false
- } else {
- if s.prevTable[i].nBits == 0 {
- reuse = false
- }
- }
+ s.symbolLen = uint16(i) + 1
+ if i >= len(s.prevTable) {
+ reuse = false
+ } else if s.prevTable[i].nBits == 0 {
+ reuse = false
}
}
return int(m), reuse
}
for i, v := range s.count[:] {
+ if v == 0 {
+ continue
+ }
if v > m {
m = v
}
- if v > 0 {
- s.symbolLen = uint16(i) + 1
- }
+ s.symbolLen = uint16(i) + 1
}
return int(m), false
}
@@ -484,34 +484,35 @@ func (s *Scratch) buildCTable() error {
// Different from reference implementation.
huffNode0 := s.nodes[0 : huffNodesLen+1]
- for huffNode[nonNullRank].count == 0 {
+ for huffNode[nonNullRank].count() == 0 {
nonNullRank--
}
lowS := int16(nonNullRank)
nodeRoot := nodeNb + lowS - 1
lowN := nodeNb
- huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
- huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
+ huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count())
+ huffNode[lowS].setParent(nodeNb)
+ huffNode[lowS-1].setParent(nodeNb)
nodeNb++
lowS -= 2
for n := nodeNb; n <= nodeRoot; n++ {
- huffNode[n].count = 1 << 30
+ huffNode[n].setCount(1 << 30)
}
// fake entry, strong barrier
- huffNode0[0].count = 1 << 31
+ huffNode0[0].setCount(1 << 31)
// create parents
for nodeNb <= nodeRoot {
var n1, n2 int16
- if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+ if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
n1 = lowS
lowS--
} else {
n1 = lowN
lowN++
}
- if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+ if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
n2 = lowS
lowS--
} else {
@@ -519,18 +520,19 @@ func (s *Scratch) buildCTable() error {
lowN++
}
- huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
- huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
+ huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count())
+ huffNode0[n1+1].setParent(nodeNb)
+ huffNode0[n2+1].setParent(nodeNb)
nodeNb++
}
// distribute weights (unlimited tree height)
- huffNode[nodeRoot].nbBits = 0
+ huffNode[nodeRoot].setNbBits(0)
for n := nodeRoot - 1; n >= startNode; n-- {
- huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+ huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
}
for n := uint16(0); n <= nonNullRank; n++ {
- huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+ huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
}
s.actualTableLog = s.setMaxHeight(int(nonNullRank))
maxNbBits := s.actualTableLog
@@ -542,7 +544,7 @@ func (s *Scratch) buildCTable() error {
var nbPerRank [tableLogMax + 1]uint16
var valPerRank [16]uint16
for _, v := range huffNode[:nonNullRank+1] {
- nbPerRank[v.nbBits]++
+ nbPerRank[v.nbBits()]++
}
// determine stating value per rank
{
@@ -557,7 +559,7 @@ func (s *Scratch) buildCTable() error {
// push nbBits per symbol, symbol order
for _, v := range huffNode[:nonNullRank+1] {
- s.cTable[v.symbol].nBits = v.nbBits
+ s.cTable[v.symbol()].nBits = v.nbBits()
}
// assign value within rank, symbol order
@@ -603,12 +605,12 @@ func (s *Scratch) huffSort() {
pos := rank[r].current
rank[r].current++
prev := nodes[(pos-1)&huffNodesMask]
- for pos > rank[r].base && c > prev.count {
+ for pos > rank[r].base && c > prev.count() {
nodes[pos&huffNodesMask] = prev
pos--
prev = nodes[(pos-1)&huffNodesMask]
}
- nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
+ nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n))
}
}
@@ -617,7 +619,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
huffNode := s.nodes[1 : huffNodesLen+1]
//huffNode = huffNode[: huffNodesLen]
- largestBits := huffNode[lastNonNull].nbBits
+ largestBits := huffNode[lastNonNull].nbBits()
// early exit : no elt > maxNbBits
if largestBits <= maxNbBits {
@@ -627,14 +629,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
baseCost := int(1) << (largestBits - maxNbBits)
n := uint32(lastNonNull)
- for huffNode[n].nbBits > maxNbBits {
- totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
- huffNode[n].nbBits = maxNbBits
+ for huffNode[n].nbBits() > maxNbBits {
+ totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits()))
+ huffNode[n].setNbBits(maxNbBits)
n--
}
// n stops at huffNode[n].nbBits <= maxNbBits
- for huffNode[n].nbBits == maxNbBits {
+ for huffNode[n].nbBits() == maxNbBits {
n--
}
// n end at index of smallest symbol using < maxNbBits
@@ -655,10 +657,10 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
{
currentNbBits := maxNbBits
for pos := int(n); pos >= 0; pos-- {
- if huffNode[pos].nbBits >= currentNbBits {
+ if huffNode[pos].nbBits() >= currentNbBits {
continue
}
- currentNbBits = huffNode[pos].nbBits // < maxNbBits
+ currentNbBits = huffNode[pos].nbBits() // < maxNbBits
rankLast[maxNbBits-currentNbBits] = uint32(pos)
}
}
@@ -675,8 +677,8 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
if lowPos == noSymbol {
break
}
- highTotal := huffNode[highPos].count
- lowTotal := 2 * huffNode[lowPos].count
+ highTotal := huffNode[highPos].count()
+ lowTotal := 2 * huffNode[lowPos].count()
if highTotal <= lowTotal {
break
}
@@ -692,13 +694,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
// this rank is no longer empty
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
}
- huffNode[rankLast[nBitsToDecrease]].nbBits++
+ huffNode[rankLast[nBitsToDecrease]].setNbBits(1 +
+ huffNode[rankLast[nBitsToDecrease]].nbBits())
if rankLast[nBitsToDecrease] == 0 {
/* special case, reached largest symbol */
rankLast[nBitsToDecrease] = noSymbol
} else {
rankLast[nBitsToDecrease]--
- if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
+ if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease {
rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
}
}
@@ -706,15 +709,15 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
for totalCost < 0 { /* Sometimes, cost correction overshoot */
if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
- for huffNode[n].nbBits == maxNbBits {
+ for huffNode[n].nbBits() == maxNbBits {
n--
}
- huffNode[n+1].nbBits--
+ huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1)
rankLast[1] = n + 1
totalCost++
continue
}
- huffNode[rankLast[1]+1].nbBits--
+ huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1)
rankLast[1]++
totalCost++
}
@@ -722,9 +725,26 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
return maxNbBits
}
-type nodeElt struct {
- count uint32
- parent uint16
- symbol byte
- nbBits uint8
+// A nodeElt is the fields
+//
+// count uint32
+// parent uint16
+// symbol byte
+// nbBits uint8
+//
+// in some order, all squashed into an integer so that the compiler
+// always loads and stores entire nodeElts instead of separate fields.
+type nodeElt uint64
+
+func makeNodeElt(count uint32, symbol byte) nodeElt {
+ return nodeElt(count) | nodeElt(symbol)<<48
}
+
+func (e *nodeElt) count() uint32 { return uint32(*e) }
+func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) }
+func (e *nodeElt) symbol() byte { return byte(*e >> 48) }
+func (e *nodeElt) nbBits() uint8 { return uint8(*e >> 56) }
+
+func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) }
+func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 }
+func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 }
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index c0c48bd7..3c0b398c 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -61,7 +61,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
b, err := fse.Decompress(in[:iSize], s.fse)
s.fse.Out = nil
if err != nil {
- return s, nil, err
+ return s, nil, fmt.Errorf("fse decompress returned: %w", err)
}
if len(b) > 255 {
return s, nil, errors.New("corrupt input: output table too large")
@@ -763,17 +763,20 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[0][:])
- copy(out[dstEvery:], buf[1][:])
- copy(out[dstEvery*2:], buf[2][:])
- copy(out[dstEvery*3:], buf[3][:])
- out = out[bufoff:]
- decoded += bufoff * 4
// There must at least be 3 buffers left.
- if len(out) < dstEvery*3 {
+ if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
+ //copy(out, buf[0][:])
+ //copy(out[dstEvery:], buf[1][:])
+ //copy(out[dstEvery*2:], buf[2][:])
+ *(*[bufoff]byte)(out) = buf[0]
+ *(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+ *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+ *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+ out = out[bufoff:]
+ decoded += bufoff * 4
}
}
if off > 0 {
@@ -997,17 +1000,22 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[0][:])
- copy(out[dstEvery:], buf[1][:])
- copy(out[dstEvery*2:], buf[2][:])
- copy(out[dstEvery*3:], buf[3][:])
- out = out[bufoff:]
- decoded += bufoff * 4
// There must at least be 3 buffers left.
- if len(out) < dstEvery*3 {
+ if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
+
+ //copy(out, buf[0][:])
+ //copy(out[dstEvery:], buf[1][:])
+ //copy(out[dstEvery*2:], buf[2][:])
+ // copy(out[dstEvery*3:], buf[3][:])
+ *(*[bufoff]byte)(out) = buf[0]
+ *(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+ *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+ *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+ out = out[bufoff:]
+ decoded += bufoff * 4
}
}
if off > 0 {
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
index 9f3e9f79..ba7e8e6b 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
@@ -14,12 +14,14 @@ import (
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog > 8.
+//
//go:noescape
func decompress4x_main_loop_amd64(ctx *decompress4xContext)
// decompress4x_8b_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog <= 8 which decodes 4 entries
// per loop.
+//
//go:noescape
func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
@@ -145,11 +147,13 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress1X when tablelog > 8.
+//
//go:noescape
func decompress1x_main_loop_amd64(ctx *decompress1xContext)
// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation
// of Decompress1X when tablelog > 8.
+//
//go:noescape
func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
index dd1a5aec..c4c7ab2d 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
@@ -1,364 +1,352 @@
// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT.
//go:build amd64 && !appengine && !noasm && gc
-// +build amd64,!appengine,!noasm,gc
// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_main_loop_amd64(SB), $0-8
- XORQ DX, DX
-
// Preload values
MOVQ ctx+0(FP), AX
MOVBQZX 8(AX), DI
- MOVQ 16(AX), SI
- MOVQ 48(AX), BX
- MOVQ 24(AX), R9
- MOVQ 32(AX), R10
- MOVQ (AX), R11
+ MOVQ 16(AX), BX
+ MOVQ 48(AX), SI
+ MOVQ 24(AX), R8
+ MOVQ 32(AX), R9
+ MOVQ (AX), R10
// Main loop
main_loop:
- MOVQ SI, R8
- CMPQ R8, BX
+ XORL DX, DX
+ CMPQ BX, SI
SETGE DL
// br0.fillFast32()
- MOVQ 32(R11), R12
- MOVBQZX 40(R11), R13
- CMPQ R13, $0x20
+ MOVQ 32(R10), R11
+ MOVBQZX 40(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill0
- MOVQ 24(R11), AX
- SUBQ $0x20, R13
+ MOVQ 24(R10), AX
+ SUBQ $0x20, R12
SUBQ $0x04, AX
- MOVQ (R11), R14
+ MOVQ (R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R14*1), R14
- MOVQ R13, CX
- SHLQ CL, R14
- MOVQ AX, 24(R11)
- ORQ R14, R12
+ MOVL (AX)(R13*1), R13
+ MOVQ R12, CX
+ SHLQ CL, R13
+ MOVQ AX, 24(R10)
+ ORQ R13, R11
- // exhausted = exhausted || (br0.off < 4)
- CMPQ AX, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br0.off < 4)
+ CMPQ AX, $0x04
+ ADCB $+0, DL
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br0.peekTopBits(peekBits)
MOVQ DI, CX
- MOVQ R12, R14
- SHRQ CL, R14
+ MOVQ R11, R13
+ SHRQ CL, R13
// v1 := table[val1&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v1.entry))
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (R8)
+ MOVW AX, (BX)
// update the bitreader structure
- MOVQ R12, 32(R11)
- MOVB R13, 40(R11)
- ADDQ R9, R8
+ MOVQ R11, 32(R10)
+ MOVB R12, 40(R10)
// br1.fillFast32()
- MOVQ 80(R11), R12
- MOVBQZX 88(R11), R13
- CMPQ R13, $0x20
+ MOVQ 80(R10), R11
+ MOVBQZX 88(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill1
- MOVQ 72(R11), AX
- SUBQ $0x20, R13
+ MOVQ 72(R10), AX
+ SUBQ $0x20, R12
SUBQ $0x04, AX
- MOVQ 48(R11), R14
+ MOVQ 48(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R14*1), R14
- MOVQ R13, CX
- SHLQ CL, R14
- MOVQ AX, 72(R11)
- ORQ R14, R12
+ MOVL (AX)(R13*1), R13
+ MOVQ R12, CX
+ SHLQ CL, R13
+ MOVQ AX, 72(R10)
+ ORQ R13, R11
- // exhausted = exhausted || (br1.off < 4)
- CMPQ AX, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br1.off < 4)
+ CMPQ AX, $0x04
+ ADCB $+0, DL
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br1.peekTopBits(peekBits)
MOVQ DI, CX
- MOVQ R12, R14
- SHRQ CL, R14
+ MOVQ R11, R13
+ SHRQ CL, R13
// v1 := table[val1&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v1.entry))
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (R8)
+ MOVW AX, (BX)(R8*1)
// update the bitreader structure
- MOVQ R12, 80(R11)
- MOVB R13, 88(R11)
- ADDQ R9, R8
+ MOVQ R11, 80(R10)
+ MOVB R12, 88(R10)
// br2.fillFast32()
- MOVQ 128(R11), R12
- MOVBQZX 136(R11), R13
- CMPQ R13, $0x20
+ MOVQ 128(R10), R11
+ MOVBQZX 136(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill2
- MOVQ 120(R11), AX
- SUBQ $0x20, R13
+ MOVQ 120(R10), AX
+ SUBQ $0x20, R12
SUBQ $0x04, AX
- MOVQ 96(R11), R14
+ MOVQ 96(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R14*1), R14
- MOVQ R13, CX
- SHLQ CL, R14
- MOVQ AX, 120(R11)
- ORQ R14, R12
+ MOVL (AX)(R13*1), R13
+ MOVQ R12, CX
+ SHLQ CL, R13
+ MOVQ AX, 120(R10)
+ ORQ R13, R11
- // exhausted = exhausted || (br2.off < 4)
- CMPQ AX, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br2.off < 4)
+ CMPQ AX, $0x04
+ ADCB $+0, DL
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br2.peekTopBits(peekBits)
MOVQ DI, CX
- MOVQ R12, R14
- SHRQ CL, R14
+ MOVQ R11, R13
+ SHRQ CL, R13
// v1 := table[val1&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v1.entry))
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (R8)
+ MOVW AX, (BX)(R8*2)
// update the bitreader structure
- MOVQ R12, 128(R11)
- MOVB R13, 136(R11)
- ADDQ R9, R8
+ MOVQ R11, 128(R10)
+ MOVB R12, 136(R10)
// br3.fillFast32()
- MOVQ 176(R11), R12
- MOVBQZX 184(R11), R13
- CMPQ R13, $0x20
+ MOVQ 176(R10), R11
+ MOVBQZX 184(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill3
- MOVQ 168(R11), AX
- SUBQ $0x20, R13
+ MOVQ 168(R10), AX
+ SUBQ $0x20, R12
SUBQ $0x04, AX
- MOVQ 144(R11), R14
+ MOVQ 144(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (AX)(R14*1), R14
- MOVQ R13, CX
- SHLQ CL, R14
- MOVQ AX, 168(R11)
- ORQ R14, R12
+ MOVL (AX)(R13*1), R13
+ MOVQ R12, CX
+ SHLQ CL, R13
+ MOVQ AX, 168(R10)
+ ORQ R13, R11
- // exhausted = exhausted || (br3.off < 4)
- CMPQ AX, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br3.off < 4)
+ CMPQ AX, $0x04
+ ADCB $+0, DL
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br3.peekTopBits(peekBits)
MOVQ DI, CX
- MOVQ R12, R14
- SHRQ CL, R14
+ MOVQ R11, R13
+ SHRQ CL, R13
// v1 := table[val1&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v1.entry))
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
- MOVW AX, (R8)
+ LEAQ (R8)(R8*2), CX
+ MOVW AX, (BX)(CX*1)
// update the bitreader structure
- MOVQ R12, 176(R11)
- MOVB R13, 184(R11)
- ADDQ $0x02, SI
+ MOVQ R11, 176(R10)
+ MOVB R12, 184(R10)
+ ADDQ $0x02, BX
TESTB DL, DL
JZ main_loop
MOVQ ctx+0(FP), AX
- SUBQ 16(AX), SI
- SHLQ $0x02, SI
- MOVQ SI, 40(AX)
+ SUBQ 16(AX), BX
+ SHLQ $0x02, BX
+ MOVQ BX, 40(AX)
RET
// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
- XORQ DX, DX
-
// Preload values
MOVQ ctx+0(FP), CX
MOVBQZX 8(CX), DI
MOVQ 16(CX), BX
MOVQ 48(CX), SI
- MOVQ 24(CX), R9
- MOVQ 32(CX), R10
- MOVQ (CX), R11
+ MOVQ 24(CX), R8
+ MOVQ 32(CX), R9
+ MOVQ (CX), R10
// Main loop
main_loop:
- MOVQ BX, R8
- CMPQ R8, SI
+ XORL DX, DX
+ CMPQ BX, SI
SETGE DL
// br0.fillFast32()
- MOVQ 32(R11), R12
- MOVBQZX 40(R11), R13
- CMPQ R13, $0x20
+ MOVQ 32(R10), R11
+ MOVBQZX 40(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill0
- MOVQ 24(R11), R14
- SUBQ $0x20, R13
- SUBQ $0x04, R14
- MOVQ (R11), R15
+ MOVQ 24(R10), R13
+ SUBQ $0x20, R12
+ SUBQ $0x04, R13
+ MOVQ (R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R14)(R15*1), R15
- MOVQ R13, CX
- SHLQ CL, R15
- MOVQ R14, 24(R11)
- ORQ R15, R12
+ MOVL (R13)(R14*1), R14
+ MOVQ R12, CX
+ SHLQ CL, R14
+ MOVQ R13, 24(R10)
+ ORQ R14, R11
- // exhausted = exhausted || (br0.off < 4)
- CMPQ R14, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br0.off < 4)
+ CMPQ R13, $0x04
+ ADCB $+0, DL
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v1 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v1.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// val2 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v2 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v2.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val3 := br0.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v3 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br0.advance(uint8(v3.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@@ -366,88 +354,86 @@ skip_fill0:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (R8)
+ MOVL AX, (BX)
// update the bitreader structure
- MOVQ R12, 32(R11)
- MOVB R13, 40(R11)
- ADDQ R9, R8
+ MOVQ R11, 32(R10)
+ MOVB R12, 40(R10)
// br1.fillFast32()
- MOVQ 80(R11), R12
- MOVBQZX 88(R11), R13
- CMPQ R13, $0x20
+ MOVQ 80(R10), R11
+ MOVBQZX 88(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill1
- MOVQ 72(R11), R14
- SUBQ $0x20, R13
- SUBQ $0x04, R14
- MOVQ 48(R11), R15
+ MOVQ 72(R10), R13
+ SUBQ $0x20, R12
+ SUBQ $0x04, R13
+ MOVQ 48(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R14)(R15*1), R15
- MOVQ R13, CX
- SHLQ CL, R15
- MOVQ R14, 72(R11)
- ORQ R15, R12
+ MOVL (R13)(R14*1), R14
+ MOVQ R12, CX
+ SHLQ CL, R14
+ MOVQ R13, 72(R10)
+ ORQ R14, R11
- // exhausted = exhausted || (br1.off < 4)
- CMPQ R14, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br1.off < 4)
+ CMPQ R13, $0x04
+ ADCB $+0, DL
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v1 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v1.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// val2 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v2 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v2.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val3 := br1.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v3 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br1.advance(uint8(v3.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@@ -455,88 +441,86 @@ skip_fill1:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (R8)
+ MOVL AX, (BX)(R8*1)
// update the bitreader structure
- MOVQ R12, 80(R11)
- MOVB R13, 88(R11)
- ADDQ R9, R8
+ MOVQ R11, 80(R10)
+ MOVB R12, 88(R10)
// br2.fillFast32()
- MOVQ 128(R11), R12
- MOVBQZX 136(R11), R13
- CMPQ R13, $0x20
+ MOVQ 128(R10), R11
+ MOVBQZX 136(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill2
- MOVQ 120(R11), R14
- SUBQ $0x20, R13
- SUBQ $0x04, R14
- MOVQ 96(R11), R15
+ MOVQ 120(R10), R13
+ SUBQ $0x20, R12
+ SUBQ $0x04, R13
+ MOVQ 96(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R14)(R15*1), R15
- MOVQ R13, CX
- SHLQ CL, R15
- MOVQ R14, 120(R11)
- ORQ R15, R12
+ MOVL (R13)(R14*1), R14
+ MOVQ R12, CX
+ SHLQ CL, R14
+ MOVQ R13, 120(R10)
+ ORQ R14, R11
- // exhausted = exhausted || (br2.off < 4)
- CMPQ R14, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br2.off < 4)
+ CMPQ R13, $0x04
+ ADCB $+0, DL
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v1 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v1.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// val2 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v2 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v2.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val3 := br2.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v3 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br2.advance(uint8(v3.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@@ -544,88 +528,86 @@ skip_fill2:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (R8)
+ MOVL AX, (BX)(R8*2)
// update the bitreader structure
- MOVQ R12, 128(R11)
- MOVB R13, 136(R11)
- ADDQ R9, R8
+ MOVQ R11, 128(R10)
+ MOVB R12, 136(R10)
// br3.fillFast32()
- MOVQ 176(R11), R12
- MOVBQZX 184(R11), R13
- CMPQ R13, $0x20
+ MOVQ 176(R10), R11
+ MOVBQZX 184(R10), R12
+ CMPQ R12, $0x20
JBE skip_fill3
- MOVQ 168(R11), R14
- SUBQ $0x20, R13
- SUBQ $0x04, R14
- MOVQ 144(R11), R15
+ MOVQ 168(R10), R13
+ SUBQ $0x20, R12
+ SUBQ $0x04, R13
+ MOVQ 144(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
- MOVL (R14)(R15*1), R15
- MOVQ R13, CX
- SHLQ CL, R15
- MOVQ R14, 168(R11)
- ORQ R15, R12
+ MOVL (R13)(R14*1), R14
+ MOVQ R12, CX
+ SHLQ CL, R14
+ MOVQ R13, 168(R10)
+ ORQ R14, R11
- // exhausted = exhausted || (br3.off < 4)
- CMPQ R14, $0x04
- SETLT AL
- ORB AL, DL
+ // exhausted += (br3.off < 4)
+ CMPQ R13, $0x04
+ ADCB $+0, DL
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v0 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val1 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v1 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v1.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// val2 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v2 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v2.entry)
MOVB CH, AH
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
// val3 := br3.peekTopBits(peekBits)
- MOVQ R12, R14
+ MOVQ R11, R13
MOVQ DI, CX
- SHRQ CL, R14
+ SHRQ CL, R13
// v3 := table[val0&mask]
- MOVW (R10)(R14*2), CX
+ MOVW (R9)(R13*2), CX
// br3.advance(uint8(v3.entry)
MOVB CH, AL
- SHLQ CL, R12
- ADDB CL, R13
+ SHLQ CL, R11
+ ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@@ -633,11 +615,12 @@ skip_fill3:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
- MOVL AX, (R8)
+ LEAQ (R8)(R8*2), CX
+ MOVL AX, (BX)(CX*1)
// update the bitreader structure
- MOVQ R12, 176(R11)
- MOVB R13, 184(R11)
+ MOVQ R11, 176(R10)
+ MOVB R12, 184(R10)
ADDQ $0x04, BX
TESTB DL, DL
JZ main_loop
@@ -653,7 +636,7 @@ TEXT ·decompress1x_main_loop_amd64(SB), $0-8
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
- JB error_max_decoded_size_exeeded
+ JB error_max_decoded_size_exceeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
@@ -668,7 +651,7 @@ main_loop:
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
- JGE error_max_decoded_size_exeeded
+ JGE error_max_decoded_size_exceeded
// Decode 4 values
CMPQ R11, $0x20
@@ -745,7 +728,7 @@ loop_condition:
RET
// Report error
-error_max_decoded_size_exeeded:
+error_max_decoded_size_exceeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
@@ -758,7 +741,7 @@ TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
- JB error_max_decoded_size_exeeded
+ JB error_max_decoded_size_exceeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
@@ -773,7 +756,7 @@ main_loop:
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
- JGE error_max_decoded_size_exeeded
+ JGE error_max_decoded_size_exceeded
// Decode 4 values
CMPQ R11, $0x20
@@ -840,7 +823,7 @@ loop_condition:
RET
// Report error
-error_max_decoded_size_exeeded:
+error_max_decoded_size_exceeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
index 4f6f37cb..908c17de 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
@@ -122,17 +122,21 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[0][:])
- copy(out[dstEvery:], buf[1][:])
- copy(out[dstEvery*2:], buf[2][:])
- copy(out[dstEvery*3:], buf[3][:])
- out = out[bufoff:]
- decoded += bufoff * 4
// There must at least be 3 buffers left.
- if len(out) < dstEvery*3 {
+ if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
+ //copy(out, buf[0][:])
+ //copy(out[dstEvery:], buf[1][:])
+ //copy(out[dstEvery*2:], buf[2][:])
+ //copy(out[dstEvery*3:], buf[3][:])
+ *(*[bufoff]byte)(out) = buf[0]
+ *(*[bufoff]byte)(out[dstEvery:]) = buf[1]
+ *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
+ *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
+ out = out[bufoff:]
+ decoded += bufoff * 4
}
}
if off > 0 {