- update ip

master v1.0.77
李光春 1 year ago
parent 61e22b74ba
commit 851a91a9d1

@ -54,7 +54,7 @@ require (
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.15.14 // indirect
github.com/klauspost/compress v1.15.15 // indirect
github.com/leodido/go-urn v1.2.1 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/mattn/go-sqlite3 v2.0.3+incompatible // indirect

@ -261,8 +261,8 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:C
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.15.14 h1:i7WCKDToww0wA+9qrUZ1xOjp218vfFo3nTU6UHp+gOc=
github.com/klauspost/compress v1.15.14/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM=
github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw=
github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=

@ -1,5 +1,5 @@
package go_library
func Version() string {
return "1.0.76"
return "1.0.77"
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 MiB

After

Width:  |  Height:  |  Size: 67 MiB

Binary file not shown.

@ -16,6 +16,13 @@ This package provides various compression algorithms.
# changelog
* Jan 3rd, 2023 (v1.15.14)
* flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718
* zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720
* export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722
* s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723
* Dec 11, 2022 (v1.15.13)
* zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder https://github.com/klauspost/compress/pull/691
* zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708

@ -146,54 +146,51 @@ func (s *Scratch) compress(src []byte) error {
c1.encodeZero(tt[src[ip-2]])
ip -= 2
}
src = src[:ip]
// Main compression loop.
switch {
case !s.zeroBits && s.actualTableLog <= 8:
// We can encode 4 symbols without requiring a flush.
// We do not need to check if any output is 0 bits.
for ip >= 4 {
for ; len(src) >= 4; src = src[:len(src)-4] {
s.bw.flush32()
v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
c2.encode(tt[v0])
c1.encode(tt[v1])
c2.encode(tt[v2])
c1.encode(tt[v3])
ip -= 4
}
case !s.zeroBits:
// We do not need to check if any output is 0 bits.
for ip >= 4 {
for ; len(src) >= 4; src = src[:len(src)-4] {
s.bw.flush32()
v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
c2.encode(tt[v0])
c1.encode(tt[v1])
s.bw.flush32()
c2.encode(tt[v2])
c1.encode(tt[v3])
ip -= 4
}
case s.actualTableLog <= 8:
// We can encode 4 symbols without requiring a flush
for ip >= 4 {
for ; len(src) >= 4; src = src[:len(src)-4] {
s.bw.flush32()
v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
c2.encodeZero(tt[v0])
c1.encodeZero(tt[v1])
c2.encodeZero(tt[v2])
c1.encodeZero(tt[v3])
ip -= 4
}
default:
for ip >= 4 {
for ; len(src) >= 4; src = src[:len(src)-4] {
s.bw.flush32()
v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1]
c2.encodeZero(tt[v0])
c1.encodeZero(tt[v1])
s.bw.flush32()
c2.encodeZero(tt[v2])
c1.encodeZero(tt[v3])
ip -= 4
}
}
@ -459,15 +456,17 @@ func (s *Scratch) countSimple(in []byte) (max int) {
for _, v := range in {
s.count[v]++
}
m := uint32(0)
m, symlen := uint32(0), s.symbolLen
for i, v := range s.count[:] {
if v == 0 {
continue
}
if v > m {
m = v
}
if v > 0 {
s.symbolLen = uint16(i) + 1
}
symlen = uint16(i) + 1
}
s.symbolLen = symlen
return int(m)
}

@ -67,7 +67,6 @@ func (b *bitReaderBytes) fillFast() {
// 2 bounds checks.
v := b.in[b.off-4 : b.off]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << (b.bitsRead - 32)
b.bitsRead -= 32
@ -88,8 +87,7 @@ func (b *bitReaderBytes) fill() {
return
}
if b.off > 4 {
v := b.in[b.off-4:]
v = v[:4]
v := b.in[b.off-4 : b.off]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << (b.bitsRead - 32)
b.bitsRead -= 32
@ -179,7 +177,6 @@ func (b *bitReaderShifted) fillFast() {
// 2 bounds checks.
v := b.in[b.off-4 : b.off]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
b.bitsRead -= 32
@ -200,8 +197,7 @@ func (b *bitReaderShifted) fill() {
return
}
if b.off > 4 {
v := b.in[b.off-4:]
v = v[:4]
v := b.in[b.off-4 : b.off]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
b.bitsRead -= 32

@ -484,34 +484,35 @@ func (s *Scratch) buildCTable() error {
// Different from reference implementation.
huffNode0 := s.nodes[0 : huffNodesLen+1]
for huffNode[nonNullRank].count == 0 {
for huffNode[nonNullRank].count() == 0 {
nonNullRank--
}
lowS := int16(nonNullRank)
nodeRoot := nodeNb + lowS - 1
lowN := nodeNb
huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count())
huffNode[lowS].setParent(nodeNb)
huffNode[lowS-1].setParent(nodeNb)
nodeNb++
lowS -= 2
for n := nodeNb; n <= nodeRoot; n++ {
huffNode[n].count = 1 << 30
huffNode[n].setCount(1 << 30)
}
// fake entry, strong barrier
huffNode0[0].count = 1 << 31
huffNode0[0].setCount(1 << 31)
// create parents
for nodeNb <= nodeRoot {
var n1, n2 int16
if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
n1 = lowS
lowS--
} else {
n1 = lowN
lowN++
}
if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() {
n2 = lowS
lowS--
} else {
@ -519,18 +520,19 @@ func (s *Scratch) buildCTable() error {
lowN++
}
huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count())
huffNode0[n1+1].setParent(nodeNb)
huffNode0[n2+1].setParent(nodeNb)
nodeNb++
}
// distribute weights (unlimited tree height)
huffNode[nodeRoot].nbBits = 0
huffNode[nodeRoot].setNbBits(0)
for n := nodeRoot - 1; n >= startNode; n-- {
huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
}
for n := uint16(0); n <= nonNullRank; n++ {
huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1)
}
s.actualTableLog = s.setMaxHeight(int(nonNullRank))
maxNbBits := s.actualTableLog
@ -542,7 +544,7 @@ func (s *Scratch) buildCTable() error {
var nbPerRank [tableLogMax + 1]uint16
var valPerRank [16]uint16
for _, v := range huffNode[:nonNullRank+1] {
nbPerRank[v.nbBits]++
nbPerRank[v.nbBits()]++
}
// determine stating value per rank
{
@ -557,7 +559,7 @@ func (s *Scratch) buildCTable() error {
// push nbBits per symbol, symbol order
for _, v := range huffNode[:nonNullRank+1] {
s.cTable[v.symbol].nBits = v.nbBits
s.cTable[v.symbol()].nBits = v.nbBits()
}
// assign value within rank, symbol order
@ -603,12 +605,12 @@ func (s *Scratch) huffSort() {
pos := rank[r].current
rank[r].current++
prev := nodes[(pos-1)&huffNodesMask]
for pos > rank[r].base && c > prev.count {
for pos > rank[r].base && c > prev.count() {
nodes[pos&huffNodesMask] = prev
pos--
prev = nodes[(pos-1)&huffNodesMask]
}
nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n))
}
}
@ -617,7 +619,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
huffNode := s.nodes[1 : huffNodesLen+1]
//huffNode = huffNode[: huffNodesLen]
largestBits := huffNode[lastNonNull].nbBits
largestBits := huffNode[lastNonNull].nbBits()
// early exit : no elt > maxNbBits
if largestBits <= maxNbBits {
@ -627,14 +629,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
baseCost := int(1) << (largestBits - maxNbBits)
n := uint32(lastNonNull)
for huffNode[n].nbBits > maxNbBits {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
huffNode[n].nbBits = maxNbBits
for huffNode[n].nbBits() > maxNbBits {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits()))
huffNode[n].setNbBits(maxNbBits)
n--
}
// n stops at huffNode[n].nbBits <= maxNbBits
for huffNode[n].nbBits == maxNbBits {
for huffNode[n].nbBits() == maxNbBits {
n--
}
// n end at index of smallest symbol using < maxNbBits
@ -655,10 +657,10 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
{
currentNbBits := maxNbBits
for pos := int(n); pos >= 0; pos-- {
if huffNode[pos].nbBits >= currentNbBits {
if huffNode[pos].nbBits() >= currentNbBits {
continue
}
currentNbBits = huffNode[pos].nbBits // < maxNbBits
currentNbBits = huffNode[pos].nbBits() // < maxNbBits
rankLast[maxNbBits-currentNbBits] = uint32(pos)
}
}
@ -675,8 +677,8 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
if lowPos == noSymbol {
break
}
highTotal := huffNode[highPos].count
lowTotal := 2 * huffNode[lowPos].count
highTotal := huffNode[highPos].count()
lowTotal := 2 * huffNode[lowPos].count()
if highTotal <= lowTotal {
break
}
@ -692,13 +694,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
// this rank is no longer empty
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
}
huffNode[rankLast[nBitsToDecrease]].nbBits++
huffNode[rankLast[nBitsToDecrease]].setNbBits(1 +
huffNode[rankLast[nBitsToDecrease]].nbBits())
if rankLast[nBitsToDecrease] == 0 {
/* special case, reached largest symbol */
rankLast[nBitsToDecrease] = noSymbol
} else {
rankLast[nBitsToDecrease]--
if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease {
rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
}
}
@ -706,15 +709,15 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
for totalCost < 0 { /* Sometimes, cost correction overshoot */
if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
for huffNode[n].nbBits == maxNbBits {
for huffNode[n].nbBits() == maxNbBits {
n--
}
huffNode[n+1].nbBits--
huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1)
rankLast[1] = n + 1
totalCost++
continue
}
huffNode[rankLast[1]+1].nbBits--
huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1)
rankLast[1]++
totalCost++
}
@ -722,9 +725,26 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
return maxNbBits
}
type nodeElt struct {
count uint32
parent uint16
symbol byte
nbBits uint8
// A nodeElt is the fields
//
// count uint32
// parent uint16
// symbol byte
// nbBits uint8
//
// in some order, all squashed into an integer so that the compiler
// always loads and stores entire nodeElts instead of separate fields.
type nodeElt uint64
func makeNodeElt(count uint32, symbol byte) nodeElt {
return nodeElt(count) | nodeElt(symbol)<<48
}
func (e *nodeElt) count() uint32 { return uint32(*e) }
func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) }
func (e *nodeElt) symbol() byte { return byte(*e >> 48) }
func (e *nodeElt) nbBits() uint8 { return uint8(*e >> 56) }
func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) }
func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 }
func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 }

@ -4,360 +4,349 @@
// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_main_loop_amd64(SB), $0-8
XORQ DX, DX
// Preload values
MOVQ ctx+0(FP), AX
MOVBQZX 8(AX), DI
MOVQ 16(AX), SI
MOVQ 48(AX), BX
MOVQ 24(AX), R9
MOVQ 32(AX), R10
MOVQ (AX), R11
MOVQ 16(AX), BX
MOVQ 48(AX), SI
MOVQ 24(AX), R8
MOVQ 32(AX), R9
MOVQ (AX), R10
// Main loop
main_loop:
MOVQ SI, R8
CMPQ R8, BX
XORL DX, DX
CMPQ BX, SI
SETGE DL
// br0.fillFast32()
MOVQ 32(R11), R12
MOVBQZX 40(R11), R13
CMPQ R13, $0x20
MOVQ 32(R10), R11
MOVBQZX 40(R10), R12
CMPQ R12, $0x20
JBE skip_fill0
MOVQ 24(R11), AX
SUBQ $0x20, R13
MOVQ 24(R10), AX
SUBQ $0x20, R12
SUBQ $0x04, AX
MOVQ (R11), R14
MOVQ (R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 24(R11)
ORQ R14, R12
MOVL (AX)(R13*1), R13
MOVQ R12, CX
SHLQ CL, R13
MOVQ AX, 24(R10)
ORQ R13, R11
// exhausted = exhausted || (br0.off < 4)
CMPQ AX, $0x04
SETLT AL
ORB AL, DL
// exhausted += (br0.off < 4)
CMPQ AX, $0x04
ADCB $+0, DL
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val1 := br0.peekTopBits(peekBits)
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
MOVQ R11, R13
SHRQ CL, R13
// v1 := table[val1&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br0.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (R8)
MOVW AX, (BX)
// update the bitreader structure
MOVQ R12, 32(R11)
MOVB R13, 40(R11)
ADDQ R9, R8
MOVQ R11, 32(R10)
MOVB R12, 40(R10)
// br1.fillFast32()
MOVQ 80(R11), R12
MOVBQZX 88(R11), R13
CMPQ R13, $0x20
MOVQ 80(R10), R11
MOVBQZX 88(R10), R12
CMPQ R12, $0x20
JBE skip_fill1
MOVQ 72(R11), AX
SUBQ $0x20, R13
MOVQ 72(R10), AX
SUBQ $0x20, R12
SUBQ $0x04, AX
MOVQ 48(R11), R14
MOVQ 48(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 72(R11)
ORQ R14, R12
MOVL (AX)(R13*1), R13
MOVQ R12, CX
SHLQ CL, R13
MOVQ AX, 72(R10)
ORQ R13, R11
// exhausted = exhausted || (br1.off < 4)
CMPQ AX, $0x04
SETLT AL
ORB AL, DL
// exhausted += (br1.off < 4)
CMPQ AX, $0x04
ADCB $+0, DL
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val1 := br1.peekTopBits(peekBits)
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
MOVQ R11, R13
SHRQ CL, R13
// v1 := table[val1&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br1.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (R8)
MOVW AX, (BX)(R8*1)
// update the bitreader structure
MOVQ R12, 80(R11)
MOVB R13, 88(R11)
ADDQ R9, R8
MOVQ R11, 80(R10)
MOVB R12, 88(R10)
// br2.fillFast32()
MOVQ 128(R11), R12
MOVBQZX 136(R11), R13
CMPQ R13, $0x20
MOVQ 128(R10), R11
MOVBQZX 136(R10), R12
CMPQ R12, $0x20
JBE skip_fill2
MOVQ 120(R11), AX
SUBQ $0x20, R13
MOVQ 120(R10), AX
SUBQ $0x20, R12
SUBQ $0x04, AX
MOVQ 96(R11), R14
MOVQ 96(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 120(R11)
ORQ R14, R12
MOVL (AX)(R13*1), R13
MOVQ R12, CX
SHLQ CL, R13
MOVQ AX, 120(R10)
ORQ R13, R11
// exhausted = exhausted || (br2.off < 4)
CMPQ AX, $0x04
SETLT AL
ORB AL, DL
// exhausted += (br2.off < 4)
CMPQ AX, $0x04
ADCB $+0, DL
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val1 := br2.peekTopBits(peekBits)
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
MOVQ R11, R13
SHRQ CL, R13
// v1 := table[val1&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br2.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (R8)
MOVW AX, (BX)(R8*2)
// update the bitreader structure
MOVQ R12, 128(R11)
MOVB R13, 136(R11)
ADDQ R9, R8
MOVQ R11, 128(R10)
MOVB R12, 136(R10)
// br3.fillFast32()
MOVQ 176(R11), R12
MOVBQZX 184(R11), R13
CMPQ R13, $0x20
MOVQ 176(R10), R11
MOVBQZX 184(R10), R12
CMPQ R12, $0x20
JBE skip_fill3
MOVQ 168(R11), AX
SUBQ $0x20, R13
MOVQ 168(R10), AX
SUBQ $0x20, R12
SUBQ $0x04, AX
MOVQ 144(R11), R14
MOVQ 144(R10), R13
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 168(R11)
ORQ R14, R12
MOVL (AX)(R13*1), R13
MOVQ R12, CX
SHLQ CL, R13
MOVQ AX, 168(R10)
ORQ R13, R11
// exhausted = exhausted || (br3.off < 4)
CMPQ AX, $0x04
SETLT AL
ORB AL, DL
// exhausted += (br3.off < 4)
CMPQ AX, $0x04
ADCB $+0, DL
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val1 := br3.peekTopBits(peekBits)
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
MOVQ R11, R13
SHRQ CL, R13
// v1 := table[val1&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br3.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (R8)
LEAQ (R8)(R8*2), CX
MOVW AX, (BX)(CX*1)
// update the bitreader structure
MOVQ R12, 176(R11)
MOVB R13, 184(R11)
ADDQ $0x02, SI
MOVQ R11, 176(R10)
MOVB R12, 184(R10)
ADDQ $0x02, BX
TESTB DL, DL
JZ main_loop
MOVQ ctx+0(FP), AX
SUBQ 16(AX), SI
SHLQ $0x02, SI
MOVQ SI, 40(AX)
SUBQ 16(AX), BX
SHLQ $0x02, BX
MOVQ BX, 40(AX)
RET
// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
XORQ DX, DX
// Preload values
MOVQ ctx+0(FP), CX
MOVBQZX 8(CX), DI
MOVQ 16(CX), BX
MOVQ 48(CX), SI
MOVQ 24(CX), R9
MOVQ 32(CX), R10
MOVQ (CX), R11
MOVQ 24(CX), R8
MOVQ 32(CX), R9
MOVQ (CX), R10
// Main loop
main_loop:
MOVQ BX, R8
CMPQ R8, SI
XORL DX, DX
CMPQ BX, SI
SETGE DL
// br0.fillFast32()
MOVQ 32(R11), R12
MOVBQZX 40(R11), R13
CMPQ R13, $0x20
MOVQ 32(R10), R11
MOVBQZX 40(R10), R12
CMPQ R12, $0x20
JBE skip_fill0
MOVQ 24(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ (R11), R15
MOVQ 24(R10), R13
SUBQ $0x20, R12
SUBQ $0x04, R13
MOVQ (R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 24(R11)
ORQ R15, R12
MOVL (R13)(R14*1), R14
MOVQ R12, CX
SHLQ CL, R14
MOVQ R13, 24(R10)
ORQ R14, R11
// exhausted = exhausted || (br0.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
// exhausted += (br0.off < 4)
CMPQ R13, $0x04
ADCB $+0, DL
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val1 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v1 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br0.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
BSWAPL AX
// val2 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v2 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br0.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val3 := br0.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v3 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br0.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@ -365,88 +354,86 @@ skip_fill0:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (R8)
MOVL AX, (BX)
// update the bitreader structure
MOVQ R12, 32(R11)
MOVB R13, 40(R11)
ADDQ R9, R8
MOVQ R11, 32(R10)
MOVB R12, 40(R10)
// br1.fillFast32()
MOVQ 80(R11), R12
MOVBQZX 88(R11), R13
CMPQ R13, $0x20
MOVQ 80(R10), R11
MOVBQZX 88(R10), R12
CMPQ R12, $0x20
JBE skip_fill1
MOVQ 72(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 48(R11), R15
MOVQ 72(R10), R13
SUBQ $0x20, R12
SUBQ $0x04, R13
MOVQ 48(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 72(R11)
ORQ R15, R12
MOVL (R13)(R14*1), R14
MOVQ R12, CX
SHLQ CL, R14
MOVQ R13, 72(R10)
ORQ R14, R11
// exhausted = exhausted || (br1.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
// exhausted += (br1.off < 4)
CMPQ R13, $0x04
ADCB $+0, DL
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val1 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v1 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br1.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
BSWAPL AX
// val2 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v2 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br1.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val3 := br1.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v3 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br1.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@ -454,88 +441,86 @@ skip_fill1:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (R8)
MOVL AX, (BX)(R8*1)
// update the bitreader structure
MOVQ R12, 80(R11)
MOVB R13, 88(R11)
ADDQ R9, R8
MOVQ R11, 80(R10)
MOVB R12, 88(R10)
// br2.fillFast32()
MOVQ 128(R11), R12
MOVBQZX 136(R11), R13
CMPQ R13, $0x20
MOVQ 128(R10), R11
MOVBQZX 136(R10), R12
CMPQ R12, $0x20
JBE skip_fill2
MOVQ 120(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 96(R11), R15
MOVQ 120(R10), R13
SUBQ $0x20, R12
SUBQ $0x04, R13
MOVQ 96(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 120(R11)
ORQ R15, R12
MOVL (R13)(R14*1), R14
MOVQ R12, CX
SHLQ CL, R14
MOVQ R13, 120(R10)
ORQ R14, R11
// exhausted = exhausted || (br2.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
// exhausted += (br2.off < 4)
CMPQ R13, $0x04
ADCB $+0, DL
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val1 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v1 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br2.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
BSWAPL AX
// val2 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v2 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br2.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val3 := br2.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v3 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br2.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@ -543,88 +528,86 @@ skip_fill2:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (R8)
MOVL AX, (BX)(R8*2)
// update the bitreader structure
MOVQ R12, 128(R11)
MOVB R13, 136(R11)
ADDQ R9, R8
MOVQ R11, 128(R10)
MOVB R12, 136(R10)
// br3.fillFast32()
MOVQ 176(R11), R12
MOVBQZX 184(R11), R13
CMPQ R13, $0x20
MOVQ 176(R10), R11
MOVBQZX 184(R10), R12
CMPQ R12, $0x20
JBE skip_fill3
MOVQ 168(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 144(R11), R15
MOVQ 168(R10), R13
SUBQ $0x20, R12
SUBQ $0x04, R13
MOVQ 144(R10), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 168(R11)
ORQ R15, R12
MOVL (R13)(R14*1), R14
MOVQ R12, CX
SHLQ CL, R14
MOVQ R13, 168(R10)
ORQ R14, R11
// exhausted = exhausted || (br3.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
// exhausted += (br3.off < 4)
CMPQ R13, $0x04
ADCB $+0, DL
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v0 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val1 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v1 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br3.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
BSWAPL AX
// val2 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v2 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br3.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
// val3 := br3.peekTopBits(peekBits)
MOVQ R12, R14
MOVQ R11, R13
MOVQ DI, CX
SHRQ CL, R14
SHRQ CL, R13
// v3 := table[val0&mask]
MOVW (R10)(R14*2), CX
MOVW (R9)(R13*2), CX
// br3.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R12
ADDB CL, R13
SHLQ CL, R11
ADDB CL, R12
BSWAPL AX
// these four writes get coalesced
@ -632,11 +615,12 @@ skip_fill3:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (R8)
LEAQ (R8)(R8*2), CX
MOVL AX, (BX)(CX*1)
// update the bitreader structure
MOVQ R12, 176(R11)
MOVB R13, 184(R11)
MOVQ R11, 176(R10)
MOVB R12, 184(R10)
ADDQ $0x04, BX
TESTB DL, DL
JZ main_loop
@ -652,7 +636,7 @@ TEXT ·decompress1x_main_loop_amd64(SB), $0-8
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
JB error_max_decoded_size_exeeded
JB error_max_decoded_size_exceeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
@ -667,7 +651,7 @@ main_loop:
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
JGE error_max_decoded_size_exeeded
JGE error_max_decoded_size_exceeded
// Decode 4 values
CMPQ R11, $0x20
@ -744,7 +728,7 @@ loop_condition:
RET
// Report error
error_max_decoded_size_exeeded:
error_max_decoded_size_exceeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
@ -757,7 +741,7 @@ TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
JB error_max_decoded_size_exeeded
JB error_max_decoded_size_exceeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
@ -772,7 +756,7 @@ main_loop:
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
JGE error_max_decoded_size_exeeded
JGE error_max_decoded_size_exceeded
// Decode 4 values
CMPQ R11, $0x20
@ -839,7 +823,7 @@ loop_condition:
RET
// Report error
error_max_decoded_size_exeeded:
error_max_decoded_size_exceeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)

@ -192,16 +192,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
}
// Read block data.
if cap(b.dataStorage) < cSize {
if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize {
// byteBuf doesn't need a destination buffer.
if b.lowMem || cSize > maxCompressedBlockSize {
b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
} else {
b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
}
}
if cap(b.dst) <= maxSize {
b.dst = make([]byte, 0, maxSize+1)
}
b.data, err = br.readBig(cSize, b.dataStorage)
if err != nil {
if debugDecoder {
@ -210,6 +208,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
}
return err
}
if cap(b.dst) <= maxSize {
b.dst = make([]byte, 0, maxSize+1)
}
return nil
}

@ -40,8 +40,7 @@ type Decoder struct {
frame *frameDec
// Custom dictionaries.
// Always uses copies.
dicts map[uint32]dict
dicts map[uint32]*dict
// streamWg is the waitgroup for all streams
streamWg sync.WaitGroup
@ -103,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
}
// Transfer option dicts.
d.dicts = make(map[uint32]dict, len(d.o.dicts))
d.dicts = make(map[uint32]*dict, len(d.o.dicts))
for _, dc := range d.o.dicts {
d.dicts[dc.id] = dc
}
@ -341,15 +340,8 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
return dst, err
}
if frame.DictionaryID != nil {
dict, ok := d.dicts[*frame.DictionaryID]
if !ok {
return nil, ErrUnknownDictionary
}
if debugDecoder {
println("setting dict", frame.DictionaryID)
}
frame.history.setDict(&dict)
if err = d.setDict(frame); err != nil {
return nil, err
}
if frame.WindowSize > d.o.maxWindowSize {
if debugDecoder {
@ -495,18 +487,12 @@ func (d *Decoder) nextBlockSync() (ok bool) {
if !d.syncStream.inFrame {
d.frame.history.reset()
d.current.err = d.frame.reset(&d.syncStream.br)
if d.current.err == nil {
d.current.err = d.setDict(d.frame)
}
if d.current.err != nil {
return false
}
if d.frame.DictionaryID != nil {
dict, ok := d.dicts[*d.frame.DictionaryID]
if !ok {
d.current.err = ErrUnknownDictionary
return false
} else {
d.frame.history.setDict(&dict)
}
}
if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
d.current.err = ErrDecoderSizeExceeded
return false
@ -865,13 +851,8 @@ decodeStream:
if debugDecoder && err != nil {
println("Frame decoder returned", err)
}
if err == nil && frame.DictionaryID != nil {
dict, ok := d.dicts[*frame.DictionaryID]
if !ok {
err = ErrUnknownDictionary
} else {
frame.history.setDict(&dict)
}
if err == nil {
err = d.setDict(frame)
}
if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
if debugDecoder {
@ -953,3 +934,20 @@ decodeStream:
hist.reset()
d.frame.history.b = frameHistCache
}
func (d *Decoder) setDict(frame *frameDec) (err error) {
dict, ok := d.dicts[frame.DictionaryID]
if ok {
if debugDecoder {
println("setting dict", frame.DictionaryID)
}
frame.history.setDict(dict)
} else if frame.DictionaryID != 0 {
// A zero or missing dictionary id is ambiguous:
// either dictionary zero, or no dictionary. In particular,
// zstd --patch-from uses this id for the source file,
// so only return an error if the dictionary id is not zero.
err = ErrUnknownDictionary
}
return err
}

@ -6,6 +6,8 @@ package zstd
import (
"errors"
"fmt"
"math/bits"
"runtime"
)
@ -18,7 +20,7 @@ type decoderOptions struct {
concurrent int
maxDecodedSize uint64
maxWindowSize uint64
dicts []dict
dicts []*dict
ignoreChecksum bool
limitToCap bool
decodeBufsBelow int
@ -85,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption {
}
// WithDecoderDicts allows to register one or more dictionaries for the decoder.
// If several dictionaries with the same ID is provided the last one will be used.
//
// Each slice in dict must be in the [dictionary format] produced by
// "zstd --train" from the Zstandard reference implementation.
//
// If several dictionaries with the same ID are provided, the last one will be used.
//
// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
func WithDecoderDicts(dicts ...[]byte) DOption {
return func(o *decoderOptions) error {
for _, b := range dicts {
@ -93,12 +101,24 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
if err != nil {
return err
}
o.dicts = append(o.dicts, *d)
o.dicts = append(o.dicts, d)
}
return nil
}
}
// WithEncoderDictRaw registers a dictionary that may be used by the decoder.
// The slice content can be arbitrary data.
func WithDecoderDictRaw(id uint32, content []byte) DOption {
return func(o *decoderOptions) error {
if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
}
o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}})
return nil
}
}
// WithDecoderMaxWindow allows to set a maximum window size for decodes.
// This allows rejecting packets that will cause big memory usage.
// The Decoder will likely allocate more memory based on the WithDecoderLowmem setting.

@ -21,6 +21,9 @@ type dict struct {
const dictMagic = "\x37\xa4\x30\xec"
// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
const dictMaxLength = 1 << 31
// ID returns the dictionary id or 0 if d is nil.
func (d *dict) ID() uint32 {
if d == nil {

@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"math"
"math/bits"
"runtime"
"strings"
)
@ -305,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption {
}
// WithEncoderDict allows to register a dictionary that will be used for the encode.
//
// The slice dict must be in the [dictionary format] produced by
// "zstd --train" from the Zstandard reference implementation.
//
// The encoder *may* choose to use no dictionary instead for certain payloads.
//
// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
func WithEncoderDict(dict []byte) EOption {
return func(o *encoderOptions) error {
d, err := loadDict(dict)
@ -316,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption {
return nil
}
}
// WithEncoderDictRaw registers a dictionary that may be used by the encoder.
//
// The slice content may contain arbitrary data. It will be used as an initial
// history.
func WithEncoderDictRaw(id uint32, content []byte) EOption {
return func(o *encoderOptions) error {
if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
}
o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
return nil
}
}

@ -29,7 +29,7 @@ type frameDec struct {
FrameContentSize uint64
DictionaryID *uint32
DictionaryID uint32
HasCheckSum bool
SingleSegment bool
}
@ -155,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error {
// Read Dictionary_ID
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
d.DictionaryID = nil
d.DictionaryID = 0
if size := fhd & 3; size != 0 {
if size == 3 {
size = 4
@ -178,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error {
if debugDecoder {
println("Dict size", size, "ID:", id)
}
if id > 0 {
// ID 0 means "sorry, no dictionary anyway".
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
d.DictionaryID = &id
}
d.DictionaryID = id
}
// Read Frame_Content_Size

@ -72,7 +72,6 @@ var (
ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
// ErrUnknownDictionary is returned if the dictionary ID is unknown.
// For the time being dictionaries are not supported.
ErrUnknownDictionary = errors.New("unknown dictionary")
// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.

@ -119,7 +119,7 @@ github.com/jinzhu/now
# github.com/json-iterator/go v1.1.12
## explicit; go 1.12
github.com/json-iterator/go
# github.com/klauspost/compress v1.15.14
# github.com/klauspost/compress v1.15.15
## explicit; go 1.17
github.com/klauspost/compress
github.com/klauspost/compress/fse

Loading…
Cancel
Save