|
|
|
@ -4,360 +4,349 @@
|
|
|
|
|
|
|
|
|
|
// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
|
|
|
|
|
TEXT ·decompress4x_main_loop_amd64(SB), $0-8
|
|
|
|
|
XORQ DX, DX
|
|
|
|
|
|
|
|
|
|
// Preload values
|
|
|
|
|
MOVQ ctx+0(FP), AX
|
|
|
|
|
MOVBQZX 8(AX), DI
|
|
|
|
|
MOVQ 16(AX), SI
|
|
|
|
|
MOVQ 48(AX), BX
|
|
|
|
|
MOVQ 24(AX), R9
|
|
|
|
|
MOVQ 32(AX), R10
|
|
|
|
|
MOVQ (AX), R11
|
|
|
|
|
MOVQ 16(AX), BX
|
|
|
|
|
MOVQ 48(AX), SI
|
|
|
|
|
MOVQ 24(AX), R8
|
|
|
|
|
MOVQ 32(AX), R9
|
|
|
|
|
MOVQ (AX), R10
|
|
|
|
|
|
|
|
|
|
// Main loop
|
|
|
|
|
main_loop:
|
|
|
|
|
MOVQ SI, R8
|
|
|
|
|
CMPQ R8, BX
|
|
|
|
|
XORL DX, DX
|
|
|
|
|
CMPQ BX, SI
|
|
|
|
|
SETGE DL
|
|
|
|
|
|
|
|
|
|
// br0.fillFast32()
|
|
|
|
|
MOVQ 32(R11), R12
|
|
|
|
|
MOVBQZX 40(R11), R13
|
|
|
|
|
CMPQ R13, $0x20
|
|
|
|
|
MOVQ 32(R10), R11
|
|
|
|
|
MOVBQZX 40(R10), R12
|
|
|
|
|
CMPQ R12, $0x20
|
|
|
|
|
JBE skip_fill0
|
|
|
|
|
MOVQ 24(R11), AX
|
|
|
|
|
SUBQ $0x20, R13
|
|
|
|
|
MOVQ 24(R10), AX
|
|
|
|
|
SUBQ $0x20, R12
|
|
|
|
|
SUBQ $0x04, AX
|
|
|
|
|
MOVQ (R11), R14
|
|
|
|
|
MOVQ (R10), R13
|
|
|
|
|
|
|
|
|
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
|
|
|
MOVL (AX)(R14*1), R14
|
|
|
|
|
MOVQ R13, CX
|
|
|
|
|
SHLQ CL, R14
|
|
|
|
|
MOVQ AX, 24(R11)
|
|
|
|
|
ORQ R14, R12
|
|
|
|
|
MOVL (AX)(R13*1), R13
|
|
|
|
|
MOVQ R12, CX
|
|
|
|
|
SHLQ CL, R13
|
|
|
|
|
MOVQ AX, 24(R10)
|
|
|
|
|
ORQ R13, R11
|
|
|
|
|
|
|
|
|
|
// exhausted = exhausted || (br0.off < 4)
|
|
|
|
|
CMPQ AX, $0x04
|
|
|
|
|
SETLT AL
|
|
|
|
|
ORB AL, DL
|
|
|
|
|
// exhausted += (br0.off < 4)
|
|
|
|
|
CMPQ AX, $0x04
|
|
|
|
|
ADCB $+0, DL
|
|
|
|
|
|
|
|
|
|
skip_fill0:
|
|
|
|
|
// val0 := br0.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v0 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br0.advance(uint8(v0.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val1 := br0.peekTopBits(peekBits)
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v1 := table[val1&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br0.advance(uint8(v1.entry))
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// these two writes get coalesced
|
|
|
|
|
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
|
|
|
|
MOVW AX, (R8)
|
|
|
|
|
MOVW AX, (BX)
|
|
|
|
|
|
|
|
|
|
// update the bitreader structure
|
|
|
|
|
MOVQ R12, 32(R11)
|
|
|
|
|
MOVB R13, 40(R11)
|
|
|
|
|
ADDQ R9, R8
|
|
|
|
|
MOVQ R11, 32(R10)
|
|
|
|
|
MOVB R12, 40(R10)
|
|
|
|
|
|
|
|
|
|
// br1.fillFast32()
|
|
|
|
|
MOVQ 80(R11), R12
|
|
|
|
|
MOVBQZX 88(R11), R13
|
|
|
|
|
CMPQ R13, $0x20
|
|
|
|
|
MOVQ 80(R10), R11
|
|
|
|
|
MOVBQZX 88(R10), R12
|
|
|
|
|
CMPQ R12, $0x20
|
|
|
|
|
JBE skip_fill1
|
|
|
|
|
MOVQ 72(R11), AX
|
|
|
|
|
SUBQ $0x20, R13
|
|
|
|
|
MOVQ 72(R10), AX
|
|
|
|
|
SUBQ $0x20, R12
|
|
|
|
|
SUBQ $0x04, AX
|
|
|
|
|
MOVQ 48(R11), R14
|
|
|
|
|
MOVQ 48(R10), R13
|
|
|
|
|
|
|
|
|
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
|
|
|
MOVL (AX)(R14*1), R14
|
|
|
|
|
MOVQ R13, CX
|
|
|
|
|
SHLQ CL, R14
|
|
|
|
|
MOVQ AX, 72(R11)
|
|
|
|
|
ORQ R14, R12
|
|
|
|
|
MOVL (AX)(R13*1), R13
|
|
|
|
|
MOVQ R12, CX
|
|
|
|
|
SHLQ CL, R13
|
|
|
|
|
MOVQ AX, 72(R10)
|
|
|
|
|
ORQ R13, R11
|
|
|
|
|
|
|
|
|
|
// exhausted = exhausted || (br1.off < 4)
|
|
|
|
|
CMPQ AX, $0x04
|
|
|
|
|
SETLT AL
|
|
|
|
|
ORB AL, DL
|
|
|
|
|
// exhausted += (br1.off < 4)
|
|
|
|
|
CMPQ AX, $0x04
|
|
|
|
|
ADCB $+0, DL
|
|
|
|
|
|
|
|
|
|
skip_fill1:
|
|
|
|
|
// val0 := br1.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v0 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br1.advance(uint8(v0.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val1 := br1.peekTopBits(peekBits)
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v1 := table[val1&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br1.advance(uint8(v1.entry))
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// these two writes get coalesced
|
|
|
|
|
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
|
|
|
|
MOVW AX, (R8)
|
|
|
|
|
MOVW AX, (BX)(R8*1)
|
|
|
|
|
|
|
|
|
|
// update the bitreader structure
|
|
|
|
|
MOVQ R12, 80(R11)
|
|
|
|
|
MOVB R13, 88(R11)
|
|
|
|
|
ADDQ R9, R8
|
|
|
|
|
MOVQ R11, 80(R10)
|
|
|
|
|
MOVB R12, 88(R10)
|
|
|
|
|
|
|
|
|
|
// br2.fillFast32()
|
|
|
|
|
MOVQ 128(R11), R12
|
|
|
|
|
MOVBQZX 136(R11), R13
|
|
|
|
|
CMPQ R13, $0x20
|
|
|
|
|
MOVQ 128(R10), R11
|
|
|
|
|
MOVBQZX 136(R10), R12
|
|
|
|
|
CMPQ R12, $0x20
|
|
|
|
|
JBE skip_fill2
|
|
|
|
|
MOVQ 120(R11), AX
|
|
|
|
|
SUBQ $0x20, R13
|
|
|
|
|
MOVQ 120(R10), AX
|
|
|
|
|
SUBQ $0x20, R12
|
|
|
|
|
SUBQ $0x04, AX
|
|
|
|
|
MOVQ 96(R11), R14
|
|
|
|
|
MOVQ 96(R10), R13
|
|
|
|
|
|
|
|
|
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
|
|
|
MOVL (AX)(R14*1), R14
|
|
|
|
|
MOVQ R13, CX
|
|
|
|
|
SHLQ CL, R14
|
|
|
|
|
MOVQ AX, 120(R11)
|
|
|
|
|
ORQ R14, R12
|
|
|
|
|
MOVL (AX)(R13*1), R13
|
|
|
|
|
MOVQ R12, CX
|
|
|
|
|
SHLQ CL, R13
|
|
|
|
|
MOVQ AX, 120(R10)
|
|
|
|
|
ORQ R13, R11
|
|
|
|
|
|
|
|
|
|
// exhausted = exhausted || (br2.off < 4)
|
|
|
|
|
CMPQ AX, $0x04
|
|
|
|
|
SETLT AL
|
|
|
|
|
ORB AL, DL
|
|
|
|
|
// exhausted += (br2.off < 4)
|
|
|
|
|
CMPQ AX, $0x04
|
|
|
|
|
ADCB $+0, DL
|
|
|
|
|
|
|
|
|
|
skip_fill2:
|
|
|
|
|
// val0 := br2.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v0 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br2.advance(uint8(v0.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val1 := br2.peekTopBits(peekBits)
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v1 := table[val1&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br2.advance(uint8(v1.entry))
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// these two writes get coalesced
|
|
|
|
|
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
|
|
|
|
MOVW AX, (R8)
|
|
|
|
|
MOVW AX, (BX)(R8*2)
|
|
|
|
|
|
|
|
|
|
// update the bitreader structure
|
|
|
|
|
MOVQ R12, 128(R11)
|
|
|
|
|
MOVB R13, 136(R11)
|
|
|
|
|
ADDQ R9, R8
|
|
|
|
|
MOVQ R11, 128(R10)
|
|
|
|
|
MOVB R12, 136(R10)
|
|
|
|
|
|
|
|
|
|
// br3.fillFast32()
|
|
|
|
|
MOVQ 176(R11), R12
|
|
|
|
|
MOVBQZX 184(R11), R13
|
|
|
|
|
CMPQ R13, $0x20
|
|
|
|
|
MOVQ 176(R10), R11
|
|
|
|
|
MOVBQZX 184(R10), R12
|
|
|
|
|
CMPQ R12, $0x20
|
|
|
|
|
JBE skip_fill3
|
|
|
|
|
MOVQ 168(R11), AX
|
|
|
|
|
SUBQ $0x20, R13
|
|
|
|
|
MOVQ 168(R10), AX
|
|
|
|
|
SUBQ $0x20, R12
|
|
|
|
|
SUBQ $0x04, AX
|
|
|
|
|
MOVQ 144(R11), R14
|
|
|
|
|
MOVQ 144(R10), R13
|
|
|
|
|
|
|
|
|
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
|
|
|
MOVL (AX)(R14*1), R14
|
|
|
|
|
MOVQ R13, CX
|
|
|
|
|
SHLQ CL, R14
|
|
|
|
|
MOVQ AX, 168(R11)
|
|
|
|
|
ORQ R14, R12
|
|
|
|
|
MOVL (AX)(R13*1), R13
|
|
|
|
|
MOVQ R12, CX
|
|
|
|
|
SHLQ CL, R13
|
|
|
|
|
MOVQ AX, 168(R10)
|
|
|
|
|
ORQ R13, R11
|
|
|
|
|
|
|
|
|
|
// exhausted = exhausted || (br3.off < 4)
|
|
|
|
|
CMPQ AX, $0x04
|
|
|
|
|
SETLT AL
|
|
|
|
|
ORB AL, DL
|
|
|
|
|
// exhausted += (br3.off < 4)
|
|
|
|
|
CMPQ AX, $0x04
|
|
|
|
|
ADCB $+0, DL
|
|
|
|
|
|
|
|
|
|
skip_fill3:
|
|
|
|
|
// val0 := br3.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v0 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br3.advance(uint8(v0.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val1 := br3.peekTopBits(peekBits)
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v1 := table[val1&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br3.advance(uint8(v1.entry))
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// these two writes get coalesced
|
|
|
|
|
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
|
|
|
|
MOVW AX, (R8)
|
|
|
|
|
LEAQ (R8)(R8*2), CX
|
|
|
|
|
MOVW AX, (BX)(CX*1)
|
|
|
|
|
|
|
|
|
|
// update the bitreader structure
|
|
|
|
|
MOVQ R12, 176(R11)
|
|
|
|
|
MOVB R13, 184(R11)
|
|
|
|
|
ADDQ $0x02, SI
|
|
|
|
|
MOVQ R11, 176(R10)
|
|
|
|
|
MOVB R12, 184(R10)
|
|
|
|
|
ADDQ $0x02, BX
|
|
|
|
|
TESTB DL, DL
|
|
|
|
|
JZ main_loop
|
|
|
|
|
MOVQ ctx+0(FP), AX
|
|
|
|
|
SUBQ 16(AX), SI
|
|
|
|
|
SHLQ $0x02, SI
|
|
|
|
|
MOVQ SI, 40(AX)
|
|
|
|
|
SUBQ 16(AX), BX
|
|
|
|
|
SHLQ $0x02, BX
|
|
|
|
|
MOVQ BX, 40(AX)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
|
|
|
|
|
TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
|
|
|
|
|
XORQ DX, DX
|
|
|
|
|
|
|
|
|
|
// Preload values
|
|
|
|
|
MOVQ ctx+0(FP), CX
|
|
|
|
|
MOVBQZX 8(CX), DI
|
|
|
|
|
MOVQ 16(CX), BX
|
|
|
|
|
MOVQ 48(CX), SI
|
|
|
|
|
MOVQ 24(CX), R9
|
|
|
|
|
MOVQ 32(CX), R10
|
|
|
|
|
MOVQ (CX), R11
|
|
|
|
|
MOVQ 24(CX), R8
|
|
|
|
|
MOVQ 32(CX), R9
|
|
|
|
|
MOVQ (CX), R10
|
|
|
|
|
|
|
|
|
|
// Main loop
|
|
|
|
|
main_loop:
|
|
|
|
|
MOVQ BX, R8
|
|
|
|
|
CMPQ R8, SI
|
|
|
|
|
XORL DX, DX
|
|
|
|
|
CMPQ BX, SI
|
|
|
|
|
SETGE DL
|
|
|
|
|
|
|
|
|
|
// br0.fillFast32()
|
|
|
|
|
MOVQ 32(R11), R12
|
|
|
|
|
MOVBQZX 40(R11), R13
|
|
|
|
|
CMPQ R13, $0x20
|
|
|
|
|
MOVQ 32(R10), R11
|
|
|
|
|
MOVBQZX 40(R10), R12
|
|
|
|
|
CMPQ R12, $0x20
|
|
|
|
|
JBE skip_fill0
|
|
|
|
|
MOVQ 24(R11), R14
|
|
|
|
|
SUBQ $0x20, R13
|
|
|
|
|
SUBQ $0x04, R14
|
|
|
|
|
MOVQ (R11), R15
|
|
|
|
|
MOVQ 24(R10), R13
|
|
|
|
|
SUBQ $0x20, R12
|
|
|
|
|
SUBQ $0x04, R13
|
|
|
|
|
MOVQ (R10), R14
|
|
|
|
|
|
|
|
|
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
|
|
|
MOVL (R14)(R15*1), R15
|
|
|
|
|
MOVQ R13, CX
|
|
|
|
|
SHLQ CL, R15
|
|
|
|
|
MOVQ R14, 24(R11)
|
|
|
|
|
ORQ R15, R12
|
|
|
|
|
MOVL (R13)(R14*1), R14
|
|
|
|
|
MOVQ R12, CX
|
|
|
|
|
SHLQ CL, R14
|
|
|
|
|
MOVQ R13, 24(R10)
|
|
|
|
|
ORQ R14, R11
|
|
|
|
|
|
|
|
|
|
// exhausted = exhausted || (br0.off < 4)
|
|
|
|
|
CMPQ R14, $0x04
|
|
|
|
|
SETLT AL
|
|
|
|
|
ORB AL, DL
|
|
|
|
|
// exhausted += (br0.off < 4)
|
|
|
|
|
CMPQ R13, $0x04
|
|
|
|
|
ADCB $+0, DL
|
|
|
|
|
|
|
|
|
|
skip_fill0:
|
|
|
|
|
// val0 := br0.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v0 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br0.advance(uint8(v0.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val1 := br0.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v1 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br0.advance(uint8(v1.entry)
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
BSWAPL AX
|
|
|
|
|
|
|
|
|
|
// val2 := br0.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v2 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br0.advance(uint8(v2.entry)
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val3 := br0.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v3 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br0.advance(uint8(v3.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
BSWAPL AX
|
|
|
|
|
|
|
|
|
|
// these four writes get coalesced
|
|
|
|
@ -365,88 +354,86 @@ skip_fill0:
|
|
|
|
|
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
|
|
|
|
|
MOVL AX, (R8)
|
|
|
|
|
MOVL AX, (BX)
|
|
|
|
|
|
|
|
|
|
// update the bitreader structure
|
|
|
|
|
MOVQ R12, 32(R11)
|
|
|
|
|
MOVB R13, 40(R11)
|
|
|
|
|
ADDQ R9, R8
|
|
|
|
|
MOVQ R11, 32(R10)
|
|
|
|
|
MOVB R12, 40(R10)
|
|
|
|
|
|
|
|
|
|
// br1.fillFast32()
|
|
|
|
|
MOVQ 80(R11), R12
|
|
|
|
|
MOVBQZX 88(R11), R13
|
|
|
|
|
CMPQ R13, $0x20
|
|
|
|
|
MOVQ 80(R10), R11
|
|
|
|
|
MOVBQZX 88(R10), R12
|
|
|
|
|
CMPQ R12, $0x20
|
|
|
|
|
JBE skip_fill1
|
|
|
|
|
MOVQ 72(R11), R14
|
|
|
|
|
SUBQ $0x20, R13
|
|
|
|
|
SUBQ $0x04, R14
|
|
|
|
|
MOVQ 48(R11), R15
|
|
|
|
|
MOVQ 72(R10), R13
|
|
|
|
|
SUBQ $0x20, R12
|
|
|
|
|
SUBQ $0x04, R13
|
|
|
|
|
MOVQ 48(R10), R14
|
|
|
|
|
|
|
|
|
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
|
|
|
MOVL (R14)(R15*1), R15
|
|
|
|
|
MOVQ R13, CX
|
|
|
|
|
SHLQ CL, R15
|
|
|
|
|
MOVQ R14, 72(R11)
|
|
|
|
|
ORQ R15, R12
|
|
|
|
|
MOVL (R13)(R14*1), R14
|
|
|
|
|
MOVQ R12, CX
|
|
|
|
|
SHLQ CL, R14
|
|
|
|
|
MOVQ R13, 72(R10)
|
|
|
|
|
ORQ R14, R11
|
|
|
|
|
|
|
|
|
|
// exhausted = exhausted || (br1.off < 4)
|
|
|
|
|
CMPQ R14, $0x04
|
|
|
|
|
SETLT AL
|
|
|
|
|
ORB AL, DL
|
|
|
|
|
// exhausted += (br1.off < 4)
|
|
|
|
|
CMPQ R13, $0x04
|
|
|
|
|
ADCB $+0, DL
|
|
|
|
|
|
|
|
|
|
skip_fill1:
|
|
|
|
|
// val0 := br1.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v0 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br1.advance(uint8(v0.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val1 := br1.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v1 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br1.advance(uint8(v1.entry)
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
BSWAPL AX
|
|
|
|
|
|
|
|
|
|
// val2 := br1.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v2 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br1.advance(uint8(v2.entry)
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val3 := br1.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v3 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br1.advance(uint8(v3.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
BSWAPL AX
|
|
|
|
|
|
|
|
|
|
// these four writes get coalesced
|
|
|
|
@ -454,88 +441,86 @@ skip_fill1:
|
|
|
|
|
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
|
|
|
|
|
MOVL AX, (R8)
|
|
|
|
|
MOVL AX, (BX)(R8*1)
|
|
|
|
|
|
|
|
|
|
// update the bitreader structure
|
|
|
|
|
MOVQ R12, 80(R11)
|
|
|
|
|
MOVB R13, 88(R11)
|
|
|
|
|
ADDQ R9, R8
|
|
|
|
|
MOVQ R11, 80(R10)
|
|
|
|
|
MOVB R12, 88(R10)
|
|
|
|
|
|
|
|
|
|
// br2.fillFast32()
|
|
|
|
|
MOVQ 128(R11), R12
|
|
|
|
|
MOVBQZX 136(R11), R13
|
|
|
|
|
CMPQ R13, $0x20
|
|
|
|
|
MOVQ 128(R10), R11
|
|
|
|
|
MOVBQZX 136(R10), R12
|
|
|
|
|
CMPQ R12, $0x20
|
|
|
|
|
JBE skip_fill2
|
|
|
|
|
MOVQ 120(R11), R14
|
|
|
|
|
SUBQ $0x20, R13
|
|
|
|
|
SUBQ $0x04, R14
|
|
|
|
|
MOVQ 96(R11), R15
|
|
|
|
|
MOVQ 120(R10), R13
|
|
|
|
|
SUBQ $0x20, R12
|
|
|
|
|
SUBQ $0x04, R13
|
|
|
|
|
MOVQ 96(R10), R14
|
|
|
|
|
|
|
|
|
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
|
|
|
MOVL (R14)(R15*1), R15
|
|
|
|
|
MOVQ R13, CX
|
|
|
|
|
SHLQ CL, R15
|
|
|
|
|
MOVQ R14, 120(R11)
|
|
|
|
|
ORQ R15, R12
|
|
|
|
|
MOVL (R13)(R14*1), R14
|
|
|
|
|
MOVQ R12, CX
|
|
|
|
|
SHLQ CL, R14
|
|
|
|
|
MOVQ R13, 120(R10)
|
|
|
|
|
ORQ R14, R11
|
|
|
|
|
|
|
|
|
|
// exhausted = exhausted || (br2.off < 4)
|
|
|
|
|
CMPQ R14, $0x04
|
|
|
|
|
SETLT AL
|
|
|
|
|
ORB AL, DL
|
|
|
|
|
// exhausted += (br2.off < 4)
|
|
|
|
|
CMPQ R13, $0x04
|
|
|
|
|
ADCB $+0, DL
|
|
|
|
|
|
|
|
|
|
skip_fill2:
|
|
|
|
|
// val0 := br2.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v0 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br2.advance(uint8(v0.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val1 := br2.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v1 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br2.advance(uint8(v1.entry)
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
BSWAPL AX
|
|
|
|
|
|
|
|
|
|
// val2 := br2.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v2 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br2.advance(uint8(v2.entry)
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val3 := br2.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v3 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br2.advance(uint8(v3.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
BSWAPL AX
|
|
|
|
|
|
|
|
|
|
// these four writes get coalesced
|
|
|
|
@ -543,88 +528,86 @@ skip_fill2:
|
|
|
|
|
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
|
|
|
|
|
MOVL AX, (R8)
|
|
|
|
|
MOVL AX, (BX)(R8*2)
|
|
|
|
|
|
|
|
|
|
// update the bitreader structure
|
|
|
|
|
MOVQ R12, 128(R11)
|
|
|
|
|
MOVB R13, 136(R11)
|
|
|
|
|
ADDQ R9, R8
|
|
|
|
|
MOVQ R11, 128(R10)
|
|
|
|
|
MOVB R12, 136(R10)
|
|
|
|
|
|
|
|
|
|
// br3.fillFast32()
|
|
|
|
|
MOVQ 176(R11), R12
|
|
|
|
|
MOVBQZX 184(R11), R13
|
|
|
|
|
CMPQ R13, $0x20
|
|
|
|
|
MOVQ 176(R10), R11
|
|
|
|
|
MOVBQZX 184(R10), R12
|
|
|
|
|
CMPQ R12, $0x20
|
|
|
|
|
JBE skip_fill3
|
|
|
|
|
MOVQ 168(R11), R14
|
|
|
|
|
SUBQ $0x20, R13
|
|
|
|
|
SUBQ $0x04, R14
|
|
|
|
|
MOVQ 144(R11), R15
|
|
|
|
|
MOVQ 168(R10), R13
|
|
|
|
|
SUBQ $0x20, R12
|
|
|
|
|
SUBQ $0x04, R13
|
|
|
|
|
MOVQ 144(R10), R14
|
|
|
|
|
|
|
|
|
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
|
|
|
MOVL (R14)(R15*1), R15
|
|
|
|
|
MOVQ R13, CX
|
|
|
|
|
SHLQ CL, R15
|
|
|
|
|
MOVQ R14, 168(R11)
|
|
|
|
|
ORQ R15, R12
|
|
|
|
|
MOVL (R13)(R14*1), R14
|
|
|
|
|
MOVQ R12, CX
|
|
|
|
|
SHLQ CL, R14
|
|
|
|
|
MOVQ R13, 168(R10)
|
|
|
|
|
ORQ R14, R11
|
|
|
|
|
|
|
|
|
|
// exhausted = exhausted || (br3.off < 4)
|
|
|
|
|
CMPQ R14, $0x04
|
|
|
|
|
SETLT AL
|
|
|
|
|
ORB AL, DL
|
|
|
|
|
// exhausted += (br3.off < 4)
|
|
|
|
|
CMPQ R13, $0x04
|
|
|
|
|
ADCB $+0, DL
|
|
|
|
|
|
|
|
|
|
skip_fill3:
|
|
|
|
|
// val0 := br3.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v0 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br3.advance(uint8(v0.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val1 := br3.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v1 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br3.advance(uint8(v1.entry)
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
BSWAPL AX
|
|
|
|
|
|
|
|
|
|
// val2 := br3.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v2 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br3.advance(uint8(v2.entry)
|
|
|
|
|
MOVB CH, AH
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
|
|
|
|
|
// val3 := br3.peekTopBits(peekBits)
|
|
|
|
|
MOVQ R12, R14
|
|
|
|
|
MOVQ R11, R13
|
|
|
|
|
MOVQ DI, CX
|
|
|
|
|
SHRQ CL, R14
|
|
|
|
|
SHRQ CL, R13
|
|
|
|
|
|
|
|
|
|
// v3 := table[val0&mask]
|
|
|
|
|
MOVW (R10)(R14*2), CX
|
|
|
|
|
MOVW (R9)(R13*2), CX
|
|
|
|
|
|
|
|
|
|
// br3.advance(uint8(v3.entry)
|
|
|
|
|
MOVB CH, AL
|
|
|
|
|
SHLQ CL, R12
|
|
|
|
|
ADDB CL, R13
|
|
|
|
|
SHLQ CL, R11
|
|
|
|
|
ADDB CL, R12
|
|
|
|
|
BSWAPL AX
|
|
|
|
|
|
|
|
|
|
// these four writes get coalesced
|
|
|
|
@ -632,11 +615,12 @@ skip_fill3:
|
|
|
|
|
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
|
|
|
|
|
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
|
|
|
|
|
MOVL AX, (R8)
|
|
|
|
|
LEAQ (R8)(R8*2), CX
|
|
|
|
|
MOVL AX, (BX)(CX*1)
|
|
|
|
|
|
|
|
|
|
// update the bitreader structure
|
|
|
|
|
MOVQ R12, 176(R11)
|
|
|
|
|
MOVB R13, 184(R11)
|
|
|
|
|
MOVQ R11, 176(R10)
|
|
|
|
|
MOVB R12, 184(R10)
|
|
|
|
|
ADDQ $0x04, BX
|
|
|
|
|
TESTB DL, DL
|
|
|
|
|
JZ main_loop
|
|
|
|
@ -652,7 +636,7 @@ TEXT ·decompress1x_main_loop_amd64(SB), $0-8
|
|
|
|
|
MOVQ 16(CX), DX
|
|
|
|
|
MOVQ 24(CX), BX
|
|
|
|
|
CMPQ BX, $0x04
|
|
|
|
|
JB error_max_decoded_size_exeeded
|
|
|
|
|
JB error_max_decoded_size_exceeded
|
|
|
|
|
LEAQ (DX)(BX*1), BX
|
|
|
|
|
MOVQ (CX), SI
|
|
|
|
|
MOVQ (SI), R8
|
|
|
|
@ -667,7 +651,7 @@ main_loop:
|
|
|
|
|
// Check if we have room for 4 bytes in the output buffer
|
|
|
|
|
LEAQ 4(DX), CX
|
|
|
|
|
CMPQ CX, BX
|
|
|
|
|
JGE error_max_decoded_size_exeeded
|
|
|
|
|
JGE error_max_decoded_size_exceeded
|
|
|
|
|
|
|
|
|
|
// Decode 4 values
|
|
|
|
|
CMPQ R11, $0x20
|
|
|
|
@ -744,7 +728,7 @@ loop_condition:
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
// Report error
|
|
|
|
|
error_max_decoded_size_exeeded:
|
|
|
|
|
error_max_decoded_size_exceeded:
|
|
|
|
|
MOVQ ctx+0(FP), AX
|
|
|
|
|
MOVQ $-1, CX
|
|
|
|
|
MOVQ CX, 40(AX)
|
|
|
|
@ -757,7 +741,7 @@ TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
|
|
|
|
|
MOVQ 16(CX), DX
|
|
|
|
|
MOVQ 24(CX), BX
|
|
|
|
|
CMPQ BX, $0x04
|
|
|
|
|
JB error_max_decoded_size_exeeded
|
|
|
|
|
JB error_max_decoded_size_exceeded
|
|
|
|
|
LEAQ (DX)(BX*1), BX
|
|
|
|
|
MOVQ (CX), SI
|
|
|
|
|
MOVQ (SI), R8
|
|
|
|
@ -772,7 +756,7 @@ main_loop:
|
|
|
|
|
// Check if we have room for 4 bytes in the output buffer
|
|
|
|
|
LEAQ 4(DX), CX
|
|
|
|
|
CMPQ CX, BX
|
|
|
|
|
JGE error_max_decoded_size_exeeded
|
|
|
|
|
JGE error_max_decoded_size_exceeded
|
|
|
|
|
|
|
|
|
|
// Decode 4 values
|
|
|
|
|
CMPQ R11, $0x20
|
|
|
|
@ -839,7 +823,7 @@ loop_condition:
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
// Report error
|
|
|
|
|
error_max_decoded_size_exeeded:
|
|
|
|
|
error_max_decoded_size_exceeded:
|
|
|
|
|
MOVQ ctx+0(FP), AX
|
|
|
|
|
MOVQ $-1, CX
|
|
|
|
|
MOVQ CX, 40(AX)
|
|
|
|
|