// Copyright 2016 Tom Thorogood. All rights reserved. // Use of this source code is governed by a // Modified BSD License license that can be found in // the LICENSE file. // // Copyright 2005-2016, Wojciech Muła. All rights reserved. // Use of this source code is governed by a // Simplified BSD License license that can be found in // the LICENSE file. // // This file is auto-generated - do not modify // +build amd64,!gccgo,!appengine #include "textflag.h" DATA decodeBase<>+0x00(SB)/8, $0x3030303030303030 DATA decodeBase<>+0x08(SB)/8, $0x3030303030303030 DATA decodeBase<>+0x10(SB)/8, $0x2727272727272727 DATA decodeBase<>+0x18(SB)/8, $0x2727272727272727 GLOBL decodeBase<>(SB),RODATA,$32 DATA decodeToLower<>+0x00(SB)/8, $0x2020202020202020 DATA decodeToLower<>+0x08(SB)/8, $0x2020202020202020 GLOBL decodeToLower<>(SB),RODATA,$16 DATA decodeHigh<>+0x00(SB)/8, $0x0e0c0a0806040200 DATA decodeHigh<>+0x08(SB)/8, $0xffffffffffffffff GLOBL decodeHigh<>(SB),RODATA,$16 DATA decodeLow<>+0x00(SB)/8, $0x0f0d0b0907050301 DATA decodeLow<>+0x08(SB)/8, $0xffffffffffffffff GLOBL decodeLow<>(SB),RODATA,$16 DATA decodeValid<>+0x00(SB)/8, $0xb0b0b0b0b0b0b0b0 DATA decodeValid<>+0x08(SB)/8, $0xb0b0b0b0b0b0b0b0 DATA decodeValid<>+0x10(SB)/8, $0xb9b9b9b9b9b9b9b9 DATA decodeValid<>+0x18(SB)/8, $0xb9b9b9b9b9b9b9b9 DATA decodeValid<>+0x20(SB)/8, $0xe1e1e1e1e1e1e1e1 DATA decodeValid<>+0x28(SB)/8, $0xe1e1e1e1e1e1e1e1 DATA decodeValid<>+0x30(SB)/8, $0xe6e6e6e6e6e6e6e6 DATA decodeValid<>+0x38(SB)/8, $0xe6e6e6e6e6e6e6e6 GLOBL decodeValid<>(SB),RODATA,$64 DATA decodeToSigned<>+0x00(SB)/8, $0x8080808080808080 DATA decodeToSigned<>+0x08(SB)/8, $0x8080808080808080 GLOBL decodeToSigned<>(SB),RODATA,$16 TEXT ·decodeAVX(SB),NOSPLIT,$0 MOVQ dst+0(FP), DI MOVQ src+8(FP), SI MOVQ len+16(FP), BX MOVQ SI, R15 MOVOU decodeValid<>(SB), X14 MOVOU decodeValid<>+0x20(SB), X15 MOVW $65535, DX CMPQ BX, $16 JB tail bigloop: MOVOU (SI), X0 VPXOR decodeToSigned<>(SB), X0, X1 POR decodeToLower<>(SB), X0 VPXOR decodeToSigned<>(SB), X0, X2 VPCMPGTB X1, X14, X3 PCMPGTB decodeValid<>+0x10(SB), X1 VPCMPGTB X2, X15, X4 PCMPGTB decodeValid<>+0x30(SB), X2 PAND X4, X1 POR X2, X3 POR X1, X3 PMOVMSKB X3, AX TESTW AX, DX JNZ invalid PSUBB decodeBase<>(SB), X0 PANDN decodeBase<>+0x10(SB), X4 PSUBB X4, X0 VPSHUFB decodeLow<>(SB), X0, X3 PSHUFB decodeHigh<>(SB), X0 PSLLW $4, X0 POR X3, X0 MOVQ X0, (DI) SUBQ $16, BX JZ ret ADDQ $16, SI ADDQ $8, DI CMPQ BX, $16 JAE bigloop tail: MOVQ $16, CX SUBQ BX, CX SHRW CX, DX CMPQ BX, $4 JB tail_in_2 JE tail_in_4 CMPQ BX, $8 JB tail_in_6 JE tail_in_8 CMPQ BX, $12 JB tail_in_10 JE tail_in_12 tail_in_14: PINSRW $6, 12(SI), X0 tail_in_12: PINSRW $5, 10(SI), X0 tail_in_10: PINSRW $4, 8(SI), X0 tail_in_8: PINSRQ $0, (SI), X0 JMP tail_conv tail_in_6: PINSRW $2, 4(SI), X0 tail_in_4: PINSRW $1, 2(SI), X0 tail_in_2: PINSRW $0, (SI), X0 tail_conv: VPXOR decodeToSigned<>(SB), X0, X1 POR decodeToLower<>(SB), X0 VPXOR decodeToSigned<>(SB), X0, X2 VPCMPGTB X1, X14, X3 PCMPGTB decodeValid<>+0x10(SB), X1 VPCMPGTB X2, X15, X4 PCMPGTB decodeValid<>+0x30(SB), X2 PAND X4, X1 POR X2, X3 POR X1, X3 PMOVMSKB X3, AX TESTW AX, DX JNZ invalid PSUBB decodeBase<>(SB), X0 PANDN decodeBase<>+0x10(SB), X4 PSUBB X4, X0 VPSHUFB decodeLow<>(SB), X0, X3 PSHUFB decodeHigh<>(SB), X0 PSLLW $4, X0 POR X3, X0 CMPQ BX, $4 JB tail_out_2 JE tail_out_4 CMPQ BX, $8 JB tail_out_6 JE tail_out_8 CMPQ BX, $12 JB tail_out_10 JE tail_out_12 tail_out_14: PEXTRB $6, X0, 6(DI) tail_out_12: PEXTRB $5, X0, 5(DI) tail_out_10: PEXTRB $4, X0, 4(DI) tail_out_8: MOVL X0, (DI) JMP ret tail_out_6: PEXTRB $2, X0, 2(DI) tail_out_4: PEXTRB $1, X0, 1(DI) tail_out_2: PEXTRB $0, X0, (DI) ret: MOVB $1, ok+32(FP) RET invalid: BSFW AX, AX SUBQ R15, SI ADDQ SI, AX MOVQ AX, n+24(FP) MOVB $0, ok+32(FP) RET TEXT ·decodeSSE(SB),NOSPLIT,$0 MOVQ dst+0(FP), DI MOVQ src+8(FP), SI MOVQ len+16(FP), BX MOVQ SI, R15 MOVOU decodeValid<>(SB), X14 MOVOU decodeValid<>+0x20(SB), X15 MOVW $65535, DX CMPQ BX, $16 JB tail bigloop: MOVOU (SI), X0 MOVOU X0, X1 PXOR decodeToSigned<>(SB), X1 POR decodeToLower<>(SB), X0 MOVOU X0, X2 PXOR decodeToSigned<>(SB), X2 MOVOU X14, X3 PCMPGTB X1, X3 PCMPGTB decodeValid<>+0x10(SB), X1 MOVOU X15, X4 PCMPGTB X2, X4 PCMPGTB decodeValid<>+0x30(SB), X2 PAND X4, X1 POR X2, X3 POR X1, X3 PMOVMSKB X3, AX TESTW AX, DX JNZ invalid PSUBB decodeBase<>(SB), X0 PANDN decodeBase<>+0x10(SB), X4 PSUBB X4, X0 MOVOU X0, X3 PSHUFB decodeLow<>(SB), X3 PSHUFB decodeHigh<>(SB), X0 PSLLW $4, X0 POR X3, X0 MOVQ X0, (DI) SUBQ $16, BX JZ ret ADDQ $16, SI ADDQ $8, DI CMPQ BX, $16 JAE bigloop tail: MOVQ $16, CX SUBQ BX, CX SHRW CX, DX CMPQ BX, $4 JB tail_in_2 JE tail_in_4 CMPQ BX, $8 JB tail_in_6 JE tail_in_8 CMPQ BX, $12 JB tail_in_10 JE tail_in_12 tail_in_14: PINSRW $6, 12(SI), X0 tail_in_12: PINSRW $5, 10(SI), X0 tail_in_10: PINSRW $4, 8(SI), X0 tail_in_8: PINSRQ $0, (SI), X0 JMP tail_conv tail_in_6: PINSRW $2, 4(SI), X0 tail_in_4: PINSRW $1, 2(SI), X0 tail_in_2: PINSRW $0, (SI), X0 tail_conv: MOVOU X0, X1 PXOR decodeToSigned<>(SB), X1 POR decodeToLower<>(SB), X0 MOVOU X0, X2 PXOR decodeToSigned<>(SB), X2 MOVOU X14, X3 PCMPGTB X1, X3 PCMPGTB decodeValid<>+0x10(SB), X1 MOVOU X15, X4 PCMPGTB X2, X4 PCMPGTB decodeValid<>+0x30(SB), X2 PAND X4, X1 POR X2, X3 POR X1, X3 PMOVMSKB X3, AX TESTW AX, DX JNZ invalid PSUBB decodeBase<>(SB), X0 PANDN decodeBase<>+0x10(SB), X4 PSUBB X4, X0 MOVOU X0, X3 PSHUFB decodeLow<>(SB), X3 PSHUFB decodeHigh<>(SB), X0 PSLLW $4, X0 POR X3, X0 CMPQ BX, $4 JB tail_out_2 JE tail_out_4 CMPQ BX, $8 JB tail_out_6 JE tail_out_8 CMPQ BX, $12 JB tail_out_10 JE tail_out_12 tail_out_14: PEXTRB $6, X0, 6(DI) tail_out_12: PEXTRB $5, X0, 5(DI) tail_out_10: PEXTRB $4, X0, 4(DI) tail_out_8: MOVL X0, (DI) JMP ret tail_out_6: PEXTRB $2, X0, 2(DI) tail_out_4: PEXTRB $1, X0, 1(DI) tail_out_2: PEXTRB $0, X0, (DI) ret: MOVB $1, ok+32(FP) RET invalid: BSFW AX, AX SUBQ R15, SI ADDQ SI, AX MOVQ AX, n+24(FP) MOVB $0, ok+32(FP) RET