diff options
Diffstat (limited to 'vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s')
-rw-r--r-- | vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s b/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s new file mode 100644 index 00000000..909fc0ef --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s @@ -0,0 +1,266 @@ +//+build !noasm,!appengine + +// SHA intrinsic version of SHA256 + +// Kristofer Peterson, (C) 2018. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "textflag.h" + +DATA K<>+0x00(SB)/4, $0x428a2f98 +DATA K<>+0x04(SB)/4, $0x71374491 +DATA K<>+0x08(SB)/4, $0xb5c0fbcf +DATA K<>+0x0c(SB)/4, $0xe9b5dba5 +DATA K<>+0x10(SB)/4, $0x3956c25b +DATA K<>+0x14(SB)/4, $0x59f111f1 +DATA K<>+0x18(SB)/4, $0x923f82a4 +DATA K<>+0x1c(SB)/4, $0xab1c5ed5 +DATA K<>+0x20(SB)/4, $0xd807aa98 +DATA K<>+0x24(SB)/4, $0x12835b01 +DATA K<>+0x28(SB)/4, $0x243185be +DATA K<>+0x2c(SB)/4, $0x550c7dc3 +DATA K<>+0x30(SB)/4, $0x72be5d74 +DATA K<>+0x34(SB)/4, $0x80deb1fe +DATA K<>+0x38(SB)/4, $0x9bdc06a7 +DATA K<>+0x3c(SB)/4, $0xc19bf174 +DATA K<>+0x40(SB)/4, $0xe49b69c1 +DATA K<>+0x44(SB)/4, $0xefbe4786 +DATA K<>+0x48(SB)/4, $0x0fc19dc6 +DATA K<>+0x4c(SB)/4, $0x240ca1cc +DATA K<>+0x50(SB)/4, $0x2de92c6f +DATA K<>+0x54(SB)/4, $0x4a7484aa +DATA K<>+0x58(SB)/4, $0x5cb0a9dc +DATA K<>+0x5c(SB)/4, $0x76f988da +DATA K<>+0x60(SB)/4, $0x983e5152 +DATA K<>+0x64(SB)/4, $0xa831c66d +DATA K<>+0x68(SB)/4, $0xb00327c8 +DATA K<>+0x6c(SB)/4, $0xbf597fc7 +DATA K<>+0x70(SB)/4, $0xc6e00bf3 +DATA K<>+0x74(SB)/4, $0xd5a79147 +DATA K<>+0x78(SB)/4, $0x06ca6351 +DATA K<>+0x7c(SB)/4, $0x14292967 +DATA K<>+0x80(SB)/4, $0x27b70a85 +DATA K<>+0x84(SB)/4, $0x2e1b2138 +DATA K<>+0x88(SB)/4, $0x4d2c6dfc +DATA K<>+0x8c(SB)/4, $0x53380d13 +DATA K<>+0x90(SB)/4, $0x650a7354 +DATA K<>+0x94(SB)/4, $0x766a0abb +DATA K<>+0x98(SB)/4, $0x81c2c92e +DATA K<>+0x9c(SB)/4, $0x92722c85 +DATA K<>+0xa0(SB)/4, $0xa2bfe8a1 +DATA K<>+0xa4(SB)/4, $0xa81a664b +DATA K<>+0xa8(SB)/4, $0xc24b8b70 +DATA K<>+0xac(SB)/4, $0xc76c51a3 +DATA K<>+0xb0(SB)/4, $0xd192e819 +DATA K<>+0xb4(SB)/4, $0xd6990624 +DATA K<>+0xb8(SB)/4, $0xf40e3585 +DATA K<>+0xbc(SB)/4, $0x106aa070 +DATA K<>+0xc0(SB)/4, $0x19a4c116 +DATA K<>+0xc4(SB)/4, $0x1e376c08 +DATA K<>+0xc8(SB)/4, $0x2748774c +DATA K<>+0xcc(SB)/4, $0x34b0bcb5 +DATA K<>+0xd0(SB)/4, $0x391c0cb3 +DATA K<>+0xd4(SB)/4, $0x4ed8aa4a +DATA K<>+0xd8(SB)/4, $0x5b9cca4f +DATA K<>+0xdc(SB)/4, $0x682e6ff3 +DATA K<>+0xe0(SB)/4, $0x748f82ee +DATA K<>+0xe4(SB)/4, $0x78a5636f +DATA K<>+0xe8(SB)/4, $0x84c87814 +DATA K<>+0xec(SB)/4, $0x8cc70208 +DATA K<>+0xf0(SB)/4, $0x90befffa +DATA K<>+0xf4(SB)/4, $0xa4506ceb +DATA K<>+0xf8(SB)/4, $0xbef9a3f7 +DATA K<>+0xfc(SB)/4, $0xc67178f2 +GLOBL K<>(SB), RODATA|NOPTR, $256 + +DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203 +DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b +GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16 + +// Register Usage +// BX base address of constant table (constant) +// DX hash_state (constant) +// SI hash_data.data +// DI hash_data.data + hash_data.length - 64 (constant) +// X0 scratch +// X1 scratch +// X2 working hash state // ABEF +// X3 working hash state // CDGH +// X4 first 16 bytes of block +// X5 second 16 bytes of block +// X6 third 16 bytes of block +// X7 fourth 16 bytes of block +// X12 saved hash state // ABEF +// X13 saved hash state // CDGH +// X15 data shuffle mask (constant) + +TEXT ·blockSha(SB), NOSPLIT, $0-32 + MOVQ h+0(FP), DX + MOVQ message_base+8(FP), SI + MOVQ message_len+16(FP), DI + LEAQ -64(SI)(DI*1), DI + MOVOU (DX), X2 + MOVOU 16(DX), X1 + MOVO X2, X3 + PUNPCKLLQ X1, X2 + PUNPCKHLQ X1, X3 + PSHUFD $0x27, X2, X2 + PSHUFD $0x27, X3, X3 + MOVO SHUF_MASK<>(SB), X15 + LEAQ K<>(SB), BX + + JMP TEST + +LOOP: + MOVO X2, X12 + MOVO X3, X13 + + // load block and shuffle + MOVOU (SI), X4 + MOVOU 16(SI), X5 + MOVOU 32(SI), X6 + MOVOU 48(SI), X7 + PSHUFB X15, X4 + PSHUFB X15, X5 + PSHUFB X15, X6 + PSHUFB X15, X7 + +#define ROUND456 \ + PADDL X5, X0 \ + LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 + MOVO X5, X1 \ + LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4 + PADDL X1, X6 \ + LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5 + PSHUFD $0x4e, X0, X0 \ + LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 + LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 + +#define ROUND567 \ + PADDL X6, X0 \ + LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 + MOVO X6, X1 \ + LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4 + PADDL X1, X7 \ + LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6 + PSHUFD $0x4e, X0, X0 \ + LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 + LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 + +#define ROUND674 \ + PADDL X7, X0 \ + LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 + MOVO X7, X1 \ + LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4 + PADDL X1, X4 \ + LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7 + PSHUFD $0x4e, X0, X0 \ + LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 + LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7 + +#define ROUND745 \ + PADDL X4, X0 \ + LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 + MOVO X4, X1 \ + LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4 + PADDL X1, X5 \ + LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4 + PSHUFD $0x4e, X0, X0 \ + LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 + LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4 + + // rounds 0-3 + MOVO (BX), X0 + PADDL X4, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + + // rounds 4-7 + MOVO 1*16(BX), X0 + PADDL X5, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 + + // rounds 8-11 + MOVO 2*16(BX), X0 + PADDL X6, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 + + MOVO 3*16(BX), X0; ROUND674 // rounds 12-15 + MOVO 4*16(BX), X0; ROUND745 // rounds 16-19 + MOVO 5*16(BX), X0; ROUND456 // rounds 20-23 + MOVO 6*16(BX), X0; ROUND567 // rounds 24-27 + MOVO 7*16(BX), X0; ROUND674 // rounds 28-31 + MOVO 8*16(BX), X0; ROUND745 // rounds 32-35 + MOVO 9*16(BX), X0; ROUND456 // rounds 36-39 + MOVO 10*16(BX), X0; ROUND567 // rounds 40-43 + MOVO 11*16(BX), X0; ROUND674 // rounds 44-47 + MOVO 12*16(BX), X0; ROUND745 // rounds 48-51 + + // rounds 52-55 + MOVO 13*16(BX), X0 + PADDL X5, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + MOVO X5, X1 + LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4 + PADDL X1, X6 + LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + + // rounds 56-59 + MOVO 14*16(BX), X0 + PADDL X6, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + MOVO X6, X1 + LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4 + PADDL X1, X7 + LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + + // rounds 60-63 + MOVO 15*16(BX), X0 + PADDL X7, X0 + LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 + PSHUFD $0x4e, X0, X0 + LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 + + PADDL X12, X2 + PADDL X13, X3 + + ADDQ $64, SI + +TEST: + CMPQ SI, DI + JBE LOOP + + PSHUFD $0x4e, X3, X0 + LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0 + PSHUFD $0x4e, X2, X1 + LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f + PSHUFD $0x1b, X0, X0 + PSHUFD $0x1b, X1, X1 + + MOVOU X0, (DX) + MOVOU X1, 16(DX) + + RET |