summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s')
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s266
1 files changed, 266 insertions, 0 deletions
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s b/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s
new file mode 100644
index 00000000..909fc0ef
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s
@@ -0,0 +1,266 @@
+//+build !noasm,!appengine
+
+// SHA intrinsic version of SHA256
+
+// Kristofer Peterson, (C) 2018.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "textflag.h"
+
+DATA K<>+0x00(SB)/4, $0x428a2f98
+DATA K<>+0x04(SB)/4, $0x71374491
+DATA K<>+0x08(SB)/4, $0xb5c0fbcf
+DATA K<>+0x0c(SB)/4, $0xe9b5dba5
+DATA K<>+0x10(SB)/4, $0x3956c25b
+DATA K<>+0x14(SB)/4, $0x59f111f1
+DATA K<>+0x18(SB)/4, $0x923f82a4
+DATA K<>+0x1c(SB)/4, $0xab1c5ed5
+DATA K<>+0x20(SB)/4, $0xd807aa98
+DATA K<>+0x24(SB)/4, $0x12835b01
+DATA K<>+0x28(SB)/4, $0x243185be
+DATA K<>+0x2c(SB)/4, $0x550c7dc3
+DATA K<>+0x30(SB)/4, $0x72be5d74
+DATA K<>+0x34(SB)/4, $0x80deb1fe
+DATA K<>+0x38(SB)/4, $0x9bdc06a7
+DATA K<>+0x3c(SB)/4, $0xc19bf174
+DATA K<>+0x40(SB)/4, $0xe49b69c1
+DATA K<>+0x44(SB)/4, $0xefbe4786
+DATA K<>+0x48(SB)/4, $0x0fc19dc6
+DATA K<>+0x4c(SB)/4, $0x240ca1cc
+DATA K<>+0x50(SB)/4, $0x2de92c6f
+DATA K<>+0x54(SB)/4, $0x4a7484aa
+DATA K<>+0x58(SB)/4, $0x5cb0a9dc
+DATA K<>+0x5c(SB)/4, $0x76f988da
+DATA K<>+0x60(SB)/4, $0x983e5152
+DATA K<>+0x64(SB)/4, $0xa831c66d
+DATA K<>+0x68(SB)/4, $0xb00327c8
+DATA K<>+0x6c(SB)/4, $0xbf597fc7
+DATA K<>+0x70(SB)/4, $0xc6e00bf3
+DATA K<>+0x74(SB)/4, $0xd5a79147
+DATA K<>+0x78(SB)/4, $0x06ca6351
+DATA K<>+0x7c(SB)/4, $0x14292967
+DATA K<>+0x80(SB)/4, $0x27b70a85
+DATA K<>+0x84(SB)/4, $0x2e1b2138
+DATA K<>+0x88(SB)/4, $0x4d2c6dfc
+DATA K<>+0x8c(SB)/4, $0x53380d13
+DATA K<>+0x90(SB)/4, $0x650a7354
+DATA K<>+0x94(SB)/4, $0x766a0abb
+DATA K<>+0x98(SB)/4, $0x81c2c92e
+DATA K<>+0x9c(SB)/4, $0x92722c85
+DATA K<>+0xa0(SB)/4, $0xa2bfe8a1
+DATA K<>+0xa4(SB)/4, $0xa81a664b
+DATA K<>+0xa8(SB)/4, $0xc24b8b70
+DATA K<>+0xac(SB)/4, $0xc76c51a3
+DATA K<>+0xb0(SB)/4, $0xd192e819
+DATA K<>+0xb4(SB)/4, $0xd6990624
+DATA K<>+0xb8(SB)/4, $0xf40e3585
+DATA K<>+0xbc(SB)/4, $0x106aa070
+DATA K<>+0xc0(SB)/4, $0x19a4c116
+DATA K<>+0xc4(SB)/4, $0x1e376c08
+DATA K<>+0xc8(SB)/4, $0x2748774c
+DATA K<>+0xcc(SB)/4, $0x34b0bcb5
+DATA K<>+0xd0(SB)/4, $0x391c0cb3
+DATA K<>+0xd4(SB)/4, $0x4ed8aa4a
+DATA K<>+0xd8(SB)/4, $0x5b9cca4f
+DATA K<>+0xdc(SB)/4, $0x682e6ff3
+DATA K<>+0xe0(SB)/4, $0x748f82ee
+DATA K<>+0xe4(SB)/4, $0x78a5636f
+DATA K<>+0xe8(SB)/4, $0x84c87814
+DATA K<>+0xec(SB)/4, $0x8cc70208
+DATA K<>+0xf0(SB)/4, $0x90befffa
+DATA K<>+0xf4(SB)/4, $0xa4506ceb
+DATA K<>+0xf8(SB)/4, $0xbef9a3f7
+DATA K<>+0xfc(SB)/4, $0xc67178f2
+GLOBL K<>(SB), RODATA|NOPTR, $256
+
+DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203
+DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b
+GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16
+
+// Register Usage
+// BX base address of constant table (constant)
+// DX hash_state (constant)
+// SI hash_data.data
+// DI hash_data.data + hash_data.length - 64 (constant)
+// X0 scratch
+// X1 scratch
+// X2 working hash state // ABEF
+// X3 working hash state // CDGH
+// X4 first 16 bytes of block
+// X5 second 16 bytes of block
+// X6 third 16 bytes of block
+// X7 fourth 16 bytes of block
+// X12 saved hash state // ABEF
+// X13 saved hash state // CDGH
+// X15 data shuffle mask (constant)
+
+TEXT ·blockSha(SB), NOSPLIT, $0-32
+ MOVQ h+0(FP), DX
+ MOVQ message_base+8(FP), SI
+ MOVQ message_len+16(FP), DI
+ LEAQ -64(SI)(DI*1), DI
+ MOVOU (DX), X2
+ MOVOU 16(DX), X1
+ MOVO X2, X3
+ PUNPCKLLQ X1, X2
+ PUNPCKHLQ X1, X3
+ PSHUFD $0x27, X2, X2
+ PSHUFD $0x27, X3, X3
+ MOVO SHUF_MASK<>(SB), X15
+ LEAQ K<>(SB), BX
+
+ JMP TEST
+
+LOOP:
+ MOVO X2, X12
+ MOVO X3, X13
+
+ // load block and shuffle
+ MOVOU (SI), X4
+ MOVOU 16(SI), X5
+ MOVOU 32(SI), X6
+ MOVOU 48(SI), X7
+ PSHUFB X15, X4
+ PSHUFB X15, X5
+ PSHUFB X15, X6
+ PSHUFB X15, X7
+
+#define ROUND456 \
+ PADDL X5, X0 \
+ LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
+ MOVO X5, X1 \
+ LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4
+ PADDL X1, X6 \
+ LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5
+ PSHUFD $0x4e, X0, X0 \
+ LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
+ LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5
+
+#define ROUND567 \
+ PADDL X6, X0 \
+ LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
+ MOVO X6, X1 \
+ LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4
+ PADDL X1, X7 \
+ LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6
+ PSHUFD $0x4e, X0, X0 \
+ LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
+ LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6
+
+#define ROUND674 \
+ PADDL X7, X0 \
+ LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
+ MOVO X7, X1 \
+ LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4
+ PADDL X1, X4 \
+ LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7
+ PSHUFD $0x4e, X0, X0 \
+ LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
+ LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7
+
+#define ROUND745 \
+ PADDL X4, X0 \
+ LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
+ MOVO X4, X1 \
+ LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4
+ PADDL X1, X5 \
+ LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4
+ PSHUFD $0x4e, X0, X0 \
+ LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
+ LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4
+
+ // rounds 0-3
+ MOVO (BX), X0
+ PADDL X4, X0
+ LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
+ PSHUFD $0x4e, X0, X0
+ LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
+
+ // rounds 4-7
+ MOVO 1*16(BX), X0
+ PADDL X5, X0
+ LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
+ PSHUFD $0x4e, X0, X0
+ LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
+ LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5
+
+ // rounds 8-11
+ MOVO 2*16(BX), X0
+ PADDL X6, X0
+ LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
+ PSHUFD $0x4e, X0, X0
+ LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
+ LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6
+
+ MOVO 3*16(BX), X0; ROUND674 // rounds 12-15
+ MOVO 4*16(BX), X0; ROUND745 // rounds 16-19
+ MOVO 5*16(BX), X0; ROUND456 // rounds 20-23
+ MOVO 6*16(BX), X0; ROUND567 // rounds 24-27
+ MOVO 7*16(BX), X0; ROUND674 // rounds 28-31
+ MOVO 8*16(BX), X0; ROUND745 // rounds 32-35
+ MOVO 9*16(BX), X0; ROUND456 // rounds 36-39
+ MOVO 10*16(BX), X0; ROUND567 // rounds 40-43
+ MOVO 11*16(BX), X0; ROUND674 // rounds 44-47
+ MOVO 12*16(BX), X0; ROUND745 // rounds 48-51
+
+ // rounds 52-55
+ MOVO 13*16(BX), X0
+ PADDL X5, X0
+ LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
+ MOVO X5, X1
+ LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4
+ PADDL X1, X6
+ LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5
+ PSHUFD $0x4e, X0, X0
+ LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
+
+ // rounds 56-59
+ MOVO 14*16(BX), X0
+ PADDL X6, X0
+ LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
+ MOVO X6, X1
+ LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4
+ PADDL X1, X7
+ LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6
+ PSHUFD $0x4e, X0, X0
+ LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
+
+ // rounds 60-63
+ MOVO 15*16(BX), X0
+ PADDL X7, X0
+ LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
+ PSHUFD $0x4e, X0, X0
+ LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
+
+ PADDL X12, X2
+ PADDL X13, X3
+
+ ADDQ $64, SI
+
+TEST:
+ CMPQ SI, DI
+ JBE LOOP
+
+ PSHUFD $0x4e, X3, X0
+ LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0
+ PSHUFD $0x4e, X2, X1
+ LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f
+ PSHUFD $0x1b, X0, X0
+ PSHUFD $0x1b, X1, X1
+
+ MOVOU X0, (DX)
+ MOVOU X1, 16(DX)
+
+ RET