From 0f708daf2d14dcca261ef98cc698a1b1f2a6aa74 Mon Sep 17 00:00:00 2001 From: Wim Date: Thu, 9 Jan 2020 21:02:56 +0100 Subject: Update dependencies (#975) --- .../golang.org/x/crypto/curve25519/const_amd64.h | 8 - .../golang.org/x/crypto/curve25519/const_amd64.s | 20 - .../golang.org/x/crypto/curve25519/cswap_amd64.s | 65 - .../golang.org/x/crypto/curve25519/curve25519.go | 881 +--------- .../x/crypto/curve25519/curve25519_amd64.go | 240 +++ .../x/crypto/curve25519/curve25519_amd64.s | 1793 ++++++++++++++++++++ .../x/crypto/curve25519/curve25519_generic.go | 828 +++++++++ .../x/crypto/curve25519/curve25519_noasm.go | 11 + vendor/golang.org/x/crypto/curve25519/doc.go | 23 - .../golang.org/x/crypto/curve25519/freeze_amd64.s | 73 - .../x/crypto/curve25519/ladderstep_amd64.s | 1377 --------------- .../x/crypto/curve25519/mont25519_amd64.go | 240 --- vendor/golang.org/x/crypto/curve25519/mul_amd64.s | 169 -- .../golang.org/x/crypto/curve25519/square_amd64.s | 132 -- 14 files changed, 2943 insertions(+), 2917 deletions(-) delete mode 100644 vendor/golang.org/x/crypto/curve25519/const_amd64.h delete mode 100644 vendor/golang.org/x/crypto/curve25519/const_amd64.s delete mode 100644 vendor/golang.org/x/crypto/curve25519/cswap_amd64.s create mode 100644 vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go create mode 100644 vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s create mode 100644 vendor/golang.org/x/crypto/curve25519/curve25519_generic.go create mode 100644 vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/doc.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/freeze_amd64.s delete mode 100644 vendor/golang.org/x/crypto/curve25519/ladderstep_amd64.s delete mode 100644 vendor/golang.org/x/crypto/curve25519/mont25519_amd64.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/mul_amd64.s delete mode 100644 vendor/golang.org/x/crypto/curve25519/square_amd64.s (limited to 'vendor/golang.org/x/crypto/curve25519') diff --git a/vendor/golang.org/x/crypto/curve25519/const_amd64.h b/vendor/golang.org/x/crypto/curve25519/const_amd64.h deleted file mode 100644 index b3f74162..00000000 --- a/vendor/golang.org/x/crypto/curve25519/const_amd64.h +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -#define REDMASK51 0x0007FFFFFFFFFFFF diff --git a/vendor/golang.org/x/crypto/curve25519/const_amd64.s b/vendor/golang.org/x/crypto/curve25519/const_amd64.s deleted file mode 100644 index ee7b4bd5..00000000 --- a/vendor/golang.org/x/crypto/curve25519/const_amd64.s +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -// These constants cannot be encoded in non-MOVQ immediates. -// We access them directly from memory instead. - -DATA ·_121666_213(SB)/8, $996687872 -GLOBL ·_121666_213(SB), 8, $8 - -DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA -GLOBL ·_2P0(SB), 8, $8 - -DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE -GLOBL ·_2P1234(SB), 8, $8 diff --git a/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s b/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s deleted file mode 100644 index cd793a5b..00000000 --- a/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build amd64,!gccgo,!appengine - -// func cswap(inout *[4][5]uint64, v uint64) -TEXT ·cswap(SB),7,$0 - MOVQ inout+0(FP),DI - MOVQ v+8(FP),SI - - SUBQ $1, SI - NOTQ SI - MOVQ SI, X15 - PSHUFD $0x44, X15, X15 - - MOVOU 0(DI), X0 - MOVOU 16(DI), X2 - MOVOU 32(DI), X4 - MOVOU 48(DI), X6 - MOVOU 64(DI), X8 - MOVOU 80(DI), X1 - MOVOU 96(DI), X3 - MOVOU 112(DI), X5 - MOVOU 128(DI), X7 - MOVOU 144(DI), X9 - - MOVO X1, X10 - MOVO X3, X11 - MOVO X5, X12 - MOVO X7, X13 - MOVO X9, X14 - - PXOR X0, X10 - PXOR X2, X11 - PXOR X4, X12 - PXOR X6, X13 - PXOR X8, X14 - PAND X15, X10 - PAND X15, X11 - PAND X15, X12 - PAND X15, X13 - PAND X15, X14 - PXOR X10, X0 - PXOR X10, X1 - PXOR X11, X2 - PXOR X11, X3 - PXOR X12, X4 - PXOR X12, X5 - PXOR X13, X6 - PXOR X13, X7 - PXOR X14, X8 - PXOR X14, X9 - - MOVOU X0, 0(DI) - MOVOU X2, 16(DI) - MOVOU X4, 32(DI) - MOVOU X6, 48(DI) - MOVOU X8, 64(DI) - MOVOU X1, 80(DI) - MOVOU X3, 96(DI) - MOVOU X5, 112(DI) - MOVOU X7, 128(DI) - MOVOU X9, 144(DI) - RET diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519.go b/vendor/golang.org/x/crypto/curve25519/curve25519.go index 75f24bab..4b9a655d 100644 --- a/vendor/golang.org/x/crypto/curve25519/curve25519.go +++ b/vendor/golang.org/x/crypto/curve25519/curve25519.go @@ -1,834 +1,95 @@ -// Copyright 2013 The Go Authors. All rights reserved. +// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// We have an implementation in amd64 assembly so this code is only run on -// non-amd64 platforms. The amd64 assembly does not support gccgo. -// +build !amd64 gccgo appengine - -package curve25519 +// Package curve25519 provides an implementation of the X25519 function, which +// performs scalar multiplication on the elliptic curve known as Curve25519. +// See RFC 7748. +package curve25519 // import "golang.org/x/crypto/curve25519" import ( - "encoding/binary" + "crypto/subtle" + "fmt" ) -// This code is a port of the public domain, "ref10" implementation of -// curve25519 from SUPERCOP 20130419 by D. J. Bernstein. - -// fieldElement represents an element of the field GF(2^255 - 19). An element -// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 -// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on -// context. -type fieldElement [10]int32 - -func feZero(fe *fieldElement) { - for i := range fe { - fe[i] = 0 - } -} - -func feOne(fe *fieldElement) { - feZero(fe) - fe[0] = 1 -} - -func feAdd(dst, a, b *fieldElement) { - for i := range dst { - dst[i] = a[i] + b[i] - } -} - -func feSub(dst, a, b *fieldElement) { - for i := range dst { - dst[i] = a[i] - b[i] - } -} - -func feCopy(dst, src *fieldElement) { - for i := range dst { - dst[i] = src[i] - } -} - -// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0. -// -// Preconditions: b in {0,1}. -func feCSwap(f, g *fieldElement, b int32) { - b = -b - for i := range f { - t := b & (f[i] ^ g[i]) - f[i] ^= t - g[i] ^= t - } -} - -// load3 reads a 24-bit, little-endian value from in. -func load3(in []byte) int64 { - var r int64 - r = int64(in[0]) - r |= int64(in[1]) << 8 - r |= int64(in[2]) << 16 - return r -} - -// load4 reads a 32-bit, little-endian value from in. -func load4(in []byte) int64 { - return int64(binary.LittleEndian.Uint32(in)) -} - -func feFromBytes(dst *fieldElement, src *[32]byte) { - h0 := load4(src[:]) - h1 := load3(src[4:]) << 6 - h2 := load3(src[7:]) << 5 - h3 := load3(src[10:]) << 3 - h4 := load3(src[13:]) << 2 - h5 := load4(src[16:]) - h6 := load3(src[20:]) << 7 - h7 := load3(src[23:]) << 5 - h8 := load3(src[26:]) << 4 - h9 := (load3(src[29:]) & 0x7fffff) << 2 - - var carry [10]int64 - carry[9] = (h9 + 1<<24) >> 25 - h0 += carry[9] * 19 - h9 -= carry[9] << 25 - carry[1] = (h1 + 1<<24) >> 25 - h2 += carry[1] - h1 -= carry[1] << 25 - carry[3] = (h3 + 1<<24) >> 25 - h4 += carry[3] - h3 -= carry[3] << 25 - carry[5] = (h5 + 1<<24) >> 25 - h6 += carry[5] - h5 -= carry[5] << 25 - carry[7] = (h7 + 1<<24) >> 25 - h8 += carry[7] - h7 -= carry[7] << 25 - - carry[0] = (h0 + 1<<25) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - carry[2] = (h2 + 1<<25) >> 26 - h3 += carry[2] - h2 -= carry[2] << 26 - carry[4] = (h4 + 1<<25) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - carry[6] = (h6 + 1<<25) >> 26 - h7 += carry[6] - h6 -= carry[6] << 26 - carry[8] = (h8 + 1<<25) >> 26 - h9 += carry[8] - h8 -= carry[8] << 26 - - dst[0] = int32(h0) - dst[1] = int32(h1) - dst[2] = int32(h2) - dst[3] = int32(h3) - dst[4] = int32(h4) - dst[5] = int32(h5) - dst[6] = int32(h6) - dst[7] = int32(h7) - dst[8] = int32(h8) - dst[9] = int32(h9) -} - -// feToBytes marshals h to s. -// Preconditions: -// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. -// -// Write p=2^255-19; q=floor(h/p). -// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). -// -// Proof: -// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. -// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. -// -// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). -// Then 0> 25 - q = (h[0] + q) >> 26 - q = (h[1] + q) >> 25 - q = (h[2] + q) >> 26 - q = (h[3] + q) >> 25 - q = (h[4] + q) >> 26 - q = (h[5] + q) >> 25 - q = (h[6] + q) >> 26 - q = (h[7] + q) >> 25 - q = (h[8] + q) >> 26 - q = (h[9] + q) >> 25 - - // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. - h[0] += 19 * q - // Goal: Output h-2^255 q, which is between 0 and 2^255-20. - - carry[0] = h[0] >> 26 - h[1] += carry[0] - h[0] -= carry[0] << 26 - carry[1] = h[1] >> 25 - h[2] += carry[1] - h[1] -= carry[1] << 25 - carry[2] = h[2] >> 26 - h[3] += carry[2] - h[2] -= carry[2] << 26 - carry[3] = h[3] >> 25 - h[4] += carry[3] - h[3] -= carry[3] << 25 - carry[4] = h[4] >> 26 - h[5] += carry[4] - h[4] -= carry[4] << 26 - carry[5] = h[5] >> 25 - h[6] += carry[5] - h[5] -= carry[5] << 25 - carry[6] = h[6] >> 26 - h[7] += carry[6] - h[6] -= carry[6] << 26 - carry[7] = h[7] >> 25 - h[8] += carry[7] - h[7] -= carry[7] << 25 - carry[8] = h[8] >> 26 - h[9] += carry[8] - h[8] -= carry[8] << 26 - carry[9] = h[9] >> 25 - h[9] -= carry[9] << 25 - // h10 = carry9 - - // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. - // Have h[0]+...+2^230 h[9] between 0 and 2^255-1; - // evidently 2^255 h10-2^255 q = 0. - // Goal: Output h[0]+...+2^230 h[9]. - - s[0] = byte(h[0] >> 0) - s[1] = byte(h[0] >> 8) - s[2] = byte(h[0] >> 16) - s[3] = byte((h[0] >> 24) | (h[1] << 2)) - s[4] = byte(h[1] >> 6) - s[5] = byte(h[1] >> 14) - s[6] = byte((h[1] >> 22) | (h[2] << 3)) - s[7] = byte(h[2] >> 5) - s[8] = byte(h[2] >> 13) - s[9] = byte((h[2] >> 21) | (h[3] << 5)) - s[10] = byte(h[3] >> 3) - s[11] = byte(h[3] >> 11) - s[12] = byte((h[3] >> 19) | (h[4] << 6)) - s[13] = byte(h[4] >> 2) - s[14] = byte(h[4] >> 10) - s[15] = byte(h[4] >> 18) - s[16] = byte(h[5] >> 0) - s[17] = byte(h[5] >> 8) - s[18] = byte(h[5] >> 16) - s[19] = byte((h[5] >> 24) | (h[6] << 1)) - s[20] = byte(h[6] >> 7) - s[21] = byte(h[6] >> 15) - s[22] = byte((h[6] >> 23) | (h[7] << 3)) - s[23] = byte(h[7] >> 5) - s[24] = byte(h[7] >> 13) - s[25] = byte((h[7] >> 21) | (h[8] << 4)) - s[26] = byte(h[8] >> 4) - s[27] = byte(h[8] >> 12) - s[28] = byte((h[8] >> 20) | (h[9] << 6)) - s[29] = byte(h[9] >> 2) - s[30] = byte(h[9] >> 10) - s[31] = byte(h[9] >> 18) +// Deprecated: when provided a low-order point, ScalarMult will set dst to all +// zeroes, irrespective of the scalar. Instead, use the X25519 function, which +// will return an error. +func ScalarMult(dst, scalar, point *[32]byte) { + scalarMult(dst, scalar, point) } -// feMul calculates h = f * g -// Can overlap h with f or g. -// -// Preconditions: -// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. -// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. -// -// Postconditions: -// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. -// -// Notes on implementation strategy: -// -// Using schoolbook multiplication. -// Karatsuba would save a little in some cost models. +// ScalarBaseMult sets dst to the product scalar * base where base is the +// standard generator. // -// Most multiplications by 2 and 19 are 32-bit precomputations; -// cheaper than 64-bit postcomputations. -// -// There is one remaining multiplication by 19 in the carry chain; -// one *19 precomputation can be merged into this, -// but the resulting data flow is considerably less clean. -// -// There are 12 carries below. -// 10 of them are 2-way parallelizable and vectorizable. -// Can get away with 11 carries, but then data flow is much deeper. -// -// With tighter constraints on inputs can squeeze carries into int32. -func feMul(h, f, g *fieldElement) { - f0 := f[0] - f1 := f[1] - f2 := f[2] - f3 := f[3] - f4 := f[4] - f5 := f[5] - f6 := f[6] - f7 := f[7] - f8 := f[8] - f9 := f[9] - g0 := g[0] - g1 := g[1] - g2 := g[2] - g3 := g[3] - g4 := g[4] - g5 := g[5] - g6 := g[6] - g7 := g[7] - g8 := g[8] - g9 := g[9] - g1_19 := 19 * g1 // 1.4*2^29 - g2_19 := 19 * g2 // 1.4*2^30; still ok - g3_19 := 19 * g3 - g4_19 := 19 * g4 - g5_19 := 19 * g5 - g6_19 := 19 * g6 - g7_19 := 19 * g7 - g8_19 := 19 * g8 - g9_19 := 19 * g9 - f1_2 := 2 * f1 - f3_2 := 2 * f3 - f5_2 := 2 * f5 - f7_2 := 2 * f7 - f9_2 := 2 * f9 - f0g0 := int64(f0) * int64(g0) - f0g1 := int64(f0) * int64(g1) - f0g2 := int64(f0) * int64(g2) - f0g3 := int64(f0) * int64(g3) - f0g4 := int64(f0) * int64(g4) - f0g5 := int64(f0) * int64(g5) - f0g6 := int64(f0) * int64(g6) - f0g7 := int64(f0) * int64(g7) - f0g8 := int64(f0) * int64(g8) - f0g9 := int64(f0) * int64(g9) - f1g0 := int64(f1) * int64(g0) - f1g1_2 := int64(f1_2) * int64(g1) - f1g2 := int64(f1) * int64(g2) - f1g3_2 := int64(f1_2) * int64(g3) - f1g4 := int64(f1) * int64(g4) - f1g5_2 := int64(f1_2) * int64(g5) - f1g6 := int64(f1) * int64(g6) - f1g7_2 := int64(f1_2) * int64(g7) - f1g8 := int64(f1) * int64(g8) - f1g9_38 := int64(f1_2) * int64(g9_19) - f2g0 := int64(f2) * int64(g0) - f2g1 := int64(f2) * int64(g1) - f2g2 := int64(f2) * int64(g2) - f2g3 := int64(f2) * int64(g3) - f2g4 := int64(f2) * int64(g4) - f2g5 := int64(f2) * int64(g5) - f2g6 := int64(f2) * int64(g6) - f2g7 := int64(f2) * int64(g7) - f2g8_19 := int64(f2) * int64(g8_19) - f2g9_19 := int64(f2) * int64(g9_19) - f3g0 := int64(f3) * int64(g0) - f3g1_2 := int64(f3_2) * int64(g1) - f3g2 := int64(f3) * int64(g2) - f3g3_2 := int64(f3_2) * int64(g3) - f3g4 := int64(f3) * int64(g4) - f3g5_2 := int64(f3_2) * int64(g5) - f3g6 := int64(f3) * int64(g6) - f3g7_38 := int64(f3_2) * int64(g7_19) - f3g8_19 := int64(f3) * int64(g8_19) - f3g9_38 := int64(f3_2) * int64(g9_19) - f4g0 := int64(f4) * int64(g0) - f4g1 := int64(f4) * int64(g1) - f4g2 := int64(f4) * int64(g2) - f4g3 := int64(f4) * int64(g3) - f4g4 := int64(f4) * int64(g4) - f4g5 := int64(f4) * int64(g5) - f4g6_19 := int64(f4) * int64(g6_19) - f4g7_19 := int64(f4) * int64(g7_19) - f4g8_19 := int64(f4) * int64(g8_19) - f4g9_19 := int64(f4) * int64(g9_19) - f5g0 := int64(f5) * int64(g0) - f5g1_2 := int64(f5_2) * int64(g1) - f5g2 := int64(f5) * int64(g2) - f5g3_2 := int64(f5_2) * int64(g3) - f5g4 := int64(f5) * int64(g4) - f5g5_38 := int64(f5_2) * int64(g5_19) - f5g6_19 := int64(f5) * int64(g6_19) - f5g7_38 := int64(f5_2) * int64(g7_19) - f5g8_19 := int64(f5) * int64(g8_19) - f5g9_38 := int64(f5_2) * int64(g9_19) - f6g0 := int64(f6) * int64(g0) - f6g1 := int64(f6) * int64(g1) - f6g2 := int64(f6) * int64(g2) - f6g3 := int64(f6) * int64(g3) - f6g4_19 := int64(f6) * int64(g4_19) - f6g5_19 := int64(f6) * int64(g5_19) - f6g6_19 := int64(f6) * int64(g6_19) - f6g7_19 := int64(f6) * int64(g7_19) - f6g8_19 := int64(f6) * int64(g8_19) - f6g9_19 := int64(f6) * int64(g9_19) - f7g0 := int64(f7) * int64(g0) - f7g1_2 := int64(f7_2) * int64(g1) - f7g2 := int64(f7) * int64(g2) - f7g3_38 := int64(f7_2) * int64(g3_19) - f7g4_19 := int64(f7) * int64(g4_19) - f7g5_38 := int64(f7_2) * int64(g5_19) - f7g6_19 := int64(f7) * int64(g6_19) - f7g7_38 := int64(f7_2) * int64(g7_19) - f7g8_19 := int64(f7) * int64(g8_19) - f7g9_38 := int64(f7_2) * int64(g9_19) - f8g0 := int64(f8) * int64(g0) - f8g1 := int64(f8) * int64(g1) - f8g2_19 := int64(f8) * int64(g2_19) - f8g3_19 := int64(f8) * int64(g3_19) - f8g4_19 := int64(f8) * int64(g4_19) - f8g5_19 := int64(f8) * int64(g5_19) - f8g6_19 := int64(f8) * int64(g6_19) - f8g7_19 := int64(f8) * int64(g7_19) - f8g8_19 := int64(f8) * int64(g8_19) - f8g9_19 := int64(f8) * int64(g9_19) - f9g0 := int64(f9) * int64(g0) - f9g1_38 := int64(f9_2) * int64(g1_19) - f9g2_19 := int64(f9) * int64(g2_19) - f9g3_38 := int64(f9_2) * int64(g3_19) - f9g4_19 := int64(f9) * int64(g4_19) - f9g5_38 := int64(f9_2) * int64(g5_19) - f9g6_19 := int64(f9) * int64(g6_19) - f9g7_38 := int64(f9_2) * int64(g7_19) - f9g8_19 := int64(f9) * int64(g8_19) - f9g9_38 := int64(f9_2) * int64(g9_19) - h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38 - h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19 - h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38 - h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19 - h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38 - h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19 - h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38 - h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19 - h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38 - h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 - var carry [10]int64 - - // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) - // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 - // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) - // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 - - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - // |h0| <= 2^25 - // |h4| <= 2^25 - // |h1| <= 1.51*2^58 - // |h5| <= 1.51*2^58 - - carry[1] = (h1 + (1 << 24)) >> 25 - h2 += carry[1] - h1 -= carry[1] << 25 - carry[5] = (h5 + (1 << 24)) >> 25 - h6 += carry[5] - h5 -= carry[5] << 25 - // |h1| <= 2^24; from now on fits into int32 - // |h5| <= 2^24; from now on fits into int32 - // |h2| <= 1.21*2^59 - // |h6| <= 1.21*2^59 - - carry[2] = (h2 + (1 << 25)) >> 26 - h3 += carry[2] - h2 -= carry[2] << 26 - carry[6] = (h6 + (1 << 25)) >> 26 - h7 += carry[6] - h6 -= carry[6] << 26 - // |h2| <= 2^25; from now on fits into int32 unchanged - // |h6| <= 2^25; from now on fits into int32 unchanged - // |h3| <= 1.51*2^58 - // |h7| <= 1.51*2^58 - - carry[3] = (h3 + (1 << 24)) >> 25 - h4 += carry[3] - h3 -= carry[3] << 25 - carry[7] = (h7 + (1 << 24)) >> 25 - h8 += carry[7] - h7 -= carry[7] << 25 - // |h3| <= 2^24; from now on fits into int32 unchanged - // |h7| <= 2^24; from now on fits into int32 unchanged - // |h4| <= 1.52*2^33 - // |h8| <= 1.52*2^33 - - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - carry[8] = (h8 + (1 << 25)) >> 26 - h9 += carry[8] - h8 -= carry[8] << 26 - // |h4| <= 2^25; from now on fits into int32 unchanged - // |h8| <= 2^25; from now on fits into int32 unchanged - // |h5| <= 1.01*2^24 - // |h9| <= 1.51*2^58 - - carry[9] = (h9 + (1 << 24)) >> 25 - h0 += carry[9] * 19 - h9 -= carry[9] << 25 - // |h9| <= 2^24; from now on fits into int32 unchanged - // |h0| <= 1.8*2^37 - - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - // |h0| <= 2^25; from now on fits into int32 unchanged - // |h1| <= 1.01*2^24 - - h[0] = int32(h0) - h[1] = int32(h1) - h[2] = int32(h2) - h[3] = int32(h3) - h[4] = int32(h4) - h[5] = int32(h5) - h[6] = int32(h6) - h[7] = int32(h7) - h[8] = int32(h8) - h[9] = int32(h9) +// It is recommended to use the X25519 function with Basepoint instead, as +// copying into fixed size arrays can lead to unexpected bugs. +func ScalarBaseMult(dst, scalar *[32]byte) { + ScalarMult(dst, scalar, &basePoint) } -// feSquare calculates h = f*f. Can overlap h with f. -// -// Preconditions: -// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. -// -// Postconditions: -// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. -func feSquare(h, f *fieldElement) { - f0 := f[0] - f1 := f[1] - f2 := f[2] - f3 := f[3] - f4 := f[4] - f5 := f[5] - f6 := f[6] - f7 := f[7] - f8 := f[8] - f9 := f[9] - f0_2 := 2 * f0 - f1_2 := 2 * f1 - f2_2 := 2 * f2 - f3_2 := 2 * f3 - f4_2 := 2 * f4 - f5_2 := 2 * f5 - f6_2 := 2 * f6 - f7_2 := 2 * f7 - f5_38 := 38 * f5 // 1.31*2^30 - f6_19 := 19 * f6 // 1.31*2^30 - f7_38 := 38 * f7 // 1.31*2^30 - f8_19 := 19 * f8 // 1.31*2^30 - f9_38 := 38 * f9 // 1.31*2^30 - f0f0 := int64(f0) * int64(f0) - f0f1_2 := int64(f0_2) * int64(f1) - f0f2_2 := int64(f0_2) * int64(f2) - f0f3_2 := int64(f0_2) * int64(f3) - f0f4_2 := int64(f0_2) * int64(f4) - f0f5_2 := int64(f0_2) * int64(f5) - f0f6_2 := int64(f0_2) * int64(f6) - f0f7_2 := int64(f0_2) * int64(f7) - f0f8_2 := int64(f0_2) * int64(f8) - f0f9_2 := int64(f0_2) * int64(f9) - f1f1_2 := int64(f1_2) * int64(f1) - f1f2_2 := int64(f1_2) * int64(f2) - f1f3_4 := int64(f1_2) * int64(f3_2) - f1f4_2 := int64(f1_2) * int64(f4) - f1f5_4 := int64(f1_2) * int64(f5_2) - f1f6_2 := int64(f1_2) * int64(f6) - f1f7_4 := int64(f1_2) * int64(f7_2) - f1f8_2 := int64(f1_2) * int64(f8) - f1f9_76 := int64(f1_2) * int64(f9_38) - f2f2 := int64(f2) * int64(f2) - f2f3_2 := int64(f2_2) * int64(f3) - f2f4_2 := int64(f2_2) * int64(f4) - f2f5_2 := int64(f2_2) * int64(f5) - f2f6_2 := int64(f2_2) * int64(f6) - f2f7_2 := int64(f2_2) * int64(f7) - f2f8_38 := int64(f2_2) * int64(f8_19) - f2f9_38 := int64(f2) * int64(f9_38) - f3f3_2 := int64(f3_2) * int64(f3) - f3f4_2 := int64(f3_2) * int64(f4) - f3f5_4 := int64(f3_2) * int64(f5_2) - f3f6_2 := int64(f3_2) * int64(f6) - f3f7_76 := int64(f3_2) * int64(f7_38) - f3f8_38 := int64(f3_2) * int64(f8_19) - f3f9_76 := int64(f3_2) * int64(f9_38) - f4f4 := int64(f4) * int64(f4) - f4f5_2 := int64(f4_2) * int64(f5) - f4f6_38 := int64(f4_2) * int64(f6_19) - f4f7_38 := int64(f4) * int64(f7_38) - f4f8_38 := int64(f4_2) * int64(f8_19) - f4f9_38 := int64(f4) * int64(f9_38) - f5f5_38 := int64(f5) * int64(f5_38) - f5f6_38 := int64(f5_2) * int64(f6_19) - f5f7_76 := int64(f5_2) * int64(f7_38) - f5f8_38 := int64(f5_2) * int64(f8_19) - f5f9_76 := int64(f5_2) * int64(f9_38) - f6f6_19 := int64(f6) * int64(f6_19) - f6f7_38 := int64(f6) * int64(f7_38) - f6f8_38 := int64(f6_2) * int64(f8_19) - f6f9_38 := int64(f6) * int64(f9_38) - f7f7_38 := int64(f7) * int64(f7_38) - f7f8_38 := int64(f7_2) * int64(f8_19) - f7f9_76 := int64(f7_2) * int64(f9_38) - f8f8_19 := int64(f8) * int64(f8_19) - f8f9_38 := int64(f8) * int64(f9_38) - f9f9_38 := int64(f9) * int64(f9_38) - h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38 - h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38 - h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19 - h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38 - h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38 - h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38 - h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19 - h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38 - h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38 - h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2 - var carry [10]int64 - - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - - carry[1] = (h1 + (1 << 24)) >> 25 - h2 += carry[1] - h1 -= carry[1] << 25 - carry[5] = (h5 + (1 << 24)) >> 25 - h6 += carry[5] - h5 -= carry[5] << 25 - - carry[2] = (h2 + (1 << 25)) >> 26 - h3 += carry[2] - h2 -= carry[2] << 26 - carry[6] = (h6 + (1 << 25)) >> 26 - h7 += carry[6] - h6 -= carry[6] << 26 - - carry[3] = (h3 + (1 << 24)) >> 25 - h4 += carry[3] - h3 -= carry[3] << 25 - carry[7] = (h7 + (1 << 24)) >> 25 - h8 += carry[7] - h7 -= carry[7] << 25 +const ( + // ScalarSize is the size of the scalar input to X25519. + ScalarSize = 32 + // PointSize is the size of the point input to X25519. + PointSize = 32 +) - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - carry[8] = (h8 + (1 << 25)) >> 26 - h9 += carry[8] - h8 -= carry[8] << 26 +// Basepoint is the canonical Curve25519 generator. +var Basepoint []byte - carry[9] = (h9 + (1 << 24)) >> 25 - h0 += carry[9] * 19 - h9 -= carry[9] << 25 +var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 +func init() { Basepoint = basePoint[:] } - h[0] = int32(h0) - h[1] = int32(h1) - h[2] = int32(h2) - h[3] = int32(h3) - h[4] = int32(h4) - h[5] = int32(h5) - h[6] = int32(h6) - h[7] = int32(h7) - h[8] = int32(h8) - h[9] = int32(h9) +func checkBasepoint() { + if subtle.ConstantTimeCompare(Basepoint, []byte{ + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }) != 1 { + panic("curve25519: global Basepoint value was modified") + } } -// feMul121666 calculates h = f * 121666. Can overlap h with f. +// X25519 returns the result of the scalar multiplication (scalar * point), +// according to RFC 7748, Section 5. scalar, point and the return value are +// slices of 32 bytes. // -// Preconditions: -// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// scalar can be generated at random, for example with crypto/rand. point should +// be either Basepoint or the output of another X25519 call. // -// Postconditions: -// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. -func feMul121666(h, f *fieldElement) { - h0 := int64(f[0]) * 121666 - h1 := int64(f[1]) * 121666 - h2 := int64(f[2]) * 121666 - h3 := int64(f[3]) * 121666 - h4 := int64(f[4]) * 121666 - h5 := int64(f[5]) * 121666 - h6 := int64(f[6]) * 121666 - h7 := int64(f[7]) * 121666 - h8 := int64(f[8]) * 121666 - h9 := int64(f[9]) * 121666 - var carry [10]int64 - - carry[9] = (h9 + (1 << 24)) >> 25 - h0 += carry[9] * 19 - h9 -= carry[9] << 25 - carry[1] = (h1 + (1 << 24)) >> 25 - h2 += carry[1] - h1 -= carry[1] << 25 - carry[3] = (h3 + (1 << 24)) >> 25 - h4 += carry[3] - h3 -= carry[3] << 25 - carry[5] = (h5 + (1 << 24)) >> 25 - h6 += carry[5] - h5 -= carry[5] << 25 - carry[7] = (h7 + (1 << 24)) >> 25 - h8 += carry[7] - h7 -= carry[7] << 25 - - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - carry[2] = (h2 + (1 << 25)) >> 26 - h3 += carry[2] - h2 -= carry[2] << 26 - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - carry[6] = (h6 + (1 << 25)) >> 26 - h7 += carry[6] - h6 -= carry[6] << 26 - carry[8] = (h8 + (1 << 25)) >> 26 - h9 += carry[8] - h8 -= carry[8] << 26 - - h[0] = int32(h0) - h[1] = int32(h1) - h[2] = int32(h2) - h[3] = int32(h3) - h[4] = int32(h4) - h[5] = int32(h5) - h[6] = int32(h6) - h[7] = int32(h7) - h[8] = int32(h8) - h[9] = int32(h9) -} - -// feInvert sets out = z^-1. -func feInvert(out, z *fieldElement) { - var t0, t1, t2, t3 fieldElement - var i int - - feSquare(&t0, z) - for i = 1; i < 1; i++ { - feSquare(&t0, &t0) - } - feSquare(&t1, &t0) - for i = 1; i < 2; i++ { - feSquare(&t1, &t1) - } - feMul(&t1, z, &t1) - feMul(&t0, &t0, &t1) - feSquare(&t2, &t0) - for i = 1; i < 1; i++ { - feSquare(&t2, &t2) - } - feMul(&t1, &t1, &t2) - feSquare(&t2, &t1) - for i = 1; i < 5; i++ { - feSquare(&t2, &t2) - } - feMul(&t1, &t2, &t1) - feSquare(&t2, &t1) - for i = 1; i < 10; i++ { - feSquare(&t2, &t2) - } - feMul(&t2, &t2, &t1) - feSquare(&t3, &t2) - for i = 1; i < 20; i++ { - feSquare(&t3, &t3) - } - feMul(&t2, &t3, &t2) - feSquare(&t2, &t2) - for i = 1; i < 10; i++ { - feSquare(&t2, &t2) - } - feMul(&t1, &t2, &t1) - feSquare(&t2, &t1) - for i = 1; i < 50; i++ { - feSquare(&t2, &t2) - } - feMul(&t2, &t2, &t1) - feSquare(&t3, &t2) - for i = 1; i < 100; i++ { - feSquare(&t3, &t3) - } - feMul(&t2, &t3, &t2) - feSquare(&t2, &t2) - for i = 1; i < 50; i++ { - feSquare(&t2, &t2) - } - feMul(&t1, &t2, &t1) - feSquare(&t1, &t1) - for i = 1; i < 5; i++ { - feSquare(&t1, &t1) - } - feMul(out, &t1, &t0) +// If point is Basepoint (but not if it's a different slice with the same +// contents) a precomputed implementation might be used for performance. +func X25519(scalar, point []byte) ([]byte, error) { + // Outline the body of function, to let the allocation be inlined in the + // caller, and possibly avoid escaping to the heap. + var dst [32]byte + return x25519(&dst, scalar, point) } -func scalarMult(out, in, base *[32]byte) { - var e [32]byte - - copy(e[:], in[:]) - e[0] &= 248 - e[31] &= 127 - e[31] |= 64 - - var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement - feFromBytes(&x1, base) - feOne(&x2) - feCopy(&x3, &x1) - feOne(&z3) - - swap := int32(0) - for pos := 254; pos >= 0; pos-- { - b := e[pos/8] >> uint(pos&7) - b &= 1 - swap ^= int32(b) - feCSwap(&x2, &x3, swap) - feCSwap(&z2, &z3, swap) - swap = int32(b) - - feSub(&tmp0, &x3, &z3) - feSub(&tmp1, &x2, &z2) - feAdd(&x2, &x2, &z2) - feAdd(&z2, &x3, &z3) - feMul(&z3, &tmp0, &x2) - feMul(&z2, &z2, &tmp1) - feSquare(&tmp0, &tmp1) - feSquare(&tmp1, &x2) - feAdd(&x3, &z3, &z2) - feSub(&z2, &z3, &z2) - feMul(&x2, &tmp1, &tmp0) - feSub(&tmp1, &tmp1, &tmp0) - feSquare(&z2, &z2) - feMul121666(&z3, &tmp1) - feSquare(&x3, &x3) - feAdd(&tmp0, &tmp0, &z3) - feMul(&z3, &x1, &z2) - feMul(&z2, &tmp1, &tmp0) - } - - feCSwap(&x2, &x3, swap) - feCSwap(&z2, &z3, swap) - - feInvert(&z2, &z2) - feMul(&x2, &x2, &z2) - feToBytes(out, &x2) +func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) { + var in [32]byte + if l := len(scalar); l != 32 { + return nil, fmt.Errorf("bad scalar length: %d, expected %d", l, 32) + } + if l := len(point); l != 32 { + return nil, fmt.Errorf("bad point length: %d, expected %d", l, 32) + } + copy(in[:], scalar) + if &point[0] == &Basepoint[0] { + checkBasepoint() + ScalarBaseMult(dst, &in) + } else { + var base, zero [32]byte + copy(base[:], point) + ScalarMult(dst, &in, &base) + if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 { + return nil, fmt.Errorf("bad input point: low order point") + } + } + return dst[:], nil } diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go new file mode 100644 index 00000000..5120b779 --- /dev/null +++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go @@ -0,0 +1,240 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine,!purego + +package curve25519 + +// These functions are implemented in the .s files. The names of the functions +// in the rest of the file are also taken from the SUPERCOP sources to help +// people following along. + +//go:noescape + +func cswap(inout *[5]uint64, v uint64) + +//go:noescape + +func ladderstep(inout *[5][5]uint64) + +//go:noescape + +func freeze(inout *[5]uint64) + +//go:noescape + +func mul(dest, a, b *[5]uint64) + +//go:noescape + +func square(out, in *[5]uint64) + +// mladder uses a Montgomery ladder to calculate (xr/zr) *= s. +func mladder(xr, zr *[5]uint64, s *[32]byte) { + var work [5][5]uint64 + + work[0] = *xr + setint(&work[1], 1) + setint(&work[2], 0) + work[3] = *xr + setint(&work[4], 1) + + j := uint(6) + var prevbit byte + + for i := 31; i >= 0; i-- { + for j < 8 { + bit := ((*s)[i] >> j) & 1 + swap := bit ^ prevbit + prevbit = bit + cswap(&work[1], uint64(swap)) + ladderstep(&work) + j-- + } + j = 7 + } + + *xr = work[1] + *zr = work[2] +} + +func scalarMult(out, in, base *[32]byte) { + var e [32]byte + copy(e[:], (*in)[:]) + e[0] &= 248 + e[31] &= 127 + e[31] |= 64 + + var t, z [5]uint64 + unpack(&t, base) + mladder(&t, &z, &e) + invert(&z, &z) + mul(&t, &t, &z) + pack(out, &t) +} + +func setint(r *[5]uint64, v uint64) { + r[0] = v + r[1] = 0 + r[2] = 0 + r[3] = 0 + r[4] = 0 +} + +// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian +// order. +func unpack(r *[5]uint64, x *[32]byte) { + r[0] = uint64(x[0]) | + uint64(x[1])<<8 | + uint64(x[2])<<16 | + uint64(x[3])<<24 | + uint64(x[4])<<32 | + uint64(x[5])<<40 | + uint64(x[6]&7)<<48 + + r[1] = uint64(x[6])>>3 | + uint64(x[7])<<5 | + uint64(x[8])<<13 | + uint64(x[9])<<21 | + uint64(x[10])<<29 | + uint64(x[11])<<37 | + uint64(x[12]&63)<<45 + + r[2] = uint64(x[12])>>6 | + uint64(x[13])<<2 | + uint64(x[14])<<10 | + uint64(x[15])<<18 | + uint64(x[16])<<26 | + uint64(x[17])<<34 | + uint64(x[18])<<42 | + uint64(x[19]&1)<<50 + + r[3] = uint64(x[19])>>1 | + uint64(x[20])<<7 | + uint64(x[21])<<15 | + uint64(x[22])<<23 | + uint64(x[23])<<31 | + uint64(x[24])<<39 | + uint64(x[25]&15)<<47 + + r[4] = uint64(x[25])>>4 | + uint64(x[26])<<4 | + uint64(x[27])<<12 | + uint64(x[28])<<20 | + uint64(x[29])<<28 | + uint64(x[30])<<36 | + uint64(x[31]&127)<<44 +} + +// pack sets out = x where out is the usual, little-endian form of the 5, +// 51-bit limbs in x. +func pack(out *[32]byte, x *[5]uint64) { + t := *x + freeze(&t) + + out[0] = byte(t[0]) + out[1] = byte(t[0] >> 8) + out[2] = byte(t[0] >> 16) + out[3] = byte(t[0] >> 24) + out[4] = byte(t[0] >> 32) + out[5] = byte(t[0] >> 40) + out[6] = byte(t[0] >> 48) + + out[6] ^= byte(t[1]<<3) & 0xf8 + out[7] = byte(t[1] >> 5) + out[8] = byte(t[1] >> 13) + out[9] = byte(t[1] >> 21) + out[10] = byte(t[1] >> 29) + out[11] = byte(t[1] >> 37) + out[12] = byte(t[1] >> 45) + + out[12] ^= byte(t[2]<<6) & 0xc0 + out[13] = byte(t[2] >> 2) + out[14] = byte(t[2] >> 10) + out[15] = byte(t[2] >> 18) + out[16] = byte(t[2] >> 26) + out[17] = byte(t[2] >> 34) + out[18] = byte(t[2] >> 42) + out[19] = byte(t[2] >> 50) + + out[19] ^= byte(t[3]<<1) & 0xfe + out[20] = byte(t[3] >> 7) + out[21] = byte(t[3] >> 15) + out[22] = byte(t[3] >> 23) + out[23] = byte(t[3] >> 31) + out[24] = byte(t[3] >> 39) + out[25] = byte(t[3] >> 47) + + out[25] ^= byte(t[4]<<4) & 0xf0 + out[26] = byte(t[4] >> 4) + out[27] = byte(t[4] >> 12) + out[28] = byte(t[4] >> 20) + out[29] = byte(t[4] >> 28) + out[30] = byte(t[4] >> 36) + out[31] = byte(t[4] >> 44) +} + +// invert calculates r = x^-1 mod p using Fermat's little theorem. +func invert(r *[5]uint64, x *[5]uint64) { + var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64 + + square(&z2, x) /* 2 */ + square(&t, &z2) /* 4 */ + square(&t, &t) /* 8 */ + mul(&z9, &t, x) /* 9 */ + mul(&z11, &z9, &z2) /* 11 */ + square(&t, &z11) /* 22 */ + mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */ + + square(&t, &z2_5_0) /* 2^6 - 2^1 */ + for i := 1; i < 5; i++ { /* 2^20 - 2^10 */ + square(&t, &t) + } + mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */ + + square(&t, &z2_10_0) /* 2^11 - 2^1 */ + for i := 1; i < 10; i++ { /* 2^20 - 2^10 */ + square(&t, &t) + } + mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */ + + square(&t, &z2_20_0) /* 2^21 - 2^1 */ + for i := 1; i < 20; i++ { /* 2^40 - 2^20 */ + square(&t, &t) + } + mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */ + + square(&t, &t) /* 2^41 - 2^1 */ + for i := 1; i < 10; i++ { /* 2^50 - 2^10 */ + square(&t, &t) + } + mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */ + + square(&t, &z2_50_0) /* 2^51 - 2^1 */ + for i := 1; i < 50; i++ { /* 2^100 - 2^50 */ + square(&t, &t) + } + mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */ + + square(&t, &z2_100_0) /* 2^101 - 2^1 */ + for i := 1; i < 100; i++ { /* 2^200 - 2^100 */ + square(&t, &t) + } + mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */ + + square(&t, &t) /* 2^201 - 2^1 */ + for i := 1; i < 50; i++ { /* 2^250 - 2^50 */ + square(&t, &t) + } + mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */ + + square(&t, &t) /* 2^251 - 2^1 */ + square(&t, &t) /* 2^252 - 2^2 */ + square(&t, &t) /* 2^253 - 2^3 */ + + square(&t, &t) /* 2^254 - 2^4 */ + + square(&t, &t) /* 2^255 - 2^5 */ + mul(r, &t, &z11) /* 2^255 - 21 */ +} diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s new file mode 100644 index 00000000..0250c888 --- /dev/null +++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s @@ -0,0 +1,1793 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine,!purego + +#define REDMASK51 0x0007FFFFFFFFFFFF + +// These constants cannot be encoded in non-MOVQ immediates. +// We access them directly from memory instead. + +DATA ·_121666_213(SB)/8, $996687872 +GLOBL ·_121666_213(SB), 8, $8 + +DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA +GLOBL ·_2P0(SB), 8, $8 + +DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE +GLOBL ·_2P1234(SB), 8, $8 + +// func freeze(inout *[5]uint64) +TEXT ·freeze(SB),7,$0-8 + MOVQ inout+0(FP), DI + + MOVQ 0(DI),SI + MOVQ 8(DI),DX + MOVQ 16(DI),CX + MOVQ 24(DI),R8 + MOVQ 32(DI),R9 + MOVQ $REDMASK51,AX + MOVQ AX,R10 + SUBQ $18,R10 + MOVQ $3,R11 +REDUCELOOP: + MOVQ SI,R12 + SHRQ $51,R12 + ANDQ AX,SI + ADDQ R12,DX + MOVQ DX,R12 + SHRQ $51,R12 + ANDQ AX,DX + ADDQ R12,CX + MOVQ CX,R12 + SHRQ $51,R12 + ANDQ AX,CX + ADDQ R12,R8 + MOVQ R8,R12 + SHRQ $51,R12 + ANDQ AX,R8 + ADDQ R12,R9 + MOVQ R9,R12 + SHRQ $51,R12 + ANDQ AX,R9 + IMUL3Q $19,R12,R12 + ADDQ R12,SI + SUBQ $1,R11 + JA REDUCELOOP + MOVQ $1,R12 + CMPQ R10,SI + CMOVQLT R11,R12 + CMPQ AX,DX + CMOVQNE R11,R12 + CMPQ AX,CX + CMOVQNE R11,R12 + CMPQ AX,R8 + CMOVQNE R11,R12 + CMPQ AX,R9 + CMOVQNE R11,R12 + NEGQ R12 + ANDQ R12,AX + ANDQ R12,R10 + SUBQ R10,SI + SUBQ AX,DX + SUBQ AX,CX + SUBQ AX,R8 + SUBQ AX,R9 + MOVQ SI,0(DI) + MOVQ DX,8(DI) + MOVQ CX,16(DI) + MOVQ R8,24(DI) + MOVQ R9,32(DI) + RET + +// func ladderstep(inout *[5][5]uint64) +TEXT ·ladderstep(SB),0,$296-8 + MOVQ inout+0(FP),DI + + MOVQ 40(DI),SI + MOVQ 48(DI),DX + MOVQ 56(DI),CX + MOVQ 64(DI),R8 + MOVQ 72(DI),R9 + MOVQ SI,AX + MOVQ DX,R10 + MOVQ CX,R11 + MOVQ R8,R12 + MOVQ R9,R13 + ADDQ ·_2P0(SB),AX + ADDQ ·_2P1234(SB),R10 + ADDQ ·_2P1234(SB),R11 + ADDQ ·_2P1234(SB),R12 + ADDQ ·_2P1234(SB),R13 + ADDQ 80(DI),SI + ADDQ 88(DI),DX + ADDQ 96(DI),CX + ADDQ 104(DI),R8 + ADDQ 112(DI),R9 + SUBQ 80(DI),AX + SUBQ 88(DI),R10 + SUBQ 96(DI),R11 + SUBQ 104(DI),R12 + SUBQ 112(DI),R13 + MOVQ SI,0(SP) + MOVQ DX,8(SP) + MOVQ CX,16(SP) + MOVQ R8,24(SP) + MOVQ R9,32(SP) + MOVQ AX,40(SP) + MOVQ R10,48(SP) + MOVQ R11,56(SP) + MOVQ R12,64(SP) + MOVQ R13,72(SP) + MOVQ 40(SP),AX + MULQ 40(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 40(SP),AX + SHLQ $1,AX + MULQ 48(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 40(SP),AX + SHLQ $1,AX + MULQ 56(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 40(SP),AX + SHLQ $1,AX + MULQ 64(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 40(SP),AX + SHLQ $1,AX + MULQ 72(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 48(SP),AX + MULQ 48(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 48(SP),AX + SHLQ $1,AX + MULQ 56(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 48(SP),AX + SHLQ $1,AX + MULQ 64(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 48(SP),DX + IMUL3Q $38,DX,AX + MULQ 72(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 56(SP),AX + MULQ 56(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 56(SP),DX + IMUL3Q $38,DX,AX + MULQ 64(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 56(SP),DX + IMUL3Q $38,DX,AX + MULQ 72(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 64(SP),DX + IMUL3Q $19,DX,AX + MULQ 64(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 64(SP),DX + IMUL3Q $38,DX,AX + MULQ 72(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 72(SP),DX + IMUL3Q $19,DX,AX + MULQ 72(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + ANDQ DX,SI + MOVQ CX,R8 + SHRQ $51,CX + ADDQ R10,CX + ANDQ DX,R8 + MOVQ CX,R9 + SHRQ $51,CX + ADDQ R12,CX + ANDQ DX,R9 + MOVQ CX,AX + SHRQ $51,CX + ADDQ R14,CX + ANDQ DX,AX + MOVQ CX,R10 + SHRQ $51,CX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,80(SP) + MOVQ R8,88(SP) + MOVQ R9,96(SP) + MOVQ AX,104(SP) + MOVQ R10,112(SP) + MOVQ 0(SP),AX + MULQ 0(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 0(SP),AX + SHLQ $1,AX + MULQ 8(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 0(SP),AX + SHLQ $1,AX + MULQ 16(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 0(SP),AX + SHLQ $1,AX + MULQ 24(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 0(SP),AX + SHLQ $1,AX + MULQ 32(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 8(SP),AX + MULQ 8(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SP),AX + SHLQ $1,AX + MULQ 16(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 8(SP),AX + SHLQ $1,AX + MULQ 24(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 8(SP),DX + IMUL3Q $38,DX,AX + MULQ 32(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 16(SP),AX + MULQ 16(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 16(SP),DX + IMUL3Q $38,DX,AX + MULQ 24(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 16(SP),DX + IMUL3Q $38,DX,AX + MULQ 32(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 24(SP),DX + IMUL3Q $19,DX,AX + MULQ 24(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 24(SP),DX + IMUL3Q $38,DX,AX + MULQ 32(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 32(SP),DX + IMUL3Q $19,DX,AX + MULQ 32(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + ANDQ DX,SI + MOVQ CX,R8 + SHRQ $51,CX + ADDQ R10,CX + ANDQ DX,R8 + MOVQ CX,R9 + SHRQ $51,CX + ADDQ R12,CX + ANDQ DX,R9 + MOVQ CX,AX + SHRQ $51,CX + ADDQ R14,CX + ANDQ DX,AX + MOVQ CX,R10 + SHRQ $51,CX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,120(SP) + MOVQ R8,128(SP) + MOVQ R9,136(SP) + MOVQ AX,144(SP) + MOVQ R10,152(SP) + MOVQ SI,SI + MOVQ R8,DX + MOVQ R9,CX + MOVQ AX,R8 + MOVQ R10,R9 + ADDQ ·_2P0(SB),SI + ADDQ ·_2P1234(SB),DX + ADDQ ·_2P1234(SB),CX + ADDQ ·_2P1234(SB),R8 + ADDQ ·_2P1234(SB),R9 + SUBQ 80(SP),SI + SUBQ 88(SP),DX + SUBQ 96(SP),CX + SUBQ 104(SP),R8 + SUBQ 112(SP),R9 + MOVQ SI,160(SP) + MOVQ DX,168(SP) + MOVQ CX,176(SP) + MOVQ R8,184(SP) + MOVQ R9,192(SP) + MOVQ 120(DI),SI + MOVQ 128(DI),DX + MOVQ 136(DI),CX + MOVQ 144(DI),R8 + MOVQ 152(DI),R9 + MOVQ SI,AX + MOVQ DX,R10 + MOVQ CX,R11 + MOVQ R8,R12 + MOVQ R9,R13 + ADDQ ·_2P0(SB),AX + ADDQ ·_2P1234(SB),R10 + ADDQ ·_2P1234(SB),R11 + ADDQ ·_2P1234(SB),R12 + ADDQ ·_2P1234(SB),R13 + ADDQ 160(DI),SI + ADDQ 168(DI),DX + ADDQ 176(DI),CX + ADDQ 184(DI),R8 + ADDQ 192(DI),R9 + SUBQ 160(DI),AX + SUBQ 168(DI),R10 + SUBQ 176(DI),R11 + SUBQ 184(DI),R12 + SUBQ 192(DI),R13 + MOVQ SI,200(SP) + MOVQ DX,208(SP) + MOVQ CX,216(SP) + MOVQ R8,224(SP) + MOVQ R9,232(SP) + MOVQ AX,240(SP) + MOVQ R10,248(SP) + MOVQ R11,256(SP) + MOVQ R12,264(SP) + MOVQ R13,272(SP) + MOVQ 224(SP),SI + IMUL3Q $19,SI,AX + MOVQ AX,280(SP) + MULQ 56(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 232(SP),DX + IMUL3Q $19,DX,AX + MOVQ AX,288(SP) + MULQ 48(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 200(SP),AX + MULQ 40(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 200(SP),AX + MULQ 48(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 200(SP),AX + MULQ 56(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 200(SP),AX + MULQ 64(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 200(SP),AX + MULQ 72(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 208(SP),AX + MULQ 40(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 208(SP),AX + MULQ 48(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 208(SP),AX + MULQ 56(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 208(SP),AX + MULQ 64(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 208(SP),DX + IMUL3Q $19,DX,AX + MULQ 72(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 216(SP),AX + MULQ 40(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 216(SP),AX + MULQ 48(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 216(SP),AX + MULQ 56(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 216(SP),DX + IMUL3Q $19,DX,AX + MULQ 64(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 216(SP),DX + IMUL3Q $19,DX,AX + MULQ 72(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 224(SP),AX + MULQ 40(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 224(SP),AX + MULQ 48(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 280(SP),AX + MULQ 64(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 280(SP),AX + MULQ 72(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 232(SP),AX + MULQ 40(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 288(SP),AX + MULQ 56(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 288(SP),AX + MULQ 64(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 288(SP),AX + MULQ 72(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,40(SP) + MOVQ R8,48(SP) + MOVQ R9,56(SP) + MOVQ AX,64(SP) + MOVQ R10,72(SP) + MOVQ 264(SP),SI + IMUL3Q $19,SI,AX + MOVQ AX,200(SP) + MULQ 16(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 272(SP),DX + IMUL3Q $19,DX,AX + MOVQ AX,208(SP) + MULQ 8(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 240(SP),AX + MULQ 0(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 240(SP),AX + MULQ 8(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 240(SP),AX + MULQ 16(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 240(SP),AX + MULQ 24(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 240(SP),AX + MULQ 32(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 248(SP),AX + MULQ 0(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 248(SP),AX + MULQ 8(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 248(SP),AX + MULQ 16(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 248(SP),AX + MULQ 24(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 248(SP),DX + IMUL3Q $19,DX,AX + MULQ 32(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 256(SP),AX + MULQ 0(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 256(SP),AX + MULQ 8(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 256(SP),AX + MULQ 16(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 256(SP),DX + IMUL3Q $19,DX,AX + MULQ 24(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 256(SP),DX + IMUL3Q $19,DX,AX + MULQ 32(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 264(SP),AX + MULQ 0(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 264(SP),AX + MULQ 8(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 200(SP),AX + MULQ 24(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 200(SP),AX + MULQ 32(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 272(SP),AX + MULQ 0(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 208(SP),AX + MULQ 16(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 208(SP),AX + MULQ 24(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 208(SP),AX + MULQ 32(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,DX + MOVQ R8,CX + MOVQ R9,R11 + MOVQ AX,R12 + MOVQ R10,R13 + ADDQ ·_2P0(SB),DX + ADDQ ·_2P1234(SB),CX + ADDQ ·_2P1234(SB),R11 + ADDQ ·_2P1234(SB),R12 + ADDQ ·_2P1234(SB),R13 + ADDQ 40(SP),SI + ADDQ 48(SP),R8 + ADDQ 56(SP),R9 + ADDQ 64(SP),AX + ADDQ 72(SP),R10 + SUBQ 40(SP),DX + SUBQ 48(SP),CX + SUBQ 56(SP),R11 + SUBQ 64(SP),R12 + SUBQ 72(SP),R13 + MOVQ SI,120(DI) + MOVQ R8,128(DI) + MOVQ R9,136(DI) + MOVQ AX,144(DI) + MOVQ R10,152(DI) + MOVQ DX,160(DI) + MOVQ CX,168(DI) + MOVQ R11,176(DI) + MOVQ R12,184(DI) + MOVQ R13,192(DI) + MOVQ 120(DI),AX + MULQ 120(DI) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 120(DI),AX + SHLQ $1,AX + MULQ 128(DI) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 120(DI),AX + SHLQ $1,AX + MULQ 136(DI) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 120(DI),AX + SHLQ $1,AX + MULQ 144(DI) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 120(DI),AX + SHLQ $1,AX + MULQ 152(DI) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 128(DI),AX + MULQ 128(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 128(DI),AX + SHLQ $1,AX + MULQ 136(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 128(DI),AX + SHLQ $1,AX + MULQ 144(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 128(DI),DX + IMUL3Q $38,DX,AX + MULQ 152(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 136(DI),AX + MULQ 136(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 136(DI),DX + IMUL3Q $38,DX,AX + MULQ 144(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 136(DI),DX + IMUL3Q $38,DX,AX + MULQ 152(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 144(DI),DX + IMUL3Q $19,DX,AX + MULQ 144(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 144(DI),DX + IMUL3Q $38,DX,AX + MULQ 152(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 152(DI),DX + IMUL3Q $19,DX,AX + MULQ 152(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + ANDQ DX,SI + MOVQ CX,R8 + SHRQ $51,CX + ADDQ R10,CX + ANDQ DX,R8 + MOVQ CX,R9 + SHRQ $51,CX + ADDQ R12,CX + ANDQ DX,R9 + MOVQ CX,AX + SHRQ $51,CX + ADDQ R14,CX + ANDQ DX,AX + MOVQ CX,R10 + SHRQ $51,CX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,120(DI) + MOVQ R8,128(DI) + MOVQ R9,136(DI) + MOVQ AX,144(DI) + MOVQ R10,152(DI) + MOVQ 160(DI),AX + MULQ 160(DI) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 160(DI),AX + SHLQ $1,AX + MULQ 168(DI) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 160(DI),AX + SHLQ $1,AX + MULQ 176(DI) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 160(DI),AX + SHLQ $1,AX + MULQ 184(DI) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 160(DI),AX + SHLQ $1,AX + MULQ 192(DI) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 168(DI),AX + MULQ 168(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 168(DI),AX + SHLQ $1,AX + MULQ 176(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 168(DI),AX + SHLQ $1,AX + MULQ 184(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 168(DI),DX + IMUL3Q $38,DX,AX + MULQ 192(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(DI),AX + MULQ 176(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 176(DI),DX + IMUL3Q $38,DX,AX + MULQ 184(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(DI),DX + IMUL3Q $38,DX,AX + MULQ 192(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 184(DI),DX + IMUL3Q $19,DX,AX + MULQ 184(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 184(DI),DX + IMUL3Q $38,DX,AX + MULQ 192(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 192(DI),DX + IMUL3Q $19,DX,AX + MULQ 192(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + ANDQ DX,SI + MOVQ CX,R8 + SHRQ $51,CX + ADDQ R10,CX + ANDQ DX,R8 + MOVQ CX,R9 + SHRQ $51,CX + ADDQ R12,CX + ANDQ DX,R9 + MOVQ CX,AX + SHRQ $51,CX + ADDQ R14,CX + ANDQ DX,AX + MOVQ CX,R10 + SHRQ $51,CX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,160(DI) + MOVQ R8,168(DI) + MOVQ R9,176(DI) + MOVQ AX,184(DI) + MOVQ R10,192(DI) + MOVQ 184(DI),SI + IMUL3Q $19,SI,AX + MOVQ AX,0(SP) + MULQ 16(DI) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 192(DI),DX + IMUL3Q $19,DX,AX + MOVQ AX,8(SP) + MULQ 8(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 160(DI),AX + MULQ 0(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 160(DI),AX + MULQ 8(DI) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 160(DI),AX + MULQ 16(DI) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 160(DI),AX + MULQ 24(DI) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 160(DI),AX + MULQ 32(DI) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 168(DI),AX + MULQ 0(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 168(DI),AX + MULQ 8(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 168(DI),AX + MULQ 16(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 168(DI),AX + MULQ 24(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 168(DI),DX + IMUL3Q $19,DX,AX + MULQ 32(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(DI),AX + MULQ 0(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 176(DI),AX + MULQ 8(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 176(DI),AX + MULQ 16(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 176(DI),DX + IMUL3Q $19,DX,AX + MULQ 24(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(DI),DX + IMUL3Q $19,DX,AX + MULQ 32(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 184(DI),AX + MULQ 0(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 184(DI),AX + MULQ 8(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 0(SP),AX + MULQ 24(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SP),AX + MULQ 32(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 192(DI),AX + MULQ 0(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 8(SP),AX + MULQ 16(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 8(SP),AX + MULQ 24(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SP),AX + MULQ 32(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,160(DI) + MOVQ R8,168(DI) + MOVQ R9,176(DI) + MOVQ AX,184(DI) + MOVQ R10,192(DI) + MOVQ 144(SP),SI + IMUL3Q $19,SI,AX + MOVQ AX,0(SP) + MULQ 96(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 152(SP),DX + IMUL3Q $19,DX,AX + MOVQ AX,8(SP) + MULQ 88(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 120(SP),AX + MULQ 80(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 120(SP),AX + MULQ 88(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 120(SP),AX + MULQ 96(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 120(SP),AX + MULQ 104(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 120(SP),AX + MULQ 112(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 128(SP),AX + MULQ 80(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 128(SP),AX + MULQ 88(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 128(SP),AX + MULQ 96(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 128(SP),AX + MULQ 104(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 128(SP),DX + IMUL3Q $19,DX,AX + MULQ 112(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 136(SP),AX + MULQ 80(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 136(SP),AX + MULQ 88(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 136(SP),AX + MULQ 96(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 136(SP),DX + IMUL3Q $19,DX,AX + MULQ 104(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 136(SP),DX + IMUL3Q $19,DX,AX + MULQ 112(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 144(SP),AX + MULQ 80(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 144(SP),AX + MULQ 88(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 0(SP),AX + MULQ 104(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SP),AX + MULQ 112(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 152(SP),AX + MULQ 80(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 8(SP),AX + MULQ 96(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 8(SP),AX + MULQ 104(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SP),AX + MULQ 112(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,40(DI) + MOVQ R8,48(DI) + MOVQ R9,56(DI) + MOVQ AX,64(DI) + MOVQ R10,72(DI) + MOVQ 160(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + MOVQ AX,SI + MOVQ DX,CX + MOVQ 168(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + ADDQ AX,CX + MOVQ DX,R8 + MOVQ 176(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + ADDQ AX,R8 + MOVQ DX,R9 + MOVQ 184(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + ADDQ AX,R9 + MOVQ DX,R10 + MOVQ 192(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + ADDQ AX,R10 + IMUL3Q $19,DX,DX + ADDQ DX,SI + ADDQ 80(SP),SI + ADDQ 88(SP),CX + ADDQ 96(SP),R8 + ADDQ 104(SP),R9 + ADDQ 112(SP),R10 + MOVQ SI,80(DI) + MOVQ CX,88(DI) + MOVQ R8,96(DI) + MOVQ R9,104(DI) + MOVQ R10,112(DI) + MOVQ 104(DI),SI + IMUL3Q $19,SI,AX + MOVQ AX,0(SP) + MULQ 176(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 112(DI),DX + IMUL3Q $19,DX,AX + MOVQ AX,8(SP) + MULQ 168(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 80(DI),AX + MULQ 160(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 80(DI),AX + MULQ 168(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 80(DI),AX + MULQ 176(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 80(DI),AX + MULQ 184(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 80(DI),AX + MULQ 192(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 88(DI),AX + MULQ 160(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 88(DI),AX + MULQ 168(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 88(DI),AX + MULQ 176(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 88(DI),AX + MULQ 184(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 88(DI),DX + IMUL3Q $19,DX,AX + MULQ 192(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 96(DI),AX + MULQ 160(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 96(DI),AX + MULQ 168(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 96(DI),AX + MULQ 176(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 96(DI),DX + IMUL3Q $19,DX,AX + MULQ 184(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 96(DI),DX + IMUL3Q $19,DX,AX + MULQ 192(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 104(DI),AX + MULQ 160(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 104(DI),AX + MULQ 168(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 0(SP),AX + MULQ 184(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SP),AX + MULQ 192(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 112(DI),AX + MULQ 160(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 8(SP),AX + MULQ 176(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 8(SP),AX + MULQ 184(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SP),AX + MULQ 192(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ $REDMASK51,DX + SHLQ $13,SI,CX + ANDQ DX,SI + SHLQ $13,R8,R9 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R10,R11 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,80(DI) + MOVQ R8,88(DI) + MOVQ R9,96(DI) + MOVQ AX,104(DI) + MOVQ R10,112(DI) + RET + +// func cswap(inout *[4][5]uint64, v uint64) +TEXT ·cswap(SB),7,$0 + MOVQ inout+0(FP),DI + MOVQ v+8(FP),SI + + SUBQ $1, SI + NOTQ SI + MOVQ SI, X15 + PSHUFD $0x44, X15, X15 + + MOVOU 0(DI), X0 + MOVOU 16(DI), X2 + MOVOU 32(DI), X4 + MOVOU 48(DI), X6 + MOVOU 64(DI), X8 + MOVOU 80(DI), X1 + MOVOU 96(DI), X3 + MOVOU 112(DI), X5 + MOVOU 128(DI), X7 + MOVOU 144(DI), X9 + + MOVO X1, X10 + MOVO X3, X11 + MOVO X5, X12 + MOVO X7, X13 + MOVO X9, X14 + + PXOR X0, X10 + PXOR X2, X11 + PXOR X4, X12 + PXOR X6, X13 + PXOR X8, X14 + PAND X15, X10 + PAND X15, X11 + PAND X15, X12 + PAND X15, X13 + PAND X15, X14 + PXOR X10, X0 + PXOR X10, X1 + PXOR X11, X2 + PXOR X11, X3 + PXOR X12, X4 + PXOR X12, X5 + PXOR X13, X6 + PXOR X13, X7 + PXOR X14, X8 + PXOR X14, X9 + + MOVOU X0, 0(DI) + MOVOU X2, 16(DI) + MOVOU X4, 32(DI) + MOVOU X6, 48(DI) + MOVOU X8, 64(DI) + MOVOU X1, 80(DI) + MOVOU X3, 96(DI) + MOVOU X5, 112(DI) + MOVOU X7, 128(DI) + MOVOU X9, 144(DI) + RET + +// func mul(dest, a, b *[5]uint64) +TEXT ·mul(SB),0,$16-24 + MOVQ dest+0(FP), DI + MOVQ a+8(FP), SI + MOVQ b+16(FP), DX + + MOVQ DX,CX + MOVQ 24(SI),DX + IMUL3Q $19,DX,AX + MOVQ AX,0(SP) + MULQ 16(CX) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 32(SI),DX + IMUL3Q $19,DX,AX + MOVQ AX,8(SP) + MULQ 8(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SI),AX + MULQ 0(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SI),AX + MULQ 8(CX) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 0(SI),AX + MULQ 16(CX) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 0(SI),AX + MULQ 24(CX) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 0(SI),AX + MULQ 32(CX) + MOVQ AX,BX + MOVQ DX,BP + MOVQ 8(SI),AX + MULQ 0(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SI),AX + MULQ 8(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 8(SI),AX + MULQ 16(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 8(SI),AX + MULQ 24(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 8(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 16(SI),AX + MULQ 0(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 16(SI),AX + MULQ 8(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 16(SI),AX + MULQ 16(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 16(SI),DX + IMUL3Q $19,DX,AX + MULQ 24(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 16(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 24(SI),AX + MULQ 0(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 24(SI),AX + MULQ 8(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 0(SP),AX + MULQ 24(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 0(SP),AX + MULQ 32(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 32(SI),AX + MULQ 0(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 8(SP),AX + MULQ 16(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SP),AX + MULQ 24(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 8(SP),AX + MULQ 32(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ $REDMASK51,SI + SHLQ $13,R8,R9 + ANDQ SI,R8 + SHLQ $13,R10,R11 + ANDQ SI,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ SI,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ SI,R14 + ADDQ R13,R14 + SHLQ $13,BX,BP + ANDQ SI,BX + ADDQ R15,BX + IMUL3Q $19,BP,DX + ADDQ DX,R8 + MOVQ R8,DX + SHRQ $51,DX + ADDQ R10,DX + MOVQ DX,CX + SHRQ $51,DX + ANDQ SI,R8 + ADDQ R12,DX + MOVQ DX,R9 + SHRQ $51,DX + ANDQ SI,CX + ADDQ R14,DX + MOVQ DX,AX + SHRQ $51,DX + ANDQ SI,R9 + ADDQ BX,DX + MOVQ DX,R10 + SHRQ $51,DX + ANDQ SI,AX + IMUL3Q $19,DX,DX + ADDQ DX,R8 + ANDQ SI,R10 + MOVQ R8,0(DI) + MOVQ CX,8(DI) + MOVQ R9,16(DI) + MOVQ AX,24(DI) + MOVQ R10,32(DI) + RET + +// func square(out, in *[5]uint64) +TEXT ·square(SB),7,$0-16 + MOVQ out+0(FP), DI + MOVQ in+8(FP), SI + + MOVQ 0(SI),AX + MULQ 0(SI) + MOVQ AX,CX + MOVQ DX,R8 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 8(SI) + MOVQ AX,R9 + MOVQ DX,R10 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 16(SI) + MOVQ AX,R11 + MOVQ DX,R12 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 24(SI) + MOVQ AX,R13 + MOVQ DX,R14 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 32(SI) + MOVQ AX,R15 + MOVQ DX,BX + MOVQ 8(SI),AX + MULQ 8(SI) + ADDQ AX,R11 + ADCQ DX,R12 + MOVQ 8(SI),AX + SHLQ $1,AX + MULQ 16(SI) + ADDQ AX,R13 + ADCQ DX,R14 + MOVQ 8(SI),AX + SHLQ $1,AX + MULQ 24(SI) + ADDQ AX,R15 + ADCQ DX,BX + MOVQ 8(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,CX + ADCQ DX,R8 + MOVQ 16(SI),AX + MULQ 16(SI) + ADDQ AX,R15 + ADCQ DX,BX + MOVQ 16(SI),DX + IMUL3Q $38,DX,AX + MULQ 24(SI) + ADDQ AX,CX + ADCQ DX,R8 + MOVQ 16(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,R9 + ADCQ DX,R10 + MOVQ 24(SI),DX + IMUL3Q $19,DX,AX + MULQ 24(SI) + ADDQ AX,R9 + ADCQ DX,R10 + MOVQ 24(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,R11 + ADCQ DX,R12 + MOVQ 32(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(SI) + ADDQ AX,R13 + ADCQ DX,R14 + MOVQ $REDMASK51,SI + SHLQ $13,CX,R8 + ANDQ SI,CX + SHLQ $13,R9,R10 + ANDQ SI,R9 + ADDQ R8,R9 + SHLQ $13,R11,R12 + ANDQ SI,R11 + ADDQ R10,R11 + SHLQ $13,R13,R14 + ANDQ SI,R13 + ADDQ R12,R13 + SHLQ $13,R15,BX + ANDQ SI,R15 + ADDQ R14,R15 + IMUL3Q $19,BX,DX + ADDQ DX,CX + MOVQ CX,DX + SHRQ $51,DX + ADDQ R9,DX + ANDQ SI,CX + MOVQ DX,R8 + SHRQ $51,DX + ADDQ R11,DX + ANDQ SI,R8 + MOVQ DX,R9 + SHRQ $51,DX + ADDQ R13,DX + ANDQ SI,R9 + MOVQ DX,AX + SHRQ $51,DX + ADDQ R15,DX + ANDQ SI,AX + MOVQ DX,R10 + SHRQ $51,DX + IMUL3Q $19,DX,DX + ADDQ DX,CX + ANDQ SI,R10 + MOVQ CX,0(DI) + MOVQ R8,8(DI) + MOVQ R9,16(DI) + MOVQ AX,24(DI) + MOVQ R10,32(DI) + RET diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go b/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go new file mode 100644 index 00000000..c43b13fc --- /dev/null +++ b/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go @@ -0,0 +1,828 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package curve25519 + +import "encoding/binary" + +// This code is a port of the public domain, "ref10" implementation of +// curve25519 from SUPERCOP 20130419 by D. J. Bernstein. + +// fieldElement represents an element of the field GF(2^255 - 19). An element +// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 +// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on +// context. +type fieldElement [10]int32 + +func feZero(fe *fieldElement) { + for i := range fe { + fe[i] = 0 + } +} + +func feOne(fe *fieldElement) { + feZero(fe) + fe[0] = 1 +} + +func feAdd(dst, a, b *fieldElement) { + for i := range dst { + dst[i] = a[i] + b[i] + } +} + +func feSub(dst, a, b *fieldElement) { + for i := range dst { + dst[i] = a[i] - b[i] + } +} + +func feCopy(dst, src *fieldElement) { + for i := range dst { + dst[i] = src[i] + } +} + +// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0. +// +// Preconditions: b in {0,1}. +func feCSwap(f, g *fieldElement, b int32) { + b = -b + for i := range f { + t := b & (f[i] ^ g[i]) + f[i] ^= t + g[i] ^= t + } +} + +// load3 reads a 24-bit, little-endian value from in. +func load3(in []byte) int64 { + var r int64 + r = int64(in[0]) + r |= int64(in[1]) << 8 + r |= int64(in[2]) << 16 + return r +} + +// load4 reads a 32-bit, little-endian value from in. +func load4(in []byte) int64 { + return int64(binary.LittleEndian.Uint32(in)) +} + +func feFromBytes(dst *fieldElement, src *[32]byte) { + h0 := load4(src[:]) + h1 := load3(src[4:]) << 6 + h2 := load3(src[7:]) << 5 + h3 := load3(src[10:]) << 3 + h4 := load3(src[13:]) << 2 + h5 := load4(src[16:]) + h6 := load3(src[20:]) << 7 + h7 := load3(src[23:]) << 5 + h8 := load3(src[26:]) << 4 + h9 := (load3(src[29:]) & 0x7fffff) << 2 + + var carry [10]int64 + carry[9] = (h9 + 1<<24) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + carry[1] = (h1 + 1<<24) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[3] = (h3 + 1<<24) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[5] = (h5 + 1<<24) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + carry[7] = (h7 + 1<<24) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[0] = (h0 + 1<<25) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[2] = (h2 + 1<<25) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[4] = (h4 + 1<<25) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[6] = (h6 + 1<<25) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + carry[8] = (h8 + 1<<25) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + dst[0] = int32(h0) + dst[1] = int32(h1) + dst[2] = int32(h2) + dst[3] = int32(h3) + dst[4] = int32(h4) + dst[5] = int32(h5) + dst[6] = int32(h6) + dst[7] = int32(h7) + dst[8] = int32(h8) + dst[9] = int32(h9) +} + +// feToBytes marshals h to s. +// Preconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +// +// Write p=2^255-19; q=floor(h/p). +// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). +// +// Proof: +// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. +// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. +// +// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). +// Then 0> 25 + q = (h[0] + q) >> 26 + q = (h[1] + q) >> 25 + q = (h[2] + q) >> 26 + q = (h[3] + q) >> 25 + q = (h[4] + q) >> 26 + q = (h[5] + q) >> 25 + q = (h[6] + q) >> 26 + q = (h[7] + q) >> 25 + q = (h[8] + q) >> 26 + q = (h[9] + q) >> 25 + + // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. + h[0] += 19 * q + // Goal: Output h-2^255 q, which is between 0 and 2^255-20. + + carry[0] = h[0] >> 26 + h[1] += carry[0] + h[0] -= carry[0] << 26 + carry[1] = h[1] >> 25 + h[2] += carry[1] + h[1] -= carry[1] << 25 + carry[2] = h[2] >> 26 + h[3] += carry[2] + h[2] -= carry[2] << 26 + carry[3] = h[3] >> 25 + h[4] += carry[3] + h[3] -= carry[3] << 25 + carry[4] = h[4] >> 26 + h[5] += carry[4] + h[4] -= carry[4] << 26 + carry[5] = h[5] >> 25 + h[6] += carry[5] + h[5] -= carry[5] << 25 + carry[6] = h[6] >> 26 + h[7] += carry[6] + h[6] -= carry[6] << 26 + carry[7] = h[7] >> 25 + h[8] += carry[7] + h[7] -= carry[7] << 25 + carry[8] = h[8] >> 26 + h[9] += carry[8] + h[8] -= carry[8] << 26 + carry[9] = h[9] >> 25 + h[9] -= carry[9] << 25 + // h10 = carry9 + + // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. + // Have h[0]+...+2^230 h[9] between 0 and 2^255-1; + // evidently 2^255 h10-2^255 q = 0. + // Goal: Output h[0]+...+2^230 h[9]. + + s[0] = byte(h[0] >> 0) + s[1] = byte(h[0] >> 8) + s[2] = byte(h[0] >> 16) + s[3] = byte((h[0] >> 24) | (h[1] << 2)) + s[4] = byte(h[1] >> 6) + s[5] = byte(h[1] >> 14) + s[6] = byte((h[1] >> 22) | (h[2] << 3)) + s[7] = byte(h[2] >> 5) + s[8] = byte(h[2] >> 13) + s[9] = byte((h[2] >> 21) | (h[3] << 5)) + s[10] = byte(h[3] >> 3) + s[11] = byte(h[3] >> 11) + s[12] = byte((h[3] >> 19) | (h[4] << 6)) + s[13] = byte(h[4] >> 2) + s[14] = byte(h[4] >> 10) + s[15] = byte(h[4] >> 18) + s[16] = byte(h[5] >> 0) + s[17] = byte(h[5] >> 8) + s[18] = byte(h[5] >> 16) + s[19] = byte((h[5] >> 24) | (h[6] << 1)) + s[20] = byte(h[6] >> 7) + s[21] = byte(h[6] >> 15) + s[22] = byte((h[6] >> 23) | (h[7] << 3)) + s[23] = byte(h[7] >> 5) + s[24] = byte(h[7] >> 13) + s[25] = byte((h[7] >> 21) | (h[8] << 4)) + s[26] = byte(h[8] >> 4) + s[27] = byte(h[8] >> 12) + s[28] = byte((h[8] >> 20) | (h[9] << 6)) + s[29] = byte(h[9] >> 2) + s[30] = byte(h[9] >> 10) + s[31] = byte(h[9] >> 18) +} + +// feMul calculates h = f * g +// Can overlap h with f or g. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +// +// Notes on implementation strategy: +// +// Using schoolbook multiplication. +// Karatsuba would save a little in some cost models. +// +// Most multiplications by 2 and 19 are 32-bit precomputations; +// cheaper than 64-bit postcomputations. +// +// There is one remaining multiplication by 19 in the carry chain; +// one *19 precomputation can be merged into this, +// but the resulting data flow is considerably less clean. +// +// There are 12 carries below. +// 10 of them are 2-way parallelizable and vectorizable. +// Can get away with 11 carries, but then data flow is much deeper. +// +// With tighter constraints on inputs can squeeze carries into int32. +func feMul(h, f, g *fieldElement) { + f0 := f[0] + f1 := f[1] + f2 := f[2] + f3 := f[3] + f4 := f[4] + f5 := f[5] + f6 := f[6] + f7 := f[7] + f8 := f[8] + f9 := f[9] + g0 := g[0] + g1 := g[1] + g2 := g[2] + g3 := g[3] + g4 := g[4] + g5 := g[5] + g6 := g[6] + g7 := g[7] + g8 := g[8] + g9 := g[9] + g1_19 := 19 * g1 // 1.4*2^29 + g2_19 := 19 * g2 // 1.4*2^30; still ok + g3_19 := 19 * g3 + g4_19 := 19 * g4 + g5_19 := 19 * g5 + g6_19 := 19 * g6 + g7_19 := 19 * g7 + g8_19 := 19 * g8 + g9_19 := 19 * g9 + f1_2 := 2 * f1 + f3_2 := 2 * f3 + f5_2 := 2 * f5 + f7_2 := 2 * f7 + f9_2 := 2 * f9 + f0g0 := int64(f0) * int64(g0) + f0g1 := int64(f0) * int64(g1) + f0g2 := int64(f0) * int64(g2) + f0g3 := int64(f0) * int64(g3) + f0g4 := int64(f0) * int64(g4) + f0g5 := int64(f0) * int64(g5) + f0g6 := int64(f0) * int64(g6) + f0g7 := int64(f0) * int64(g7) + f0g8 := int64(f0) * int64(g8) + f0g9 := int64(f0) * int64(g9) + f1g0 := int64(f1) * int64(g0) + f1g1_2 := int64(f1_2) * int64(g1) + f1g2 := int64(f1) * int64(g2) + f1g3_2 := int64(f1_2) * int64(g3) + f1g4 := int64(f1) * int64(g4) + f1g5_2 := int64(f1_2) * int64(g5) + f1g6 := int64(f1) * int64(g6) + f1g7_2 := int64(f1_2) * int64(g7) + f1g8 := int64(f1) * int64(g8) + f1g9_38 := int64(f1_2) * int64(g9_19) + f2g0 := int64(f2) * int64(g0) + f2g1 := int64(f2) * int64(g1) + f2g2 := int64(f2) * int64(g2) + f2g3 := int64(f2) * int64(g3) + f2g4 := int64(f2) * int64(g4) + f2g5 := int64(f2) * int64(g5) + f2g6 := int64(f2) * int64(g6) + f2g7 := int64(f2) * int64(g7) + f2g8_19 := int64(f2) * int64(g8_19) + f2g9_19 := int64(f2) * int64(g9_19) + f3g0 := int64(f3) * int64(g0) + f3g1_2 := int64(f3_2) * int64(g1) + f3g2 := int64(f3) * int64(g2) + f3g3_2 := int64(f3_2) * int64(g3) + f3g4 := int64(f3) * int64(g4) + f3g5_2 := int64(f3_2) * int64(g5) + f3g6 := int64(f3) * int64(g6) + f3g7_38 := int64(f3_2) * int64(g7_19) + f3g8_19 := int64(f3) * int64(g8_19) + f3g9_38 := int64(f3_2) * int64(g9_19) + f4g0 := int64(f4) * int64(g0) + f4g1 := int64(f4) * int64(g1) + f4g2 := int64(f4) * int64(g2) + f4g3 := int64(f4) * int64(g3) + f4g4 := int64(f4) * int64(g4) + f4g5 := int64(f4) * int64(g5) + f4g6_19 := int64(f4) * int64(g6_19) + f4g7_19 := int64(f4) * int64(g7_19) + f4g8_19 := int64(f4) * int64(g8_19) + f4g9_19 := int64(f4) * int64(g9_19) + f5g0 := int64(f5) * int64(g0) + f5g1_2 := int64(f5_2) * int64(g1) + f5g2 := int64(f5) * int64(g2) + f5g3_2 := int64(f5_2) * int64(g3) + f5g4 := int64(f5) * int64(g4) + f5g5_38 := int64(f5_2) * int64(g5_19) + f5g6_19 := int64(f5) * int64(g6_19) + f5g7_38 := int64(f5_2) * int64(g7_19) + f5g8_19 := int64(f5) * int64(g8_19) + f5g9_38 := int64(f5_2) * int64(g9_19) + f6g0 := int64(f6) * int64(g0) + f6g1 := int64(f6) * int64(g1) + f6g2 := int64(f6) * int64(g2) + f6g3 := int64(f6) * int64(g3) + f6g4_19 := int64(f6) * int64(g4_19) + f6g5_19 := int64(f6) * int64(g5_19) + f6g6_19 := int64(f6) * int64(g6_19) + f6g7_19 := int64(f6) * int64(g7_19) + f6g8_19 := int64(f6) * int64(g8_19) + f6g9_19 := int64(f6) * int64(g9_19) + f7g0 := int64(f7) * int64(g0) + f7g1_2 := int64(f7_2) * int64(g1) + f7g2 := int64(f7) * int64(g2) + f7g3_38 := int64(f7_2) * int64(g3_19) + f7g4_19 := int64(f7) * int64(g4_19) + f7g5_38 := int64(f7_2) * int64(g5_19) + f7g6_19 := int64(f7) * int64(g6_19) + f7g7_38 := int64(f7_2) * int64(g7_19) + f7g8_19 := int64(f7) * int64(g8_19) + f7g9_38 := int64(f7_2) * int64(g9_19) + f8g0 := int64(f8) * int64(g0) + f8g1 := int64(f8) * int64(g1) + f8g2_19 := int64(f8) * int64(g2_19) + f8g3_19 := int64(f8) * int64(g3_19) + f8g4_19 := int64(f8) * int64(g4_19) + f8g5_19 := int64(f8) * int64(g5_19) + f8g6_19 := int64(f8) * int64(g6_19) + f8g7_19 := int64(f8) * int64(g7_19) + f8g8_19 := int64(f8) * int64(g8_19) + f8g9_19 := int64(f8) * int64(g9_19) + f9g0 := int64(f9) * int64(g0) + f9g1_38 := int64(f9_2) * int64(g1_19) + f9g2_19 := int64(f9) * int64(g2_19) + f9g3_38 := int64(f9_2) * int64(g3_19) + f9g4_19 := int64(f9) * int64(g4_19) + f9g5_38 := int64(f9_2) * int64(g5_19) + f9g6_19 := int64(f9) * int64(g6_19) + f9g7_38 := int64(f9_2) * int64(g7_19) + f9g8_19 := int64(f9) * int64(g8_19) + f9g9_38 := int64(f9_2) * int64(g9_19) + h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38 + h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19 + h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38 + h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19 + h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38 + h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19 + h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38 + h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19 + h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38 + h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 + var carry [10]int64 + + // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) + // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 + // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) + // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + // |h0| <= 2^25 + // |h4| <= 2^25 + // |h1| <= 1.51*2^58 + // |h5| <= 1.51*2^58 + + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + // |h1| <= 2^24; from now on fits into int32 + // |h5| <= 2^24; from now on fits into int32 + // |h2| <= 1.21*2^59 + // |h6| <= 1.21*2^59 + + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + // |h2| <= 2^25; from now on fits into int32 unchanged + // |h6| <= 2^25; from now on fits into int32 unchanged + // |h3| <= 1.51*2^58 + // |h7| <= 1.51*2^58 + + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + // |h3| <= 2^24; from now on fits into int32 unchanged + // |h7| <= 2^24; from now on fits into int32 unchanged + // |h4| <= 1.52*2^33 + // |h8| <= 1.52*2^33 + + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + // |h4| <= 2^25; from now on fits into int32 unchanged + // |h8| <= 2^25; from now on fits into int32 unchanged + // |h5| <= 1.01*2^24 + // |h9| <= 1.51*2^58 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + // |h9| <= 2^24; from now on fits into int32 unchanged + // |h0| <= 1.8*2^37 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + // |h0| <= 2^25; from now on fits into int32 unchanged + // |h1| <= 1.01*2^24 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feSquare calculates h = f*f. Can overlap h with f. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +func feSquare(h, f *fieldElement) { + f0 := f[0] + f1 := f[1] + f2 := f[2] + f3 := f[3] + f4 := f[4] + f5 := f[5] + f6 := f[6] + f7 := f[7] + f8 := f[8] + f9 := f[9] + f0_2 := 2 * f0 + f1_2 := 2 * f1 + f2_2 := 2 * f2 + f3_2 := 2 * f3 + f4_2 := 2 * f4 + f5_2 := 2 * f5 + f6_2 := 2 * f6 + f7_2 := 2 * f7 + f5_38 := 38 * f5 // 1.31*2^30 + f6_19 := 19 * f6 // 1.31*2^30 + f7_38 := 38 * f7 // 1.31*2^30 + f8_19 := 19 * f8 // 1.31*2^30 + f9_38 := 38 * f9 // 1.31*2^30 + f0f0 := int64(f0) * int64(f0) + f0f1_2 := int64(f0_2) * int64(f1) + f0f2_2 := int64(f0_2) * int64(f2) + f0f3_2 := int64(f0_2) * int64(f3) + f0f4_2 := int64(f0_2) * int64(f4) + f0f5_2 := int64(f0_2) * int64(f5) + f0f6_2 := int64(f0_2) * int64(f6) + f0f7_2 := int64(f0_2) * int64(f7) + f0f8_2 := int64(f0_2) * int64(f8) + f0f9_2 := int64(f0_2) * int64(f9) + f1f1_2 := int64(f1_2) * int64(f1) + f1f2_2 := int64(f1_2) * int64(f2) + f1f3_4 := int64(f1_2) * int64(f3_2) + f1f4_2 := int64(f1_2) * int64(f4) + f1f5_4 := int64(f1_2) * int64(f5_2) + f1f6_2 := int64(f1_2) * int64(f6) + f1f7_4 := int64(f1_2) * int64(f7_2) + f1f8_2 := int64(f1_2) * int64(f8) + f1f9_76 := int64(f1_2) * int64(f9_38) + f2f2 := int64(f2) * int64(f2) + f2f3_2 := int64(f2_2) * int64(f3) + f2f4_2 := int64(f2_2) * int64(f4) + f2f5_2 := int64(f2_2) * int64(f5) + f2f6_2 := int64(f2_2) * int64(f6) + f2f7_2 := int64(f2_2) * int64(f7) + f2f8_38 := int64(f2_2) * int64(f8_19) + f2f9_38 := int64(f2) * int64(f9_38) + f3f3_2 := int64(f3_2) * int64(f3) + f3f4_2 := int64(f3_2) * int64(f4) + f3f5_4 := int64(f3_2) * int64(f5_2) + f3f6_2 := int64(f3_2) * int64(f6) + f3f7_76 := int64(f3_2) * int64(f7_38) + f3f8_38 := int64(f3_2) * int64(f8_19) + f3f9_76 := int64(f3_2) * int64(f9_38) + f4f4 := int64(f4) * int64(f4) + f4f5_2 := int64(f4_2) * int64(f5) + f4f6_38 := int64(f4_2) * int64(f6_19) + f4f7_38 := int64(f4) * int64(f7_38) + f4f8_38 := int64(f4_2) * int64(f8_19) + f4f9_38 := int64(f4) * int64(f9_38) + f5f5_38 := int64(f5) * int64(f5_38) + f5f6_38 := int64(f5_2) * int64(f6_19) + f5f7_76 := int64(f5_2) * int64(f7_38) + f5f8_38 := int64(f5_2) * int64(f8_19) + f5f9_76 := int64(f5_2) * int64(f9_38) + f6f6_19 := int64(f6) * int64(f6_19) + f6f7_38 := int64(f6) * int64(f7_38) + f6f8_38 := int64(f6_2) * int64(f8_19) + f6f9_38 := int64(f6) * int64(f9_38) + f7f7_38 := int64(f7) * int64(f7_38) + f7f8_38 := int64(f7_2) * int64(f8_19) + f7f9_76 := int64(f7_2) * int64(f9_38) + f8f8_19 := int64(f8) * int64(f8_19) + f8f9_38 := int64(f8) * int64(f9_38) + f9f9_38 := int64(f9) * int64(f9_38) + h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38 + h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38 + h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19 + h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38 + h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38 + h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38 + h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19 + h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38 + h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38 + h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2 + var carry [10]int64 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feMul121666 calculates h = f * 121666. Can overlap h with f. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +func feMul121666(h, f *fieldElement) { + h0 := int64(f[0]) * 121666 + h1 := int64(f[1]) * 121666 + h2 := int64(f[2]) * 121666 + h3 := int64(f[3]) * 121666 + h4 := int64(f[4]) * 121666 + h5 := int64(f[5]) * 121666 + h6 := int64(f[6]) * 121666 + h7 := int64(f[7]) * 121666 + h8 := int64(f[8]) * 121666 + h9 := int64(f[9]) * 121666 + var carry [10]int64 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feInvert sets out = z^-1. +func feInvert(out, z *fieldElement) { + var t0, t1, t2, t3 fieldElement + var i int + + feSquare(&t0, z) + for i = 1; i < 1; i++ { + feSquare(&t0, &t0) + } + feSquare(&t1, &t0) + for i = 1; i < 2; i++ { + feSquare(&t1, &t1) + } + feMul(&t1, z, &t1) + feMul(&t0, &t0, &t1) + feSquare(&t2, &t0) + for i = 1; i < 1; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t1, &t2) + feSquare(&t2, &t1) + for i = 1; i < 5; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t2, &t1) + for i = 1; i < 10; i++ { + feSquare(&t2, &t2) + } + feMul(&t2, &t2, &t1) + feSquare(&t3, &t2) + for i = 1; i < 20; i++ { + feSquare(&t3, &t3) + } + feMul(&t2, &t3, &t2) + feSquare(&t2, &t2) + for i = 1; i < 10; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t2, &t1) + for i = 1; i < 50; i++ { + feSquare(&t2, &t2) + } + feMul(&t2, &t2, &t1) + feSquare(&t3, &t2) + for i = 1; i < 100; i++ { + feSquare(&t3, &t3) + } + feMul(&t2, &t3, &t2) + feSquare(&t2, &t2) + for i = 1; i < 50; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t1, &t1) + for i = 1; i < 5; i++ { + feSquare(&t1, &t1) + } + feMul(out, &t1, &t0) +} + +func scalarMultGeneric(out, in, base *[32]byte) { + var e [32]byte + + copy(e[:], in[:]) + e[0] &= 248 + e[31] &= 127 + e[31] |= 64 + + var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement + feFromBytes(&x1, base) + feOne(&x2) + feCopy(&x3, &x1) + feOne(&z3) + + swap := int32(0) + for pos := 254; pos >= 0; pos-- { + b := e[pos/8] >> uint(pos&7) + b &= 1 + swap ^= int32(b) + feCSwap(&x2, &x3, swap) + feCSwap(&z2, &z3, swap) + swap = int32(b) + + feSub(&tmp0, &x3, &z3) + feSub(&tmp1, &x2, &z2) + feAdd(&x2, &x2, &z2) + feAdd(&z2, &x3, &z3) + feMul(&z3, &tmp0, &x2) + feMul(&z2, &z2, &tmp1) + feSquare(&tmp0, &tmp1) + feSquare(&tmp1, &x2) + feAdd(&x3, &z3, &z2) + feSub(&z2, &z3, &z2) + feMul(&x2, &tmp1, &tmp0) + feSub(&tmp1, &tmp1, &tmp0) + feSquare(&z2, &z2) + feMul121666(&z3, &tmp1) + feSquare(&x3, &x3) + feAdd(&tmp0, &tmp0, &z3) + feMul(&z3, &x1, &z2) + feMul(&z2, &tmp1, &tmp0) + } + + feCSwap(&x2, &x3, swap) + feCSwap(&z2, &z3, swap) + + feInvert(&z2, &z2) + feMul(&x2, &x2, &z2) + feToBytes(out, &x2) +} diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go b/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go new file mode 100644 index 00000000..047d49af --- /dev/null +++ b/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go @@ -0,0 +1,11 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 gccgo appengine purego + +package curve25519 + +func scalarMult(out, in, base *[32]byte) { + scalarMultGeneric(out, in, base) +} diff --git a/vendor/golang.org/x/crypto/curve25519/doc.go b/vendor/golang.org/x/crypto/curve25519/doc.go deleted file mode 100644 index da9b10d9..00000000 --- a/vendor/golang.org/x/crypto/curve25519/doc.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package curve25519 provides an implementation of scalar multiplication on -// the elliptic curve known as curve25519. See https://cr.yp.to/ecdh.html -package curve25519 // import "golang.org/x/crypto/curve25519" - -// basePoint is the x coordinate of the generator of the curve. -var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} - -// ScalarMult sets dst to the product in*base where dst and base are the x -// coordinates of group points and all values are in little-endian form. -func ScalarMult(dst, in, base *[32]byte) { - scalarMult(dst, in, base) -} - -// ScalarBaseMult sets dst to the product in*base where dst and base are the x -// coordinates of group points, base is the standard generator and all values -// are in little-endian form. -func ScalarBaseMult(dst, in *[32]byte) { - ScalarMult(dst, in, &basePoint) -} diff --git a/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s b/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s deleted file mode 100644 index 39081610..00000000 --- a/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -#include "const_amd64.h" - -// func freeze(inout *[5]uint64) -TEXT ·freeze(SB),7,$0-8 - MOVQ inout+0(FP), DI - - MOVQ 0(DI),SI - MOVQ 8(DI),DX - MOVQ 16(DI),CX - MOVQ 24(DI),R8 - MOVQ 32(DI),R9 - MOVQ $REDMASK51,AX - MOVQ AX,R10 - SUBQ $18,R10 - MOVQ $3,R11 -REDUCELOOP: - MOVQ SI,R12 - SHRQ $51,R12 - ANDQ AX,SI - ADDQ R12,DX - MOVQ DX,R12 - SHRQ $51,R12 - ANDQ AX,DX - ADDQ R12,CX - MOVQ CX,R12 - SHRQ $51,R12 - ANDQ AX,CX - ADDQ R12,R8 - MOVQ R8,R12 - SHRQ $51,R12 - ANDQ AX,R8 - ADDQ R12,R9 - MOVQ R9,R12 - SHRQ $51,R12 - ANDQ AX,R9 - IMUL3Q $19,R12,R12 - ADDQ R12,SI - SUBQ $1,R11 - JA REDUCELOOP - MOVQ $1,R12 - CMPQ R10,SI - CMOVQLT R11,R12 - CMPQ AX,DX - CMOVQNE R11,R12 - CMPQ AX,CX - CMOVQNE R11,R12 - CMPQ AX,R8 - CMOVQNE R11,R12 - CMPQ AX,R9 - CMOVQNE R11,R12 - NEGQ R12 - ANDQ R12,AX - ANDQ R12,R10 - SUBQ R10,SI - SUBQ AX,DX - SUBQ AX,CX - SUBQ AX,R8 - SUBQ AX,R9 - MOVQ SI,0(DI) - MOVQ DX,8(DI) - MOVQ CX,16(DI) - MOVQ R8,24(DI) - MOVQ R9,32(DI) - RET diff --git a/vendor/golang.org/x/crypto/curve25519/ladderstep_amd64.s b/vendor/golang.org/x/crypto/curve25519/ladderstep_amd64.s deleted file mode 100644 index e0ac30c7..00000000 --- a/vendor/golang.org/x/crypto/curve25519/ladderstep_amd64.s +++ /dev/null @@ -1,1377 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -#include "const_amd64.h" - -// func ladderstep(inout *[5][5]uint64) -TEXT ·ladderstep(SB),0,$296-8 - MOVQ inout+0(FP),DI - - MOVQ 40(DI),SI - MOVQ 48(DI),DX - MOVQ 56(DI),CX - MOVQ 64(DI),R8 - MOVQ 72(DI),R9 - MOVQ SI,AX - MOVQ DX,R10 - MOVQ CX,R11 - MOVQ R8,R12 - MOVQ R9,R13 - ADDQ ·_2P0(SB),AX - ADDQ ·_2P1234(SB),R10 - ADDQ ·_2P1234(SB),R11 - ADDQ ·_2P1234(SB),R12 - ADDQ ·_2P1234(SB),R13 - ADDQ 80(DI),SI - ADDQ 88(DI),DX - ADDQ 96(DI),CX - ADDQ 104(DI),R8 - ADDQ 112(DI),R9 - SUBQ 80(DI),AX - SUBQ 88(DI),R10 - SUBQ 96(DI),R11 - SUBQ 104(DI),R12 - SUBQ 112(DI),R13 - MOVQ SI,0(SP) - MOVQ DX,8(SP) - MOVQ CX,16(SP) - MOVQ R8,24(SP) - MOVQ R9,32(SP) - MOVQ AX,40(SP) - MOVQ R10,48(SP) - MOVQ R11,56(SP) - MOVQ R12,64(SP) - MOVQ R13,72(SP) - MOVQ 40(SP),AX - MULQ 40(SP) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 40(SP),AX - SHLQ $1,AX - MULQ 48(SP) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 40(SP),AX - SHLQ $1,AX - MULQ 56(SP) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 40(SP),AX - SHLQ $1,AX - MULQ 64(SP) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 40(SP),AX - SHLQ $1,AX - MULQ 72(SP) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 48(SP),AX - MULQ 48(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 48(SP),AX - SHLQ $1,AX - MULQ 56(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 48(SP),AX - SHLQ $1,AX - MULQ 64(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 48(SP),DX - IMUL3Q $38,DX,AX - MULQ 72(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 56(SP),AX - MULQ 56(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 56(SP),DX - IMUL3Q $38,DX,AX - MULQ 64(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 56(SP),DX - IMUL3Q $38,DX,AX - MULQ 72(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 64(SP),DX - IMUL3Q $19,DX,AX - MULQ 64(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 64(SP),DX - IMUL3Q $38,DX,AX - MULQ 72(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 72(SP),DX - IMUL3Q $19,DX,AX - MULQ 72(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - ANDQ DX,SI - MOVQ CX,R8 - SHRQ $51,CX - ADDQ R10,CX - ANDQ DX,R8 - MOVQ CX,R9 - SHRQ $51,CX - ADDQ R12,CX - ANDQ DX,R9 - MOVQ CX,AX - SHRQ $51,CX - ADDQ R14,CX - ANDQ DX,AX - MOVQ CX,R10 - SHRQ $51,CX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,80(SP) - MOVQ R8,88(SP) - MOVQ R9,96(SP) - MOVQ AX,104(SP) - MOVQ R10,112(SP) - MOVQ 0(SP),AX - MULQ 0(SP) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 0(SP),AX - SHLQ $1,AX - MULQ 8(SP) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 0(SP),AX - SHLQ $1,AX - MULQ 16(SP) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 0(SP),AX - SHLQ $1,AX - MULQ 24(SP) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 0(SP),AX - SHLQ $1,AX - MULQ 32(SP) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 8(SP),AX - MULQ 8(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 8(SP),AX - SHLQ $1,AX - MULQ 16(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 8(SP),AX - SHLQ $1,AX - MULQ 24(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 8(SP),DX - IMUL3Q $38,DX,AX - MULQ 32(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 16(SP),AX - MULQ 16(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 16(SP),DX - IMUL3Q $38,DX,AX - MULQ 24(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 16(SP),DX - IMUL3Q $38,DX,AX - MULQ 32(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 24(SP),DX - IMUL3Q $19,DX,AX - MULQ 24(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 24(SP),DX - IMUL3Q $38,DX,AX - MULQ 32(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 32(SP),DX - IMUL3Q $19,DX,AX - MULQ 32(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - ANDQ DX,SI - MOVQ CX,R8 - SHRQ $51,CX - ADDQ R10,CX - ANDQ DX,R8 - MOVQ CX,R9 - SHRQ $51,CX - ADDQ R12,CX - ANDQ DX,R9 - MOVQ CX,AX - SHRQ $51,CX - ADDQ R14,CX - ANDQ DX,AX - MOVQ CX,R10 - SHRQ $51,CX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,120(SP) - MOVQ R8,128(SP) - MOVQ R9,136(SP) - MOVQ AX,144(SP) - MOVQ R10,152(SP) - MOVQ SI,SI - MOVQ R8,DX - MOVQ R9,CX - MOVQ AX,R8 - MOVQ R10,R9 - ADDQ ·_2P0(SB),SI - ADDQ ·_2P1234(SB),DX - ADDQ ·_2P1234(SB),CX - ADDQ ·_2P1234(SB),R8 - ADDQ ·_2P1234(SB),R9 - SUBQ 80(SP),SI - SUBQ 88(SP),DX - SUBQ 96(SP),CX - SUBQ 104(SP),R8 - SUBQ 112(SP),R9 - MOVQ SI,160(SP) - MOVQ DX,168(SP) - MOVQ CX,176(SP) - MOVQ R8,184(SP) - MOVQ R9,192(SP) - MOVQ 120(DI),SI - MOVQ 128(DI),DX - MOVQ 136(DI),CX - MOVQ 144(DI),R8 - MOVQ 152(DI),R9 - MOVQ SI,AX - MOVQ DX,R10 - MOVQ CX,R11 - MOVQ R8,R12 - MOVQ R9,R13 - ADDQ ·_2P0(SB),AX - ADDQ ·_2P1234(SB),R10 - ADDQ ·_2P1234(SB),R11 - ADDQ ·_2P1234(SB),R12 - ADDQ ·_2P1234(SB),R13 - ADDQ 160(DI),SI - ADDQ 168(DI),DX - ADDQ 176(DI),CX - ADDQ 184(DI),R8 - ADDQ 192(DI),R9 - SUBQ 160(DI),AX - SUBQ 168(DI),R10 - SUBQ 176(DI),R11 - SUBQ 184(DI),R12 - SUBQ 192(DI),R13 - MOVQ SI,200(SP) - MOVQ DX,208(SP) - MOVQ CX,216(SP) - MOVQ R8,224(SP) - MOVQ R9,232(SP) - MOVQ AX,240(SP) - MOVQ R10,248(SP) - MOVQ R11,256(SP) - MOVQ R12,264(SP) - MOVQ R13,272(SP) - MOVQ 224(SP),SI - IMUL3Q $19,SI,AX - MOVQ AX,280(SP) - MULQ 56(SP) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 232(SP),DX - IMUL3Q $19,DX,AX - MOVQ AX,288(SP) - MULQ 48(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 200(SP),AX - MULQ 40(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 200(SP),AX - MULQ 48(SP) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 200(SP),AX - MULQ 56(SP) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 200(SP),AX - MULQ 64(SP) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 200(SP),AX - MULQ 72(SP) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 208(SP),AX - MULQ 40(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 208(SP),AX - MULQ 48(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 208(SP),AX - MULQ 56(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 208(SP),AX - MULQ 64(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 208(SP),DX - IMUL3Q $19,DX,AX - MULQ 72(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 216(SP),AX - MULQ 40(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 216(SP),AX - MULQ 48(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 216(SP),AX - MULQ 56(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 216(SP),DX - IMUL3Q $19,DX,AX - MULQ 64(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 216(SP),DX - IMUL3Q $19,DX,AX - MULQ 72(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 224(SP),AX - MULQ 40(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 224(SP),AX - MULQ 48(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 280(SP),AX - MULQ 64(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 280(SP),AX - MULQ 72(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 232(SP),AX - MULQ 40(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 288(SP),AX - MULQ 56(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 288(SP),AX - MULQ 64(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 288(SP),AX - MULQ 72(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - MOVQ CX,R8 - SHRQ $51,CX - ANDQ DX,SI - ADDQ R10,CX - MOVQ CX,R9 - SHRQ $51,CX - ANDQ DX,R8 - ADDQ R12,CX - MOVQ CX,AX - SHRQ $51,CX - ANDQ DX,R9 - ADDQ R14,CX - MOVQ CX,R10 - SHRQ $51,CX - ANDQ DX,AX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,40(SP) - MOVQ R8,48(SP) - MOVQ R9,56(SP) - MOVQ AX,64(SP) - MOVQ R10,72(SP) - MOVQ 264(SP),SI - IMUL3Q $19,SI,AX - MOVQ AX,200(SP) - MULQ 16(SP) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 272(SP),DX - IMUL3Q $19,DX,AX - MOVQ AX,208(SP) - MULQ 8(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 240(SP),AX - MULQ 0(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 240(SP),AX - MULQ 8(SP) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 240(SP),AX - MULQ 16(SP) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 240(SP),AX - MULQ 24(SP) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 240(SP),AX - MULQ 32(SP) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 248(SP),AX - MULQ 0(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 248(SP),AX - MULQ 8(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 248(SP),AX - MULQ 16(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 248(SP),AX - MULQ 24(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 248(SP),DX - IMUL3Q $19,DX,AX - MULQ 32(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 256(SP),AX - MULQ 0(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 256(SP),AX - MULQ 8(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 256(SP),AX - MULQ 16(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 256(SP),DX - IMUL3Q $19,DX,AX - MULQ 24(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 256(SP),DX - IMUL3Q $19,DX,AX - MULQ 32(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 264(SP),AX - MULQ 0(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 264(SP),AX - MULQ 8(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 200(SP),AX - MULQ 24(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 200(SP),AX - MULQ 32(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 272(SP),AX - MULQ 0(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 208(SP),AX - MULQ 16(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 208(SP),AX - MULQ 24(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 208(SP),AX - MULQ 32(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - MOVQ CX,R8 - SHRQ $51,CX - ANDQ DX,SI - ADDQ R10,CX - MOVQ CX,R9 - SHRQ $51,CX - ANDQ DX,R8 - ADDQ R12,CX - MOVQ CX,AX - SHRQ $51,CX - ANDQ DX,R9 - ADDQ R14,CX - MOVQ CX,R10 - SHRQ $51,CX - ANDQ DX,AX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,DX - MOVQ R8,CX - MOVQ R9,R11 - MOVQ AX,R12 - MOVQ R10,R13 - ADDQ ·_2P0(SB),DX - ADDQ ·_2P1234(SB),CX - ADDQ ·_2P1234(SB),R11 - ADDQ ·_2P1234(SB),R12 - ADDQ ·_2P1234(SB),R13 - ADDQ 40(SP),SI - ADDQ 48(SP),R8 - ADDQ 56(SP),R9 - ADDQ 64(SP),AX - ADDQ 72(SP),R10 - SUBQ 40(SP),DX - SUBQ 48(SP),CX - SUBQ 56(SP),R11 - SUBQ 64(SP),R12 - SUBQ 72(SP),R13 - MOVQ SI,120(DI) - MOVQ R8,128(DI) - MOVQ R9,136(DI) - MOVQ AX,144(DI) - MOVQ R10,152(DI) - MOVQ DX,160(DI) - MOVQ CX,168(DI) - MOVQ R11,176(DI) - MOVQ R12,184(DI) - MOVQ R13,192(DI) - MOVQ 120(DI),AX - MULQ 120(DI) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 120(DI),AX - SHLQ $1,AX - MULQ 128(DI) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 120(DI),AX - SHLQ $1,AX - MULQ 136(DI) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 120(DI),AX - SHLQ $1,AX - MULQ 144(DI) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 120(DI),AX - SHLQ $1,AX - MULQ 152(DI) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 128(DI),AX - MULQ 128(DI) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 128(DI),AX - SHLQ $1,AX - MULQ 136(DI) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 128(DI),AX - SHLQ $1,AX - MULQ 144(DI) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 128(DI),DX - IMUL3Q $38,DX,AX - MULQ 152(DI) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 136(DI),AX - MULQ 136(DI) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 136(DI),DX - IMUL3Q $38,DX,AX - MULQ 144(DI) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 136(DI),DX - IMUL3Q $38,DX,AX - MULQ 152(DI) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 144(DI),DX - IMUL3Q $19,DX,AX - MULQ 144(DI) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 144(DI),DX - IMUL3Q $38,DX,AX - MULQ 152(DI) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 152(DI),DX - IMUL3Q $19,DX,AX - MULQ 152(DI) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - ANDQ DX,SI - MOVQ CX,R8 - SHRQ $51,CX - ADDQ R10,CX - ANDQ DX,R8 - MOVQ CX,R9 - SHRQ $51,CX - ADDQ R12,CX - ANDQ DX,R9 - MOVQ CX,AX - SHRQ $51,CX - ADDQ R14,CX - ANDQ DX,AX - MOVQ CX,R10 - SHRQ $51,CX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,120(DI) - MOVQ R8,128(DI) - MOVQ R9,136(DI) - MOVQ AX,144(DI) - MOVQ R10,152(DI) - MOVQ 160(DI),AX - MULQ 160(DI) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 160(DI),AX - SHLQ $1,AX - MULQ 168(DI) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 160(DI),AX - SHLQ $1,AX - MULQ 176(DI) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 160(DI),AX - SHLQ $1,AX - MULQ 184(DI) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 160(DI),AX - SHLQ $1,AX - MULQ 192(DI) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 168(DI),AX - MULQ 168(DI) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 168(DI),AX - SHLQ $1,AX - MULQ 176(DI) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 168(DI),AX - SHLQ $1,AX - MULQ 184(DI) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 168(DI),DX - IMUL3Q $38,DX,AX - MULQ 192(DI) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 176(DI),AX - MULQ 176(DI) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 176(DI),DX - IMUL3Q $38,DX,AX - MULQ 184(DI) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 176(DI),DX - IMUL3Q $38,DX,AX - MULQ 192(DI) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 184(DI),DX - IMUL3Q $19,DX,AX - MULQ 184(DI) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 184(DI),DX - IMUL3Q $38,DX,AX - MULQ 192(DI) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 192(DI),DX - IMUL3Q $19,DX,AX - MULQ 192(DI) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - ANDQ DX,SI - MOVQ CX,R8 - SHRQ $51,CX - ADDQ R10,CX - ANDQ DX,R8 - MOVQ CX,R9 - SHRQ $51,CX - ADDQ R12,CX - ANDQ DX,R9 - MOVQ CX,AX - SHRQ $51,CX - ADDQ R14,CX - ANDQ DX,AX - MOVQ CX,R10 - SHRQ $51,CX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,160(DI) - MOVQ R8,168(DI) - MOVQ R9,176(DI) - MOVQ AX,184(DI) - MOVQ R10,192(DI) - MOVQ 184(DI),SI - IMUL3Q $19,SI,AX - MOVQ AX,0(SP) - MULQ 16(DI) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 192(DI),DX - IMUL3Q $19,DX,AX - MOVQ AX,8(SP) - MULQ 8(DI) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 160(DI),AX - MULQ 0(DI) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 160(DI),AX - MULQ 8(DI) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 160(DI),AX - MULQ 16(DI) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 160(DI),AX - MULQ 24(DI) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 160(DI),AX - MULQ 32(DI) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 168(DI),AX - MULQ 0(DI) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 168(DI),AX - MULQ 8(DI) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 168(DI),AX - MULQ 16(DI) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 168(DI),AX - MULQ 24(DI) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 168(DI),DX - IMUL3Q $19,DX,AX - MULQ 32(DI) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 176(DI),AX - MULQ 0(DI) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 176(DI),AX - MULQ 8(DI) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 176(DI),AX - MULQ 16(DI) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 176(DI),DX - IMUL3Q $19,DX,AX - MULQ 24(DI) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 176(DI),DX - IMUL3Q $19,DX,AX - MULQ 32(DI) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 184(DI),AX - MULQ 0(DI) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 184(DI),AX - MULQ 8(DI) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 0(SP),AX - MULQ 24(DI) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 0(SP),AX - MULQ 32(DI) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 192(DI),AX - MULQ 0(DI) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 8(SP),AX - MULQ 16(DI) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 8(SP),AX - MULQ 24(DI) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 8(SP),AX - MULQ 32(DI) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - MOVQ CX,R8 - SHRQ $51,CX - ANDQ DX,SI - ADDQ R10,CX - MOVQ CX,R9 - SHRQ $51,CX - ANDQ DX,R8 - ADDQ R12,CX - MOVQ CX,AX - SHRQ $51,CX - ANDQ DX,R9 - ADDQ R14,CX - MOVQ CX,R10 - SHRQ $51,CX - ANDQ DX,AX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,160(DI) - MOVQ R8,168(DI) - MOVQ R9,176(DI) - MOVQ AX,184(DI) - MOVQ R10,192(DI) - MOVQ 144(SP),SI - IMUL3Q $19,SI,AX - MOVQ AX,0(SP) - MULQ 96(SP) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 152(SP),DX - IMUL3Q $19,DX,AX - MOVQ AX,8(SP) - MULQ 88(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 120(SP),AX - MULQ 80(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 120(SP),AX - MULQ 88(SP) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 120(SP),AX - MULQ 96(SP) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 120(SP),AX - MULQ 104(SP) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 120(SP),AX - MULQ 112(SP) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 128(SP),AX - MULQ 80(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 128(SP),AX - MULQ 88(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 128(SP),AX - MULQ 96(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 128(SP),AX - MULQ 104(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 128(SP),DX - IMUL3Q $19,DX,AX - MULQ 112(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 136(SP),AX - MULQ 80(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 136(SP),AX - MULQ 88(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 136(SP),AX - MULQ 96(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 136(SP),DX - IMUL3Q $19,DX,AX - MULQ 104(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 136(SP),DX - IMUL3Q $19,DX,AX - MULQ 112(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 144(SP),AX - MULQ 80(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 144(SP),AX - MULQ 88(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 0(SP),AX - MULQ 104(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 0(SP),AX - MULQ 112(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 152(SP),AX - MULQ 80(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 8(SP),AX - MULQ 96(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 8(SP),AX - MULQ 104(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 8(SP),AX - MULQ 112(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - MOVQ CX,R8 - SHRQ $51,CX - ANDQ DX,SI - ADDQ R10,CX - MOVQ CX,R9 - SHRQ $51,CX - ANDQ DX,R8 - ADDQ R12,CX - MOVQ CX,AX - SHRQ $51,CX - ANDQ DX,R9 - ADDQ R14,CX - MOVQ CX,R10 - SHRQ $51,CX - ANDQ DX,AX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,40(DI) - MOVQ R8,48(DI) - MOVQ R9,56(DI) - MOVQ AX,64(DI) - MOVQ R10,72(DI) - MOVQ 160(SP),AX - MULQ ·_121666_213(SB) - SHRQ $13,AX - MOVQ AX,SI - MOVQ DX,CX - MOVQ 168(SP),AX - MULQ ·_121666_213(SB) - SHRQ $13,AX - ADDQ AX,CX - MOVQ DX,R8 - MOVQ 176(SP),AX - MULQ ·_121666_213(SB) - SHRQ $13,AX - ADDQ AX,R8 - MOVQ DX,R9 - MOVQ 184(SP),AX - MULQ ·_121666_213(SB) - SHRQ $13,AX - ADDQ AX,R9 - MOVQ DX,R10 - MOVQ 192(SP),AX - MULQ ·_121666_213(SB) - SHRQ $13,AX - ADDQ AX,R10 - IMUL3Q $19,DX,DX - ADDQ DX,SI - ADDQ 80(SP),SI - ADDQ 88(SP),CX - ADDQ 96(SP),R8 - ADDQ 104(SP),R9 - ADDQ 112(SP),R10 - MOVQ SI,80(DI) - MOVQ CX,88(DI) - MOVQ R8,96(DI) - MOVQ R9,104(DI) - MOVQ R10,112(DI) - MOVQ 104(DI),SI - IMUL3Q $19,SI,AX - MOVQ AX,0(SP) - MULQ 176(SP) - MOVQ AX,SI - MOVQ DX,CX - MOVQ 112(DI),DX - IMUL3Q $19,DX,AX - MOVQ AX,8(SP) - MULQ 168(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 80(DI),AX - MULQ 160(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 80(DI),AX - MULQ 168(SP) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 80(DI),AX - MULQ 176(SP) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 80(DI),AX - MULQ 184(SP) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 80(DI),AX - MULQ 192(SP) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 88(DI),AX - MULQ 160(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 88(DI),AX - MULQ 168(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 88(DI),AX - MULQ 176(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 88(DI),AX - MULQ 184(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 88(DI),DX - IMUL3Q $19,DX,AX - MULQ 192(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 96(DI),AX - MULQ 160(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 96(DI),AX - MULQ 168(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 96(DI),AX - MULQ 176(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 96(DI),DX - IMUL3Q $19,DX,AX - MULQ 184(SP) - ADDQ AX,SI - ADCQ DX,CX - MOVQ 96(DI),DX - IMUL3Q $19,DX,AX - MULQ 192(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 104(DI),AX - MULQ 160(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 104(DI),AX - MULQ 168(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 0(SP),AX - MULQ 184(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 0(SP),AX - MULQ 192(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 112(DI),AX - MULQ 160(SP) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 8(SP),AX - MULQ 176(SP) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 8(SP),AX - MULQ 184(SP) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 8(SP),AX - MULQ 192(SP) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ $REDMASK51,DX - SHLQ $13,SI,CX - ANDQ DX,SI - SHLQ $13,R8,R9 - ANDQ DX,R8 - ADDQ CX,R8 - SHLQ $13,R10,R11 - ANDQ DX,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ DX,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ DX,R14 - ADDQ R13,R14 - IMUL3Q $19,R15,CX - ADDQ CX,SI - MOVQ SI,CX - SHRQ $51,CX - ADDQ R8,CX - MOVQ CX,R8 - SHRQ $51,CX - ANDQ DX,SI - ADDQ R10,CX - MOVQ CX,R9 - SHRQ $51,CX - ANDQ DX,R8 - ADDQ R12,CX - MOVQ CX,AX - SHRQ $51,CX - ANDQ DX,R9 - ADDQ R14,CX - MOVQ CX,R10 - SHRQ $51,CX - ANDQ DX,AX - IMUL3Q $19,CX,CX - ADDQ CX,SI - ANDQ DX,R10 - MOVQ SI,80(DI) - MOVQ R8,88(DI) - MOVQ R9,96(DI) - MOVQ AX,104(DI) - MOVQ R10,112(DI) - RET diff --git a/vendor/golang.org/x/crypto/curve25519/mont25519_amd64.go b/vendor/golang.org/x/crypto/curve25519/mont25519_amd64.go deleted file mode 100644 index 5822bd53..00000000 --- a/vendor/golang.org/x/crypto/curve25519/mont25519_amd64.go +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build amd64,!gccgo,!appengine - -package curve25519 - -// These functions are implemented in the .s files. The names of the functions -// in the rest of the file are also taken from the SUPERCOP sources to help -// people following along. - -//go:noescape - -func cswap(inout *[5]uint64, v uint64) - -//go:noescape - -func ladderstep(inout *[5][5]uint64) - -//go:noescape - -func freeze(inout *[5]uint64) - -//go:noescape - -func mul(dest, a, b *[5]uint64) - -//go:noescape - -func square(out, in *[5]uint64) - -// mladder uses a Montgomery ladder to calculate (xr/zr) *= s. -func mladder(xr, zr *[5]uint64, s *[32]byte) { - var work [5][5]uint64 - - work[0] = *xr - setint(&work[1], 1) - setint(&work[2], 0) - work[3] = *xr - setint(&work[4], 1) - - j := uint(6) - var prevbit byte - - for i := 31; i >= 0; i-- { - for j < 8 { - bit := ((*s)[i] >> j) & 1 - swap := bit ^ prevbit - prevbit = bit - cswap(&work[1], uint64(swap)) - ladderstep(&work) - j-- - } - j = 7 - } - - *xr = work[1] - *zr = work[2] -} - -func scalarMult(out, in, base *[32]byte) { - var e [32]byte - copy(e[:], (*in)[:]) - e[0] &= 248 - e[31] &= 127 - e[31] |= 64 - - var t, z [5]uint64 - unpack(&t, base) - mladder(&t, &z, &e) - invert(&z, &z) - mul(&t, &t, &z) - pack(out, &t) -} - -func setint(r *[5]uint64, v uint64) { - r[0] = v - r[1] = 0 - r[2] = 0 - r[3] = 0 - r[4] = 0 -} - -// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian -// order. -func unpack(r *[5]uint64, x *[32]byte) { - r[0] = uint64(x[0]) | - uint64(x[1])<<8 | - uint64(x[2])<<16 | - uint64(x[3])<<24 | - uint64(x[4])<<32 | - uint64(x[5])<<40 | - uint64(x[6]&7)<<48 - - r[1] = uint64(x[6])>>3 | - uint64(x[7])<<5 | - uint64(x[8])<<13 | - uint64(x[9])<<21 | - uint64(x[10])<<29 | - uint64(x[11])<<37 | - uint64(x[12]&63)<<45 - - r[2] = uint64(x[12])>>6 | - uint64(x[13])<<2 | - uint64(x[14])<<10 | - uint64(x[15])<<18 | - uint64(x[16])<<26 | - uint64(x[17])<<34 | - uint64(x[18])<<42 | - uint64(x[19]&1)<<50 - - r[3] = uint64(x[19])>>1 | - uint64(x[20])<<7 | - uint64(x[21])<<15 | - uint64(x[22])<<23 | - uint64(x[23])<<31 | - uint64(x[24])<<39 | - uint64(x[25]&15)<<47 - - r[4] = uint64(x[25])>>4 | - uint64(x[26])<<4 | - uint64(x[27])<<12 | - uint64(x[28])<<20 | - uint64(x[29])<<28 | - uint64(x[30])<<36 | - uint64(x[31]&127)<<44 -} - -// pack sets out = x where out is the usual, little-endian form of the 5, -// 51-bit limbs in x. -func pack(out *[32]byte, x *[5]uint64) { - t := *x - freeze(&t) - - out[0] = byte(t[0]) - out[1] = byte(t[0] >> 8) - out[2] = byte(t[0] >> 16) - out[3] = byte(t[0] >> 24) - out[4] = byte(t[0] >> 32) - out[5] = byte(t[0] >> 40) - out[6] = byte(t[0] >> 48) - - out[6] ^= byte(t[1]<<3) & 0xf8 - out[7] = byte(t[1] >> 5) - out[8] = byte(t[1] >> 13) - out[9] = byte(t[1] >> 21) - out[10] = byte(t[1] >> 29) - out[11] = byte(t[1] >> 37) - out[12] = byte(t[1] >> 45) - - out[12] ^= byte(t[2]<<6) & 0xc0 - out[13] = byte(t[2] >> 2) - out[14] = byte(t[2] >> 10) - out[15] = byte(t[2] >> 18) - out[16] = byte(t[2] >> 26) - out[17] = byte(t[2] >> 34) - out[18] = byte(t[2] >> 42) - out[19] = byte(t[2] >> 50) - - out[19] ^= byte(t[3]<<1) & 0xfe - out[20] = byte(t[3] >> 7) - out[21] = byte(t[3] >> 15) - out[22] = byte(t[3] >> 23) - out[23] = byte(t[3] >> 31) - out[24] = byte(t[3] >> 39) - out[25] = byte(t[3] >> 47) - - out[25] ^= byte(t[4]<<4) & 0xf0 - out[26] = byte(t[4] >> 4) - out[27] = byte(t[4] >> 12) - out[28] = byte(t[4] >> 20) - out[29] = byte(t[4] >> 28) - out[30] = byte(t[4] >> 36) - out[31] = byte(t[4] >> 44) -} - -// invert calculates r = x^-1 mod p using Fermat's little theorem. -func invert(r *[5]uint64, x *[5]uint64) { - var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64 - - square(&z2, x) /* 2 */ - square(&t, &z2) /* 4 */ - square(&t, &t) /* 8 */ - mul(&z9, &t, x) /* 9 */ - mul(&z11, &z9, &z2) /* 11 */ - square(&t, &z11) /* 22 */ - mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */ - - square(&t, &z2_5_0) /* 2^6 - 2^1 */ - for i := 1; i < 5; i++ { /* 2^20 - 2^10 */ - square(&t, &t) - } - mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */ - - square(&t, &z2_10_0) /* 2^11 - 2^1 */ - for i := 1; i < 10; i++ { /* 2^20 - 2^10 */ - square(&t, &t) - } - mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */ - - square(&t, &z2_20_0) /* 2^21 - 2^1 */ - for i := 1; i < 20; i++ { /* 2^40 - 2^20 */ - square(&t, &t) - } - mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */ - - square(&t, &t) /* 2^41 - 2^1 */ - for i := 1; i < 10; i++ { /* 2^50 - 2^10 */ - square(&t, &t) - } - mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */ - - square(&t, &z2_50_0) /* 2^51 - 2^1 */ - for i := 1; i < 50; i++ { /* 2^100 - 2^50 */ - square(&t, &t) - } - mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */ - - square(&t, &z2_100_0) /* 2^101 - 2^1 */ - for i := 1; i < 100; i++ { /* 2^200 - 2^100 */ - square(&t, &t) - } - mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */ - - square(&t, &t) /* 2^201 - 2^1 */ - for i := 1; i < 50; i++ { /* 2^250 - 2^50 */ - square(&t, &t) - } - mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */ - - square(&t, &t) /* 2^251 - 2^1 */ - square(&t, &t) /* 2^252 - 2^2 */ - square(&t, &t) /* 2^253 - 2^3 */ - - square(&t, &t) /* 2^254 - 2^4 */ - - square(&t, &t) /* 2^255 - 2^5 */ - mul(r, &t, &z11) /* 2^255 - 21 */ -} diff --git a/vendor/golang.org/x/crypto/curve25519/mul_amd64.s b/vendor/golang.org/x/crypto/curve25519/mul_amd64.s deleted file mode 100644 index 1f76d1a3..00000000 --- a/vendor/golang.org/x/crypto/curve25519/mul_amd64.s +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -#include "const_amd64.h" - -// func mul(dest, a, b *[5]uint64) -TEXT ·mul(SB),0,$16-24 - MOVQ dest+0(FP), DI - MOVQ a+8(FP), SI - MOVQ b+16(FP), DX - - MOVQ DX,CX - MOVQ 24(SI),DX - IMUL3Q $19,DX,AX - MOVQ AX,0(SP) - MULQ 16(CX) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 32(SI),DX - IMUL3Q $19,DX,AX - MOVQ AX,8(SP) - MULQ 8(CX) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 0(SI),AX - MULQ 0(CX) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 0(SI),AX - MULQ 8(CX) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 0(SI),AX - MULQ 16(CX) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 0(SI),AX - MULQ 24(CX) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 0(SI),AX - MULQ 32(CX) - MOVQ AX,BX - MOVQ DX,BP - MOVQ 8(SI),AX - MULQ 0(CX) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 8(SI),AX - MULQ 8(CX) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 8(SI),AX - MULQ 16(CX) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 8(SI),AX - MULQ 24(CX) - ADDQ AX,BX - ADCQ DX,BP - MOVQ 8(SI),DX - IMUL3Q $19,DX,AX - MULQ 32(CX) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 16(SI),AX - MULQ 0(CX) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 16(SI),AX - MULQ 8(CX) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 16(SI),AX - MULQ 16(CX) - ADDQ AX,BX - ADCQ DX,BP - MOVQ 16(SI),DX - IMUL3Q $19,DX,AX - MULQ 24(CX) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 16(SI),DX - IMUL3Q $19,DX,AX - MULQ 32(CX) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 24(SI),AX - MULQ 0(CX) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 24(SI),AX - MULQ 8(CX) - ADDQ AX,BX - ADCQ DX,BP - MOVQ 0(SP),AX - MULQ 24(CX) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 0(SP),AX - MULQ 32(CX) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 32(SI),AX - MULQ 0(CX) - ADDQ AX,BX - ADCQ DX,BP - MOVQ 8(SP),AX - MULQ 16(CX) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 8(SP),AX - MULQ 24(CX) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 8(SP),AX - MULQ 32(CX) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ $REDMASK51,SI - SHLQ $13,R8,R9 - ANDQ SI,R8 - SHLQ $13,R10,R11 - ANDQ SI,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ SI,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ SI,R14 - ADDQ R13,R14 - SHLQ $13,BX,BP - ANDQ SI,BX - ADDQ R15,BX - IMUL3Q $19,BP,DX - ADDQ DX,R8 - MOVQ R8,DX - SHRQ $51,DX - ADDQ R10,DX - MOVQ DX,CX - SHRQ $51,DX - ANDQ SI,R8 - ADDQ R12,DX - MOVQ DX,R9 - SHRQ $51,DX - ANDQ SI,CX - ADDQ R14,DX - MOVQ DX,AX - SHRQ $51,DX - ANDQ SI,R9 - ADDQ BX,DX - MOVQ DX,R10 - SHRQ $51,DX - ANDQ SI,AX - IMUL3Q $19,DX,DX - ADDQ DX,R8 - ANDQ SI,R10 - MOVQ R8,0(DI) - MOVQ CX,8(DI) - MOVQ R9,16(DI) - MOVQ AX,24(DI) - MOVQ R10,32(DI) - RET diff --git a/vendor/golang.org/x/crypto/curve25519/square_amd64.s b/vendor/golang.org/x/crypto/curve25519/square_amd64.s deleted file mode 100644 index 07511a45..00000000 --- a/vendor/golang.org/x/crypto/curve25519/square_amd64.s +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -#include "const_amd64.h" - -// func square(out, in *[5]uint64) -TEXT ·square(SB),7,$0-16 - MOVQ out+0(FP), DI - MOVQ in+8(FP), SI - - MOVQ 0(SI),AX - MULQ 0(SI) - MOVQ AX,CX - MOVQ DX,R8 - MOVQ 0(SI),AX - SHLQ $1,AX - MULQ 8(SI) - MOVQ AX,R9 - MOVQ DX,R10 - MOVQ 0(SI),AX - SHLQ $1,AX - MULQ 16(SI) - MOVQ AX,R11 - MOVQ DX,R12 - MOVQ 0(SI),AX - SHLQ $1,AX - MULQ 24(SI) - MOVQ AX,R13 - MOVQ DX,R14 - MOVQ 0(SI),AX - SHLQ $1,AX - MULQ 32(SI) - MOVQ AX,R15 - MOVQ DX,BX - MOVQ 8(SI),AX - MULQ 8(SI) - ADDQ AX,R11 - ADCQ DX,R12 - MOVQ 8(SI),AX - SHLQ $1,AX - MULQ 16(SI) - ADDQ AX,R13 - ADCQ DX,R14 - MOVQ 8(SI),AX - SHLQ $1,AX - MULQ 24(SI) - ADDQ AX,R15 - ADCQ DX,BX - MOVQ 8(SI),DX - IMUL3Q $38,DX,AX - MULQ 32(SI) - ADDQ AX,CX - ADCQ DX,R8 - MOVQ 16(SI),AX - MULQ 16(SI) - ADDQ AX,R15 - ADCQ DX,BX - MOVQ 16(SI),DX - IMUL3Q $38,DX,AX - MULQ 24(SI) - ADDQ AX,CX - ADCQ DX,R8 - MOVQ 16(SI),DX - IMUL3Q $38,DX,AX - MULQ 32(SI) - ADDQ AX,R9 - ADCQ DX,R10 - MOVQ 24(SI),DX - IMUL3Q $19,DX,AX - MULQ 24(SI) - ADDQ AX,R9 - ADCQ DX,R10 - MOVQ 24(SI),DX - IMUL3Q $38,DX,AX - MULQ 32(SI) - ADDQ AX,R11 - ADCQ DX,R12 - MOVQ 32(SI),DX - IMUL3Q $19,DX,AX - MULQ 32(SI) - ADDQ AX,R13 - ADCQ DX,R14 - MOVQ $REDMASK51,SI - SHLQ $13,CX,R8 - ANDQ SI,CX - SHLQ $13,R9,R10 - ANDQ SI,R9 - ADDQ R8,R9 - SHLQ $13,R11,R12 - ANDQ SI,R11 - ADDQ R10,R11 - SHLQ $13,R13,R14 - ANDQ SI,R13 - ADDQ R12,R13 - SHLQ $13,R15,BX - ANDQ SI,R15 - ADDQ R14,R15 - IMUL3Q $19,BX,DX - ADDQ DX,CX - MOVQ CX,DX - SHRQ $51,DX - ADDQ R9,DX - ANDQ SI,CX - MOVQ DX,R8 - SHRQ $51,DX - ADDQ R11,DX - ANDQ SI,R8 - MOVQ DX,R9 - SHRQ $51,DX - ADDQ R13,DX - ANDQ SI,R9 - MOVQ DX,AX - SHRQ $51,DX - ADDQ R15,DX - ANDQ SI,AX - MOVQ DX,R10 - SHRQ $51,DX - IMUL3Q $19,DX,DX - ADDQ DX,CX - ANDQ SI,R10 - MOVQ CX,0(DI) - MOVQ R8,8(DI) - MOVQ R9,16(DI) - MOVQ AX,24(DI) - MOVQ R10,32(DI) - RET -- cgit v1.2.3