diff options
Diffstat (limited to 'vendor/golang.org/x/crypto/curve25519')
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/const_amd64.h | 8 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/const_amd64.s | 20 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/cswap_amd64.s | 65 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/curve25519.go | 881 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go (renamed from vendor/golang.org/x/crypto/curve25519/mont25519_amd64.go) | 2 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s (renamed from vendor/golang.org/x/crypto/curve25519/ladderstep_amd64.s) | 420 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/curve25519_generic.go | 828 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go | 11 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/doc.go | 23 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/freeze_amd64.s | 73 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/mul_amd64.s | 169 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/curve25519/square_amd64.s | 132 |
12 files changed, 1329 insertions, 1303 deletions
diff --git a/vendor/golang.org/x/crypto/curve25519/const_amd64.h b/vendor/golang.org/x/crypto/curve25519/const_amd64.h deleted file mode 100644 index b3f74162..00000000 --- a/vendor/golang.org/x/crypto/curve25519/const_amd64.h +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -#define REDMASK51 0x0007FFFFFFFFFFFF diff --git a/vendor/golang.org/x/crypto/curve25519/const_amd64.s b/vendor/golang.org/x/crypto/curve25519/const_amd64.s deleted file mode 100644 index ee7b4bd5..00000000 --- a/vendor/golang.org/x/crypto/curve25519/const_amd64.s +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -// These constants cannot be encoded in non-MOVQ immediates. -// We access them directly from memory instead. - -DATA ·_121666_213(SB)/8, $996687872 -GLOBL ·_121666_213(SB), 8, $8 - -DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA -GLOBL ·_2P0(SB), 8, $8 - -DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE -GLOBL ·_2P1234(SB), 8, $8 diff --git a/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s b/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s deleted file mode 100644 index cd793a5b..00000000 --- a/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build amd64,!gccgo,!appengine - -// func cswap(inout *[4][5]uint64, v uint64) -TEXT ·cswap(SB),7,$0 - MOVQ inout+0(FP),DI - MOVQ v+8(FP),SI - - SUBQ $1, SI - NOTQ SI - MOVQ SI, X15 - PSHUFD $0x44, X15, X15 - - MOVOU 0(DI), X0 - MOVOU 16(DI), X2 - MOVOU 32(DI), X4 - MOVOU 48(DI), X6 - MOVOU 64(DI), X8 - MOVOU 80(DI), X1 - MOVOU 96(DI), X3 - MOVOU 112(DI), X5 - MOVOU 128(DI), X7 - MOVOU 144(DI), X9 - - MOVO X1, X10 - MOVO X3, X11 - MOVO X5, X12 - MOVO X7, X13 - MOVO X9, X14 - - PXOR X0, X10 - PXOR X2, X11 - PXOR X4, X12 - PXOR X6, X13 - PXOR X8, X14 - PAND X15, X10 - PAND X15, X11 - PAND X15, X12 - PAND X15, X13 - PAND X15, X14 - PXOR X10, X0 - PXOR X10, X1 - PXOR X11, X2 - PXOR X11, X3 - PXOR X12, X4 - PXOR X12, X5 - PXOR X13, X6 - PXOR X13, X7 - PXOR X14, X8 - PXOR X14, X9 - - MOVOU X0, 0(DI) - MOVOU X2, 16(DI) - MOVOU X4, 32(DI) - MOVOU X6, 48(DI) - MOVOU X8, 64(DI) - MOVOU X1, 80(DI) - MOVOU X3, 96(DI) - MOVOU X5, 112(DI) - MOVOU X7, 128(DI) - MOVOU X9, 144(DI) - RET diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519.go b/vendor/golang.org/x/crypto/curve25519/curve25519.go index 75f24bab..4b9a655d 100644 --- a/vendor/golang.org/x/crypto/curve25519/curve25519.go +++ b/vendor/golang.org/x/crypto/curve25519/curve25519.go @@ -1,834 +1,95 @@ -// Copyright 2013 The Go Authors. All rights reserved. +// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// We have an implementation in amd64 assembly so this code is only run on -// non-amd64 platforms. The amd64 assembly does not support gccgo. -// +build !amd64 gccgo appengine - -package curve25519 +// Package curve25519 provides an implementation of the X25519 function, which +// performs scalar multiplication on the elliptic curve known as Curve25519. +// See RFC 7748. +package curve25519 // import "golang.org/x/crypto/curve25519" import ( - "encoding/binary" + "crypto/subtle" + "fmt" ) -// This code is a port of the public domain, "ref10" implementation of -// curve25519 from SUPERCOP 20130419 by D. J. Bernstein. - -// fieldElement represents an element of the field GF(2^255 - 19). An element -// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 -// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on -// context. -type fieldElement [10]int32 - -func feZero(fe *fieldElement) { - for i := range fe { - fe[i] = 0 - } -} - -func feOne(fe *fieldElement) { - feZero(fe) - fe[0] = 1 -} - -func feAdd(dst, a, b *fieldElement) { - for i := range dst { - dst[i] = a[i] + b[i] - } -} - -func feSub(dst, a, b *fieldElement) { - for i := range dst { - dst[i] = a[i] - b[i] - } -} - -func feCopy(dst, src *fieldElement) { - for i := range dst { - dst[i] = src[i] - } -} - -// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0. -// -// Preconditions: b in {0,1}. -func feCSwap(f, g *fieldElement, b int32) { - b = -b - for i := range f { - t := b & (f[i] ^ g[i]) - f[i] ^= t - g[i] ^= t - } -} - -// load3 reads a 24-bit, little-endian value from in. -func load3(in []byte) int64 { - var r int64 - r = int64(in[0]) - r |= int64(in[1]) << 8 - r |= int64(in[2]) << 16 - return r -} - -// load4 reads a 32-bit, little-endian value from in. -func load4(in []byte) int64 { - return int64(binary.LittleEndian.Uint32(in)) -} - -func feFromBytes(dst *fieldElement, src *[32]byte) { - h0 := load4(src[:]) - h1 := load3(src[4:]) << 6 - h2 := load3(src[7:]) << 5 - h3 := load3(src[10:]) << 3 - h4 := load3(src[13:]) << 2 - h5 := load4(src[16:]) - h6 := load3(src[20:]) << 7 - h7 := load3(src[23:]) << 5 - h8 := load3(src[26:]) << 4 - h9 := (load3(src[29:]) & 0x7fffff) << 2 - - var carry [10]int64 - carry[9] = (h9 + 1<<24) >> 25 - h0 += carry[9] * 19 - h9 -= carry[9] << 25 - carry[1] = (h1 + 1<<24) >> 25 - h2 += carry[1] - h1 -= carry[1] << 25 - carry[3] = (h3 + 1<<24) >> 25 - h4 += carry[3] - h3 -= carry[3] << 25 - carry[5] = (h5 + 1<<24) >> 25 - h6 += carry[5] - h5 -= carry[5] << 25 - carry[7] = (h7 + 1<<24) >> 25 - h8 += carry[7] - h7 -= carry[7] << 25 - - carry[0] = (h0 + 1<<25) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - carry[2] = (h2 + 1<<25) >> 26 - h3 += carry[2] - h2 -= carry[2] << 26 - carry[4] = (h4 + 1<<25) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - carry[6] = (h6 + 1<<25) >> 26 - h7 += carry[6] - h6 -= carry[6] << 26 - carry[8] = (h8 + 1<<25) >> 26 - h9 += carry[8] - h8 -= carry[8] << 26 - - dst[0] = int32(h0) - dst[1] = int32(h1) - dst[2] = int32(h2) - dst[3] = int32(h3) - dst[4] = int32(h4) - dst[5] = int32(h5) - dst[6] = int32(h6) - dst[7] = int32(h7) - dst[8] = int32(h8) - dst[9] = int32(h9) -} - -// feToBytes marshals h to s. -// Preconditions: -// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. -// -// Write p=2^255-19; q=floor(h/p). -// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). -// -// Proof: -// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. -// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. -// -// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). -// Then 0<y<1. +// ScalarMult sets dst to the product scalar * point. // -// Write r=h-pq. -// Have 0<=r<=p-1=2^255-20. -// Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. -// -// Write x=r+19(2^-255)r+y. -// Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. -// -// Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) -// so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. -func feToBytes(s *[32]byte, h *fieldElement) { - var carry [10]int32 - - q := (19*h[9] + (1 << 24)) >> 25 - q = (h[0] + q) >> 26 - q = (h[1] + q) >> 25 - q = (h[2] + q) >> 26 - q = (h[3] + q) >> 25 - q = (h[4] + q) >> 26 - q = (h[5] + q) >> 25 - q = (h[6] + q) >> 26 - q = (h[7] + q) >> 25 - q = (h[8] + q) >> 26 - q = (h[9] + q) >> 25 - - // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. - h[0] += 19 * q - // Goal: Output h-2^255 q, which is between 0 and 2^255-20. - - carry[0] = h[0] >> 26 - h[1] += carry[0] - h[0] -= carry[0] << 26 - carry[1] = h[1] >> 25 - h[2] += carry[1] - h[1] -= carry[1] << 25 - carry[2] = h[2] >> 26 - h[3] += carry[2] - h[2] -= carry[2] << 26 - carry[3] = h[3] >> 25 - h[4] += carry[3] - h[3] -= carry[3] << 25 - carry[4] = h[4] >> 26 - h[5] += carry[4] - h[4] -= carry[4] << 26 - carry[5] = h[5] >> 25 - h[6] += carry[5] - h[5] -= carry[5] << 25 - carry[6] = h[6] >> 26 - h[7] += carry[6] - h[6] -= carry[6] << 26 - carry[7] = h[7] >> 25 - h[8] += carry[7] - h[7] -= carry[7] << 25 - carry[8] = h[8] >> 26 - h[9] += carry[8] - h[8] -= carry[8] << 26 - carry[9] = h[9] >> 25 - h[9] -= carry[9] << 25 - // h10 = carry9 - - // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. - // Have h[0]+...+2^230 h[9] between 0 and 2^255-1; - // evidently 2^255 h10-2^255 q = 0. - // Goal: Output h[0]+...+2^230 h[9]. - - s[0] = byte(h[0] >> 0) - s[1] = byte(h[0] >> 8) - s[2] = byte(h[0] >> 16) - s[3] = byte((h[0] >> 24) | (h[1] << 2)) - s[4] = byte(h[1] >> 6) - s[5] = byte(h[1] >> 14) - s[6] = byte((h[1] >> 22) | (h[2] << 3)) - s[7] = byte(h[2] >> 5) - s[8] = byte(h[2] >> 13) - s[9] = byte((h[2] >> 21) | (h[3] << 5)) - s[10] = byte(h[3] >> 3) - s[11] = byte(h[3] >> 11) - s[12] = byte((h[3] >> 19) | (h[4] << 6)) - s[13] = byte(h[4] >> 2) - s[14] = byte(h[4] >> 10) - s[15] = byte(h[4] >> 18) - s[16] = byte(h[5] >> 0) - s[17] = byte(h[5] >> 8) - s[18] = byte(h[5] >> 16) - s[19] = byte((h[5] >> 24) | (h[6] << 1)) - s[20] = byte(h[6] >> 7) - s[21] = byte(h[6] >> 15) - s[22] = byte((h[6] >> 23) | (h[7] << 3)) - s[23] = byte(h[7] >> 5) - s[24] = byte(h[7] >> 13) - s[25] = byte((h[7] >> 21) | (h[8] << 4)) - s[26] = byte(h[8] >> 4) - s[27] = byte(h[8] >> 12) - s[28] = byte((h[8] >> 20) | (h[9] << 6)) - s[29] = byte(h[9] >> 2) - s[30] = byte(h[9] >> 10) - s[31] = byte(h[9] >> 18) +// Deprecated: when provided a low-order point, ScalarMult will set dst to all +// zeroes, irrespective of the scalar. Instead, use the X25519 function, which +// will return an error. +func ScalarMult(dst, scalar, point *[32]byte) { + scalarMult(dst, scalar, point) } -// feMul calculates h = f * g -// Can overlap h with f or g. -// -// Preconditions: -// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. -// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. -// -// Postconditions: -// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. -// -// Notes on implementation strategy: -// -// Using schoolbook multiplication. -// Karatsuba would save a little in some cost models. +// ScalarBaseMult sets dst to the product scalar * base where base is the +// standard generator. // -// Most multiplications by 2 and 19 are 32-bit precomputations; -// cheaper than 64-bit postcomputations. -// -// There is one remaining multiplication by 19 in the carry chain; -// one *19 precomputation can be merged into this, -// but the resulting data flow is considerably less clean. -// -// There are 12 carries below. -// 10 of them are 2-way parallelizable and vectorizable. -// Can get away with 11 carries, but then data flow is much deeper. -// -// With tighter constraints on inputs can squeeze carries into int32. -func feMul(h, f, g *fieldElement) { - f0 := f[0] - f1 := f[1] - f2 := f[2] - f3 := f[3] - f4 := f[4] - f5 := f[5] - f6 := f[6] - f7 := f[7] - f8 := f[8] - f9 := f[9] - g0 := g[0] - g1 := g[1] - g2 := g[2] - g3 := g[3] - g4 := g[4] - g5 := g[5] - g6 := g[6] - g7 := g[7] - g8 := g[8] - g9 := g[9] - g1_19 := 19 * g1 // 1.4*2^29 - g2_19 := 19 * g2 // 1.4*2^30; still ok - g3_19 := 19 * g3 - g4_19 := 19 * g4 - g5_19 := 19 * g5 - g6_19 := 19 * g6 - g7_19 := 19 * g7 - g8_19 := 19 * g8 - g9_19 := 19 * g9 - f1_2 := 2 * f1 - f3_2 := 2 * f3 - f5_2 := 2 * f5 - f7_2 := 2 * f7 - f9_2 := 2 * f9 - f0g0 := int64(f0) * int64(g0) - f0g1 := int64(f0) * int64(g1) - f0g2 := int64(f0) * int64(g2) - f0g3 := int64(f0) * int64(g3) - f0g4 := int64(f0) * int64(g4) - f0g5 := int64(f0) * int64(g5) - f0g6 := int64(f0) * int64(g6) - f0g7 := int64(f0) * int64(g7) - f0g8 := int64(f0) * int64(g8) - f0g9 := int64(f0) * int64(g9) - f1g0 := int64(f1) * int64(g0) - f1g1_2 := int64(f1_2) * int64(g1) - f1g2 := int64(f1) * int64(g2) - f1g3_2 := int64(f1_2) * int64(g3) - f1g4 := int64(f1) * int64(g4) - f1g5_2 := int64(f1_2) * int64(g5) - f1g6 := int64(f1) * int64(g6) - f1g7_2 := int64(f1_2) * int64(g7) - f1g8 := int64(f1) * int64(g8) - f1g9_38 := int64(f1_2) * int64(g9_19) - f2g0 := int64(f2) * int64(g0) - f2g1 := int64(f2) * int64(g1) - f2g2 := int64(f2) * int64(g2) - f2g3 := int64(f2) * int64(g3) - f2g4 := int64(f2) * int64(g4) - f2g5 := int64(f2) * int64(g5) - f2g6 := int64(f2) * int64(g6) - f2g7 := int64(f2) * int64(g7) - f2g8_19 := int64(f2) * int64(g8_19) - f2g9_19 := int64(f2) * int64(g9_19) - f3g0 := int64(f3) * int64(g0) - f3g1_2 := int64(f3_2) * int64(g1) - f3g2 := int64(f3) * int64(g2) - f3g3_2 := int64(f3_2) * int64(g3) - f3g4 := int64(f3) * int64(g4) - f3g5_2 := int64(f3_2) * int64(g5) - f3g6 := int64(f3) * int64(g6) - f3g7_38 := int64(f3_2) * int64(g7_19) - f3g8_19 := int64(f3) * int64(g8_19) - f3g9_38 := int64(f3_2) * int64(g9_19) - f4g0 := int64(f4) * int64(g0) - f4g1 := int64(f4) * int64(g1) - f4g2 := int64(f4) * int64(g2) - f4g3 := int64(f4) * int64(g3) - f4g4 := int64(f4) * int64(g4) - f4g5 := int64(f4) * int64(g5) - f4g6_19 := int64(f4) * int64(g6_19) - f4g7_19 := int64(f4) * int64(g7_19) - f4g8_19 := int64(f4) * int64(g8_19) - f4g9_19 := int64(f4) * int64(g9_19) - f5g0 := int64(f5) * int64(g0) - f5g1_2 := int64(f5_2) * int64(g1) - f5g2 := int64(f5) * int64(g2) - f5g3_2 := int64(f5_2) * int64(g3) - f5g4 := int64(f5) * int64(g4) - f5g5_38 := int64(f5_2) * int64(g5_19) - f5g6_19 := int64(f5) * int64(g6_19) - f5g7_38 := int64(f5_2) * int64(g7_19) - f5g8_19 := int64(f5) * int64(g8_19) - f5g9_38 := int64(f5_2) * int64(g9_19) - f6g0 := int64(f6) * int64(g0) - f6g1 := int64(f6) * int64(g1) - f6g2 := int64(f6) * int64(g2) - f6g3 := int64(f6) * int64(g3) - f6g4_19 := int64(f6) * int64(g4_19) - f6g5_19 := int64(f6) * int64(g5_19) - f6g6_19 := int64(f6) * int64(g6_19) - f6g7_19 := int64(f6) * int64(g7_19) - f6g8_19 := int64(f6) * int64(g8_19) - f6g9_19 := int64(f6) * int64(g9_19) - f7g0 := int64(f7) * int64(g0) - f7g1_2 := int64(f7_2) * int64(g1) - f7g2 := int64(f7) * int64(g2) - f7g3_38 := int64(f7_2) * int64(g3_19) - f7g4_19 := int64(f7) * int64(g4_19) - f7g5_38 := int64(f7_2) * int64(g5_19) - f7g6_19 := int64(f7) * int64(g6_19) - f7g7_38 := int64(f7_2) * int64(g7_19) - f7g8_19 := int64(f7) * int64(g8_19) - f7g9_38 := int64(f7_2) * int64(g9_19) - f8g0 := int64(f8) * int64(g0) - f8g1 := int64(f8) * int64(g1) - f8g2_19 := int64(f8) * int64(g2_19) - f8g3_19 := int64(f8) * int64(g3_19) - f8g4_19 := int64(f8) * int64(g4_19) - f8g5_19 := int64(f8) * int64(g5_19) - f8g6_19 := int64(f8) * int64(g6_19) - f8g7_19 := int64(f8) * int64(g7_19) - f8g8_19 := int64(f8) * int64(g8_19) - f8g9_19 := int64(f8) * int64(g9_19) - f9g0 := int64(f9) * int64(g0) - f9g1_38 := int64(f9_2) * int64(g1_19) - f9g2_19 := int64(f9) * int64(g2_19) - f9g3_38 := int64(f9_2) * int64(g3_19) - f9g4_19 := int64(f9) * int64(g4_19) - f9g5_38 := int64(f9_2) * int64(g5_19) - f9g6_19 := int64(f9) * int64(g6_19) - f9g7_38 := int64(f9_2) * int64(g7_19) - f9g8_19 := int64(f9) * int64(g8_19) - f9g9_38 := int64(f9_2) * int64(g9_19) - h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38 - h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19 - h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38 - h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19 - h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38 - h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19 - h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38 - h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19 - h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38 - h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 - var carry [10]int64 - - // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) - // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 - // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) - // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 - - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - // |h0| <= 2^25 - // |h4| <= 2^25 - // |h1| <= 1.51*2^58 - // |h5| <= 1.51*2^58 - - carry[1] = (h1 + (1 << 24)) >> 25 - h2 += carry[1] - h1 -= carry[1] << 25 - carry[5] = (h5 + (1 << 24)) >> 25 - h6 += carry[5] - h5 -= carry[5] << 25 - // |h1| <= 2^24; from now on fits into int32 - // |h5| <= 2^24; from now on fits into int32 - // |h2| <= 1.21*2^59 - // |h6| <= 1.21*2^59 - - carry[2] = (h2 + (1 << 25)) >> 26 - h3 += carry[2] - h2 -= carry[2] << 26 - carry[6] = (h6 + (1 << 25)) >> 26 - h7 += carry[6] - h6 -= carry[6] << 26 - // |h2| <= 2^25; from now on fits into int32 unchanged - // |h6| <= 2^25; from now on fits into int32 unchanged - // |h3| <= 1.51*2^58 - // |h7| <= 1.51*2^58 - - carry[3] = (h3 + (1 << 24)) >> 25 - h4 += carry[3] - h3 -= carry[3] << 25 - carry[7] = (h7 + (1 << 24)) >> 25 - h8 += carry[7] - h7 -= carry[7] << 25 - // |h3| <= 2^24; from now on fits into int32 unchanged - // |h7| <= 2^24; from now on fits into int32 unchanged - // |h4| <= 1.52*2^33 - // |h8| <= 1.52*2^33 - - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - carry[8] = (h8 + (1 << 25)) >> 26 - h9 += carry[8] - h8 -= carry[8] << 26 - // |h4| <= 2^25; from now on fits into int32 unchanged - // |h8| <= 2^25; from now on fits into int32 unchanged - // |h5| <= 1.01*2^24 - // |h9| <= 1.51*2^58 - - carry[9] = (h9 + (1 << 24)) >> 25 - h0 += carry[9] * 19 - h9 -= carry[9] << 25 - // |h9| <= 2^24; from now on fits into int32 unchanged - // |h0| <= 1.8*2^37 - - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - // |h0| <= 2^25; from now on fits into int32 unchanged - // |h1| <= 1.01*2^24 - - h[0] = int32(h0) - h[1] = int32(h1) - h[2] = int32(h2) - h[3] = int32(h3) - h[4] = int32(h4) - h[5] = int32(h5) - h[6] = int32(h6) - h[7] = int32(h7) - h[8] = int32(h8) - h[9] = int32(h9) +// It is recommended to use the X25519 function with Basepoint instead, as +// copying into fixed size arrays can lead to unexpected bugs. +func ScalarBaseMult(dst, scalar *[32]byte) { + ScalarMult(dst, scalar, &basePoint) } -// feSquare calculates h = f*f. Can overlap h with f. -// -// Preconditions: -// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. -// -// Postconditions: -// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. -func feSquare(h, f *fieldElement) { - f0 := f[0] - f1 := f[1] - f2 := f[2] - f3 := f[3] - f4 := f[4] - f5 := f[5] - f6 := f[6] - f7 := f[7] - f8 := f[8] - f9 := f[9] - f0_2 := 2 * f0 - f1_2 := 2 * f1 - f2_2 := 2 * f2 - f3_2 := 2 * f3 - f4_2 := 2 * f4 - f5_2 := 2 * f5 - f6_2 := 2 * f6 - f7_2 := 2 * f7 - f5_38 := 38 * f5 // 1.31*2^30 - f6_19 := 19 * f6 // 1.31*2^30 - f7_38 := 38 * f7 // 1.31*2^30 - f8_19 := 19 * f8 // 1.31*2^30 - f9_38 := 38 * f9 // 1.31*2^30 - f0f0 := int64(f0) * int64(f0) - f0f1_2 := int64(f0_2) * int64(f1) - f0f2_2 := int64(f0_2) * int64(f2) - f0f3_2 := int64(f0_2) * int64(f3) - f0f4_2 := int64(f0_2) * int64(f4) - f0f5_2 := int64(f0_2) * int64(f5) - f0f6_2 := int64(f0_2) * int64(f6) - f0f7_2 := int64(f0_2) * int64(f7) - f0f8_2 := int64(f0_2) * int64(f8) - f0f9_2 := int64(f0_2) * int64(f9) - f1f1_2 := int64(f1_2) * int64(f1) - f1f2_2 := int64(f1_2) * int64(f2) - f1f3_4 := int64(f1_2) * int64(f3_2) - f1f4_2 := int64(f1_2) * int64(f4) - f1f5_4 := int64(f1_2) * int64(f5_2) - f1f6_2 := int64(f1_2) * int64(f6) - f1f7_4 := int64(f1_2) * int64(f7_2) - f1f8_2 := int64(f1_2) * int64(f8) - f1f9_76 := int64(f1_2) * int64(f9_38) - f2f2 := int64(f2) * int64(f2) - f2f3_2 := int64(f2_2) * int64(f3) - f2f4_2 := int64(f2_2) * int64(f4) - f2f5_2 := int64(f2_2) * int64(f5) - f2f6_2 := int64(f2_2) * int64(f6) - f2f7_2 := int64(f2_2) * int64(f7) - f2f8_38 := int64(f2_2) * int64(f8_19) - f2f9_38 := int64(f2) * int64(f9_38) - f3f3_2 := int64(f3_2) * int64(f3) - f3f4_2 := int64(f3_2) * int64(f4) - f3f5_4 := int64(f3_2) * int64(f5_2) - f3f6_2 := int64(f3_2) * int64(f6) - f3f7_76 := int64(f3_2) * int64(f7_38) - f3f8_38 := int64(f3_2) * int64(f8_19) - f3f9_76 := int64(f3_2) * int64(f9_38) - f4f4 := int64(f4) * int64(f4) - f4f5_2 := int64(f4_2) * int64(f5) - f4f6_38 := int64(f4_2) * int64(f6_19) - f4f7_38 := int64(f4) * int64(f7_38) - f4f8_38 := int64(f4_2) * int64(f8_19) - f4f9_38 := int64(f4) * int64(f9_38) - f5f5_38 := int64(f5) * int64(f5_38) - f5f6_38 := int64(f5_2) * int64(f6_19) - f5f7_76 := int64(f5_2) * int64(f7_38) - f5f8_38 := int64(f5_2) * int64(f8_19) - f5f9_76 := int64(f5_2) * int64(f9_38) - f6f6_19 := int64(f6) * int64(f6_19) - f6f7_38 := int64(f6) * int64(f7_38) - f6f8_38 := int64(f6_2) * int64(f8_19) - f6f9_38 := int64(f6) * int64(f9_38) - f7f7_38 := int64(f7) * int64(f7_38) - f7f8_38 := int64(f7_2) * int64(f8_19) - f7f9_76 := int64(f7_2) * int64(f9_38) - f8f8_19 := int64(f8) * int64(f8_19) - f8f9_38 := int64(f8) * int64(f9_38) - f9f9_38 := int64(f9) * int64(f9_38) - h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38 - h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38 - h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19 - h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38 - h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38 - h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38 - h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19 - h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38 - h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38 - h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2 - var carry [10]int64 - - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - - carry[1] = (h1 + (1 << 24)) >> 25 - h2 += carry[1] - h1 -= carry[1] << 25 - carry[5] = (h5 + (1 << 24)) >> 25 - h6 += carry[5] - h5 -= carry[5] << 25 - - carry[2] = (h2 + (1 << 25)) >> 26 - h3 += carry[2] - h2 -= carry[2] << 26 - carry[6] = (h6 + (1 << 25)) >> 26 - h7 += carry[6] - h6 -= carry[6] << 26 - - carry[3] = (h3 + (1 << 24)) >> 25 - h4 += carry[3] - h3 -= carry[3] << 25 - carry[7] = (h7 + (1 << 24)) >> 25 - h8 += carry[7] - h7 -= carry[7] << 25 +const ( + // ScalarSize is the size of the scalar input to X25519. + ScalarSize = 32 + // PointSize is the size of the point input to X25519. + PointSize = 32 +) - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - carry[8] = (h8 + (1 << 25)) >> 26 - h9 += carry[8] - h8 -= carry[8] << 26 +// Basepoint is the canonical Curve25519 generator. +var Basepoint []byte - carry[9] = (h9 + (1 << 24)) >> 25 - h0 += carry[9] * 19 - h9 -= carry[9] << 25 +var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 +func init() { Basepoint = basePoint[:] } - h[0] = int32(h0) - h[1] = int32(h1) - h[2] = int32(h2) - h[3] = int32(h3) - h[4] = int32(h4) - h[5] = int32(h5) - h[6] = int32(h6) - h[7] = int32(h7) - h[8] = int32(h8) - h[9] = int32(h9) +func checkBasepoint() { + if subtle.ConstantTimeCompare(Basepoint, []byte{ + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }) != 1 { + panic("curve25519: global Basepoint value was modified") + } } -// feMul121666 calculates h = f * 121666. Can overlap h with f. +// X25519 returns the result of the scalar multiplication (scalar * point), +// according to RFC 7748, Section 5. scalar, point and the return value are +// slices of 32 bytes. // -// Preconditions: -// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// scalar can be generated at random, for example with crypto/rand. point should +// be either Basepoint or the output of another X25519 call. // -// Postconditions: -// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. -func feMul121666(h, f *fieldElement) { - h0 := int64(f[0]) * 121666 - h1 := int64(f[1]) * 121666 - h2 := int64(f[2]) * 121666 - h3 := int64(f[3]) * 121666 - h4 := int64(f[4]) * 121666 - h5 := int64(f[5]) * 121666 - h6 := int64(f[6]) * 121666 - h7 := int64(f[7]) * 121666 - h8 := int64(f[8]) * 121666 - h9 := int64(f[9]) * 121666 - var carry [10]int64 - - carry[9] = (h9 + (1 << 24)) >> 25 - h0 += carry[9] * 19 - h9 -= carry[9] << 25 - carry[1] = (h1 + (1 << 24)) >> 25 - h2 += carry[1] - h1 -= carry[1] << 25 - carry[3] = (h3 + (1 << 24)) >> 25 - h4 += carry[3] - h3 -= carry[3] << 25 - carry[5] = (h5 + (1 << 24)) >> 25 - h6 += carry[5] - h5 -= carry[5] << 25 - carry[7] = (h7 + (1 << 24)) >> 25 - h8 += carry[7] - h7 -= carry[7] << 25 - - carry[0] = (h0 + (1 << 25)) >> 26 - h1 += carry[0] - h0 -= carry[0] << 26 - carry[2] = (h2 + (1 << 25)) >> 26 - h3 += carry[2] - h2 -= carry[2] << 26 - carry[4] = (h4 + (1 << 25)) >> 26 - h5 += carry[4] - h4 -= carry[4] << 26 - carry[6] = (h6 + (1 << 25)) >> 26 - h7 += carry[6] - h6 -= carry[6] << 26 - carry[8] = (h8 + (1 << 25)) >> 26 - h9 += carry[8] - h8 -= carry[8] << 26 - - h[0] = int32(h0) - h[1] = int32(h1) - h[2] = int32(h2) - h[3] = int32(h3) - h[4] = int32(h4) - h[5] = int32(h5) - h[6] = int32(h6) - h[7] = int32(h7) - h[8] = int32(h8) - h[9] = int32(h9) -} - -// feInvert sets out = z^-1. -func feInvert(out, z *fieldElement) { - var t0, t1, t2, t3 fieldElement - var i int - - feSquare(&t0, z) - for i = 1; i < 1; i++ { - feSquare(&t0, &t0) - } - feSquare(&t1, &t0) - for i = 1; i < 2; i++ { - feSquare(&t1, &t1) - } - feMul(&t1, z, &t1) - feMul(&t0, &t0, &t1) - feSquare(&t2, &t0) - for i = 1; i < 1; i++ { - feSquare(&t2, &t2) - } - feMul(&t1, &t1, &t2) - feSquare(&t2, &t1) - for i = 1; i < 5; i++ { - feSquare(&t2, &t2) - } - feMul(&t1, &t2, &t1) - feSquare(&t2, &t1) - for i = 1; i < 10; i++ { - feSquare(&t2, &t2) - } - feMul(&t2, &t2, &t1) - feSquare(&t3, &t2) - for i = 1; i < 20; i++ { - feSquare(&t3, &t3) - } - feMul(&t2, &t3, &t2) - feSquare(&t2, &t2) - for i = 1; i < 10; i++ { - feSquare(&t2, &t2) - } - feMul(&t1, &t2, &t1) - feSquare(&t2, &t1) - for i = 1; i < 50; i++ { - feSquare(&t2, &t2) - } - feMul(&t2, &t2, &t1) - feSquare(&t3, &t2) - for i = 1; i < 100; i++ { - feSquare(&t3, &t3) - } - feMul(&t2, &t3, &t2) - feSquare(&t2, &t2) - for i = 1; i < 50; i++ { - feSquare(&t2, &t2) - } - feMul(&t1, &t2, &t1) - feSquare(&t1, &t1) - for i = 1; i < 5; i++ { - feSquare(&t1, &t1) - } - feMul(out, &t1, &t0) +// If point is Basepoint (but not if it's a different slice with the same +// contents) a precomputed implementation might be used for performance. +func X25519(scalar, point []byte) ([]byte, error) { + // Outline the body of function, to let the allocation be inlined in the + // caller, and possibly avoid escaping to the heap. + var dst [32]byte + return x25519(&dst, scalar, point) } -func scalarMult(out, in, base *[32]byte) { - var e [32]byte - - copy(e[:], in[:]) - e[0] &= 248 - e[31] &= 127 - e[31] |= 64 - - var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement - feFromBytes(&x1, base) - feOne(&x2) - feCopy(&x3, &x1) - feOne(&z3) - - swap := int32(0) - for pos := 254; pos >= 0; pos-- { - b := e[pos/8] >> uint(pos&7) - b &= 1 - swap ^= int32(b) - feCSwap(&x2, &x3, swap) - feCSwap(&z2, &z3, swap) - swap = int32(b) - - feSub(&tmp0, &x3, &z3) - feSub(&tmp1, &x2, &z2) - feAdd(&x2, &x2, &z2) - feAdd(&z2, &x3, &z3) - feMul(&z3, &tmp0, &x2) - feMul(&z2, &z2, &tmp1) - feSquare(&tmp0, &tmp1) - feSquare(&tmp1, &x2) - feAdd(&x3, &z3, &z2) - feSub(&z2, &z3, &z2) - feMul(&x2, &tmp1, &tmp0) - feSub(&tmp1, &tmp1, &tmp0) - feSquare(&z2, &z2) - feMul121666(&z3, &tmp1) - feSquare(&x3, &x3) - feAdd(&tmp0, &tmp0, &z3) - feMul(&z3, &x1, &z2) - feMul(&z2, &tmp1, &tmp0) - } - - feCSwap(&x2, &x3, swap) - feCSwap(&z2, &z3, swap) - - feInvert(&z2, &z2) - feMul(&x2, &x2, &z2) - feToBytes(out, &x2) +func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) { + var in [32]byte + if l := len(scalar); l != 32 { + return nil, fmt.Errorf("bad scalar length: %d, expected %d", l, 32) + } + if l := len(point); l != 32 { + return nil, fmt.Errorf("bad point length: %d, expected %d", l, 32) + } + copy(in[:], scalar) + if &point[0] == &Basepoint[0] { + checkBasepoint() + ScalarBaseMult(dst, &in) + } else { + var base, zero [32]byte + copy(base[:], point) + ScalarMult(dst, &in, &base) + if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 { + return nil, fmt.Errorf("bad input point: low order point") + } + } + return dst[:], nil } diff --git a/vendor/golang.org/x/crypto/curve25519/mont25519_amd64.go b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go index 5822bd53..5120b779 100644 --- a/vendor/golang.org/x/crypto/curve25519/mont25519_amd64.go +++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64,!gccgo,!appengine +// +build amd64,!gccgo,!appengine,!purego package curve25519 diff --git a/vendor/golang.org/x/crypto/curve25519/ladderstep_amd64.s b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s index e0ac30c7..0250c888 100644 --- a/vendor/golang.org/x/crypto/curve25519/ladderstep_amd64.s +++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s @@ -5,9 +5,84 @@ // This code was translated into a form compatible with 6a from the public // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html -// +build amd64,!gccgo,!appengine +// +build amd64,!gccgo,!appengine,!purego -#include "const_amd64.h" +#define REDMASK51 0x0007FFFFFFFFFFFF + +// These constants cannot be encoded in non-MOVQ immediates. +// We access them directly from memory instead. + +DATA ·_121666_213(SB)/8, $996687872 +GLOBL ·_121666_213(SB), 8, $8 + +DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA +GLOBL ·_2P0(SB), 8, $8 + +DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE +GLOBL ·_2P1234(SB), 8, $8 + +// func freeze(inout *[5]uint64) +TEXT ·freeze(SB),7,$0-8 + MOVQ inout+0(FP), DI + + MOVQ 0(DI),SI + MOVQ 8(DI),DX + MOVQ 16(DI),CX + MOVQ 24(DI),R8 + MOVQ 32(DI),R9 + MOVQ $REDMASK51,AX + MOVQ AX,R10 + SUBQ $18,R10 + MOVQ $3,R11 +REDUCELOOP: + MOVQ SI,R12 + SHRQ $51,R12 + ANDQ AX,SI + ADDQ R12,DX + MOVQ DX,R12 + SHRQ $51,R12 + ANDQ AX,DX + ADDQ R12,CX + MOVQ CX,R12 + SHRQ $51,R12 + ANDQ AX,CX + ADDQ R12,R8 + MOVQ R8,R12 + SHRQ $51,R12 + ANDQ AX,R8 + ADDQ R12,R9 + MOVQ R9,R12 + SHRQ $51,R12 + ANDQ AX,R9 + IMUL3Q $19,R12,R12 + ADDQ R12,SI + SUBQ $1,R11 + JA REDUCELOOP + MOVQ $1,R12 + CMPQ R10,SI + CMOVQLT R11,R12 + CMPQ AX,DX + CMOVQNE R11,R12 + CMPQ AX,CX + CMOVQNE R11,R12 + CMPQ AX,R8 + CMOVQNE R11,R12 + CMPQ AX,R9 + CMOVQNE R11,R12 + NEGQ R12 + ANDQ R12,AX + ANDQ R12,R10 + SUBQ R10,SI + SUBQ AX,DX + SUBQ AX,CX + SUBQ AX,R8 + SUBQ AX,R9 + MOVQ SI,0(DI) + MOVQ DX,8(DI) + MOVQ CX,16(DI) + MOVQ R8,24(DI) + MOVQ R9,32(DI) + RET // func ladderstep(inout *[5][5]uint64) TEXT ·ladderstep(SB),0,$296-8 @@ -1375,3 +1450,344 @@ TEXT ·ladderstep(SB),0,$296-8 MOVQ AX,104(DI) MOVQ R10,112(DI) RET + +// func cswap(inout *[4][5]uint64, v uint64) +TEXT ·cswap(SB),7,$0 + MOVQ inout+0(FP),DI + MOVQ v+8(FP),SI + + SUBQ $1, SI + NOTQ SI + MOVQ SI, X15 + PSHUFD $0x44, X15, X15 + + MOVOU 0(DI), X0 + MOVOU 16(DI), X2 + MOVOU 32(DI), X4 + MOVOU 48(DI), X6 + MOVOU 64(DI), X8 + MOVOU 80(DI), X1 + MOVOU 96(DI), X3 + MOVOU 112(DI), X5 + MOVOU 128(DI), X7 + MOVOU 144(DI), X9 + + MOVO X1, X10 + MOVO X3, X11 + MOVO X5, X12 + MOVO X7, X13 + MOVO X9, X14 + + PXOR X0, X10 + PXOR X2, X11 + PXOR X4, X12 + PXOR X6, X13 + PXOR X8, X14 + PAND X15, X10 + PAND X15, X11 + PAND X15, X12 + PAND X15, X13 + PAND X15, X14 + PXOR X10, X0 + PXOR X10, X1 + PXOR X11, X2 + PXOR X11, X3 + PXOR X12, X4 + PXOR X12, X5 + PXOR X13, X6 + PXOR X13, X7 + PXOR X14, X8 + PXOR X14, X9 + + MOVOU X0, 0(DI) + MOVOU X2, 16(DI) + MOVOU X4, 32(DI) + MOVOU X6, 48(DI) + MOVOU X8, 64(DI) + MOVOU X1, 80(DI) + MOVOU X3, 96(DI) + MOVOU X5, 112(DI) + MOVOU X7, 128(DI) + MOVOU X9, 144(DI) + RET + +// func mul(dest, a, b *[5]uint64) +TEXT ·mul(SB),0,$16-24 + MOVQ dest+0(FP), DI + MOVQ a+8(FP), SI + MOVQ b+16(FP), DX + + MOVQ DX,CX + MOVQ 24(SI),DX + IMUL3Q $19,DX,AX + MOVQ AX,0(SP) + MULQ 16(CX) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 32(SI),DX + IMUL3Q $19,DX,AX + MOVQ AX,8(SP) + MULQ 8(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SI),AX + MULQ 0(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SI),AX + MULQ 8(CX) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 0(SI),AX + MULQ 16(CX) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 0(SI),AX + MULQ 24(CX) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 0(SI),AX + MULQ 32(CX) + MOVQ AX,BX + MOVQ DX,BP + MOVQ 8(SI),AX + MULQ 0(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SI),AX + MULQ 8(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 8(SI),AX + MULQ 16(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 8(SI),AX + MULQ 24(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 8(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 16(SI),AX + MULQ 0(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 16(SI),AX + MULQ 8(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 16(SI),AX + MULQ 16(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 16(SI),DX + IMUL3Q $19,DX,AX + MULQ 24(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 16(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 24(SI),AX + MULQ 0(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 24(SI),AX + MULQ 8(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 0(SP),AX + MULQ 24(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 0(SP),AX + MULQ 32(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 32(SI),AX + MULQ 0(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 8(SP),AX + MULQ 16(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SP),AX + MULQ 24(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 8(SP),AX + MULQ 32(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ $REDMASK51,SI + SHLQ $13,R8,R9 + ANDQ SI,R8 + SHLQ $13,R10,R11 + ANDQ SI,R10 + ADDQ R9,R10 + SHLQ $13,R12,R13 + ANDQ SI,R12 + ADDQ R11,R12 + SHLQ $13,R14,R15 + ANDQ SI,R14 + ADDQ R13,R14 + SHLQ $13,BX,BP + ANDQ SI,BX + ADDQ R15,BX + IMUL3Q $19,BP,DX + ADDQ DX,R8 + MOVQ R8,DX + SHRQ $51,DX + ADDQ R10,DX + MOVQ DX,CX + SHRQ $51,DX + ANDQ SI,R8 + ADDQ R12,DX + MOVQ DX,R9 + SHRQ $51,DX + ANDQ SI,CX + ADDQ R14,DX + MOVQ DX,AX + SHRQ $51,DX + ANDQ SI,R9 + ADDQ BX,DX + MOVQ DX,R10 + SHRQ $51,DX + ANDQ SI,AX + IMUL3Q $19,DX,DX + ADDQ DX,R8 + ANDQ SI,R10 + MOVQ R8,0(DI) + MOVQ CX,8(DI) + MOVQ R9,16(DI) + MOVQ AX,24(DI) + MOVQ R10,32(DI) + RET + +// func square(out, in *[5]uint64) +TEXT ·square(SB),7,$0-16 + MOVQ out+0(FP), DI + MOVQ in+8(FP), SI + + MOVQ 0(SI),AX + MULQ 0(SI) + MOVQ AX,CX + MOVQ DX,R8 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 8(SI) + MOVQ AX,R9 + MOVQ DX,R10 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 16(SI) + MOVQ AX,R11 + MOVQ DX,R12 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 24(SI) + MOVQ AX,R13 + MOVQ DX,R14 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 32(SI) + MOVQ AX,R15 + MOVQ DX,BX + MOVQ 8(SI),AX + MULQ 8(SI) + ADDQ AX,R11 + ADCQ DX,R12 + MOVQ 8(SI),AX + SHLQ $1,AX + MULQ 16(SI) + ADDQ AX,R13 + ADCQ DX,R14 + MOVQ 8(SI),AX + SHLQ $1,AX + MULQ 24(SI) + ADDQ AX,R15 + ADCQ DX,BX + MOVQ 8(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,CX + ADCQ DX,R8 + MOVQ 16(SI),AX + MULQ 16(SI) + ADDQ AX,R15 + ADCQ DX,BX + MOVQ 16(SI),DX + IMUL3Q $38,DX,AX + MULQ 24(SI) + ADDQ AX,CX + ADCQ DX,R8 + MOVQ 16(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,R9 + ADCQ DX,R10 + MOVQ 24(SI),DX + IMUL3Q $19,DX,AX + MULQ 24(SI) + ADDQ AX,R9 + ADCQ DX,R10 + MOVQ 24(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,R11 + ADCQ DX,R12 + MOVQ 32(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(SI) + ADDQ AX,R13 + ADCQ DX,R14 + MOVQ $REDMASK51,SI + SHLQ $13,CX,R8 + ANDQ SI,CX + SHLQ $13,R9,R10 + ANDQ SI,R9 + ADDQ R8,R9 + SHLQ $13,R11,R12 + ANDQ SI,R11 + ADDQ R10,R11 + SHLQ $13,R13,R14 + ANDQ SI,R13 + ADDQ R12,R13 + SHLQ $13,R15,BX + ANDQ SI,R15 + ADDQ R14,R15 + IMUL3Q $19,BX,DX + ADDQ DX,CX + MOVQ CX,DX + SHRQ $51,DX + ADDQ R9,DX + ANDQ SI,CX + MOVQ DX,R8 + SHRQ $51,DX + ADDQ R11,DX + ANDQ SI,R8 + MOVQ DX,R9 + SHRQ $51,DX + ADDQ R13,DX + ANDQ SI,R9 + MOVQ DX,AX + SHRQ $51,DX + ADDQ R15,DX + ANDQ SI,AX + MOVQ DX,R10 + SHRQ $51,DX + IMUL3Q $19,DX,DX + ADDQ DX,CX + ANDQ SI,R10 + MOVQ CX,0(DI) + MOVQ R8,8(DI) + MOVQ R9,16(DI) + MOVQ AX,24(DI) + MOVQ R10,32(DI) + RET diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go b/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go new file mode 100644 index 00000000..c43b13fc --- /dev/null +++ b/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go @@ -0,0 +1,828 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package curve25519 + +import "encoding/binary" + +// This code is a port of the public domain, "ref10" implementation of +// curve25519 from SUPERCOP 20130419 by D. J. Bernstein. + +// fieldElement represents an element of the field GF(2^255 - 19). An element +// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 +// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on +// context. +type fieldElement [10]int32 + +func feZero(fe *fieldElement) { + for i := range fe { + fe[i] = 0 + } +} + +func feOne(fe *fieldElement) { + feZero(fe) + fe[0] = 1 +} + +func feAdd(dst, a, b *fieldElement) { + for i := range dst { + dst[i] = a[i] + b[i] + } +} + +func feSub(dst, a, b *fieldElement) { + for i := range dst { + dst[i] = a[i] - b[i] + } +} + +func feCopy(dst, src *fieldElement) { + for i := range dst { + dst[i] = src[i] + } +} + +// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0. +// +// Preconditions: b in {0,1}. +func feCSwap(f, g *fieldElement, b int32) { + b = -b + for i := range f { + t := b & (f[i] ^ g[i]) + f[i] ^= t + g[i] ^= t + } +} + +// load3 reads a 24-bit, little-endian value from in. +func load3(in []byte) int64 { + var r int64 + r = int64(in[0]) + r |= int64(in[1]) << 8 + r |= int64(in[2]) << 16 + return r +} + +// load4 reads a 32-bit, little-endian value from in. +func load4(in []byte) int64 { + return int64(binary.LittleEndian.Uint32(in)) +} + +func feFromBytes(dst *fieldElement, src *[32]byte) { + h0 := load4(src[:]) + h1 := load3(src[4:]) << 6 + h2 := load3(src[7:]) << 5 + h3 := load3(src[10:]) << 3 + h4 := load3(src[13:]) << 2 + h5 := load4(src[16:]) + h6 := load3(src[20:]) << 7 + h7 := load3(src[23:]) << 5 + h8 := load3(src[26:]) << 4 + h9 := (load3(src[29:]) & 0x7fffff) << 2 + + var carry [10]int64 + carry[9] = (h9 + 1<<24) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + carry[1] = (h1 + 1<<24) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[3] = (h3 + 1<<24) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[5] = (h5 + 1<<24) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + carry[7] = (h7 + 1<<24) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[0] = (h0 + 1<<25) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[2] = (h2 + 1<<25) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[4] = (h4 + 1<<25) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[6] = (h6 + 1<<25) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + carry[8] = (h8 + 1<<25) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + dst[0] = int32(h0) + dst[1] = int32(h1) + dst[2] = int32(h2) + dst[3] = int32(h3) + dst[4] = int32(h4) + dst[5] = int32(h5) + dst[6] = int32(h6) + dst[7] = int32(h7) + dst[8] = int32(h8) + dst[9] = int32(h9) +} + +// feToBytes marshals h to s. +// Preconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +// +// Write p=2^255-19; q=floor(h/p). +// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). +// +// Proof: +// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. +// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. +// +// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). +// Then 0<y<1. +// +// Write r=h-pq. +// Have 0<=r<=p-1=2^255-20. +// Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. +// +// Write x=r+19(2^-255)r+y. +// Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. +// +// Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) +// so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. +func feToBytes(s *[32]byte, h *fieldElement) { + var carry [10]int32 + + q := (19*h[9] + (1 << 24)) >> 25 + q = (h[0] + q) >> 26 + q = (h[1] + q) >> 25 + q = (h[2] + q) >> 26 + q = (h[3] + q) >> 25 + q = (h[4] + q) >> 26 + q = (h[5] + q) >> 25 + q = (h[6] + q) >> 26 + q = (h[7] + q) >> 25 + q = (h[8] + q) >> 26 + q = (h[9] + q) >> 25 + + // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. + h[0] += 19 * q + // Goal: Output h-2^255 q, which is between 0 and 2^255-20. + + carry[0] = h[0] >> 26 + h[1] += carry[0] + h[0] -= carry[0] << 26 + carry[1] = h[1] >> 25 + h[2] += carry[1] + h[1] -= carry[1] << 25 + carry[2] = h[2] >> 26 + h[3] += carry[2] + h[2] -= carry[2] << 26 + carry[3] = h[3] >> 25 + h[4] += carry[3] + h[3] -= carry[3] << 25 + carry[4] = h[4] >> 26 + h[5] += carry[4] + h[4] -= carry[4] << 26 + carry[5] = h[5] >> 25 + h[6] += carry[5] + h[5] -= carry[5] << 25 + carry[6] = h[6] >> 26 + h[7] += carry[6] + h[6] -= carry[6] << 26 + carry[7] = h[7] >> 25 + h[8] += carry[7] + h[7] -= carry[7] << 25 + carry[8] = h[8] >> 26 + h[9] += carry[8] + h[8] -= carry[8] << 26 + carry[9] = h[9] >> 25 + h[9] -= carry[9] << 25 + // h10 = carry9 + + // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. + // Have h[0]+...+2^230 h[9] between 0 and 2^255-1; + // evidently 2^255 h10-2^255 q = 0. + // Goal: Output h[0]+...+2^230 h[9]. + + s[0] = byte(h[0] >> 0) + s[1] = byte(h[0] >> 8) + s[2] = byte(h[0] >> 16) + s[3] = byte((h[0] >> 24) | (h[1] << 2)) + s[4] = byte(h[1] >> 6) + s[5] = byte(h[1] >> 14) + s[6] = byte((h[1] >> 22) | (h[2] << 3)) + s[7] = byte(h[2] >> 5) + s[8] = byte(h[2] >> 13) + s[9] = byte((h[2] >> 21) | (h[3] << 5)) + s[10] = byte(h[3] >> 3) + s[11] = byte(h[3] >> 11) + s[12] = byte((h[3] >> 19) | (h[4] << 6)) + s[13] = byte(h[4] >> 2) + s[14] = byte(h[4] >> 10) + s[15] = byte(h[4] >> 18) + s[16] = byte(h[5] >> 0) + s[17] = byte(h[5] >> 8) + s[18] = byte(h[5] >> 16) + s[19] = byte((h[5] >> 24) | (h[6] << 1)) + s[20] = byte(h[6] >> 7) + s[21] = byte(h[6] >> 15) + s[22] = byte((h[6] >> 23) | (h[7] << 3)) + s[23] = byte(h[7] >> 5) + s[24] = byte(h[7] >> 13) + s[25] = byte((h[7] >> 21) | (h[8] << 4)) + s[26] = byte(h[8] >> 4) + s[27] = byte(h[8] >> 12) + s[28] = byte((h[8] >> 20) | (h[9] << 6)) + s[29] = byte(h[9] >> 2) + s[30] = byte(h[9] >> 10) + s[31] = byte(h[9] >> 18) +} + +// feMul calculates h = f * g +// Can overlap h with f or g. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +// +// Notes on implementation strategy: +// +// Using schoolbook multiplication. +// Karatsuba would save a little in some cost models. +// +// Most multiplications by 2 and 19 are 32-bit precomputations; +// cheaper than 64-bit postcomputations. +// +// There is one remaining multiplication by 19 in the carry chain; +// one *19 precomputation can be merged into this, +// but the resulting data flow is considerably less clean. +// +// There are 12 carries below. +// 10 of them are 2-way parallelizable and vectorizable. +// Can get away with 11 carries, but then data flow is much deeper. +// +// With tighter constraints on inputs can squeeze carries into int32. +func feMul(h, f, g *fieldElement) { + f0 := f[0] + f1 := f[1] + f2 := f[2] + f3 := f[3] + f4 := f[4] + f5 := f[5] + f6 := f[6] + f7 := f[7] + f8 := f[8] + f9 := f[9] + g0 := g[0] + g1 := g[1] + g2 := g[2] + g3 := g[3] + g4 := g[4] + g5 := g[5] + g6 := g[6] + g7 := g[7] + g8 := g[8] + g9 := g[9] + g1_19 := 19 * g1 // 1.4*2^29 + g2_19 := 19 * g2 // 1.4*2^30; still ok + g3_19 := 19 * g3 + g4_19 := 19 * g4 + g5_19 := 19 * g5 + g6_19 := 19 * g6 + g7_19 := 19 * g7 + g8_19 := 19 * g8 + g9_19 := 19 * g9 + f1_2 := 2 * f1 + f3_2 := 2 * f3 + f5_2 := 2 * f5 + f7_2 := 2 * f7 + f9_2 := 2 * f9 + f0g0 := int64(f0) * int64(g0) + f0g1 := int64(f0) * int64(g1) + f0g2 := int64(f0) * int64(g2) + f0g3 := int64(f0) * int64(g3) + f0g4 := int64(f0) * int64(g4) + f0g5 := int64(f0) * int64(g5) + f0g6 := int64(f0) * int64(g6) + f0g7 := int64(f0) * int64(g7) + f0g8 := int64(f0) * int64(g8) + f0g9 := int64(f0) * int64(g9) + f1g0 := int64(f1) * int64(g0) + f1g1_2 := int64(f1_2) * int64(g1) + f1g2 := int64(f1) * int64(g2) + f1g3_2 := int64(f1_2) * int64(g3) + f1g4 := int64(f1) * int64(g4) + f1g5_2 := int64(f1_2) * int64(g5) + f1g6 := int64(f1) * int64(g6) + f1g7_2 := int64(f1_2) * int64(g7) + f1g8 := int64(f1) * int64(g8) + f1g9_38 := int64(f1_2) * int64(g9_19) + f2g0 := int64(f2) * int64(g0) + f2g1 := int64(f2) * int64(g1) + f2g2 := int64(f2) * int64(g2) + f2g3 := int64(f2) * int64(g3) + f2g4 := int64(f2) * int64(g4) + f2g5 := int64(f2) * int64(g5) + f2g6 := int64(f2) * int64(g6) + f2g7 := int64(f2) * int64(g7) + f2g8_19 := int64(f2) * int64(g8_19) + f2g9_19 := int64(f2) * int64(g9_19) + f3g0 := int64(f3) * int64(g0) + f3g1_2 := int64(f3_2) * int64(g1) + f3g2 := int64(f3) * int64(g2) + f3g3_2 := int64(f3_2) * int64(g3) + f3g4 := int64(f3) * int64(g4) + f3g5_2 := int64(f3_2) * int64(g5) + f3g6 := int64(f3) * int64(g6) + f3g7_38 := int64(f3_2) * int64(g7_19) + f3g8_19 := int64(f3) * int64(g8_19) + f3g9_38 := int64(f3_2) * int64(g9_19) + f4g0 := int64(f4) * int64(g0) + f4g1 := int64(f4) * int64(g1) + f4g2 := int64(f4) * int64(g2) + f4g3 := int64(f4) * int64(g3) + f4g4 := int64(f4) * int64(g4) + f4g5 := int64(f4) * int64(g5) + f4g6_19 := int64(f4) * int64(g6_19) + f4g7_19 := int64(f4) * int64(g7_19) + f4g8_19 := int64(f4) * int64(g8_19) + f4g9_19 := int64(f4) * int64(g9_19) + f5g0 := int64(f5) * int64(g0) + f5g1_2 := int64(f5_2) * int64(g1) + f5g2 := int64(f5) * int64(g2) + f5g3_2 := int64(f5_2) * int64(g3) + f5g4 := int64(f5) * int64(g4) + f5g5_38 := int64(f5_2) * int64(g5_19) + f5g6_19 := int64(f5) * int64(g6_19) + f5g7_38 := int64(f5_2) * int64(g7_19) + f5g8_19 := int64(f5) * int64(g8_19) + f5g9_38 := int64(f5_2) * int64(g9_19) + f6g0 := int64(f6) * int64(g0) + f6g1 := int64(f6) * int64(g1) + f6g2 := int64(f6) * int64(g2) + f6g3 := int64(f6) * int64(g3) + f6g4_19 := int64(f6) * int64(g4_19) + f6g5_19 := int64(f6) * int64(g5_19) + f6g6_19 := int64(f6) * int64(g6_19) + f6g7_19 := int64(f6) * int64(g7_19) + f6g8_19 := int64(f6) * int64(g8_19) + f6g9_19 := int64(f6) * int64(g9_19) + f7g0 := int64(f7) * int64(g0) + f7g1_2 := int64(f7_2) * int64(g1) + f7g2 := int64(f7) * int64(g2) + f7g3_38 := int64(f7_2) * int64(g3_19) + f7g4_19 := int64(f7) * int64(g4_19) + f7g5_38 := int64(f7_2) * int64(g5_19) + f7g6_19 := int64(f7) * int64(g6_19) + f7g7_38 := int64(f7_2) * int64(g7_19) + f7g8_19 := int64(f7) * int64(g8_19) + f7g9_38 := int64(f7_2) * int64(g9_19) + f8g0 := int64(f8) * int64(g0) + f8g1 := int64(f8) * int64(g1) + f8g2_19 := int64(f8) * int64(g2_19) + f8g3_19 := int64(f8) * int64(g3_19) + f8g4_19 := int64(f8) * int64(g4_19) + f8g5_19 := int64(f8) * int64(g5_19) + f8g6_19 := int64(f8) * int64(g6_19) + f8g7_19 := int64(f8) * int64(g7_19) + f8g8_19 := int64(f8) * int64(g8_19) + f8g9_19 := int64(f8) * int64(g9_19) + f9g0 := int64(f9) * int64(g0) + f9g1_38 := int64(f9_2) * int64(g1_19) + f9g2_19 := int64(f9) * int64(g2_19) + f9g3_38 := int64(f9_2) * int64(g3_19) + f9g4_19 := int64(f9) * int64(g4_19) + f9g5_38 := int64(f9_2) * int64(g5_19) + f9g6_19 := int64(f9) * int64(g6_19) + f9g7_38 := int64(f9_2) * int64(g7_19) + f9g8_19 := int64(f9) * int64(g8_19) + f9g9_38 := int64(f9_2) * int64(g9_19) + h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38 + h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19 + h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38 + h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19 + h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38 + h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19 + h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38 + h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19 + h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38 + h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 + var carry [10]int64 + + // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) + // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 + // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) + // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + // |h0| <= 2^25 + // |h4| <= 2^25 + // |h1| <= 1.51*2^58 + // |h5| <= 1.51*2^58 + + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + // |h1| <= 2^24; from now on fits into int32 + // |h5| <= 2^24; from now on fits into int32 + // |h2| <= 1.21*2^59 + // |h6| <= 1.21*2^59 + + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + // |h2| <= 2^25; from now on fits into int32 unchanged + // |h6| <= 2^25; from now on fits into int32 unchanged + // |h3| <= 1.51*2^58 + // |h7| <= 1.51*2^58 + + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + // |h3| <= 2^24; from now on fits into int32 unchanged + // |h7| <= 2^24; from now on fits into int32 unchanged + // |h4| <= 1.52*2^33 + // |h8| <= 1.52*2^33 + + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + // |h4| <= 2^25; from now on fits into int32 unchanged + // |h8| <= 2^25; from now on fits into int32 unchanged + // |h5| <= 1.01*2^24 + // |h9| <= 1.51*2^58 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + // |h9| <= 2^24; from now on fits into int32 unchanged + // |h0| <= 1.8*2^37 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + // |h0| <= 2^25; from now on fits into int32 unchanged + // |h1| <= 1.01*2^24 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feSquare calculates h = f*f. Can overlap h with f. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +func feSquare(h, f *fieldElement) { + f0 := f[0] + f1 := f[1] + f2 := f[2] + f3 := f[3] + f4 := f[4] + f5 := f[5] + f6 := f[6] + f7 := f[7] + f8 := f[8] + f9 := f[9] + f0_2 := 2 * f0 + f1_2 := 2 * f1 + f2_2 := 2 * f2 + f3_2 := 2 * f3 + f4_2 := 2 * f4 + f5_2 := 2 * f5 + f6_2 := 2 * f6 + f7_2 := 2 * f7 + f5_38 := 38 * f5 // 1.31*2^30 + f6_19 := 19 * f6 // 1.31*2^30 + f7_38 := 38 * f7 // 1.31*2^30 + f8_19 := 19 * f8 // 1.31*2^30 + f9_38 := 38 * f9 // 1.31*2^30 + f0f0 := int64(f0) * int64(f0) + f0f1_2 := int64(f0_2) * int64(f1) + f0f2_2 := int64(f0_2) * int64(f2) + f0f3_2 := int64(f0_2) * int64(f3) + f0f4_2 := int64(f0_2) * int64(f4) + f0f5_2 := int64(f0_2) * int64(f5) + f0f6_2 := int64(f0_2) * int64(f6) + f0f7_2 := int64(f0_2) * int64(f7) + f0f8_2 := int64(f0_2) * int64(f8) + f0f9_2 := int64(f0_2) * int64(f9) + f1f1_2 := int64(f1_2) * int64(f1) + f1f2_2 := int64(f1_2) * int64(f2) + f1f3_4 := int64(f1_2) * int64(f3_2) + f1f4_2 := int64(f1_2) * int64(f4) + f1f5_4 := int64(f1_2) * int64(f5_2) + f1f6_2 := int64(f1_2) * int64(f6) + f1f7_4 := int64(f1_2) * int64(f7_2) + f1f8_2 := int64(f1_2) * int64(f8) + f1f9_76 := int64(f1_2) * int64(f9_38) + f2f2 := int64(f2) * int64(f2) + f2f3_2 := int64(f2_2) * int64(f3) + f2f4_2 := int64(f2_2) * int64(f4) + f2f5_2 := int64(f2_2) * int64(f5) + f2f6_2 := int64(f2_2) * int64(f6) + f2f7_2 := int64(f2_2) * int64(f7) + f2f8_38 := int64(f2_2) * int64(f8_19) + f2f9_38 := int64(f2) * int64(f9_38) + f3f3_2 := int64(f3_2) * int64(f3) + f3f4_2 := int64(f3_2) * int64(f4) + f3f5_4 := int64(f3_2) * int64(f5_2) + f3f6_2 := int64(f3_2) * int64(f6) + f3f7_76 := int64(f3_2) * int64(f7_38) + f3f8_38 := int64(f3_2) * int64(f8_19) + f3f9_76 := int64(f3_2) * int64(f9_38) + f4f4 := int64(f4) * int64(f4) + f4f5_2 := int64(f4_2) * int64(f5) + f4f6_38 := int64(f4_2) * int64(f6_19) + f4f7_38 := int64(f4) * int64(f7_38) + f4f8_38 := int64(f4_2) * int64(f8_19) + f4f9_38 := int64(f4) * int64(f9_38) + f5f5_38 := int64(f5) * int64(f5_38) + f5f6_38 := int64(f5_2) * int64(f6_19) + f5f7_76 := int64(f5_2) * int64(f7_38) + f5f8_38 := int64(f5_2) * int64(f8_19) + f5f9_76 := int64(f5_2) * int64(f9_38) + f6f6_19 := int64(f6) * int64(f6_19) + f6f7_38 := int64(f6) * int64(f7_38) + f6f8_38 := int64(f6_2) * int64(f8_19) + f6f9_38 := int64(f6) * int64(f9_38) + f7f7_38 := int64(f7) * int64(f7_38) + f7f8_38 := int64(f7_2) * int64(f8_19) + f7f9_76 := int64(f7_2) * int64(f9_38) + f8f8_19 := int64(f8) * int64(f8_19) + f8f9_38 := int64(f8) * int64(f9_38) + f9f9_38 := int64(f9) * int64(f9_38) + h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38 + h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38 + h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19 + h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38 + h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38 + h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38 + h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19 + h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38 + h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38 + h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2 + var carry [10]int64 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feMul121666 calculates h = f * 121666. Can overlap h with f. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +func feMul121666(h, f *fieldElement) { + h0 := int64(f[0]) * 121666 + h1 := int64(f[1]) * 121666 + h2 := int64(f[2]) * 121666 + h3 := int64(f[3]) * 121666 + h4 := int64(f[4]) * 121666 + h5 := int64(f[5]) * 121666 + h6 := int64(f[6]) * 121666 + h7 := int64(f[7]) * 121666 + h8 := int64(f[8]) * 121666 + h9 := int64(f[9]) * 121666 + var carry [10]int64 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feInvert sets out = z^-1. +func feInvert(out, z *fieldElement) { + var t0, t1, t2, t3 fieldElement + var i int + + feSquare(&t0, z) + for i = 1; i < 1; i++ { + feSquare(&t0, &t0) + } + feSquare(&t1, &t0) + for i = 1; i < 2; i++ { + feSquare(&t1, &t1) + } + feMul(&t1, z, &t1) + feMul(&t0, &t0, &t1) + feSquare(&t2, &t0) + for i = 1; i < 1; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t1, &t2) + feSquare(&t2, &t1) + for i = 1; i < 5; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t2, &t1) + for i = 1; i < 10; i++ { + feSquare(&t2, &t2) + } + feMul(&t2, &t2, &t1) + feSquare(&t3, &t2) + for i = 1; i < 20; i++ { + feSquare(&t3, &t3) + } + feMul(&t2, &t3, &t2) + feSquare(&t2, &t2) + for i = 1; i < 10; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t2, &t1) + for i = 1; i < 50; i++ { + feSquare(&t2, &t2) + } + feMul(&t2, &t2, &t1) + feSquare(&t3, &t2) + for i = 1; i < 100; i++ { + feSquare(&t3, &t3) + } + feMul(&t2, &t3, &t2) + feSquare(&t2, &t2) + for i = 1; i < 50; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t1, &t1) + for i = 1; i < 5; i++ { + feSquare(&t1, &t1) + } + feMul(out, &t1, &t0) +} + +func scalarMultGeneric(out, in, base *[32]byte) { + var e [32]byte + + copy(e[:], in[:]) + e[0] &= 248 + e[31] &= 127 + e[31] |= 64 + + var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement + feFromBytes(&x1, base) + feOne(&x2) + feCopy(&x3, &x1) + feOne(&z3) + + swap := int32(0) + for pos := 254; pos >= 0; pos-- { + b := e[pos/8] >> uint(pos&7) + b &= 1 + swap ^= int32(b) + feCSwap(&x2, &x3, swap) + feCSwap(&z2, &z3, swap) + swap = int32(b) + + feSub(&tmp0, &x3, &z3) + feSub(&tmp1, &x2, &z2) + feAdd(&x2, &x2, &z2) + feAdd(&z2, &x3, &z3) + feMul(&z3, &tmp0, &x2) + feMul(&z2, &z2, &tmp1) + feSquare(&tmp0, &tmp1) + feSquare(&tmp1, &x2) + feAdd(&x3, &z3, &z2) + feSub(&z2, &z3, &z2) + feMul(&x2, &tmp1, &tmp0) + feSub(&tmp1, &tmp1, &tmp0) + feSquare(&z2, &z2) + feMul121666(&z3, &tmp1) + feSquare(&x3, &x3) + feAdd(&tmp0, &tmp0, &z3) + feMul(&z3, &x1, &z2) + feMul(&z2, &tmp1, &tmp0) + } + + feCSwap(&x2, &x3, swap) + feCSwap(&z2, &z3, swap) + + feInvert(&z2, &z2) + feMul(&x2, &x2, &z2) + feToBytes(out, &x2) +} diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go b/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go new file mode 100644 index 00000000..047d49af --- /dev/null +++ b/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go @@ -0,0 +1,11 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 gccgo appengine purego + +package curve25519 + +func scalarMult(out, in, base *[32]byte) { + scalarMultGeneric(out, in, base) +} diff --git a/vendor/golang.org/x/crypto/curve25519/doc.go b/vendor/golang.org/x/crypto/curve25519/doc.go deleted file mode 100644 index da9b10d9..00000000 --- a/vendor/golang.org/x/crypto/curve25519/doc.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package curve25519 provides an implementation of scalar multiplication on -// the elliptic curve known as curve25519. See https://cr.yp.to/ecdh.html -package curve25519 // import "golang.org/x/crypto/curve25519" - -// basePoint is the x coordinate of the generator of the curve. -var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} - -// ScalarMult sets dst to the product in*base where dst and base are the x -// coordinates of group points and all values are in little-endian form. -func ScalarMult(dst, in, base *[32]byte) { - scalarMult(dst, in, base) -} - -// ScalarBaseMult sets dst to the product in*base where dst and base are the x -// coordinates of group points, base is the standard generator and all values -// are in little-endian form. -func ScalarBaseMult(dst, in *[32]byte) { - ScalarMult(dst, in, &basePoint) -} diff --git a/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s b/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s deleted file mode 100644 index 39081610..00000000 --- a/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -#include "const_amd64.h" - -// func freeze(inout *[5]uint64) -TEXT ·freeze(SB),7,$0-8 - MOVQ inout+0(FP), DI - - MOVQ 0(DI),SI - MOVQ 8(DI),DX - MOVQ 16(DI),CX - MOVQ 24(DI),R8 - MOVQ 32(DI),R9 - MOVQ $REDMASK51,AX - MOVQ AX,R10 - SUBQ $18,R10 - MOVQ $3,R11 -REDUCELOOP: - MOVQ SI,R12 - SHRQ $51,R12 - ANDQ AX,SI - ADDQ R12,DX - MOVQ DX,R12 - SHRQ $51,R12 - ANDQ AX,DX - ADDQ R12,CX - MOVQ CX,R12 - SHRQ $51,R12 - ANDQ AX,CX - ADDQ R12,R8 - MOVQ R8,R12 - SHRQ $51,R12 - ANDQ AX,R8 - ADDQ R12,R9 - MOVQ R9,R12 - SHRQ $51,R12 - ANDQ AX,R9 - IMUL3Q $19,R12,R12 - ADDQ R12,SI - SUBQ $1,R11 - JA REDUCELOOP - MOVQ $1,R12 - CMPQ R10,SI - CMOVQLT R11,R12 - CMPQ AX,DX - CMOVQNE R11,R12 - CMPQ AX,CX - CMOVQNE R11,R12 - CMPQ AX,R8 - CMOVQNE R11,R12 - CMPQ AX,R9 - CMOVQNE R11,R12 - NEGQ R12 - ANDQ R12,AX - ANDQ R12,R10 - SUBQ R10,SI - SUBQ AX,DX - SUBQ AX,CX - SUBQ AX,R8 - SUBQ AX,R9 - MOVQ SI,0(DI) - MOVQ DX,8(DI) - MOVQ CX,16(DI) - MOVQ R8,24(DI) - MOVQ R9,32(DI) - RET diff --git a/vendor/golang.org/x/crypto/curve25519/mul_amd64.s b/vendor/golang.org/x/crypto/curve25519/mul_amd64.s deleted file mode 100644 index 1f76d1a3..00000000 --- a/vendor/golang.org/x/crypto/curve25519/mul_amd64.s +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -#include "const_amd64.h" - -// func mul(dest, a, b *[5]uint64) -TEXT ·mul(SB),0,$16-24 - MOVQ dest+0(FP), DI - MOVQ a+8(FP), SI - MOVQ b+16(FP), DX - - MOVQ DX,CX - MOVQ 24(SI),DX - IMUL3Q $19,DX,AX - MOVQ AX,0(SP) - MULQ 16(CX) - MOVQ AX,R8 - MOVQ DX,R9 - MOVQ 32(SI),DX - IMUL3Q $19,DX,AX - MOVQ AX,8(SP) - MULQ 8(CX) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 0(SI),AX - MULQ 0(CX) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 0(SI),AX - MULQ 8(CX) - MOVQ AX,R10 - MOVQ DX,R11 - MOVQ 0(SI),AX - MULQ 16(CX) - MOVQ AX,R12 - MOVQ DX,R13 - MOVQ 0(SI),AX - MULQ 24(CX) - MOVQ AX,R14 - MOVQ DX,R15 - MOVQ 0(SI),AX - MULQ 32(CX) - MOVQ AX,BX - MOVQ DX,BP - MOVQ 8(SI),AX - MULQ 0(CX) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 8(SI),AX - MULQ 8(CX) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 8(SI),AX - MULQ 16(CX) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 8(SI),AX - MULQ 24(CX) - ADDQ AX,BX - ADCQ DX,BP - MOVQ 8(SI),DX - IMUL3Q $19,DX,AX - MULQ 32(CX) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 16(SI),AX - MULQ 0(CX) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 16(SI),AX - MULQ 8(CX) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 16(SI),AX - MULQ 16(CX) - ADDQ AX,BX - ADCQ DX,BP - MOVQ 16(SI),DX - IMUL3Q $19,DX,AX - MULQ 24(CX) - ADDQ AX,R8 - ADCQ DX,R9 - MOVQ 16(SI),DX - IMUL3Q $19,DX,AX - MULQ 32(CX) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 24(SI),AX - MULQ 0(CX) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ 24(SI),AX - MULQ 8(CX) - ADDQ AX,BX - ADCQ DX,BP - MOVQ 0(SP),AX - MULQ 24(CX) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 0(SP),AX - MULQ 32(CX) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 32(SI),AX - MULQ 0(CX) - ADDQ AX,BX - ADCQ DX,BP - MOVQ 8(SP),AX - MULQ 16(CX) - ADDQ AX,R10 - ADCQ DX,R11 - MOVQ 8(SP),AX - MULQ 24(CX) - ADDQ AX,R12 - ADCQ DX,R13 - MOVQ 8(SP),AX - MULQ 32(CX) - ADDQ AX,R14 - ADCQ DX,R15 - MOVQ $REDMASK51,SI - SHLQ $13,R8,R9 - ANDQ SI,R8 - SHLQ $13,R10,R11 - ANDQ SI,R10 - ADDQ R9,R10 - SHLQ $13,R12,R13 - ANDQ SI,R12 - ADDQ R11,R12 - SHLQ $13,R14,R15 - ANDQ SI,R14 - ADDQ R13,R14 - SHLQ $13,BX,BP - ANDQ SI,BX - ADDQ R15,BX - IMUL3Q $19,BP,DX - ADDQ DX,R8 - MOVQ R8,DX - SHRQ $51,DX - ADDQ R10,DX - MOVQ DX,CX - SHRQ $51,DX - ANDQ SI,R8 - ADDQ R12,DX - MOVQ DX,R9 - SHRQ $51,DX - ANDQ SI,CX - ADDQ R14,DX - MOVQ DX,AX - SHRQ $51,DX - ANDQ SI,R9 - ADDQ BX,DX - MOVQ DX,R10 - SHRQ $51,DX - ANDQ SI,AX - IMUL3Q $19,DX,DX - ADDQ DX,R8 - ANDQ SI,R10 - MOVQ R8,0(DI) - MOVQ CX,8(DI) - MOVQ R9,16(DI) - MOVQ AX,24(DI) - MOVQ R10,32(DI) - RET diff --git a/vendor/golang.org/x/crypto/curve25519/square_amd64.s b/vendor/golang.org/x/crypto/curve25519/square_amd64.s deleted file mode 100644 index 07511a45..00000000 --- a/vendor/golang.org/x/crypto/curve25519/square_amd64.s +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html - -// +build amd64,!gccgo,!appengine - -#include "const_amd64.h" - -// func square(out, in *[5]uint64) -TEXT ·square(SB),7,$0-16 - MOVQ out+0(FP), DI - MOVQ in+8(FP), SI - - MOVQ 0(SI),AX - MULQ 0(SI) - MOVQ AX,CX - MOVQ DX,R8 - MOVQ 0(SI),AX - SHLQ $1,AX - MULQ 8(SI) - MOVQ AX,R9 - MOVQ DX,R10 - MOVQ 0(SI),AX - SHLQ $1,AX - MULQ 16(SI) - MOVQ AX,R11 - MOVQ DX,R12 - MOVQ 0(SI),AX - SHLQ $1,AX - MULQ 24(SI) - MOVQ AX,R13 - MOVQ DX,R14 - MOVQ 0(SI),AX - SHLQ $1,AX - MULQ 32(SI) - MOVQ AX,R15 - MOVQ DX,BX - MOVQ 8(SI),AX - MULQ 8(SI) - ADDQ AX,R11 - ADCQ DX,R12 - MOVQ 8(SI),AX - SHLQ $1,AX - MULQ 16(SI) - ADDQ AX,R13 - ADCQ DX,R14 - MOVQ 8(SI),AX - SHLQ $1,AX - MULQ 24(SI) - ADDQ AX,R15 - ADCQ DX,BX - MOVQ 8(SI),DX - IMUL3Q $38,DX,AX - MULQ 32(SI) - ADDQ AX,CX - ADCQ DX,R8 - MOVQ 16(SI),AX - MULQ 16(SI) - ADDQ AX,R15 - ADCQ DX,BX - MOVQ 16(SI),DX - IMUL3Q $38,DX,AX - MULQ 24(SI) - ADDQ AX,CX - ADCQ DX,R8 - MOVQ 16(SI),DX - IMUL3Q $38,DX,AX - MULQ 32(SI) - ADDQ AX,R9 - ADCQ DX,R10 - MOVQ 24(SI),DX - IMUL3Q $19,DX,AX - MULQ 24(SI) - ADDQ AX,R9 - ADCQ DX,R10 - MOVQ 24(SI),DX - IMUL3Q $38,DX,AX - MULQ 32(SI) - ADDQ AX,R11 - ADCQ DX,R12 - MOVQ 32(SI),DX - IMUL3Q $19,DX,AX - MULQ 32(SI) - ADDQ AX,R13 - ADCQ DX,R14 - MOVQ $REDMASK51,SI - SHLQ $13,CX,R8 - ANDQ SI,CX - SHLQ $13,R9,R10 - ANDQ SI,R9 - ADDQ R8,R9 - SHLQ $13,R11,R12 - ANDQ SI,R11 - ADDQ R10,R11 - SHLQ $13,R13,R14 - ANDQ SI,R13 - ADDQ R12,R13 - SHLQ $13,R15,BX - ANDQ SI,R15 - ADDQ R14,R15 - IMUL3Q $19,BX,DX - ADDQ DX,CX - MOVQ CX,DX - SHRQ $51,DX - ADDQ R9,DX - ANDQ SI,CX - MOVQ DX,R8 - SHRQ $51,DX - ADDQ R11,DX - ANDQ SI,R8 - MOVQ DX,R9 - SHRQ $51,DX - ADDQ R13,DX - ANDQ SI,R9 - MOVQ DX,AX - SHRQ $51,DX - ADDQ R15,DX - ANDQ SI,AX - MOVQ DX,R10 - SHRQ $51,DX - IMUL3Q $19,DX,DX - ADDQ DX,CX - ANDQ SI,R10 - MOVQ CX,0(DI) - MOVQ R8,8(DI) - MOVQ R9,16(DI) - MOVQ AX,24(DI) - MOVQ R10,32(DI) - RET |