diff options
Diffstat (limited to 'vendor/golang.org')
-rw-r--r-- | vendor/golang.org/x/crypto/poly1305/LICENSE | 27 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/poly1305/const_amd64.s | 45 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/poly1305/poly1305.go | 32 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s | 497 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/poly1305/poly1305_arm.s | 379 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/poly1305/sum_amd64.go | 24 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/poly1305/sum_arm.go | 24 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/poly1305/sum_ref.go | 1531 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/salsa20/salsa/LICENSE | 27 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go | 144 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s | 902 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go | 199 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go | 23 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go | 234 |
14 files changed, 4088 insertions, 0 deletions
diff --git a/vendor/golang.org/x/crypto/poly1305/LICENSE b/vendor/golang.org/x/crypto/poly1305/LICENSE new file mode 100644 index 00000000..6a66aea5 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/crypto/poly1305/const_amd64.s b/vendor/golang.org/x/crypto/poly1305/const_amd64.s new file mode 100644 index 00000000..8e861f33 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/const_amd64.s @@ -0,0 +1,45 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +DATA ·SCALE(SB)/8, $0x37F4000000000000 +GLOBL ·SCALE(SB), 8, $8 +DATA ·TWO32(SB)/8, $0x41F0000000000000 +GLOBL ·TWO32(SB), 8, $8 +DATA ·TWO64(SB)/8, $0x43F0000000000000 +GLOBL ·TWO64(SB), 8, $8 +DATA ·TWO96(SB)/8, $0x45F0000000000000 +GLOBL ·TWO96(SB), 8, $8 +DATA ·ALPHA32(SB)/8, $0x45E8000000000000 +GLOBL ·ALPHA32(SB), 8, $8 +DATA ·ALPHA64(SB)/8, $0x47E8000000000000 +GLOBL ·ALPHA64(SB), 8, $8 +DATA ·ALPHA96(SB)/8, $0x49E8000000000000 +GLOBL ·ALPHA96(SB), 8, $8 +DATA ·ALPHA130(SB)/8, $0x4C08000000000000 +GLOBL ·ALPHA130(SB), 8, $8 +DATA ·DOFFSET0(SB)/8, $0x4330000000000000 +GLOBL ·DOFFSET0(SB), 8, $8 +DATA ·DOFFSET1(SB)/8, $0x4530000000000000 +GLOBL ·DOFFSET1(SB), 8, $8 +DATA ·DOFFSET2(SB)/8, $0x4730000000000000 +GLOBL ·DOFFSET2(SB), 8, $8 +DATA ·DOFFSET3(SB)/8, $0x4930000000000000 +GLOBL ·DOFFSET3(SB), 8, $8 +DATA ·DOFFSET3MINUSTWO128(SB)/8, $0x492FFFFE00000000 +GLOBL ·DOFFSET3MINUSTWO128(SB), 8, $8 +DATA ·HOFFSET0(SB)/8, $0x43300001FFFFFFFB +GLOBL ·HOFFSET0(SB), 8, $8 +DATA ·HOFFSET1(SB)/8, $0x45300001FFFFFFFE +GLOBL ·HOFFSET1(SB), 8, $8 +DATA ·HOFFSET2(SB)/8, $0x47300001FFFFFFFE +GLOBL ·HOFFSET2(SB), 8, $8 +DATA ·HOFFSET3(SB)/8, $0x49300003FFFFFFFE +GLOBL ·HOFFSET3(SB), 8, $8 +DATA ·ROUNDING(SB)/2, $0x137f +GLOBL ·ROUNDING(SB), 8, $2 diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305.go b/vendor/golang.org/x/crypto/poly1305/poly1305.go new file mode 100644 index 00000000..4a5f826f --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305.go @@ -0,0 +1,32 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package poly1305 implements Poly1305 one-time message authentication code as specified in http://cr.yp.to/mac/poly1305-20050329.pdf. + +Poly1305 is a fast, one-time authentication function. It is infeasible for an +attacker to generate an authenticator for a message without the key. However, a +key must only be used for a single message. Authenticating two different +messages with the same key allows an attacker to forge authenticators for other +messages with the same key. + +Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was +used with a fixed key in order to generate one-time keys from an nonce. +However, in this package AES isn't used and the one-time key is specified +directly. +*/ +package poly1305 // import "golang.org/x/crypto/poly1305" + +import "crypto/subtle" + +// TagSize is the size, in bytes, of a poly1305 authenticator. +const TagSize = 16 + +// Verify returns true if mac is a valid authenticator for m with the given +// key. +func Verify(mac *[16]byte, m []byte, key *[32]byte) bool { + var tmp [16]byte + Sum(&tmp, m, key) + return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1 +} diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s b/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s new file mode 100644 index 00000000..f8d4ee92 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305_amd64.s @@ -0,0 +1,497 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key) +TEXT ·poly1305(SB),0,$224-32 + MOVQ out+0(FP),DI + MOVQ m+8(FP),SI + MOVQ mlen+16(FP),DX + MOVQ key+24(FP),CX + + MOVQ SP,R11 + MOVQ $31,R9 + NOTQ R9 + ANDQ R9,SP + ADDQ $32,SP + + MOVQ R11,32(SP) + MOVQ R12,40(SP) + MOVQ R13,48(SP) + MOVQ R14,56(SP) + MOVQ R15,64(SP) + MOVQ BX,72(SP) + MOVQ BP,80(SP) + FLDCW ·ROUNDING(SB) + MOVL 0(CX),R8 + MOVL 4(CX),R9 + MOVL 8(CX),AX + MOVL 12(CX),R10 + MOVQ DI,88(SP) + MOVQ CX,96(SP) + MOVL $0X43300000,108(SP) + MOVL $0X45300000,116(SP) + MOVL $0X47300000,124(SP) + MOVL $0X49300000,132(SP) + ANDL $0X0FFFFFFF,R8 + ANDL $0X0FFFFFFC,R9 + ANDL $0X0FFFFFFC,AX + ANDL $0X0FFFFFFC,R10 + MOVL R8,104(SP) + MOVL R9,112(SP) + MOVL AX,120(SP) + MOVL R10,128(SP) + FMOVD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FMOVD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FMOVD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FMOVD 128(SP), F0 + FSUBD ·DOFFSET3(SB), F0 + FXCHD F0, F3 + FMOVDP F0, 136(SP) + FXCHD F0, F1 + FMOVD F0, 144(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 152(SP) + FMOVD F0, 160(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 168(SP) + FMOVD F0, 176(SP) + FMULD ·SCALE(SB), F0 + FMOVDP F0, 184(SP) + FLDZ + FLDZ + FLDZ + FLDZ + CMPQ DX,$16 + JB ADDATMOST15BYTES + INITIALATLEAST16BYTES: + MOVL 12(SI),DI + MOVL 8(SI),CX + MOVL 4(SI),R8 + MOVL 0(SI),R9 + MOVL DI,128(SP) + MOVL CX,120(SP) + MOVL R8,112(SP) + MOVL R9,104(SP) + ADDQ $16,SI + SUBQ $16,DX + FXCHD F0, F3 + FADDD 128(SP), F0 + FSUBD ·DOFFSET3MINUSTWO128(SB), F0 + FXCHD F0, F1 + FADDD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FXCHD F0, F2 + FADDD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FXCHD F0, F3 + FADDD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + CMPQ DX,$16 + JB MULTIPLYADDATMOST15BYTES + MULTIPLYADDATLEAST16BYTES: + MOVL 12(SI),DI + MOVL 8(SI),CX + MOVL 4(SI),R8 + MOVL 0(SI),R9 + MOVL DI,128(SP) + MOVL CX,120(SP) + MOVL R8,112(SP) + MOVL R9,104(SP) + ADDQ $16,SI + SUBQ $16,DX + FMOVD ·ALPHA130(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F2 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FXCHD F0, F2 + FADDDP F0,F1 + FMOVD ·ALPHA64(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F4 + FMOVD ·ALPHA96(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F6 + FXCHD F0, F6 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FXCHD F0, F3 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F3,F0 + FMOVD 160(SP), F0 + FMULD F4,F0 + FMOVD 144(SP), F0 + FMULD F5,F0 + FMOVD 136(SP), F0 + FMULDP F0,F6 + FMOVD 160(SP), F0 + FMULD F4,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F4,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F4,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F4 + FXCHD F0, F3 + FADDDP F0,F5 + FMOVD 144(SP), F0 + FMULD F4,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F4,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F4,F0 + FADDDP F0,F3 + FMOVD 168(SP), F0 + FMULDP F0,F4 + FXCHD F0, F3 + FADDDP F0,F4 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FXCHD F0, F3 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FXCHD F0, F1 + FMOVD 168(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 152(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F1 + CMPQ DX,$16 + FXCHD F0, F2 + FMOVD 128(SP), F0 + FSUBD ·DOFFSET3MINUSTWO128(SB), F0 + FADDDP F0,F1 + FXCHD F0, F1 + FMOVD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FADDDP F0,F1 + FXCHD F0, F3 + FMOVD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FADDDP F0,F1 + FXCHD F0, F2 + FMOVD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FADDDP F0,F1 + JAE MULTIPLYADDATLEAST16BYTES + MULTIPLYADDATMOST15BYTES: + FMOVD ·ALPHA130(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F2 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F5,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F5 + FMOVD ·ALPHA96(SB), F0 + FADDD F7,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F7 + FXCHD F0, F7 + FADDDP F0,F1 + FXCHD F0, F5 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F1,F0 + FMOVD 160(SP), F0 + FMULD F2,F0 + FMOVD 144(SP), F0 + FMULD F3,F0 + FMOVD 136(SP), F0 + FMULDP F0,F4 + FMOVD 160(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F5,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 152(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F1 + ADDATMOST15BYTES: + CMPQ DX,$0 + JE NOMOREBYTES + MOVL $0,0(SP) + MOVL $0, 4 (SP) + MOVL $0, 8 (SP) + MOVL $0, 12 (SP) + LEAQ 0(SP),DI + MOVQ DX,CX + REP; MOVSB + MOVB $1,0(DI) + MOVL 12 (SP),DI + MOVL 8 (SP),SI + MOVL 4 (SP),DX + MOVL 0(SP),CX + MOVL DI,128(SP) + MOVL SI,120(SP) + MOVL DX,112(SP) + MOVL CX,104(SP) + FXCHD F0, F3 + FADDD 128(SP), F0 + FSUBD ·DOFFSET3(SB), F0 + FXCHD F0, F2 + FADDD 120(SP), F0 + FSUBD ·DOFFSET2(SB), F0 + FXCHD F0, F1 + FADDD 112(SP), F0 + FSUBD ·DOFFSET1(SB), F0 + FXCHD F0, F3 + FADDD 104(SP), F0 + FSUBD ·DOFFSET0(SB), F0 + FMOVD ·ALPHA130(SB), F0 + FADDD F3,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F3 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F6 + FMOVD ·ALPHA96(SB), F0 + FADDD F5,F0 + FSUBD ·ALPHA96(SB), F0 + FSUBD F0,F5 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F6 + FADDDP F0,F1 + FXCHD F0, F3 + FADDDP F0,F5 + FXCHD F0, F3 + FADDDP F0,F1 + FMOVD 176(SP), F0 + FMULD F3,F0 + FMOVD 160(SP), F0 + FMULD F4,F0 + FMOVD 144(SP), F0 + FMULD F5,F0 + FMOVD 136(SP), F0 + FMULDP F0,F6 + FMOVD 160(SP), F0 + FMULD F5,F0 + FADDDP F0,F3 + FMOVD 144(SP), F0 + FMULD F5,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F5,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULDP F0,F5 + FXCHD F0, F4 + FADDDP F0,F5 + FMOVD 144(SP), F0 + FMULD F6,F0 + FADDDP F0,F2 + FMOVD 136(SP), F0 + FMULD F6,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F6,F0 + FADDDP F0,F4 + FMOVD 168(SP), F0 + FMULDP F0,F6 + FXCHD F0, F5 + FADDDP F0,F4 + FMOVD 136(SP), F0 + FMULD F2,F0 + FADDDP F0,F1 + FMOVD 184(SP), F0 + FMULD F2,F0 + FADDDP F0,F5 + FMOVD 168(SP), F0 + FMULD F2,F0 + FADDDP F0,F3 + FMOVD 152(SP), F0 + FMULDP F0,F2 + FXCHD F0, F1 + FADDDP F0,F3 + FXCHD F0, F3 + FXCHD F0, F2 + NOMOREBYTES: + MOVL $0,R10 + FMOVD ·ALPHA130(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA130(SB), F0 + FSUBD F0,F4 + FMULD ·SCALE(SB), F0 + FMOVD ·ALPHA32(SB), F0 + FADDD F2,F0 + FSUBD ·ALPHA32(SB), F0 + FSUBD F0,F2 + FMOVD ·ALPHA64(SB), F0 + FADDD F4,F0 + FSUBD ·ALPHA64(SB), F0 + FSUBD F0,F4 + FMOVD ·ALPHA96(SB), F0 + FADDD F6,F0 + FSUBD ·ALPHA96(SB), F0 + FXCHD F0, F6 + FSUBD F6,F0 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F4 + FADDDP F0,F1 + FXCHD F0, F2 + FADDDP F0,F3 + FXCHD F0, F4 + FADDDP F0,F3 + FXCHD F0, F3 + FADDD ·HOFFSET0(SB), F0 + FXCHD F0, F3 + FADDD ·HOFFSET1(SB), F0 + FXCHD F0, F1 + FADDD ·HOFFSET2(SB), F0 + FXCHD F0, F2 + FADDD ·HOFFSET3(SB), F0 + FXCHD F0, F3 + FMOVDP F0, 104(SP) + FMOVDP F0, 112(SP) + FMOVDP F0, 120(SP) + FMOVDP F0, 128(SP) + MOVL 108(SP),DI + ANDL $63,DI + MOVL 116(SP),SI + ANDL $63,SI + MOVL 124(SP),DX + ANDL $63,DX + MOVL 132(SP),CX + ANDL $63,CX + MOVL 112(SP),R8 + ADDL DI,R8 + MOVQ R8,112(SP) + MOVL 120(SP),DI + ADCL SI,DI + MOVQ DI,120(SP) + MOVL 128(SP),DI + ADCL DX,DI + MOVQ DI,128(SP) + MOVL R10,DI + ADCL CX,DI + MOVQ DI,136(SP) + MOVQ $5,DI + MOVL 104(SP),SI + ADDL SI,DI + MOVQ DI,104(SP) + MOVL R10,DI + MOVQ 112(SP),DX + ADCL DX,DI + MOVQ DI,112(SP) + MOVL R10,DI + MOVQ 120(SP),CX + ADCL CX,DI + MOVQ DI,120(SP) + MOVL R10,DI + MOVQ 128(SP),R8 + ADCL R8,DI + MOVQ DI,128(SP) + MOVQ $0XFFFFFFFC,DI + MOVQ 136(SP),R9 + ADCL R9,DI + SARL $16,DI + MOVQ DI,R9 + XORL $0XFFFFFFFF,R9 + ANDQ DI,SI + MOVQ 104(SP),AX + ANDQ R9,AX + ORQ AX,SI + ANDQ DI,DX + MOVQ 112(SP),AX + ANDQ R9,AX + ORQ AX,DX + ANDQ DI,CX + MOVQ 120(SP),AX + ANDQ R9,AX + ORQ AX,CX + ANDQ DI,R8 + MOVQ 128(SP),DI + ANDQ R9,DI + ORQ DI,R8 + MOVQ 88(SP),DI + MOVQ 96(SP),R9 + ADDL 16(R9),SI + ADCL 20(R9),DX + ADCL 24(R9),CX + ADCL 28(R9),R8 + MOVL SI,0(DI) + MOVL DX,4(DI) + MOVL CX,8(DI) + MOVL R8,12(DI) + MOVQ 32(SP),R11 + MOVQ 40(SP),R12 + MOVQ 48(SP),R13 + MOVQ 56(SP),R14 + MOVQ 64(SP),R15 + MOVQ 72(SP),BX + MOVQ 80(SP),BP + MOVQ R11,SP + RET diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305_arm.s b/vendor/golang.org/x/crypto/poly1305/poly1305_arm.s new file mode 100644 index 00000000..c1538674 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305_arm.s @@ -0,0 +1,379 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 5a from the public +// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305. + +// +build arm,!gccgo,!appengine + +DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff +DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03 +DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff +DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff +DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff +GLOBL poly1305_init_constants_armv6<>(SB), 8, $20 + +// Warning: the linker may use R11 to synthesize certain instructions. Please +// take care and verify that no synthetic instructions use it. + +TEXT poly1305_init_ext_armv6<>(SB),4,$-4 + MOVM.DB.W [R4-R11], (R13) + MOVM.IA.W (R1), [R2-R5] + MOVW $poly1305_init_constants_armv6<>(SB), R7 + MOVW R2, R8 + MOVW R2>>26, R9 + MOVW R3>>20, g + MOVW R4>>14, R11 + MOVW R5>>8, R12 + ORR R3<<6, R9, R9 + ORR R4<<12, g, g + ORR R5<<18, R11, R11 + MOVM.IA (R7), [R2-R6] + AND R8, R2, R2 + AND R9, R3, R3 + AND g, R4, R4 + AND R11, R5, R5 + AND R12, R6, R6 + MOVM.IA.W [R2-R6], (R0) + EOR R2, R2, R2 + EOR R3, R3, R3 + EOR R4, R4, R4 + EOR R5, R5, R5 + EOR R6, R6, R6 + MOVM.IA.W [R2-R6], (R0) + MOVM.IA.W (R1), [R2-R5] + MOVM.IA [R2-R6], (R0) + MOVM.IA.W (R13), [R4-R11] + RET + +#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \ + MOVBU (offset+0)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+0)(Rdst); \ + MOVBU (offset+1)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+1)(Rdst); \ + MOVBU (offset+2)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+2)(Rdst); \ + MOVBU (offset+3)(Rsrc), Rtmp; \ + MOVBU Rtmp, (offset+3)(Rdst) + +TEXT poly1305_blocks_armv6<>(SB),4,$-4 + MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13) + SUB $128, R13 + MOVW R0, 36(R13) + MOVW R1, 40(R13) + MOVW R2, 44(R13) + MOVW R1, R14 + MOVW R2, R12 + MOVW 56(R0), R8 + WORD $0xe1180008 // TST R8, R8 not working see issue 5921 + EOR R6, R6, R6 + MOVW.EQ $(1<<24), R6 + MOVW R6, 32(R13) + ADD $64, R13, g + MOVM.IA (R0), [R0-R9] + MOVM.IA [R0-R4], (g) + CMP $16, R12 + BLO poly1305_blocks_armv6_done +poly1305_blocks_armv6_mainloop: + WORD $0xe31e0003 // TST R14, #3 not working see issue 5921 + BEQ poly1305_blocks_armv6_mainloop_aligned + ADD $48, R13, g + MOVW_UNALIGNED(R14, g, R0, 0) + MOVW_UNALIGNED(R14, g, R0, 4) + MOVW_UNALIGNED(R14, g, R0, 8) + MOVW_UNALIGNED(R14, g, R0, 12) + MOVM.IA (g), [R0-R3] + ADD $16, R14 + B poly1305_blocks_armv6_mainloop_loaded +poly1305_blocks_armv6_mainloop_aligned: + MOVM.IA.W (R14), [R0-R3] +poly1305_blocks_armv6_mainloop_loaded: + MOVW R0>>26, g + MOVW R1>>20, R11 + MOVW R2>>14, R12 + MOVW R14, 40(R13) + MOVW R3>>8, R4 + ORR R1<<6, g, g + ORR R2<<12, R11, R11 + ORR R3<<18, R12, R12 + BIC $0xfc000000, R0, R0 + BIC $0xfc000000, g, g + MOVW 32(R13), R3 + BIC $0xfc000000, R11, R11 + BIC $0xfc000000, R12, R12 + ADD R0, R5, R5 + ADD g, R6, R6 + ORR R3, R4, R4 + ADD R11, R7, R7 + ADD $64, R13, R14 + ADD R12, R8, R8 + ADD R4, R9, R9 + MOVM.IA (R14), [R0-R4] + MULLU R4, R5, (R11, g) + MULLU R3, R5, (R14, R12) + MULALU R3, R6, (R11, g) + MULALU R2, R6, (R14, R12) + MULALU R2, R7, (R11, g) + MULALU R1, R7, (R14, R12) + ADD R4<<2, R4, R4 + ADD R3<<2, R3, R3 + MULALU R1, R8, (R11, g) + MULALU R0, R8, (R14, R12) + MULALU R0, R9, (R11, g) + MULALU R4, R9, (R14, R12) + MOVW g, 24(R13) + MOVW R11, 28(R13) + MOVW R12, 16(R13) + MOVW R14, 20(R13) + MULLU R2, R5, (R11, g) + MULLU R1, R5, (R14, R12) + MULALU R1, R6, (R11, g) + MULALU R0, R6, (R14, R12) + MULALU R0, R7, (R11, g) + MULALU R4, R7, (R14, R12) + ADD R2<<2, R2, R2 + ADD R1<<2, R1, R1 + MULALU R4, R8, (R11, g) + MULALU R3, R8, (R14, R12) + MULALU R3, R9, (R11, g) + MULALU R2, R9, (R14, R12) + MOVW g, 8(R13) + MOVW R11, 12(R13) + MOVW R12, 0(R13) + MOVW R14, w+4(SP) + MULLU R0, R5, (R11, g) + MULALU R4, R6, (R11, g) + MULALU R3, R7, (R11, g) + MULALU R2, R8, (R11, g) + MULALU R1, R9, (R11, g) + MOVM.IA (R13), [R0-R7] + MOVW g>>26, R12 + MOVW R4>>26, R14 + ORR R11<<6, R12, R12 + ORR R5<<6, R14, R14 + BIC $0xfc000000, g, g + BIC $0xfc000000, R4, R4 + ADD.S R12, R0, R0 + ADC $0, R1, R1 + ADD.S R14, R6, R6 + ADC $0, R7, R7 + MOVW R0>>26, R12 + MOVW R6>>26, R14 + ORR R1<<6, R12, R12 + ORR R7<<6, R14, R14 + BIC $0xfc000000, R0, R0 + BIC $0xfc000000, R6, R6 + ADD R14<<2, R14, R14 + ADD.S R12, R2, R2 + ADC $0, R3, R3 + ADD R14, g, g + MOVW R2>>26, R12 + MOVW g>>26, R14 + ORR R3<<6, R12, R12 + BIC $0xfc000000, g, R5 + BIC $0xfc000000, R2, R7 + ADD R12, R4, R4 + ADD R14, R0, R0 + MOVW R4>>26, R12 + BIC $0xfc000000, R4, R8 + ADD R12, R6, R9 + MOVW w+44(SP), R12 + MOVW w+40(SP), R14 + MOVW R0, R6 + CMP $32, R12 + SUB $16, R12, R12 + MOVW R12, 44(R13) + BHS poly1305_blocks_armv6_mainloop +poly1305_blocks_armv6_done: + MOVW 36(R13), R12 + MOVW R5, 20(R12) + MOVW R6, 24(R12) + MOVW R7, 28(R12) + MOVW R8, 32(R12) + MOVW R9, 36(R12) + ADD $128, R13, R13 + MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14] + RET + +#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \ + MOVBU.P 1(Rsrc), Rtmp; \ + MOVBU.P Rtmp, 1(Rdst); \ + MOVBU.P 1(Rsrc), Rtmp; \ + MOVBU.P Rtmp, 1(Rdst) + +#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \ + MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \ + MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) + +TEXT poly1305_finish_ext_armv6<>(SB),4,$-4 + MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13) + SUB $16, R13, R13 + MOVW R0, R5 + MOVW R1, R6 + MOVW R2, R7 + MOVW R3, R8 + AND.S R2, R2, R2 + BEQ poly1305_finish_ext_armv6_noremaining + EOR R0, R0 + MOVW R13, R9 + MOVW R0, 0(R13) + MOVW R0, 4(R13) + MOVW R0, 8(R13) + MOVW R0, 12(R13) + WORD $0xe3110003 // TST R1, #3 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_aligned + WORD $0xe3120008 // TST R2, #8 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip8 + MOVWP_UNALIGNED(R1, R9, g) + MOVWP_UNALIGNED(R1, R9, g) +poly1305_finish_ext_armv6_skip8: + WORD $0xe3120004 // TST $4, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip4 + MOVWP_UNALIGNED(R1, R9, g) +poly1305_finish_ext_armv6_skip4: + WORD $0xe3120002 // TST $2, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip2 + MOVHUP_UNALIGNED(R1, R9, g) + B poly1305_finish_ext_armv6_skip2 +poly1305_finish_ext_armv6_aligned: + WORD $0xe3120008 // TST R2, #8 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip8_aligned + MOVM.IA.W (R1), [g-R11] + MOVM.IA.W [g-R11], (R9) +poly1305_finish_ext_armv6_skip8_aligned: + WORD $0xe3120004 // TST $4, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip4_aligned + MOVW.P 4(R1), g + MOVW.P g, 4(R9) +poly1305_finish_ext_armv6_skip4_aligned: + WORD $0xe3120002 // TST $2, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip2 + MOVHU.P 2(R1), g + MOVH.P g, 2(R9) +poly1305_finish_ext_armv6_skip2: + WORD $0xe3120001 // TST $1, R2 not working see issue 5921 + BEQ poly1305_finish_ext_armv6_skip1 + MOVBU.P 1(R1), g + MOVBU.P g, 1(R9) +poly1305_finish_ext_armv6_skip1: + MOVW $1, R11 + MOVBU R11, 0(R9) + MOVW R11, 56(R5) + MOVW R5, R0 + MOVW R13, R1 + MOVW $16, R2 + BL poly1305_blocks_armv6<>(SB) +poly1305_finish_ext_armv6_noremaining: + MOVW 20(R5), R0 + MOVW 24(R5), R1 + MOVW 28(R5), R2 + MOVW 32(R5), R3 + MOVW 36(R5), R4 + MOVW R4>>26, R12 + BIC $0xfc000000, R4, R4 + ADD R12<<2, R12, R12 + ADD R12, R0, R0 + MOVW R0>>26, R12 + BIC $0xfc000000, R0, R0 + ADD R12, R1, R1 + MOVW R1>>26, R12 + BIC $0xfc000000, R1, R1 + ADD R12, R2, R2 + MOVW R2>>26, R12 + BIC $0xfc000000, R2, R2 + ADD R12, R3, R3 + MOVW R3>>26, R12 + BIC $0xfc000000, R3, R3 + ADD R12, R4, R4 + ADD $5, R0, R6 + MOVW R6>>26, R12 + BIC $0xfc000000, R6, R6 + ADD R12, R1, R7 + MOVW R7>>26, R12 + BIC $0xfc000000, R7, R7 + ADD R12, R2, g + MOVW g>>26, R12 + BIC $0xfc000000, g, g + ADD R12, R3, R11 + MOVW $-(1<<26), R12 + ADD R11>>26, R12, R12 + BIC $0xfc000000, R11, R11 + ADD R12, R4, R14 + MOVW R14>>31, R12 + SUB $1, R12 + AND R12, R6, R6 + AND R12, R7, R7 + AND R12, g, g + AND R12, R11, R11 + AND R12, R14, R14 + MVN R12, R12 + AND R12, R0, R0 + AND R12, R1, R1 + AND R12, R2, R2 + AND R12, R3, R3 + AND R12, R4, R4 + ORR R6, R0, R0 + ORR R7, R1, R1 + ORR g, R2, R2 + ORR R11, R3, R3 + ORR R14, R4, R4 + ORR R1<<26, R0, R0 + MOVW R1>>6, R1 + ORR R2<<20, R1, R1 + MOVW R2>>12, R2 + ORR R3<<14, R2, R2 + MOVW R3>>18, R3 + ORR R4<<8, R3, R3 + MOVW 40(R5), R6 + MOVW 44(R5), R7 + MOVW 48(R5), g + MOVW 52(R5), R11 + ADD.S R6, R0, R0 + ADC.S R7, R1, R1 + ADC.S g, R2, R2 + ADC.S R11, R3, R3 + MOVM.IA [R0-R3], (R8) + MOVW R5, R12 + EOR R0, R0, R0 + EOR R1, R1, R1 + EOR R2, R2, R2 + EOR R3, R3, R3 + EOR R4, R4, R4 + EOR R5, R5, R5 + EOR R6, R6, R6 + EOR R7, R7, R7 + MOVM.IA.W [R0-R7], (R12) + MOVM.IA [R0-R7], (R12) + ADD $16, R13, R13 + MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14] + RET + +// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key) +TEXT ·poly1305_auth_armv6(SB),0,$280-16 + MOVW out+0(FP), R4 + MOVW m+4(FP), R5 + MOVW mlen+8(FP), R6 + MOVW key+12(FP), R7 + + MOVW R13, R8 + BIC $63, R13 + SUB $64, R13, R13 + MOVW R13, R0 + MOVW R7, R1 + BL poly1305_init_ext_armv6<>(SB) + BIC.S $15, R6, R2 + BEQ poly1305_auth_armv6_noblocks + MOVW R13, R0 + MOVW R5, R1 + ADD R2, R5, R5 + SUB R2, R6, R6 + BL poly1305_blocks_armv6<>(SB) +poly1305_auth_armv6_noblocks: + MOVW R13, R0 + MOVW R5, R1 + MOVW R6, R2 + MOVW R4, R3 + BL poly1305_finish_ext_armv6<>(SB) + MOVW R8, R13 + RET diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go new file mode 100644 index 00000000..6775c703 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go @@ -0,0 +1,24 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +package poly1305 + +// This function is implemented in poly1305_amd64.s + +//go:noescape + +func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + var mPtr *byte + if len(m) > 0 { + mPtr = &m[0] + } + poly1305(out, mPtr, uint64(len(m)), key) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_arm.go b/vendor/golang.org/x/crypto/poly1305/sum_arm.go new file mode 100644 index 00000000..50b979c2 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.go @@ -0,0 +1,24 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build arm,!gccgo,!appengine + +package poly1305 + +// This function is implemented in poly1305_arm.s + +//go:noescape + +func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + var mPtr *byte + if len(m) > 0 { + mPtr = &m[0] + } + poly1305_auth_armv6(out, mPtr, uint32(len(m)), key) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_ref.go b/vendor/golang.org/x/crypto/poly1305/sum_ref.go new file mode 100644 index 00000000..0b24fc78 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_ref.go @@ -0,0 +1,1531 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!arm gccgo appengine + +package poly1305 + +// Based on original, public domain implementation from NaCl by D. J. +// Bernstein. + +import "math" + +const ( + alpham80 = 0.00000000558793544769287109375 + alpham48 = 24.0 + alpham16 = 103079215104.0 + alpha0 = 6755399441055744.0 + alpha18 = 1770887431076116955136.0 + alpha32 = 29014219670751100192948224.0 + alpha50 = 7605903601369376408980219232256.0 + alpha64 = 124615124604835863084731911901282304.0 + alpha82 = 32667107224410092492483962313449748299776.0 + alpha96 = 535217884764734955396857238543560676143529984.0 + alpha112 = 35076039295941670036888435985190792471742381031424.0 + alpha130 = 9194973245195333150150082162901855101712434733101613056.0 + scale = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 + offset0 = 6755408030990331.0 + offset1 = 29014256564239239022116864.0 + offset2 = 124615283061160854719918951570079744.0 + offset3 = 535219245894202480694386063513315216128475136.0 +) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + r := key + s := key[16:] + var ( + y7 float64 + y6 float64 + y1 float64 + y0 float64 + y5 float64 + y4 float64 + x7 float64 + x6 float64 + x1 float64 + x0 float64 + y3 float64 + y2 float64 + x5 float64 + r3lowx0 float64 + x4 float64 + r0lowx6 float64 + x3 float64 + r3highx0 float64 + x2 float64 + r0highx6 float64 + r0lowx0 float64 + sr1lowx6 float64 + r0highx0 float64 + sr1highx6 float64 + sr3low float64 + r1lowx0 float64 + sr2lowx6 float64 + r1highx0 float64 + sr2highx6 float64 + r2lowx0 float64 + sr3lowx6 float64 + r2highx0 float64 + sr3highx6 float64 + r1highx4 float64 + r1lowx4 float64 + r0highx4 float64 + r0lowx4 float64 + sr3highx4 float64 + sr3lowx4 float64 + sr2highx4 float64 + sr2lowx4 float64 + r0lowx2 float64 + r0highx2 float64 + r1lowx2 float64 + r1highx2 float64 + r2lowx2 float64 + r2highx2 float64 + sr3lowx2 float64 + sr3highx2 float64 + z0 float64 + z1 float64 + z2 float64 + z3 float64 + m0 int64 + m1 int64 + m2 int64 + m3 int64 + m00 uint32 + m01 uint32 + m02 uint32 + m03 uint32 + m10 uint32 + m11 uint32 + m12 uint32 + m13 uint32 + m20 uint32 + m21 uint32 + m22 uint32 + m23 uint32 + m30 uint32 + m31 uint32 + m32 uint32 + m33 uint64 + lbelow2 int32 + lbelow3 int32 + lbelow4 int32 + lbelow5 int32 + lbelow6 int32 + lbelow7 int32 + lbelow8 int32 + lbelow9 int32 + lbelow10 int32 + lbelow11 int32 + lbelow12 int32 + lbelow13 int32 + lbelow14 int32 + lbelow15 int32 + s00 uint32 + s01 uint32 + s02 uint32 + s03 uint32 + s10 uint32 + s11 uint32 + s12 uint32 + s13 uint32 + s20 uint32 + s21 uint32 + s22 uint32 + s23 uint32 + s30 uint32 + s31 uint32 + s32 uint32 + s33 uint32 + bits32 uint64 + f uint64 + f0 uint64 + f1 uint64 + f2 uint64 + f3 uint64 + f4 uint64 + g uint64 + g0 uint64 + g1 uint64 + g2 uint64 + g3 uint64 + g4 uint64 + ) + + var p int32 + + l := int32(len(m)) + + r00 := uint32(r[0]) + + r01 := uint32(r[1]) + + r02 := uint32(r[2]) + r0 := int64(2151) + + r03 := uint32(r[3]) + r03 &= 15 + r0 <<= 51 + + r10 := uint32(r[4]) + r10 &= 252 + r01 <<= 8 + r0 += int64(r00) + + r11 := uint32(r[5]) + r02 <<= 16 + r0 += int64(r01) + + r12 := uint32(r[6]) + r03 <<= 24 + r0 += int64(r02) + + r13 := uint32(r[7]) + r13 &= 15 + r1 := int64(2215) + r0 += int64(r03) + + d0 := r0 + r1 <<= 51 + r2 := int64(2279) + + r20 := uint32(r[8]) + r20 &= 252 + r11 <<= 8 + r1 += int64(r10) + + r21 := uint32(r[9]) + r12 <<= 16 + r1 += int64(r11) + + r22 := uint32(r[10]) + r13 <<= 24 + r1 += int64(r12) + + r23 := uint32(r[11]) + r23 &= 15 + r2 <<= 51 + r1 += int64(r13) + + d1 := r1 + r21 <<= 8 + r2 += int64(r20) + + r30 := uint32(r[12]) + r30 &= 252 + r22 <<= 16 + r2 += int64(r21) + + r31 := uint32(r[13]) + r23 <<= 24 + r2 += int64(r22) + + r32 := uint32(r[14]) + r2 += int64(r23) + r3 := int64(2343) + + d2 := r2 + r3 <<= 51 + + r33 := uint32(r[15]) + r33 &= 15 + r31 <<= 8 + r3 += int64(r30) + + r32 <<= 16 + r3 += int64(r31) + + r33 <<= 24 + r3 += int64(r32) + + r3 += int64(r33) + h0 := alpha32 - alpha32 + + d3 := r3 + h1 := alpha32 - alpha32 + + h2 := alpha32 - alpha32 + + h3 := alpha32 - alpha32 + + h4 := alpha32 - alpha32 + + r0low := math.Float64frombits(uint64(d0)) + h5 := alpha32 - alpha32 + + r1low := math.Float64frombits(uint64(d1)) + h6 := alpha32 - alpha32 + + r2low := math.Float64frombits(uint64(d2)) + h7 := alpha32 - alpha32 + + r0low -= alpha0 + + r1low -= alpha32 + + r2low -= alpha64 + + r0high := r0low + alpha18 + + r3low := math.Float64frombits(uint64(d3)) + + r1high := r1low + alpha50 + sr1low := scale * r1low + + r2high := r2low + alpha82 + sr2low := scale * r2low + + r0high -= alpha18 + r0high_stack := r0high + + r3low -= alpha96 + + r1high -= alpha50 + r1high_stack := r1high + + sr1high := sr1low + alpham80 + + r0low -= r0high + + r2high -= alpha82 + sr3low = scale * r3low + + sr2high := sr2low + alpham48 + + r1low -= r1high + r1low_stack := r1low + + sr1high -= alpham80 + sr1high_stack := sr1high + + r2low -= r2high + r2low_stack := r2low + + sr2high -= alpham48 + sr2high_stack := sr2high + + r3high := r3low + alpha112 + r0low_stack := r0low + + sr1low -= sr1high + sr1low_stack := sr1low + + sr3high := sr3low + alpham16 + r2high_stack := r2high + + sr2low -= sr2high + sr2low_stack := sr2low + + r3high -= alpha112 + r3high_stack := r3high + + sr3high -= alpham16 + sr3high_stack := sr3high + + r3low -= r3high + r3low_stack := r3low + + sr3low -= sr3high + sr3low_stack := sr3low + + if l < 16 { + goto addatmost15bytes + } + + m00 = uint32(m[p+0]) + m0 = 2151 + + m0 <<= 51 + m1 = 2215 + m01 = uint32(m[p+1]) + + m1 <<= 51 + m2 = 2279 + m02 = uint32(m[p+2]) + + m2 <<= 51 + m3 = 2343 + m03 = uint32(m[p+3]) + + m10 = uint32(m[p+4]) + m01 <<= 8 + m0 += int64(m00) + + m11 = uint32(m[p+5]) + m02 <<= 16 + m0 += int64(m01) + + m12 = uint32(m[p+6]) + m03 <<= 24 + m0 += int64(m02) + + m13 = uint32(m[p+7]) + m3 <<= 51 + m0 += int64(m03) + + m20 = uint32(m[p+8]) + m11 <<= 8 + m1 += int64(m10) + + m21 = uint32(m[p+9]) + m12 <<= 16 + m1 += int64(m11) + + m22 = uint32(m[p+10]) + m13 <<= 24 + m1 += int64(m12) + + m23 = uint32(m[p+11]) + m1 += int64(m13) + + m30 = uint32(m[p+12]) + m21 <<= 8 + m2 += int64(m20) + + m31 = uint32(m[p+13]) + m22 <<= 16 + m2 += int64(m21) + + m32 = uint32(m[p+14]) + m23 <<= 24 + m2 += int64(m22) + + m33 = uint64(m[p+15]) + m2 += int64(m23) + + d0 = m0 + m31 <<= 8 + m3 += int64(m30) + + d1 = m1 + m32 <<= 16 + m3 += int64(m31) + + d2 = m2 + m33 += 256 + + m33 <<= 24 + m3 += int64(m32) + + m3 += int64(m33) + d3 = m3 + + p += 16 + l -= 16 + + z0 = math.Float64frombits(uint64(d0)) + + z1 = math.Float64frombits(uint64(d1)) + + z2 = math.Float64frombits(uint64(d2)) + + z3 = math.Float64frombits(uint64(d3)) + + z0 -= alpha0 + + z1 -= alpha32 + + z2 -= alpha64 + + z3 -= alpha96 + + h0 += z0 + + h1 += z1 + + h3 += z2 + + h5 += z3 + + if l < 16 { + goto multiplyaddatmost15bytes + } + +multiplyaddatleast16bytes: + + m2 = 2279 + m20 = uint32(m[p+8]) + y7 = h7 + alpha130 + + m2 <<= 51 + m3 = 2343 + m21 = uint32(m[p+9]) + y6 = h6 + alpha130 + + m3 <<= 51 + m0 = 2151 + m22 = uint32(m[p+10]) + y1 = h1 + alpha32 + + m0 <<= 51 + m1 = 2215 + m23 = uint32(m[p+11]) + y0 = h0 + alpha32 + + m1 <<= 51 + m30 = uint32(m[p+12]) + y7 -= alpha130 + + m21 <<= 8 + m2 += int64(m20) + m31 = uint32(m[p+13]) + y6 -= alpha130 + + m22 <<= 16 + m2 += int64(m21) + m32 = uint32(m[p+14]) + y1 -= alpha32 + + m23 <<= 24 + m2 += int64(m22) + m33 = uint64(m[p+15]) + y0 -= alpha32 + + m2 += int64(m23) + m00 = uint32(m[p+0]) + y5 = h5 + alpha96 + + m31 <<= 8 + m3 += int64(m30) + m01 = uint32(m[p+1]) + y4 = h4 + alpha96 + + m32 <<= 16 + m02 = uint32(m[p+2]) + x7 = h7 - y7 + y7 *= scale + + m33 += 256 + m03 = uint32(m[p+3]) + x6 = h6 - y6 + y6 *= scale + + m33 <<= 24 + m3 += int64(m31) + m10 = uint32(m[p+4]) + x1 = h1 - y1 + + m01 <<= 8 + m3 += int64(m32) + m11 = uint32(m[p+5]) + x0 = h0 - y0 + + m3 += int64(m33) + m0 += int64(m00) + m12 = uint32(m[p+6]) + y5 -= alpha96 + + m02 <<= 16 + m0 += int64(m01) + m13 = uint32(m[p+7]) + y4 -= alpha96 + + m03 <<= 24 + m0 += int64(m02) + d2 = m2 + x1 += y7 + + m0 += int64(m03) + d3 = m3 + x0 += y6 + + m11 <<= 8 + m1 += int64(m10) + d0 = m0 + x7 += y5 + + m12 <<= 16 + m1 += int64(m11) + x6 += y4 + + m13 <<= 24 + m1 += int64(m12) + y3 = h3 + alpha64 + + m1 += int64(m13) + d1 = m1 + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + z2 = math.Float64frombits(uint64(d2)) + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + z3 = math.Float64frombits(uint64(d3)) + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + z2 -= alpha64 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + z3 -= alpha96 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + p += 16 + l -= 16 + h6 += r2lowx2 + + h7 += r2highx2 + + z1 = math.Float64frombits(uint64(d1)) + h0 += sr3lowx2 + + z0 = math.Float64frombits(uint64(d0)) + h1 += sr3highx2 + + z1 -= alpha32 + + z0 -= alpha0 + + h5 += z3 + + h3 += z2 + + h1 += z1 + + h0 += z0 + + if l >= 16 { + goto multiplyaddatleast16bytes + } + +multiplyaddatmost15bytes: + + y7 = h7 + alpha130 + + y6 = h6 + alpha130 + + y1 = h1 + alpha32 + + y0 = h0 + alpha32 + + y7 -= alpha130 + + y6 -= alpha130 + + y1 -= alpha32 + + y0 -= alpha32 + + y5 = h5 + alpha96 + + y4 = h4 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + x6 = h6 - y6 + y6 *= scale + + x1 = h1 - y1 + + x0 = h0 - y0 + + y5 -= alpha96 + + y4 -= alpha96 + + x1 += y7 + + x0 += y6 + + x7 += y5 + + x6 += y4 + + y3 = h3 + alpha64 + + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + h6 += r2lowx2 + + h7 += r2highx2 + + h0 += sr3lowx2 + + h1 += sr3highx2 + +addatmost15bytes: + + if l == 0 { + goto nomorebytes + } + + lbelow2 = l - 2 + + lbelow3 = l - 3 + + lbelow2 >>= 31 + lbelow4 = l - 4 + + m00 = uint32(m[p+0]) + lbelow3 >>= 31 + p += lbelow2 + + m01 = uint32(m[p+1]) + lbelow4 >>= 31 + p += lbelow3 + + m02 = uint32(m[p+2]) + p += lbelow4 + m0 = 2151 + + m03 = uint32(m[p+3]) + m0 <<= 51 + m1 = 2215 + + m0 += int64(m00) + m01 &^= uint32(lbelow2) + + m02 &^= uint32(lbelow3) + m01 -= uint32(lbelow2) + + m01 <<= 8 + m03 &^= uint32(lbelow4) + + m0 += int64(m01) + lbelow2 -= lbelow3 + + m02 += uint32(lbelow2) + lbelow3 -= lbelow4 + + m02 <<= 16 + m03 += uint32(lbelow3) + + m03 <<= 24 + m0 += int64(m02) + + m0 += int64(m03) + lbelow5 = l - 5 + + lbelow6 = l - 6 + lbelow7 = l - 7 + + lbelow5 >>= 31 + lbelow8 = l - 8 + + lbelow6 >>= 31 + p += lbelow5 + + m10 = uint32(m[p+4]) + lbelow7 >>= 31 + p += lbelow6 + + m11 = uint32(m[p+5]) + lbelow8 >>= 31 + p += lbelow7 + + m12 = uint32(m[p+6]) + m1 <<= 51 + p += lbelow8 + + m13 = uint32(m[p+7]) + m10 &^= uint32(lbelow5) + lbelow4 -= lbelow5 + + m10 += uint32(lbelow4) + lbelow5 -= lbelow6 + + m11 &^= uint32(lbelow6) + m11 += uint32(lbelow5) + + m11 <<= 8 + m1 += int64(m10) + + m1 += int64(m11) + m12 &^= uint32(lbelow7) + + lbelow6 -= lbelow7 + m13 &^= uint32(lbelow8) + + m12 += uint32(lbelow6) + lbelow7 -= lbelow8 + + m12 <<= 16 + m13 += uint32(lbelow7) + + m13 <<= 24 + m1 += int64(m12) + + m1 += int64(m13) + m2 = 2279 + + lbelow9 = l - 9 + m3 = 2343 + + lbelow10 = l - 10 + lbelow11 = l - 11 + + lbelow9 >>= 31 + lbelow12 = l - 12 + + lbelow10 >>= 31 + p += lbelow9 + + m20 = uint32(m[p+8]) + lbelow11 >>= 31 + p += lbelow10 + + m21 = uint32(m[p+9]) + lbelow12 >>= 31 + p += lbelow11 + + m22 = uint32(m[p+10]) + m2 <<= 51 + p += lbelow12 + + m23 = uint32(m[p+11]) + m20 &^= uint32(lbelow9) + lbelow8 -= lbelow9 + + m20 += uint32(lbelow8) + lbelow9 -= lbelow10 + + m21 &^= uint32(lbelow10) + m21 += uint32(lbelow9) + + m21 <<= 8 + m2 += int64(m20) + + m2 += int64(m21) + m22 &^= uint32(lbelow11) + + lbelow10 -= lbelow11 + m23 &^= uint32(lbelow12) + + m22 += uint32(lbelow10) + lbelow11 -= lbelow12 + + m22 <<= 16 + m23 += uint32(lbelow11) + + m23 <<= 24 + m2 += int64(m22) + + m3 <<= 51 + lbelow13 = l - 13 + + lbelow13 >>= 31 + lbelow14 = l - 14 + + lbelow14 >>= 31 + p += lbelow13 + lbelow15 = l - 15 + + m30 = uint32(m[p+12]) + lbelow15 >>= 31 + p += lbelow14 + + m31 = uint32(m[p+13]) + p += lbelow15 + m2 += int64(m23) + + m32 = uint32(m[p+14]) + m30 &^= uint32(lbelow13) + lbelow12 -= lbelow13 + + m30 += uint32(lbelow12) + lbelow13 -= lbelow14 + + m3 += int64(m30) + m31 &^= uint32(lbelow14) + + m31 += uint32(lbelow13) + m32 &^= uint32(lbelow15) + + m31 <<= 8 + lbelow14 -= lbelow15 + + m3 += int64(m31) + m32 += uint32(lbelow14) + d0 = m0 + + m32 <<= 16 + m33 = uint64(lbelow15 + 1) + d1 = m1 + + m33 <<= 24 + m3 += int64(m32) + d2 = m2 + + m3 += int64(m33) + d3 = m3 + + z3 = math.Float64frombits(uint64(d3)) + + z2 = math.Float64frombits(uint64(d2)) + + z1 = math.Float64frombits(uint64(d1)) + + z0 = math.Float64frombits(uint64(d0)) + + z3 -= alpha96 + + z2 -= alpha64 + + z1 -= alpha32 + + z0 -= alpha0 + + h5 += z3 + + h3 += z2 + + h1 += z1 + + h0 += z0 + + y7 = h7 + alpha130 + + y6 = h6 + alpha130 + + y1 = h1 + alpha32 + + y0 = h0 + alpha32 + + y7 -= alpha130 + + y6 -= alpha130 + + y1 -= alpha32 + + y0 -= alpha32 + + y5 = h5 + alpha96 + + y4 = h4 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + x6 = h6 - y6 + y6 *= scale + + x1 = h1 - y1 + + x0 = h0 - y0 + + y5 -= alpha96 + + y4 -= alpha96 + + x1 += y7 + + x0 += y6 + + x7 += y5 + + x6 += y4 + + y3 = h3 + alpha64 + + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + h6 += r2lowx2 + + h7 += r2highx2 + + h0 += sr3lowx2 + + h1 += sr3highx2 + +nomorebytes: + + y7 = h7 + alpha130 + + y0 = h0 + alpha32 + + y1 = h1 + alpha32 + + y2 = h2 + alpha64 + + y7 -= alpha130 + + y3 = h3 + alpha64 + + y4 = h4 + alpha96 + + y5 = h5 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + y0 -= alpha32 + + y1 -= alpha32 + + y2 -= alpha64 + + h6 += x7 + + y3 -= alpha64 + + y4 -= alpha96 + + y5 -= alpha96 + + y6 = h6 + alpha130 + + x0 = h0 - y0 + + x1 = h1 - y1 + + x2 = h2 - y2 + + y6 -= alpha130 + + x0 += y7 + + x3 = h3 - y3 + + x4 = h4 - y4 + + x5 = h5 - y5 + + x6 = h6 - y6 + + y6 *= scale + + x2 += y0 + + x3 += y1 + + x4 += y2 + + x0 += y6 + + x5 += y3 + + x6 += y4 + + x2 += x3 + + x0 += x1 + + x4 += x5 + + x6 += y5 + + x2 += offset1 + d1 = int64(math.Float64bits(x2)) + + x0 += offset0 + d0 = int64(math.Float64bits(x0)) + + x4 += offset2 + d2 = int64(math.Float64bits(x4)) + + x6 += offset3 + d3 = int64(math.Float64bits(x6)) + + f0 = uint64(d0) + + f1 = uint64(d1) + bits32 = math.MaxUint64 + + f2 = uint64(d2) + bits32 >>= 32 + + f3 = uint64(d3) + f = f0 >> 32 + + f0 &= bits32 + f &= 255 + + f1 += f + g0 = f0 + 5 + + g = g0 >> 32 + g0 &= bits32 + + f = f1 >> 32 + f1 &= bits32 + + f &= 255 + g1 = f1 + g + + g = g1 >> 32 + f2 += f + + f = f2 >> 32 + g1 &= bits32 + + f2 &= bits32 + f &= 255 + + f3 += f + g2 = f2 + g + + g = g2 >> 32 + g2 &= bits32 + + f4 = f3 >> 32 + f3 &= bits32 + + f4 &= 255 + g3 = f3 + g + + g = g3 >> 32 + g3 &= bits32 + + g4 = f4 + g + + g4 = g4 - 4 + s00 = uint32(s[0]) + + f = uint64(int64(g4) >> 63) + s01 = uint32(s[1]) + + f0 &= f + g0 &^= f + s02 = uint32(s[2]) + + f1 &= f + f0 |= g0 + s03 = uint32(s[3]) + + g1 &^= f + f2 &= f + s10 = uint32(s[4]) + + f3 &= f + g2 &^= f + s11 = uint32(s[5]) + + g3 &^= f + f1 |= g1 + s12 = uint32(s[6]) + + f2 |= g2 + f3 |= g3 + s13 = uint32(s[7]) + + s01 <<= 8 + f0 += uint64(s00) + s20 = uint32(s[8]) + + s02 <<= 16 + f0 += uint64(s01) + s21 = uint32(s[9]) + + s03 <<= 24 + f0 += uint64(s02) + s22 = uint32(s[10]) + + s11 <<= 8 + f1 += uint64(s10) + s23 = uint32(s[11]) + + s12 <<= 16 + f1 += uint64(s11) + s30 = uint32(s[12]) + + s13 <<= 24 + f1 += uint64(s12) + s31 = uint32(s[13]) + + f0 += uint64(s03) + f1 += uint64(s13) + s32 = uint32(s[14]) + + s21 <<= 8 + f2 += uint64(s20) + s33 = uint32(s[15]) + + s22 <<= 16 + f2 += uint64(s21) + + s23 <<= 24 + f2 += uint64(s22) + + s31 <<= 8 + f3 += uint64(s30) + + s32 <<= 16 + f3 += uint64(s31) + + s33 <<= 24 + f3 += uint64(s32) + + f2 += uint64(s23) + f3 += uint64(s33) + + out[0] = byte(f0) + f0 >>= 8 + out[1] = byte(f0) + f0 >>= 8 + out[2] = byte(f0) + f0 >>= 8 + out[3] = byte(f0) + f0 >>= 8 + f1 += f0 + + out[4] = byte(f1) + f1 >>= 8 + out[5] = byte(f1) + f1 >>= 8 + out[6] = byte(f1) + f1 >>= 8 + out[7] = byte(f1) + f1 >>= 8 + f2 += f1 + + out[8] = byte(f2) + f2 >>= 8 + out[9] = byte(f2) + f2 >>= 8 + out[10] = byte(f2) + f2 >>= 8 + out[11] = byte(f2) + f2 >>= 8 + f3 += f2 + + out[12] = byte(f3) + f3 >>= 8 + out[13] = byte(f3) + f3 >>= 8 + out[14] = byte(f3) + f3 >>= 8 + out[15] = byte(f3) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/LICENSE b/vendor/golang.org/x/crypto/salsa20/salsa/LICENSE new file mode 100644 index 00000000..6a66aea5 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go new file mode 100644 index 00000000..4c96147c --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go @@ -0,0 +1,144 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package salsa provides low-level access to functions in the Salsa family. +package salsa // import "golang.org/x/crypto/salsa20/salsa" + +// Sigma is the Salsa20 constant for 256-bit keys. +var Sigma = [16]byte{'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'} + +// HSalsa20 applies the HSalsa20 core function to a 16-byte input in, 32-byte +// key k, and 16-byte constant c, and puts the result into the 32-byte array +// out. +func HSalsa20(out *[32]byte, in *[16]byte, k *[32]byte, c *[16]byte) { + x0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24 + x1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24 + x2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24 + x3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24 + x4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24 + x5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24 + x6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + x7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + x8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + x9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + x10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24 + x11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24 + x12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24 + x13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24 + x14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24 + x15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24 + + for i := 0; i < 20; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x5) + out[5] = byte(x5 >> 8) + out[6] = byte(x5 >> 16) + out[7] = byte(x5 >> 24) + + out[8] = byte(x10) + out[9] = byte(x10 >> 8) + out[10] = byte(x10 >> 16) + out[11] = byte(x10 >> 24) + + out[12] = byte(x15) + out[13] = byte(x15 >> 8) + out[14] = byte(x15 >> 16) + out[15] = byte(x15 >> 24) + + out[16] = byte(x6) + out[17] = byte(x6 >> 8) + out[18] = byte(x6 >> 16) + out[19] = byte(x6 >> 24) + + out[20] = byte(x7) + out[21] = byte(x7 >> 8) + out[22] = byte(x7 >> 16) + out[23] = byte(x7 >> 24) + + out[24] = byte(x8) + out[25] = byte(x8 >> 8) + out[26] = byte(x8 >> 16) + out[27] = byte(x8 >> 24) + + out[28] = byte(x9) + out[29] = byte(x9 >> 8) + out[30] = byte(x9 >> 16) + out[31] = byte(x9 >> 24) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s new file mode 100644 index 00000000..6e1df963 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s @@ -0,0 +1,902 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!appengine,!gccgo + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) +TEXT ·salsa2020XORKeyStream(SB),0,$512-40 + MOVQ out+0(FP),DI + MOVQ in+8(FP),SI + MOVQ n+16(FP),DX + MOVQ nonce+24(FP),CX + MOVQ key+32(FP),R8 + + MOVQ SP,R11 + MOVQ $31,R9 + NOTQ R9 + ANDQ R9,SP + ADDQ $32,SP + + MOVQ R11,352(SP) + MOVQ R12,360(SP) + MOVQ R13,368(SP) + MOVQ R14,376(SP) + MOVQ R15,384(SP) + MOVQ BX,392(SP) + MOVQ BP,400(SP) + MOVQ DX,R9 + MOVQ CX,DX + MOVQ R8,R10 + CMPQ R9,$0 + JBE DONE + START: + MOVL 20(R10),CX + MOVL 0(R10),R8 + MOVL 0(DX),AX + MOVL 16(R10),R11 + MOVL CX,0(SP) + MOVL R8, 4 (SP) + MOVL AX, 8 (SP) + MOVL R11, 12 (SP) + MOVL 8(DX),CX + MOVL 24(R10),R8 + MOVL 4(R10),AX + MOVL 4(DX),R11 + MOVL CX,16(SP) + MOVL R8, 20 (SP) + MOVL AX, 24 (SP) + MOVL R11, 28 (SP) + MOVL 12(DX),CX + MOVL 12(R10),DX + MOVL 28(R10),R8 + MOVL 8(R10),AX + MOVL DX,32(SP) + MOVL CX, 36 (SP) + MOVL R8, 40 (SP) + MOVL AX, 44 (SP) + MOVQ $1634760805,DX + MOVQ $857760878,CX + MOVQ $2036477234,R8 + MOVQ $1797285236,AX + MOVL DX,48(SP) + MOVL CX, 52 (SP) + MOVL R8, 56 (SP) + MOVL AX, 60 (SP) + CMPQ R9,$256 + JB BYTESBETWEEN1AND255 + MOVOA 48(SP),X0 + PSHUFL $0X55,X0,X1 + PSHUFL $0XAA,X0,X2 + PSHUFL $0XFF,X0,X3 + PSHUFL $0X00,X0,X0 + MOVOA X1,64(SP) + MOVOA X2,80(SP) + MOVOA X3,96(SP) + MOVOA X0,112(SP) + MOVOA 0(SP),X0 + PSHUFL $0XAA,X0,X1 + PSHUFL $0XFF,X0,X2 + PSHUFL $0X00,X0,X3 + PSHUFL $0X55,X0,X0 + MOVOA X1,128(SP) + MOVOA X2,144(SP) + MOVOA X3,160(SP) + MOVOA X0,176(SP) + MOVOA 16(SP),X0 + PSHUFL $0XFF,X0,X1 + PSHUFL $0X55,X0,X2 + PSHUFL $0XAA,X0,X0 + MOVOA X1,192(SP) + MOVOA X2,208(SP) + MOVOA X0,224(SP) + MOVOA 32(SP),X0 + PSHUFL $0X00,X0,X1 + PSHUFL $0XAA,X0,X2 + PSHUFL $0XFF,X0,X0 + MOVOA X1,240(SP) + MOVOA X2,256(SP) + MOVOA X0,272(SP) + BYTESATLEAST256: + MOVL 16(SP),DX + MOVL 36 (SP),CX + MOVL DX,288(SP) + MOVL CX,304(SP) + ADDQ $1,DX + SHLQ $32,CX + ADDQ CX,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 292 (SP) + MOVL CX, 308 (SP) + ADDQ $1,DX + SHLQ $32,CX + ADDQ CX,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 296 (SP) + MOVL CX, 312 (SP) + ADDQ $1,DX + SHLQ $32,CX + ADDQ CX,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 300 (SP) + MOVL CX, 316 (SP) + ADDQ $1,DX + SHLQ $32,CX + ADDQ CX,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX,16(SP) + MOVL CX, 36 (SP) + MOVQ R9,408(SP) + MOVQ $20,DX + MOVOA 64(SP),X0 + MOVOA 80(SP),X1 + MOVOA 96(SP),X2 + MOVOA 256(SP),X3 + MOVOA 272(SP),X4 + MOVOA 128(SP),X5 + MOVOA 144(SP),X6 + MOVOA 176(SP),X7 + MOVOA 192(SP),X8 + MOVOA 208(SP),X9 + MOVOA 224(SP),X10 + MOVOA 304(SP),X11 + MOVOA 112(SP),X12 + MOVOA 160(SP),X13 + MOVOA 240(SP),X14 + MOVOA 288(SP),X15 + MAINLOOP1: + MOVOA X1,320(SP) + MOVOA X2,336(SP) + MOVOA X13,X1 + PADDL X12,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X14 + PSRLL $25,X2 + PXOR X2,X14 + MOVOA X7,X1 + PADDL X0,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X11 + PSRLL $25,X2 + PXOR X2,X11 + MOVOA X12,X1 + PADDL X14,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X15 + PSRLL $23,X2 + PXOR X2,X15 + MOVOA X0,X1 + PADDL X11,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X9 + PSRLL $23,X2 + PXOR X2,X9 + MOVOA X14,X1 + PADDL X15,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X13 + PSRLL $19,X2 + PXOR X2,X13 + MOVOA X11,X1 + PADDL X9,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X7 + PSRLL $19,X2 + PXOR X2,X7 + MOVOA X15,X1 + PADDL X13,X1 + MOVOA X1,X2 + PSLLL $18,X1 + PXOR X1,X12 + PSRLL $14,X2 + PXOR X2,X12 + MOVOA 320(SP),X1 + MOVOA X12,320(SP) + MOVOA X9,X2 + PADDL X7,X2 + MOVOA X2,X12 + PSLLL $18,X2 + PXOR X2,X0 + PSRLL $14,X12 + PXOR X12,X0 + MOVOA X5,X2 + PADDL X1,X2 + MOVOA X2,X12 + PSLLL $7,X2 + PXOR X2,X3 + PSRLL $25,X12 + PXOR X12,X3 + MOVOA 336(SP),X2 + MOVOA X0,336(SP) + MOVOA X6,X0 + PADDL X2,X0 + MOVOA X0,X12 + PSLLL $7,X0 + PXOR X0,X4 + PSRLL $25,X12 + PXOR X12,X4 + MOVOA X1,X0 + PADDL X3,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X10 + PSRLL $23,X12 + PXOR X12,X10 + MOVOA X2,X0 + PADDL X4,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X8 + PSRLL $23,X12 + PXOR X12,X8 + MOVOA X3,X0 + PADDL X10,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X5 + PSRLL $19,X12 + PXOR X12,X5 + MOVOA X4,X0 + PADDL X8,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X6 + PSRLL $19,X12 + PXOR X12,X6 + MOVOA X10,X0 + PADDL X5,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X1 + PSRLL $14,X12 + PXOR X12,X1 + MOVOA 320(SP),X0 + MOVOA X1,320(SP) + MOVOA X4,X1 + PADDL X0,X1 + MOVOA X1,X12 + PSLLL $7,X1 + PXOR X1,X7 + PSRLL $25,X12 + PXOR X12,X7 + MOVOA X8,X1 + PADDL X6,X1 + MOVOA X1,X12 + PSLLL $18,X1 + PXOR X1,X2 + PSRLL $14,X12 + PXOR X12,X2 + MOVOA 336(SP),X12 + MOVOA X2,336(SP) + MOVOA X14,X1 + PADDL X12,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X5 + PSRLL $25,X2 + PXOR X2,X5 + MOVOA X0,X1 + PADDL X7,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X10 + PSRLL $23,X2 + PXOR X2,X10 + MOVOA X12,X1 + PADDL X5,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X8 + PSRLL $23,X2 + PXOR X2,X8 + MOVOA X7,X1 + PADDL X10,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X4 + PSRLL $19,X2 + PXOR X2,X4 + MOVOA X5,X1 + PADDL X8,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X14 + PSRLL $19,X2 + PXOR X2,X14 + MOVOA X10,X1 + PADDL X4,X1 + MOVOA X1,X2 + PSLLL $18,X1 + PXOR X1,X0 + PSRLL $14,X2 + PXOR X2,X0 + MOVOA 320(SP),X1 + MOVOA X0,320(SP) + MOVOA X8,X0 + PADDL X14,X0 + MOVOA X0,X2 + PSLLL $18,X0 + PXOR X0,X12 + PSRLL $14,X2 + PXOR X2,X12 + MOVOA X11,X0 + PADDL X1,X0 + MOVOA X0,X2 + PSLLL $7,X0 + PXOR X0,X6 + PSRLL $25,X2 + PXOR X2,X6 + MOVOA 336(SP),X2 + MOVOA X12,336(SP) + MOVOA X3,X0 + PADDL X2,X0 + MOVOA X0,X12 + PSLLL $7,X0 + PXOR X0,X13 + PSRLL $25,X12 + PXOR X12,X13 + MOVOA X1,X0 + PADDL X6,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X15 + PSRLL $23,X12 + PXOR X12,X15 + MOVOA X2,X0 + PADDL X13,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X9 + PSRLL $23,X12 + PXOR X12,X9 + MOVOA X6,X0 + PADDL X15,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X11 + PSRLL $19,X12 + PXOR X12,X11 + MOVOA X13,X0 + PADDL X9,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X3 + PSRLL $19,X12 + PXOR X12,X3 + MOVOA X15,X0 + PADDL X11,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X1 + PSRLL $14,X12 + PXOR X12,X1 + MOVOA X9,X0 + PADDL X3,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X2 + PSRLL $14,X12 + PXOR X12,X2 + MOVOA 320(SP),X12 + MOVOA 336(SP),X0 + SUBQ $2,DX + JA MAINLOOP1 + PADDL 112(SP),X12 + PADDL 176(SP),X7 + PADDL 224(SP),X10 + PADDL 272(SP),X4 + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 0(SI),DX + XORL 4(SI),CX + XORL 8(SI),R8 + XORL 12(SI),R9 + MOVL DX,0(DI) + MOVL CX,4(DI) + MOVL R8,8(DI) + MOVL R9,12(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 64(SI),DX + XORL 68(SI),CX + XORL 72(SI),R8 + XORL 76(SI),R9 + MOVL DX,64(DI) + MOVL CX,68(DI) + MOVL R8,72(DI) + MOVL R9,76(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 128(SI),DX + XORL 132(SI),CX + XORL 136(SI),R8 + XORL 140(SI),R9 + MOVL DX,128(DI) + MOVL CX,132(DI) + MOVL R8,136(DI) + MOVL R9,140(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + XORL 192(SI),DX + XORL 196(SI),CX + XORL 200(SI),R8 + XORL 204(SI),R9 + MOVL DX,192(DI) + MOVL CX,196(DI) + MOVL R8,200(DI) + MOVL R9,204(DI) + PADDL 240(SP),X14 + PADDL 64(SP),X0 + PADDL 128(SP),X5 + PADDL 192(SP),X8 + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 16(SI),DX + XORL 20(SI),CX + XORL 24(SI),R8 + XORL 28(SI),R9 + MOVL DX,16(DI) + MOVL CX,20(DI) + MOVL R8,24(DI) + MOVL R9,28(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 80(SI),DX + XORL 84(SI),CX + XORL 88(SI),R8 + XORL 92(SI),R9 + MOVL DX,80(DI) + MOVL CX,84(DI) + MOVL R8,88(DI) + MOVL R9,92(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 144(SI),DX + XORL 148(SI),CX + XORL 152(SI),R8 + XORL 156(SI),R9 + MOVL DX,144(DI) + MOVL CX,148(DI) + MOVL R8,152(DI) + MOVL R9,156(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + XORL 208(SI),DX + XORL 212(SI),CX + XORL 216(SI),R8 + XORL 220(SI),R9 + MOVL DX,208(DI) + MOVL CX,212(DI) + MOVL R8,216(DI) + MOVL R9,220(DI) + PADDL 288(SP),X15 + PADDL 304(SP),X11 + PADDL 80(SP),X1 + PADDL 144(SP),X6 + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 32(SI),DX + XORL 36(SI),CX + XORL 40(SI),R8 + XORL 44(SI),R9 + MOVL DX,32(DI) + MOVL CX,36(DI) + MOVL R8,40(DI) + MOVL R9,44(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 96(SI),DX + XORL 100(SI),CX + XORL 104(SI),R8 + XORL 108(SI),R9 + MOVL DX,96(DI) + MOVL CX,100(DI) + MOVL R8,104(DI) + MOVL R9,108(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 160(SI),DX + XORL 164(SI),CX + XORL 168(SI),R8 + XORL 172(SI),R9 + MOVL DX,160(DI) + MOVL CX,164(DI) + MOVL R8,168(DI) + MOVL R9,172(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + XORL 224(SI),DX + XORL 228(SI),CX + XORL 232(SI),R8 + XORL 236(SI),R9 + MOVL DX,224(DI) + MOVL CX,228(DI) + MOVL R8,232(DI) + MOVL R9,236(DI) + PADDL 160(SP),X13 + PADDL 208(SP),X9 + PADDL 256(SP),X3 + PADDL 96(SP),X2 + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 48(SI),DX + XORL 52(SI),CX + XORL 56(SI),R8 + XORL 60(SI),R9 + MOVL DX,48(DI) + MOVL CX,52(DI) + MOVL R8,56(DI) + MOVL R9,60(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 112(SI),DX + XORL 116(SI),CX + XORL 120(SI),R8 + XORL 124(SI),R9 + MOVL DX,112(DI) + MOVL CX,116(DI) + MOVL R8,120(DI) + MOVL R9,124(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 176(SI),DX + XORL 180(SI),CX + XORL 184(SI),R8 + XORL 188(SI),R9 + MOVL DX,176(DI) + MOVL CX,180(DI) + MOVL R8,184(DI) + MOVL R9,188(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + XORL 240(SI),DX + XORL 244(SI),CX + XORL 248(SI),R8 + XORL 252(SI),R9 + MOVL DX,240(DI) + MOVL CX,244(DI) + MOVL R8,248(DI) + MOVL R9,252(DI) + MOVQ 408(SP),R9 + SUBQ $256,R9 + ADDQ $256,SI + ADDQ $256,DI + CMPQ R9,$256 + JAE BYTESATLEAST256 + CMPQ R9,$0 + JBE DONE + BYTESBETWEEN1AND255: + CMPQ R9,$64 + JAE NOCOPY + MOVQ DI,DX + LEAQ 416(SP),DI + MOVQ R9,CX + REP; MOVSB + LEAQ 416(SP),DI + LEAQ 416(SP),SI + NOCOPY: + MOVQ R9,408(SP) + MOVOA 48(SP),X0 + MOVOA 0(SP),X1 + MOVOA 16(SP),X2 + MOVOA 32(SP),X3 + MOVOA X1,X4 + MOVQ $20,CX + MAINLOOP2: + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X3 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X3,X3 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X1 + PSHUFL $0X4E,X2,X2 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X1,X1 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X1 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X1,X1 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X3 + PSHUFL $0X4E,X2,X2 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X3,X3 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X3 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X3,X3 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X1 + PSHUFL $0X4E,X2,X2 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X1,X1 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X1 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X1,X1 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X3 + PSHUFL $0X4E,X2,X2 + PXOR X6,X3 + SUBQ $4,CX + PADDL X3,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PXOR X7,X7 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X3,X3 + PXOR X6,X0 + JA MAINLOOP2 + PADDL 48(SP),X0 + PADDL 0(SP),X1 + PADDL 16(SP),X2 + PADDL 32(SP),X3 + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 0(SI),CX + XORL 48(SI),R8 + XORL 32(SI),R9 + XORL 16(SI),AX + MOVL CX,0(DI) + MOVL R8,48(DI) + MOVL R9,32(DI) + MOVL AX,16(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 20(SI),CX + XORL 4(SI),R8 + XORL 52(SI),R9 + XORL 36(SI),AX + MOVL CX,20(DI) + MOVL R8,4(DI) + MOVL R9,52(DI) + MOVL AX,36(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 40(SI),CX + XORL 24(SI),R8 + XORL 8(SI),R9 + XORL 56(SI),AX + MOVL CX,40(DI) + MOVL R8,24(DI) + MOVL R9,8(DI) + MOVL AX,56(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + XORL 60(SI),CX + XORL 44(SI),R8 + XORL 28(SI),R9 + XORL 12(SI),AX + MOVL CX,60(DI) + MOVL R8,44(DI) + MOVL R9,28(DI) + MOVL AX,12(DI) + MOVQ 408(SP),R9 + MOVL 16(SP),CX + MOVL 36 (SP),R8 + ADDQ $1,CX + SHLQ $32,R8 + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $32,R8 + MOVL CX,16(SP) + MOVL R8, 36 (SP) + CMPQ R9,$64 + JA BYTESATLEAST65 + JAE BYTESATLEAST64 + MOVQ DI,SI + MOVQ DX,DI + MOVQ R9,CX + REP; MOVSB + BYTESATLEAST64: + DONE: + MOVQ 352(SP),R11 + MOVQ 360(SP),R12 + MOVQ 368(SP),R13 + MOVQ 376(SP),R14 + MOVQ 384(SP),R15 + MOVQ 392(SP),BX + MOVQ 400(SP),BP + MOVQ R11,SP + RET + BYTESATLEAST65: + SUBQ $64,R9 + ADDQ $64,DI + ADDQ $64,SI + JMP BYTESBETWEEN1AND255 diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go new file mode 100644 index 00000000..9bfc0927 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go @@ -0,0 +1,199 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package salsa + +// Core208 applies the Salsa20/8 core function to the 64-byte array in and puts +// the result into the 64-byte array out. The input and output may be the same array. +func Core208(out *[64]byte, in *[64]byte) { + j0 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + j1 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + j2 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + j3 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + j4 := uint32(in[16]) | uint32(in[17])<<8 | uint32(in[18])<<16 | uint32(in[19])<<24 + j5 := uint32(in[20]) | uint32(in[21])<<8 | uint32(in[22])<<16 | uint32(in[23])<<24 + j6 := uint32(in[24]) | uint32(in[25])<<8 | uint32(in[26])<<16 | uint32(in[27])<<24 + j7 := uint32(in[28]) | uint32(in[29])<<8 | uint32(in[30])<<16 | uint32(in[31])<<24 + j8 := uint32(in[32]) | uint32(in[33])<<8 | uint32(in[34])<<16 | uint32(in[35])<<24 + j9 := uint32(in[36]) | uint32(in[37])<<8 | uint32(in[38])<<16 | uint32(in[39])<<24 + j10 := uint32(in[40]) | uint32(in[41])<<8 | uint32(in[42])<<16 | uint32(in[43])<<24 + j11 := uint32(in[44]) | uint32(in[45])<<8 | uint32(in[46])<<16 | uint32(in[47])<<24 + j12 := uint32(in[48]) | uint32(in[49])<<8 | uint32(in[50])<<16 | uint32(in[51])<<24 + j13 := uint32(in[52]) | uint32(in[53])<<8 | uint32(in[54])<<16 | uint32(in[55])<<24 + j14 := uint32(in[56]) | uint32(in[57])<<8 | uint32(in[58])<<16 | uint32(in[59])<<24 + j15 := uint32(in[60]) | uint32(in[61])<<8 | uint32(in[62])<<16 | uint32(in[63])<<24 + + x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8 + x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15 + + for i := 0; i < 8; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + x0 += j0 + x1 += j1 + x2 += j2 + x3 += j3 + x4 += j4 + x5 += j5 + x6 += j6 + x7 += j7 + x8 += j8 + x9 += j9 + x10 += j10 + x11 += j11 + x12 += j12 + x13 += j13 + x14 += j14 + x15 += j15 + + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x1) + out[5] = byte(x1 >> 8) + out[6] = byte(x1 >> 16) + out[7] = byte(x1 >> 24) + + out[8] = byte(x2) + out[9] = byte(x2 >> 8) + out[10] = byte(x2 >> 16) + out[11] = byte(x2 >> 24) + + out[12] = byte(x3) + out[13] = byte(x3 >> 8) + out[14] = byte(x3 >> 16) + out[15] = byte(x3 >> 24) + + out[16] = byte(x4) + out[17] = byte(x4 >> 8) + out[18] = byte(x4 >> 16) + out[19] = byte(x4 >> 24) + + out[20] = byte(x5) + out[21] = byte(x5 >> 8) + out[22] = byte(x5 >> 16) + out[23] = byte(x5 >> 24) + + out[24] = byte(x6) + out[25] = byte(x6 >> 8) + out[26] = byte(x6 >> 16) + out[27] = byte(x6 >> 24) + + out[28] = byte(x7) + out[29] = byte(x7 >> 8) + out[30] = byte(x7 >> 16) + out[31] = byte(x7 >> 24) + + out[32] = byte(x8) + out[33] = byte(x8 >> 8) + out[34] = byte(x8 >> 16) + out[35] = byte(x8 >> 24) + + out[36] = byte(x9) + out[37] = byte(x9 >> 8) + out[38] = byte(x9 >> 16) + out[39] = byte(x9 >> 24) + + out[40] = byte(x10) + out[41] = byte(x10 >> 8) + out[42] = byte(x10 >> 16) + out[43] = byte(x10 >> 24) + + out[44] = byte(x11) + out[45] = byte(x11 >> 8) + out[46] = byte(x11 >> 16) + out[47] = byte(x11 >> 24) + + out[48] = byte(x12) + out[49] = byte(x12 >> 8) + out[50] = byte(x12 >> 16) + out[51] = byte(x12 >> 24) + + out[52] = byte(x13) + out[53] = byte(x13 >> 8) + out[54] = byte(x13 >> 16) + out[55] = byte(x13 >> 24) + + out[56] = byte(x14) + out[57] = byte(x14 >> 8) + out[58] = byte(x14 >> 16) + out[59] = byte(x14 >> 24) + + out[60] = byte(x15) + out[61] = byte(x15 >> 8) + out[62] = byte(x15 >> 16) + out[63] = byte(x15 >> 24) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go new file mode 100644 index 00000000..903c7858 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go @@ -0,0 +1,23 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!appengine,!gccgo + +package salsa + +// This function is implemented in salsa2020_amd64.s. + +//go:noescape + +func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) + +// XORKeyStream crypts bytes from in to out using the given key and counters. +// In and out may be the same slice but otherwise should not overlap. Counter +// contains the raw salsa20 counter bytes (both nonce and block counter). +func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { + if len(in) == 0 { + return + } + salsa2020XORKeyStream(&out[0], &in[0], uint64(len(in)), &counter[0], &key[0]) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go new file mode 100644 index 00000000..95f8ca5b --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go @@ -0,0 +1,234 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine gccgo + +package salsa + +const rounds = 20 + +// core applies the Salsa20 core function to 16-byte input in, 32-byte key k, +// and 16-byte constant c, and puts the result into 64-byte array out. +func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) { + j0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24 + j1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24 + j2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24 + j3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24 + j4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24 + j5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24 + j6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + j7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + j8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + j9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + j10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24 + j11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24 + j12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24 + j13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24 + j14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24 + j15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24 + + x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8 + x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15 + + for i := 0; i < rounds; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + x0 += j0 + x1 += j1 + x2 += j2 + x3 += j3 + x4 += j4 + x5 += j5 + x6 += j6 + x7 += j7 + x8 += j8 + x9 += j9 + x10 += j10 + x11 += j11 + x12 += j12 + x13 += j13 + x14 += j14 + x15 += j15 + + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x1) + out[5] = byte(x1 >> 8) + out[6] = byte(x1 >> 16) + out[7] = byte(x1 >> 24) + + out[8] = byte(x2) + out[9] = byte(x2 >> 8) + out[10] = byte(x2 >> 16) + out[11] = byte(x2 >> 24) + + out[12] = byte(x3) + out[13] = byte(x3 >> 8) + out[14] = byte(x3 >> 16) + out[15] = byte(x3 >> 24) + + out[16] = byte(x4) + out[17] = byte(x4 >> 8) + out[18] = byte(x4 >> 16) + out[19] = byte(x4 >> 24) + + out[20] = byte(x5) + out[21] = byte(x5 >> 8) + out[22] = byte(x5 >> 16) + out[23] = byte(x5 >> 24) + + out[24] = byte(x6) + out[25] = byte(x6 >> 8) + out[26] = byte(x6 >> 16) + out[27] = byte(x6 >> 24) + + out[28] = byte(x7) + out[29] = byte(x7 >> 8) + out[30] = byte(x7 >> 16) + out[31] = byte(x7 >> 24) + + out[32] = byte(x8) + out[33] = byte(x8 >> 8) + out[34] = byte(x8 >> 16) + out[35] = byte(x8 >> 24) + + out[36] = byte(x9) + out[37] = byte(x9 >> 8) + out[38] = byte(x9 >> 16) + out[39] = byte(x9 >> 24) + + out[40] = byte(x10) + out[41] = byte(x10 >> 8) + out[42] = byte(x10 >> 16) + out[43] = byte(x10 >> 24) + + out[44] = byte(x11) + out[45] = byte(x11 >> 8) + out[46] = byte(x11 >> 16) + out[47] = byte(x11 >> 24) + + out[48] = byte(x12) + out[49] = byte(x12 >> 8) + out[50] = byte(x12 >> 16) + out[51] = byte(x12 >> 24) + + out[52] = byte(x13) + out[53] = byte(x13 >> 8) + out[54] = byte(x13 >> 16) + out[55] = byte(x13 >> 24) + + out[56] = byte(x14) + out[57] = byte(x14 >> 8) + out[58] = byte(x14 >> 16) + out[59] = byte(x14 >> 24) + + out[60] = byte(x15) + out[61] = byte(x15 >> 8) + out[62] = byte(x15 >> 16) + out[63] = byte(x15 >> 24) +} + +// XORKeyStream crypts bytes from in to out using the given key and counters. +// In and out may be the same slice but otherwise should not overlap. Counter +// contains the raw salsa20 counter bytes (both nonce and block counter). +func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { + var block [64]byte + var counterCopy [16]byte + copy(counterCopy[:], counter[:]) + + for len(in) >= 64 { + core(&block, &counterCopy, key, &Sigma) + for i, x := range block { + out[i] = in[i] ^ x + } + u := uint32(1) + for i := 8; i < 16; i++ { + u += uint32(counterCopy[i]) + counterCopy[i] = byte(u) + u >>= 8 + } + in = in[64:] + out = out[64:] + } + + if len(in) > 0 { + core(&block, &counterCopy, key, &Sigma) + for i, v := range in { + out[i] = v ^ block[i] + } + } +} |