summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/crypto/internal/poly1305/sum_s390x.go
blob: ec95966889691d2d0911277b8fceba1bb716d2f9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build gc && !purego
// +build gc,!purego

package poly1305

import (
	"golang.org/x/sys/cpu"
)

// updateVX is an assembly implementation of Poly1305 that uses vector
// instructions. It must only be called if the vector facility (vx) is
// available.
//
//go:noescape
func updateVX(state *macState, msg []byte)

// mac is a replacement for macGeneric that uses a larger buffer and redirects
// calls that would have gone to updateGeneric to updateVX if the vector
// facility is installed.
//
// A larger buffer is required for good performance because the vector
// implementation has a higher fixed cost per call than the generic
// implementation.
type mac struct {
	macState

	buffer [16 * TagSize]byte // size must be a multiple of block size (16)
	offset int
}

func (h *mac) Write(p []byte) (int, error) {
	nn := len(p)
	if h.offset > 0 {
		n := copy(h.buffer[h.offset:], p)
		if h.offset+n < len(h.buffer) {
			h.offset += n
			return nn, nil
		}
		p = p[n:]
		h.offset = 0
		if cpu.S390X.HasVX {
			updateVX(&h.macState, h.buffer[:])
		} else {
			updateGeneric(&h.macState, h.buffer[:])
		}
	}

	tail := len(p) % len(h.buffer) // number of bytes to copy into buffer
	body := len(p) - tail          // number of bytes to process now
	if body > 0 {
		if cpu.S390X.HasVX {
			updateVX(&h.macState, p[:body])
		} else {
			updateGeneric(&h.macState, p[:body])
		}
	}
	h.offset = copy(h.buffer[:], p[body:]) // copy tail bytes - can be 0
	return nn, nil
}

func (h *mac) Sum(out *[TagSize]byte) {
	state := h.macState
	remainder := h.buffer[:h.offset]

	// Use the generic implementation if we have 2 or fewer blocks left
	// to sum. The vector implementation has a higher startup time.
	if cpu.S390X.HasVX && len(remainder) > 2*TagSize {
		updateVX(&state, remainder)
	} else if len(remainder) > 0 {
		updateGeneric(&state, remainder)
	}
	finalize(out, &state.h, &state.s)
}