Skip to content

Commit 4eb8c2c

Browse files
wgomundaym
authored andcommitted
poly1305: add optimized s390x SIMD implementation with VMSL
SIMD implementation based the on the algorithm outlined in: NEON crypto, Daniel J. Bernstein and Peter Schwabe https://cryptojedi.org/papers/neoncrypto-20120320.pdf and as modified for VMSL as described in Accelerating Poly1305 Cryptographic Message Authentication on the z14 O'Farrell, Gadriwala, et al, CASCON 2017, p48-55 https://ibm.ent.box.com/s/jf9gedj0e9d2vjctfyh186shaztavnht name old new delta 64 485MB/s 1315 MB/s +171.58% 1K 607MB/s 4352 MB/s +616.97% 64Unaligned 485MB/s 1373 MB/s +183.09% 1KUnaligned 606MB/s 4286 MB/s +607.26% 2M 607MB/s 5529 MB/s +810.87% Change-Id: I31ccc25ced09180d99ea5c9233f0dcdc8666fc98 Reviewed-on: https://go-review.googlesource.com/110297 Run-TryBot: Michael Munday <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Michael Munday <[email protected]>
1 parent 2fc4c88 commit 4eb8c2c

File tree

6 files changed

+1405
-10
lines changed

6 files changed

+1405
-10
lines changed

poly1305/poly1305_test.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ var testData = []struct {
7575
},
7676
}
7777

78-
func testSum(t *testing.T, unaligned bool) {
78+
func testSum(t *testing.T, unaligned bool, sumImpl func(tag *[TagSize]byte, msg []byte, key *[32]byte)) {
7979
var out [16]byte
8080
var key [32]byte
8181

@@ -85,7 +85,7 @@ func testSum(t *testing.T, unaligned bool) {
8585
in = unalignBytes(in)
8686
}
8787
copy(key[:], v.k)
88-
Sum(&out, in, &key)
88+
sumImpl(&out, in, &key)
8989
if !bytes.Equal(out[:], v.correct) {
9090
t.Errorf("%d: expected %x, got %x", i, v.correct, out[:])
9191
}
@@ -125,8 +125,10 @@ func TestBurnin(t *testing.T) {
125125
}
126126
}
127127

128-
func TestSum(t *testing.T) { testSum(t, false) }
129-
func TestSumUnaligned(t *testing.T) { testSum(t, true) }
128+
func TestSum(t *testing.T) { testSum(t, false, Sum) }
129+
func TestSumUnaligned(t *testing.T) { testSum(t, true, Sum) }
130+
func TestSumGeneric(t *testing.T) { testSum(t, false, sumGeneric) }
131+
func TestSumGenericUnaligned(t *testing.T) { testSum(t, true, sumGeneric) }
130132

131133
func benchmark(b *testing.B, size int, unaligned bool) {
132134
var out [16]byte
@@ -146,6 +148,7 @@ func Benchmark64(b *testing.B) { benchmark(b, 64, false) }
146148
func Benchmark1K(b *testing.B) { benchmark(b, 1024, false) }
147149
func Benchmark64Unaligned(b *testing.B) { benchmark(b, 64, true) }
148150
func Benchmark1KUnaligned(b *testing.B) { benchmark(b, 1024, true) }
151+
func Benchmark2M(b *testing.B) { benchmark(b, 2097152, true) }
149152

150153
func unalignBytes(in []byte) []byte {
151154
out := make([]byte, len(in)+1)

poly1305/sum_noasm.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright 2018 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// +build s390x,!go1.11 !arm,!amd64,!s390x gccgo appengine nacl
6+
7+
package poly1305
8+
9+
// Sum generates an authenticator for msg using a one-time key and puts the
10+
// 16-byte result into out. Authenticating two different messages with the same
11+
// key allows an attacker to forge messages at will.
12+
func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
13+
sumGeneric(out, msg, key)
14+
}

poly1305/sum_ref.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
// +build !amd64,!arm gccgo appengine nacl
6-
75
package poly1305
86

97
import "encoding/binary"
108

11-
// Sum generates an authenticator for msg using a one-time key and puts the
12-
// 16-byte result into out. Authenticating two different messages with the same
13-
// key allows an attacker to forge messages at will.
14-
func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
9+
// sumGeneric generates an authenticator for msg using a one-time key and
10+
// puts the 16-byte result into out. This is the generic implementation of
11+
// Sum and should be called if no assembly implementation is available.
12+
func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
1513
var (
1614
h0, h1, h2, h3, h4 uint32 // the hash accumulators
1715
r0, r1, r2, r3, r4 uint64 // the r part of the key

poly1305/sum_s390x.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright 2018 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// +build s390x,go1.11,!gccgo,!appengine
6+
7+
package poly1305
8+
9+
// hasVectorFacility reports whether the machine supports
10+
// the vector facility (vx).
11+
func hasVectorFacility() bool
12+
13+
// hasVMSLFacility reports whether the machine supports
14+
// Vector Multiply Sum Logical (VMSL).
15+
func hasVMSLFacility() bool
16+
17+
var hasVX = hasVectorFacility()
18+
var hasVMSL = hasVMSLFacility()
19+
20+
// poly1305vx is an assembly implementation of Poly1305 that uses vector
21+
// instructions. It must only be called if the vector facility (vx) is
22+
// available.
23+
//go:noescape
24+
func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
25+
26+
// poly1305vmsl is an assembly implementation of Poly1305 that uses vector
27+
// instructions, including VMSL. It must only be called if the vector facility (vx) is
28+
// available and if VMSL is supported.
29+
//go:noescape
30+
func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
31+
32+
// Sum generates an authenticator for m using a one-time key and puts the
33+
// 16-byte result into out. Authenticating two different messages with the same
34+
// key allows an attacker to forge messages at will.
35+
func Sum(out *[16]byte, m []byte, key *[32]byte) {
36+
if hasVX {
37+
var mPtr *byte
38+
if len(m) > 0 {
39+
mPtr = &m[0]
40+
}
41+
if hasVMSL && len(m) > 256 {
42+
poly1305vmsl(out, mPtr, uint64(len(m)), key)
43+
} else {
44+
poly1305vx(out, mPtr, uint64(len(m)), key)
45+
}
46+
} else {
47+
sumGeneric(out, m, key)
48+
}
49+
}

0 commit comments

Comments
 (0)