Skip to content

Commit ac3ad03

Browse files
committed
8350589: Investigate cleaner implementation of AArch64 ML-DSA intrinsic introduced in JDK-8348561
Reviewed-by: dlong
1 parent 8a1c85e commit ac3ad03

File tree

3 files changed

+685
-590
lines changed

3 files changed

+685
-590
lines changed

src/hotspot/cpu/aarch64/register_aarch64.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,23 @@ const char* PRegister::PRegisterImpl::name() const {
5858
};
5959
return is_valid() ? names[encoding()] : "pnoreg";
6060
}
61+
62+
// convenience methods for splitting 8-way vector register sequences
63+
// in half -- needed because vector operations can normally only be
64+
// benefit from 4-way instruction parallelism
65+
66+
VSeq<4> vs_front(const VSeq<8>& v) {
67+
return VSeq<4>(v.base(), v.delta());
68+
}
69+
70+
VSeq<4> vs_back(const VSeq<8>& v) {
71+
return VSeq<4>(v.base() + 4 * v.delta(), v.delta());
72+
}
73+
74+
VSeq<4> vs_even(const VSeq<8>& v) {
75+
return VSeq<4>(v.base(), v.delta() * 2);
76+
}
77+
78+
VSeq<4> vs_odd(const VSeq<8>& v) {
79+
return VSeq<4>(v.base() + 1, v.delta() * 2);
80+
}

src/hotspot/cpu/aarch64/register_aarch64.hpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,4 +412,72 @@ inline Register as_Register(FloatRegister reg) {
412412
// High-level register class of an OptoReg or a VMReg register.
413413
enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
414414

415+
// AArch64 Vector Register Sequence management support
416+
//
417+
// VSeq implements an indexable (by operator[]) vector register
418+
// sequence starting from a fixed base register and with a fixed delta
419+
// (defaulted to 1, but sometimes 0 or 2) e.g. VSeq<4>(16) will return
420+
// registers v16, ... v19 for indices 0, ... 3.
421+
//
422+
// Generator methods may iterate across sets of VSeq<4> to schedule an
423+
// operation 4 times using distinct input and output registers,
424+
// profiting from 4-way instruction parallelism.
425+
//
426+
// A VSeq<2> can be used to specify registers loaded with special
427+
// constants e.g. <v30, v31> --> <MONT_Q, MONT_Q_INV_MOD_R>.
428+
//
429+
// A VSeq with base n and delta 0 can be used to generate code that
430+
// combines values in another VSeq with the constant in register vn.
431+
//
432+
// A VSeq with base n and delta 2 can be used to select an odd or even
433+
// indexed set of registers.
434+
//
435+
// Methods which accept arguments of type VSeq<8>, may split their
436+
// inputs into front and back halves or odd and even halves (see
437+
// convenience methods below).
438+
439+
template<int N> class VSeq {
440+
static_assert(N >= 2, "vector sequence length must be greater than 1");
441+
static_assert(N <= 8, "vector sequence length must not exceed 8");
442+
static_assert((N & (N - 1)) == 0, "vector sequence length must be power of two");
443+
private:
444+
int _base; // index of first register in sequence
445+
int _delta; // increment to derive successive indices
446+
public:
447+
VSeq(FloatRegister base_reg, int delta = 1) : VSeq(base_reg->encoding(), delta) { }
448+
VSeq(int base, int delta = 1) : _base(base), _delta(delta) {
449+
assert (_base >= 0, "invalid base register");
450+
assert (_delta >= 0, "invalid register delta");
451+
assert ((_base + (N - 1) * _delta) < 32, "range exceeded");
452+
}
453+
// indexed access to sequence
454+
FloatRegister operator [](int i) const {
455+
assert (0 <= i && i < N, "index out of bounds");
456+
return as_FloatRegister(_base + i * _delta);
457+
}
458+
int mask() const {
459+
int m = 0;
460+
int bit = 1 << _base;
461+
for (int i = 0; i < N; i++) {
462+
m |= bit << (i * _delta);
463+
}
464+
return m;
465+
}
466+
int base() const { return _base; }
467+
int delta() const { return _delta; }
468+
};
469+
470+
// declare convenience methods for splitting vector register sequences
471+
472+
VSeq<4> vs_front(const VSeq<8>& v);
473+
VSeq<4> vs_back(const VSeq<8>& v);
474+
VSeq<4> vs_even(const VSeq<8>& v);
475+
VSeq<4> vs_odd(const VSeq<8>& v);
476+
477+
// methods for use in asserts to check VSeq inputs and oupts are
478+
// either disjoint or equal
479+
480+
template<int N, int M> bool vs_disjoint(const VSeq<N>& n, const VSeq<M>& m) { return (n.mask() & m.mask()) == 0; }
481+
template<int N> bool vs_same(const VSeq<N>& n, const VSeq<N>& m) { return n.mask() == m.mask(); }
482+
415483
#endif // CPU_AARCH64_REGISTER_AARCH64_HPP

0 commit comments

Comments
 (0)