Skip to content

Commit f0936cd

Browse files
committed
Add DXIL Debugger Support for Wave Reduction ops
DXOp::WaveActiveAllEqual DXOp::WaveActiveBit DXOp::WaveAllBitCount DXOp::WaveActiveOp (WaveActiveOp::Product, WaveActiveOp::Min, WaveActiveOp::Max)
1 parent cc32e24 commit f0936cd

File tree

2 files changed

+259
-59
lines changed

2 files changed

+259
-59
lines changed

renderdoc/driver/shaders/dxil/dxil_debug.cpp

Lines changed: 256 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3794,7 +3794,7 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
37943794
RDCASSERT(GetShaderVariable(inst.args[3], opCode, dxOpCode, arg));
37953795
bool isUnsigned = (arg.value.u32v[0] != (uint32_t)SignedOpKind::Signed);
37963796

3797-
// set the identity
3797+
// set the initial value
37983798
ShaderVariable accum(result);
37993799
switch(waveOpCode)
38003800
{
@@ -3875,17 +3875,22 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
38753875
break;
38763876
}
38773877
case DXOp::WavePrefixBitCount:
3878+
case DXOp::WaveAllBitCount:
38783879
{
38793880
// WavePrefixBitCount(cond)
3881+
// WaveAllBitCount(cond)
3882+
38803883
// determine active lane indices in our subgroup
38813884
rdcarray<uint32_t> activeLanes;
38823885
GetSubgroupActiveLanes(activeMask, workgroup, activeLanes);
38833886

3887+
uint32_t maxLane = (dxOpCode == DXOp::WavePrefixBitCount) ? m_WorkgroupIndex : UINT32_MAX;
3888+
38843889
uint32_t count = 0;
38853890
for(uint32_t lane : activeLanes)
38863891
{
38873892
// stop before processing our lane
3888-
if(lane == m_WorkgroupIndex)
3893+
if(lane == maxLane)
38893894
break;
38903895

38913896
ShaderVariable x;
@@ -3899,34 +3904,14 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
38993904
case DXOp::WaveAnyTrue:
39003905
case DXOp::WaveAllTrue:
39013906
case DXOp::WaveActiveBallot:
3902-
case DXOp::WaveActiveOp:
3907+
case DXOp::WaveActiveAllEqual:
39033908
{
39043909
ShaderVariable accum(result);
3905-
bool isUnsigned = true;
3906-
WaveOpCode waveOpCode = WaveOpCode::Sum;
39073910

3908-
if(dxOpCode == DXOp::WaveActiveOp)
3909-
{
3910-
// WaveActiveOp(value,op,sop)
3911-
ShaderVariable arg;
3912-
RDCASSERT(GetShaderVariable(inst.args[2], opCode, dxOpCode, arg));
3913-
waveOpCode = (WaveOpCode)arg.value.u32v[0];
3914-
3915-
RDCASSERT(GetShaderVariable(inst.args[3], opCode, dxOpCode, arg));
3916-
isUnsigned = (arg.value.u32v[0] != (uint32_t)SignedOpKind::Signed);
3911+
ShaderVariable refValue;
3912+
RDCASSERT(GetShaderVariable(inst.args[1], opCode, dxOpCode, refValue));
39173913

3918-
// set the identity
3919-
switch(waveOpCode)
3920-
{
3921-
case WaveOpCode::Sum: SetShaderValueZero(accum); break;
3922-
case WaveOpCode::Product: SetShaderValueOne(accum); break;
3923-
default:
3924-
RDCERR("Unhandled ActiveOp wave opcode");
3925-
accum.value = {};
3926-
break;
3927-
}
3928-
}
3929-
else if(dxOpCode == DXOp::WaveAnyTrue)
3914+
if(dxOpCode == DXOp::WaveAnyTrue)
39303915
{
39313916
// WaveAnyTrue(cond)
39323917
accum.value.u32v[0] = 0;
@@ -3936,6 +3921,15 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
39363921
// WaveAllTrue(cond)
39373922
accum.value.u32v[0] = 1;
39383923
}
3924+
else if(dxOpCode == DXOp::WaveActiveAllEqual)
3925+
{
3926+
// WaveActiveAllEqual(value)
3927+
accum.value.u32v[0] = 1;
3928+
}
3929+
else
3930+
{
3931+
RDCERR("Unhandled dxOpCode %s", ToStr(dxOpCode).c_str());
3932+
}
39393933

39403934
// determine active lane indices in our subgroup
39413935
rdcarray<uint32_t> activeLanes;
@@ -3947,52 +3941,261 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
39473941
ShaderVariable x;
39483942
RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x));
39493943

3950-
if(dxOpCode == DXOp::WaveActiveOp)
3944+
if(dxOpCode == DXOp::WaveAnyTrue)
3945+
{
3946+
accum.value.u32v[0] |= x.value.u32v[0];
3947+
}
3948+
else if(dxOpCode == DXOp::WaveAllTrue)
3949+
{
3950+
accum.value.u32v[0] &= x.value.u32v[0];
3951+
}
3952+
else if(dxOpCode == DXOp::WaveActiveBallot)
3953+
{
3954+
uint32_t c = (lane - firstLaneInSub) / 32;
3955+
uint32_t bit = 1U << ((lane - firstLaneInSub) % 32U);
3956+
3957+
if(x.value.u32v[0])
3958+
accum.value.u32v[c] |= bit;
3959+
}
3960+
else if(dxOpCode == DXOp::WaveActiveAllEqual)
39513961
{
3952-
switch(waveOpCode)
3962+
for(uint8_t c = 0; c < x.columns; c++)
39533963
{
3954-
case WaveOpCode::Sum:
3964+
bool matches = false;
3965+
#undef _IMPL
3966+
#define _IMPL(I, S, U) matches = (comp<I>(x, c) == comp<I>(refValue, c));
3967+
3968+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
3969+
3970+
#undef _IMPL
3971+
#define _IMPL(T) matches = (comp<T>(x, c) == comp<T>(refValue, c));
3972+
3973+
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, x.type);
3974+
3975+
accum.value.u32v[c] &= (matches ? 1 : 0);
3976+
}
3977+
}
3978+
}
3979+
3980+
result.value = accum.value;
3981+
break;
3982+
}
3983+
case DXOp::WaveActiveOp:
3984+
{
3985+
// WaveActiveOp(value,op,sop)
3986+
ShaderVariable accum(result);
3987+
3988+
ShaderVariable refValue;
3989+
RDCASSERT(GetShaderVariable(inst.args[1], opCode, dxOpCode, refValue));
3990+
3991+
ShaderVariable arg;
3992+
RDCASSERT(GetShaderVariable(inst.args[2], opCode, dxOpCode, arg));
3993+
WaveOpCode waveOpCode = (WaveOpCode)arg.value.u32v[0];
3994+
3995+
RDCASSERT(GetShaderVariable(inst.args[3], opCode, dxOpCode, arg));
3996+
bool isUnsigned = (arg.value.u32v[0] != (uint32_t)SignedOpKind::Signed);
3997+
3998+
// set the initial value
3999+
switch(waveOpCode)
4000+
{
4001+
case WaveOpCode::Sum: SetShaderValueZero(accum); break;
4002+
case WaveOpCode::Product: SetShaderValueOne(accum); break;
4003+
case WaveOpCode::Min:
4004+
case WaveOpCode::Max:
4005+
{
4006+
accum.value = refValue.value;
4007+
break;
4008+
}
4009+
default:
4010+
RDCERR("Unhandled ActiveOp wave opcode");
4011+
accum.value = {};
4012+
break;
4013+
}
4014+
4015+
// determine active lane indices in our subgroup
4016+
rdcarray<uint32_t> activeLanes;
4017+
GetSubgroupActiveLanes(activeMask, workgroup, activeLanes);
4018+
4019+
for(uint32_t lane : activeLanes)
4020+
{
4021+
ShaderVariable x;
4022+
RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x));
4023+
4024+
switch(waveOpCode)
4025+
{
4026+
case WaveOpCode::Sum:
4027+
{
4028+
for(uint8_t c = 0; c < x.columns; c++)
39554029
{
3956-
for(uint8_t c = 0; c < x.columns; c++)
4030+
if(isUnsigned)
39574031
{
3958-
if(isUnsigned)
3959-
{
39604032
#undef _IMPL
39614033
#define _IMPL(I, S, U) comp<U>(accum, c) = comp<U>(accum, c) + comp<U>(x, c)
3962-
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
3963-
}
3964-
else
3965-
{
4034+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4035+
}
4036+
else
4037+
{
39664038
#undef _IMPL
39674039
#define _IMPL(I, S, U) comp<S>(accum, c) = comp<S>(accum, c) + comp<S>(x, c)
3968-
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4040+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
39694041

39704042
#undef _IMPL
39714043
#define _IMPL(T) comp<T>(accum, c) = comp<T>(accum, c) + comp<T>(x, c)
39724044

3973-
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, x.type);
3974-
}
4045+
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, x.type);
39754046
}
3976-
break;
39774047
}
3978-
default: RDCERR("Unhandled ActiveOp wave opcode"); break;
4048+
break;
39794049
}
4050+
case WaveOpCode::Product:
4051+
{
4052+
for(uint8_t c = 0; c < x.columns; c++)
4053+
{
4054+
if(isUnsigned)
4055+
{
4056+
#undef _IMPL
4057+
#define _IMPL(I, S, U) comp<U>(accum, c) = comp<U>(accum, c) * comp<U>(x, c)
4058+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4059+
}
4060+
else
4061+
{
4062+
#undef _IMPL
4063+
#define _IMPL(I, S, U) comp<S>(accum, c) = comp<S>(accum, c) * comp<S>(x, c)
4064+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4065+
4066+
#undef _IMPL
4067+
#define _IMPL(T) comp<T>(accum, c) = comp<T>(accum, c) * comp<T>(x, c)
4068+
4069+
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, x.type);
4070+
}
4071+
}
4072+
break;
4073+
}
4074+
case WaveOpCode::Min:
4075+
{
4076+
for(uint8_t c = 0; c < x.columns; c++)
4077+
{
4078+
if(isUnsigned)
4079+
{
4080+
#undef _IMPL
4081+
#define _IMPL(I, S, U) comp<U>(accum, c) = RDCMIN(comp<U>(accum, c), comp<U>(x, c))
4082+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4083+
}
4084+
else
4085+
{
4086+
#undef _IMPL
4087+
#define _IMPL(I, S, U) comp<S>(accum, c) = RDCMIN(comp<S>(accum, c), comp<S>(x, c))
4088+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4089+
4090+
#undef _IMPL
4091+
#define _IMPL(T) comp<T>(accum, c) = RDCMIN(comp<T>(accum, c), comp<T>(x, c))
4092+
4093+
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, x.type);
4094+
}
4095+
}
4096+
break;
4097+
}
4098+
case WaveOpCode::Max:
4099+
{
4100+
for(uint8_t c = 0; c < x.columns; c++)
4101+
{
4102+
if(isUnsigned)
4103+
{
4104+
#undef _IMPL
4105+
#define _IMPL(I, S, U) comp<U>(accum, c) = RDCMAX(comp<U>(accum, c), comp<U>(x, c))
4106+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4107+
}
4108+
else
4109+
{
4110+
#undef _IMPL
4111+
#define _IMPL(I, S, U) comp<S>(accum, c) = RDCMAX(comp<S>(accum, c), comp<S>(x, c))
4112+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4113+
4114+
#undef _IMPL
4115+
#define _IMPL(T) comp<T>(accum, c) = RDCMAX(comp<T>(accum, c), comp<T>(x, c))
4116+
4117+
IMPL_FOR_FLOAT_TYPES_FOR_TYPE(_IMPL, x.type);
4118+
}
4119+
}
4120+
break;
4121+
}
4122+
default: RDCERR("Unhandled ActiveOp wave opcode"); break;
39804123
}
3981-
else if(dxOpCode == DXOp::WaveAnyTrue)
3982-
{
3983-
accum.value.u32v[0] |= x.value.u32v[0];
3984-
}
3985-
else if(dxOpCode == DXOp::WaveAllTrue)
4124+
}
4125+
4126+
result.value = accum.value;
4127+
break;
4128+
}
4129+
case DXOp::WaveActiveBit:
4130+
{
4131+
// WaveActiveBit(value,op)
4132+
ShaderVariable accum(result);
4133+
4134+
ShaderVariable refValue;
4135+
RDCASSERT(GetShaderVariable(inst.args[1], opCode, dxOpCode, refValue));
4136+
4137+
ShaderVariable arg;
4138+
RDCASSERT(GetShaderVariable(inst.args[2], opCode, dxOpCode, arg));
4139+
WaveBitOpCode waveBitOpCode = (WaveBitOpCode)arg.value.u32v[0];
4140+
4141+
// set the initial value
4142+
switch(waveBitOpCode)
4143+
{
4144+
case WaveBitOpCode::Or:
4145+
case WaveBitOpCode::Xor: SetShaderValueZero(accum); break;
4146+
case WaveBitOpCode::And:
39864147
{
3987-
accum.value.u32v[0] &= x.value.u32v[0];
4148+
accum.value = refValue.value;
4149+
break;
39884150
}
3989-
else if(dxOpCode == DXOp::WaveActiveBallot)
3990-
{
3991-
uint32_t c = (lane - firstLaneInSub) / 32;
3992-
uint32_t bit = 1U << ((lane - firstLaneInSub) % 32U);
4151+
default:
4152+
RDCERR("Unhandled ActiveBitOp wave opcode");
4153+
accum.value = {};
4154+
break;
4155+
}
39934156

3994-
if(x.value.u32v[0])
3995-
accum.value.u32v[c] |= bit;
4157+
// determine active lane indices in our subgroup
4158+
rdcarray<uint32_t> activeLanes;
4159+
GetSubgroupActiveLanes(activeMask, workgroup, activeLanes);
4160+
4161+
for(uint32_t lane : activeLanes)
4162+
{
4163+
ShaderVariable x;
4164+
RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x));
4165+
4166+
switch(waveBitOpCode)
4167+
{
4168+
case WaveBitOpCode::And:
4169+
{
4170+
for(uint8_t c = 0; c < x.columns; c++)
4171+
{
4172+
#undef _IMPL
4173+
#define _IMPL(I, S, U) comp<S>(accum, c) = comp<I>(accum, c) & comp<I>(x, c)
4174+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4175+
}
4176+
break;
4177+
}
4178+
case WaveBitOpCode::Or:
4179+
{
4180+
for(uint8_t c = 0; c < x.columns; c++)
4181+
{
4182+
#undef _IMPL
4183+
#define _IMPL(I, S, U) comp<S>(accum, c) = comp<I>(accum, c) | comp<I>(x, c)
4184+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4185+
}
4186+
break;
4187+
}
4188+
case WaveBitOpCode::Xor:
4189+
{
4190+
for(uint8_t c = 0; c < x.columns; c++)
4191+
{
4192+
#undef _IMPL
4193+
#define _IMPL(I, S, U) comp<S>(accum, c) = comp<I>(accum, c) ^ comp<I>(x, c)
4194+
IMPL_FOR_INT_TYPES_FOR_TYPE(_IMPL, x.type);
4195+
}
4196+
break;
4197+
}
4198+
default: RDCERR("Unhandled ActiveBitOp wave opcode"); break;
39964199
}
39974200
}
39984201

@@ -4306,9 +4509,6 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
43064509
case DXOp::EmitThenCutStream:
43074510

43084511
// Wave Operations
4309-
case DXOp::WaveActiveAllEqual:
4310-
case DXOp::WaveActiveBit:
4311-
case DXOp::WaveAllBitCount:
43124512
case DXOp::WaveMatch:
43134513
case DXOp::WaveMultiPrefixOp:
43144514
case DXOp::WaveMultiPrefixBitCount:

0 commit comments

Comments
 (0)