@@ -3794,7 +3794,7 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3794
3794
RDCASSERT (GetShaderVariable (inst.args [3 ], opCode, dxOpCode, arg));
3795
3795
bool isUnsigned = (arg.value .u32v [0 ] != (uint32_t )SignedOpKind::Signed);
3796
3796
3797
- // set the identity
3797
+ // set the initial value
3798
3798
ShaderVariable accum (result);
3799
3799
switch (waveOpCode)
3800
3800
{
@@ -3875,17 +3875,22 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3875
3875
break ;
3876
3876
}
3877
3877
case DXOp::WavePrefixBitCount:
3878
+ case DXOp::WaveAllBitCount:
3878
3879
{
3879
3880
// WavePrefixBitCount(cond)
3881
+ // WaveAllBitCount(cond)
3882
+
3880
3883
// determine active lane indices in our subgroup
3881
3884
rdcarray<uint32_t > activeLanes;
3882
3885
GetSubgroupActiveLanes (activeMask, workgroup, activeLanes);
3883
3886
3887
+ uint32_t maxLane = (dxOpCode == DXOp::WavePrefixBitCount) ? m_WorkgroupIndex : UINT32_MAX;
3888
+
3884
3889
uint32_t count = 0 ;
3885
3890
for (uint32_t lane : activeLanes)
3886
3891
{
3887
3892
// stop before processing our lane
3888
- if (lane == m_WorkgroupIndex )
3893
+ if (lane == maxLane )
3889
3894
break ;
3890
3895
3891
3896
ShaderVariable x;
@@ -3899,34 +3904,14 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3899
3904
case DXOp::WaveAnyTrue:
3900
3905
case DXOp::WaveAllTrue:
3901
3906
case DXOp::WaveActiveBallot:
3902
- case DXOp::WaveActiveOp :
3907
+ case DXOp::WaveActiveAllEqual :
3903
3908
{
3904
3909
ShaderVariable accum (result);
3905
- bool isUnsigned = true ;
3906
- WaveOpCode waveOpCode = WaveOpCode::Sum;
3907
3910
3908
- if (dxOpCode == DXOp::WaveActiveOp)
3909
- {
3910
- // WaveActiveOp(value,op,sop)
3911
- ShaderVariable arg;
3912
- RDCASSERT (GetShaderVariable (inst.args [2 ], opCode, dxOpCode, arg));
3913
- waveOpCode = (WaveOpCode)arg.value .u32v [0 ];
3914
-
3915
- RDCASSERT (GetShaderVariable (inst.args [3 ], opCode, dxOpCode, arg));
3916
- isUnsigned = (arg.value .u32v [0 ] != (uint32_t )SignedOpKind::Signed);
3911
+ ShaderVariable refValue;
3912
+ RDCASSERT (GetShaderVariable (inst.args [1 ], opCode, dxOpCode, refValue));
3917
3913
3918
- // set the identity
3919
- switch (waveOpCode)
3920
- {
3921
- case WaveOpCode::Sum: SetShaderValueZero (accum); break ;
3922
- case WaveOpCode::Product: SetShaderValueOne (accum); break ;
3923
- default :
3924
- RDCERR (" Unhandled ActiveOp wave opcode" );
3925
- accum.value = {};
3926
- break ;
3927
- }
3928
- }
3929
- else if (dxOpCode == DXOp::WaveAnyTrue)
3914
+ if (dxOpCode == DXOp::WaveAnyTrue)
3930
3915
{
3931
3916
// WaveAnyTrue(cond)
3932
3917
accum.value .u32v [0 ] = 0 ;
@@ -3936,6 +3921,15 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3936
3921
// WaveAllTrue(cond)
3937
3922
accum.value .u32v [0 ] = 1 ;
3938
3923
}
3924
+ else if (dxOpCode == DXOp::WaveActiveAllEqual)
3925
+ {
3926
+ // WaveActiveAllEqual(value)
3927
+ accum.value .u32v [0 ] = 1 ;
3928
+ }
3929
+ else
3930
+ {
3931
+ RDCERR (" Unhandled dxOpCode %s" , ToStr (dxOpCode).c_str ());
3932
+ }
3939
3933
3940
3934
// determine active lane indices in our subgroup
3941
3935
rdcarray<uint32_t > activeLanes;
@@ -3947,52 +3941,261 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3947
3941
ShaderVariable x;
3948
3942
RDCASSERT (workgroup[lane].GetShaderVariable (inst.args [1 ], opCode, dxOpCode, x));
3949
3943
3950
- if (dxOpCode == DXOp::WaveActiveOp)
3944
+ if (dxOpCode == DXOp::WaveAnyTrue)
3945
+ {
3946
+ accum.value .u32v [0 ] |= x.value .u32v [0 ];
3947
+ }
3948
+ else if (dxOpCode == DXOp::WaveAllTrue)
3949
+ {
3950
+ accum.value .u32v [0 ] &= x.value .u32v [0 ];
3951
+ }
3952
+ else if (dxOpCode == DXOp::WaveActiveBallot)
3953
+ {
3954
+ uint32_t c = (lane - firstLaneInSub) / 32 ;
3955
+ uint32_t bit = 1U << ((lane - firstLaneInSub) % 32U );
3956
+
3957
+ if (x.value .u32v [0 ])
3958
+ accum.value .u32v [c] |= bit;
3959
+ }
3960
+ else if (dxOpCode == DXOp::WaveActiveAllEqual)
3951
3961
{
3952
- switch (waveOpCode )
3962
+ for ( uint8_t c = 0 ; c < x. columns ; c++ )
3953
3963
{
3954
- case WaveOpCode::Sum:
3964
+ bool matches = false ;
3965
+ #undef _IMPL
3966
+ #define _IMPL (I, S, U ) matches = (comp<I>(x, c) == comp<I>(refValue, c));
3967
+
3968
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3969
+
3970
+ #undef _IMPL
3971
+ #define _IMPL (T ) matches = (comp<T>(x, c) == comp<T>(refValue, c));
3972
+
3973
+ IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
3974
+
3975
+ accum.value .u32v [c] &= (matches ? 1 : 0 );
3976
+ }
3977
+ }
3978
+ }
3979
+
3980
+ result.value = accum.value ;
3981
+ break ;
3982
+ }
3983
+ case DXOp::WaveActiveOp:
3984
+ {
3985
+ // WaveActiveOp(value,op,sop)
3986
+ ShaderVariable accum (result);
3987
+
3988
+ ShaderVariable refValue;
3989
+ RDCASSERT (GetShaderVariable (inst.args [1 ], opCode, dxOpCode, refValue));
3990
+
3991
+ ShaderVariable arg;
3992
+ RDCASSERT (GetShaderVariable (inst.args [2 ], opCode, dxOpCode, arg));
3993
+ WaveOpCode waveOpCode = (WaveOpCode)arg.value .u32v [0 ];
3994
+
3995
+ RDCASSERT (GetShaderVariable (inst.args [3 ], opCode, dxOpCode, arg));
3996
+ bool isUnsigned = (arg.value .u32v [0 ] != (uint32_t )SignedOpKind::Signed);
3997
+
3998
+ // set the initial value
3999
+ switch (waveOpCode)
4000
+ {
4001
+ case WaveOpCode::Sum: SetShaderValueZero (accum); break ;
4002
+ case WaveOpCode::Product: SetShaderValueOne (accum); break ;
4003
+ case WaveOpCode::Min:
4004
+ case WaveOpCode::Max:
4005
+ {
4006
+ accum.value = refValue.value ;
4007
+ break ;
4008
+ }
4009
+ default :
4010
+ RDCERR (" Unhandled ActiveOp wave opcode" );
4011
+ accum.value = {};
4012
+ break ;
4013
+ }
4014
+
4015
+ // determine active lane indices in our subgroup
4016
+ rdcarray<uint32_t > activeLanes;
4017
+ GetSubgroupActiveLanes (activeMask, workgroup, activeLanes);
4018
+
4019
+ for (uint32_t lane : activeLanes)
4020
+ {
4021
+ ShaderVariable x;
4022
+ RDCASSERT (workgroup[lane].GetShaderVariable (inst.args [1 ], opCode, dxOpCode, x));
4023
+
4024
+ switch (waveOpCode)
4025
+ {
4026
+ case WaveOpCode::Sum:
4027
+ {
4028
+ for (uint8_t c = 0 ; c < x.columns ; c++)
3955
4029
{
3956
- for ( uint8_t c = 0 ; c < x. columns ; c++ )
4030
+ if (isUnsigned )
3957
4031
{
3958
- if (isUnsigned)
3959
- {
3960
4032
#undef _IMPL
3961
4033
#define _IMPL (I, S, U ) comp<U>(accum, c) = comp<U>(accum, c) + comp<U>(x, c)
3962
- IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3963
- }
3964
- else
3965
- {
4034
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4035
+ }
4036
+ else
4037
+ {
3966
4038
#undef _IMPL
3967
4039
#define _IMPL (I, S, U ) comp<S>(accum, c) = comp<S>(accum, c) + comp<S>(x, c)
3968
- IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4040
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3969
4041
3970
4042
#undef _IMPL
3971
4043
#define _IMPL (T ) comp<T>(accum, c) = comp<T>(accum, c) + comp<T>(x, c)
3972
4044
3973
- IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
3974
- }
4045
+ IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
3975
4046
}
3976
- break ;
3977
4047
}
3978
- default : RDCERR ( " Unhandled ActiveOp wave opcode " ); break ;
4048
+ break ;
3979
4049
}
4050
+ case WaveOpCode::Product:
4051
+ {
4052
+ for (uint8_t c = 0 ; c < x.columns ; c++)
4053
+ {
4054
+ if (isUnsigned)
4055
+ {
4056
+ #undef _IMPL
4057
+ #define _IMPL (I, S, U ) comp<U>(accum, c) = comp<U>(accum, c) * comp<U>(x, c)
4058
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4059
+ }
4060
+ else
4061
+ {
4062
+ #undef _IMPL
4063
+ #define _IMPL (I, S, U ) comp<S>(accum, c) = comp<S>(accum, c) * comp<S>(x, c)
4064
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4065
+
4066
+ #undef _IMPL
4067
+ #define _IMPL (T ) comp<T>(accum, c) = comp<T>(accum, c) * comp<T>(x, c)
4068
+
4069
+ IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
4070
+ }
4071
+ }
4072
+ break ;
4073
+ }
4074
+ case WaveOpCode::Min:
4075
+ {
4076
+ for (uint8_t c = 0 ; c < x.columns ; c++)
4077
+ {
4078
+ if (isUnsigned)
4079
+ {
4080
+ #undef _IMPL
4081
+ #define _IMPL (I, S, U ) comp<U>(accum, c) = RDCMIN(comp<U>(accum, c), comp<U>(x, c))
4082
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4083
+ }
4084
+ else
4085
+ {
4086
+ #undef _IMPL
4087
+ #define _IMPL (I, S, U ) comp<S>(accum, c) = RDCMIN(comp<S>(accum, c), comp<S>(x, c))
4088
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4089
+
4090
+ #undef _IMPL
4091
+ #define _IMPL (T ) comp<T>(accum, c) = RDCMIN(comp<T>(accum, c), comp<T>(x, c))
4092
+
4093
+ IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
4094
+ }
4095
+ }
4096
+ break ;
4097
+ }
4098
+ case WaveOpCode::Max:
4099
+ {
4100
+ for (uint8_t c = 0 ; c < x.columns ; c++)
4101
+ {
4102
+ if (isUnsigned)
4103
+ {
4104
+ #undef _IMPL
4105
+ #define _IMPL (I, S, U ) comp<U>(accum, c) = RDCMAX(comp<U>(accum, c), comp<U>(x, c))
4106
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4107
+ }
4108
+ else
4109
+ {
4110
+ #undef _IMPL
4111
+ #define _IMPL (I, S, U ) comp<S>(accum, c) = RDCMAX(comp<S>(accum, c), comp<S>(x, c))
4112
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4113
+
4114
+ #undef _IMPL
4115
+ #define _IMPL (T ) comp<T>(accum, c) = RDCMAX(comp<T>(accum, c), comp<T>(x, c))
4116
+
4117
+ IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
4118
+ }
4119
+ }
4120
+ break ;
4121
+ }
4122
+ default : RDCERR (" Unhandled ActiveOp wave opcode" ); break ;
3980
4123
}
3981
- else if (dxOpCode == DXOp::WaveAnyTrue)
3982
- {
3983
- accum.value .u32v [0 ] |= x.value .u32v [0 ];
3984
- }
3985
- else if (dxOpCode == DXOp::WaveAllTrue)
4124
+ }
4125
+
4126
+ result.value = accum.value ;
4127
+ break ;
4128
+ }
4129
+ case DXOp::WaveActiveBit:
4130
+ {
4131
+ // WaveActiveBit(value,op)
4132
+ ShaderVariable accum (result);
4133
+
4134
+ ShaderVariable refValue;
4135
+ RDCASSERT (GetShaderVariable (inst.args [1 ], opCode, dxOpCode, refValue));
4136
+
4137
+ ShaderVariable arg;
4138
+ RDCASSERT (GetShaderVariable (inst.args [2 ], opCode, dxOpCode, arg));
4139
+ WaveBitOpCode waveBitOpCode = (WaveBitOpCode)arg.value .u32v [0 ];
4140
+
4141
+ // set the initial value
4142
+ switch (waveBitOpCode)
4143
+ {
4144
+ case WaveBitOpCode::Or:
4145
+ case WaveBitOpCode::Xor: SetShaderValueZero (accum); break ;
4146
+ case WaveBitOpCode::And:
3986
4147
{
3987
- accum.value .u32v [0 ] &= x.value .u32v [0 ];
4148
+ accum.value = refValue.value ;
4149
+ break ;
3988
4150
}
3989
- else if (dxOpCode == DXOp::WaveActiveBallot)
3990
- {
3991
- uint32_t c = (lane - firstLaneInSub) / 32 ;
3992
- uint32_t bit = 1U << ((lane - firstLaneInSub) % 32U );
4151
+ default :
4152
+ RDCERR (" Unhandled ActiveBitOp wave opcode" );
4153
+ accum.value = {};
4154
+ break ;
4155
+ }
3993
4156
3994
- if (x.value .u32v [0 ])
3995
- accum.value .u32v [c] |= bit;
4157
+ // determine active lane indices in our subgroup
4158
+ rdcarray<uint32_t > activeLanes;
4159
+ GetSubgroupActiveLanes (activeMask, workgroup, activeLanes);
4160
+
4161
+ for (uint32_t lane : activeLanes)
4162
+ {
4163
+ ShaderVariable x;
4164
+ RDCASSERT (workgroup[lane].GetShaderVariable (inst.args [1 ], opCode, dxOpCode, x));
4165
+
4166
+ switch (waveBitOpCode)
4167
+ {
4168
+ case WaveBitOpCode::And:
4169
+ {
4170
+ for (uint8_t c = 0 ; c < x.columns ; c++)
4171
+ {
4172
+ #undef _IMPL
4173
+ #define _IMPL (I, S, U ) comp<S>(accum, c) = comp<I>(accum, c) & comp<I>(x, c)
4174
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4175
+ }
4176
+ break ;
4177
+ }
4178
+ case WaveBitOpCode::Or:
4179
+ {
4180
+ for (uint8_t c = 0 ; c < x.columns ; c++)
4181
+ {
4182
+ #undef _IMPL
4183
+ #define _IMPL (I, S, U ) comp<S>(accum, c) = comp<I>(accum, c) | comp<I>(x, c)
4184
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4185
+ }
4186
+ break ;
4187
+ }
4188
+ case WaveBitOpCode::Xor:
4189
+ {
4190
+ for (uint8_t c = 0 ; c < x.columns ; c++)
4191
+ {
4192
+ #undef _IMPL
4193
+ #define _IMPL (I, S, U ) comp<S>(accum, c) = comp<I>(accum, c) ^ comp<I>(x, c)
4194
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
4195
+ }
4196
+ break ;
4197
+ }
4198
+ default : RDCERR (" Unhandled ActiveBitOp wave opcode" ); break ;
3996
4199
}
3997
4200
}
3998
4201
@@ -4306,9 +4509,6 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
4306
4509
case DXOp::EmitThenCutStream:
4307
4510
4308
4511
// Wave Operations
4309
- case DXOp::WaveActiveAllEqual:
4310
- case DXOp::WaveActiveBit:
4311
- case DXOp::WaveAllBitCount:
4312
4512
case DXOp::WaveMatch:
4313
4513
case DXOp::WaveMultiPrefixOp:
4314
4514
case DXOp::WaveMultiPrefixBitCount:
0 commit comments