@@ -124,6 +124,68 @@ static bool DecodePointer(DXILDebug::Id &ptrId, uint64_t &offset, uint64_t &size
124
124
return true ;
125
125
}
126
126
127
+ static void SetFloatValue (float val, ShaderVariable &var)
128
+ {
129
+ for (uint8_t c = 0 ; c < var.columns ; c++)
130
+ {
131
+ #undef _IMPL
132
+ #define _IMPL (T ) comp<T>(var, c) = val;
133
+
134
+ IMPL_FOR_FLOAT_TYPES (_IMPL);
135
+ }
136
+ }
137
+
138
+ static void SetUIntValue (uint64_t val, ShaderVariable &var)
139
+ {
140
+ for (uint8_t c = 0 ; c < var.columns ; c++)
141
+ {
142
+ #undef _IMPL
143
+ #define _IMPL (I, S, U ) comp<U>(var, c) = U(val);
144
+
145
+ IMPL_FOR_INT_TYPES (_IMPL);
146
+ }
147
+ }
148
+
149
+ static void SetIntValue (int64_t val, ShaderVariable &var)
150
+ {
151
+ for (uint8_t c = 0 ; c < var.columns ; c++)
152
+ {
153
+ #undef _IMPL
154
+ #define _IMPL (I, S, U ) comp<I>(var, c) = I(val);
155
+
156
+ IMPL_FOR_INT_TYPES (_IMPL);
157
+ }
158
+ }
159
+
160
+ static void SetShaderValue (float fVal , uint64_t uVal, int64_t iVal, ShaderVariable &var)
161
+ {
162
+ switch (var.type )
163
+ {
164
+ case VarType::Half:
165
+ case VarType::Float:
166
+ case VarType::Double: SetFloatValue (fVal , var); break ;
167
+ case VarType::SByte:
168
+ case VarType::SShort:
169
+ case VarType::SInt:
170
+ case VarType::SLong: SetIntValue (iVal, var); break ;
171
+ case VarType::UByte:
172
+ case VarType::UShort:
173
+ case VarType::UInt:
174
+ case VarType::ULong: SetUIntValue (uVal, var); break ;
175
+ default : RDCERR (" Unknown type %s" , ToStr (var.type ).c_str ());
176
+ }
177
+ }
178
+
179
+ static void SetShaderValueZero (ShaderVariable &var)
180
+ {
181
+ SetShaderValue (0 .0f , 0 , 0 , var);
182
+ }
183
+
184
+ static void SetShaderValueOne (ShaderVariable &var)
185
+ {
186
+ SetShaderValue (1 .0f , 1 , 1 , var);
187
+ }
188
+
127
189
static bool OperationFlushing (const Operation op, DXOp dxOpCode)
128
190
{
129
191
if (dxOpCode != DXOp::NumOpCodes)
@@ -3721,12 +3783,125 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3721
3783
}
3722
3784
break ;
3723
3785
}
3786
+ case DXOp::WavePrefixOp:
3787
+ {
3788
+ // WavePrefixOp(value,op,sop)
3789
+
3790
+ ShaderVariable arg;
3791
+ RDCASSERT (GetShaderVariable (inst.args [2 ], opCode, dxOpCode, arg));
3792
+ WaveOpCode waveOpCode = (WaveOpCode)arg.value .u32v [0 ];
3793
+
3794
+ RDCASSERT (GetShaderVariable (inst.args [3 ], opCode, dxOpCode, arg));
3795
+ bool isUnsigned = (arg.value .u32v [0 ] != (uint32_t )SignedOpKind::Signed);
3796
+
3797
+ // set the identity
3798
+ ShaderVariable accum (result);
3799
+ switch (waveOpCode)
3800
+ {
3801
+ case WaveOpCode::Sum: SetShaderValueZero (accum); break ;
3802
+ case WaveOpCode::Product: SetShaderValueOne (accum); break ;
3803
+ default :
3804
+ RDCERR (" Unhandled PrefixOp wave opcode" );
3805
+ accum.value = {};
3806
+ break ;
3807
+ }
3808
+
3809
+ // determine active lane indices in our subgroup
3810
+ rdcarray<uint32_t > activeLanes;
3811
+ GetSubgroupActiveLanes (activeMask, workgroup, activeLanes);
3812
+ for (uint32_t lane : activeLanes)
3813
+ {
3814
+ // stop before processing our lane
3815
+ if (lane == m_WorkgroupIndex)
3816
+ break ;
3817
+
3818
+ ShaderVariable x;
3819
+ RDCASSERT (workgroup[lane].GetShaderVariable (inst.args [1 ], opCode, dxOpCode, x));
3820
+
3821
+ switch (waveOpCode)
3822
+ {
3823
+ case WaveOpCode::Sum:
3824
+ {
3825
+ for (uint8_t c = 0 ; c < x.columns ; c++)
3826
+ {
3827
+ if (isUnsigned)
3828
+ {
3829
+ #undef _IMPL
3830
+ #define _IMPL (I, S, U ) comp<U>(accum, c) = comp<U>(accum, c) + comp<U>(x, c)
3831
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3832
+ }
3833
+ else
3834
+ {
3835
+ #undef _IMPL
3836
+ #define _IMPL (I, S, U ) comp<S>(accum, c) = comp<S>(accum, c) + comp<S>(x, c)
3837
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3838
+
3839
+ #undef _IMPL
3840
+ #define _IMPL (T ) comp<T>(accum, c) = comp<T>(accum, c) + comp<T>(x, c)
3841
+
3842
+ IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
3843
+ }
3844
+ }
3845
+ break ;
3846
+ }
3847
+ case WaveOpCode::Product:
3848
+ {
3849
+ for (uint8_t c = 0 ; c < x.columns ; c++)
3850
+ {
3851
+ if (isUnsigned)
3852
+ {
3853
+ #undef _IMPL
3854
+ #define _IMPL (I, S, U ) comp<U>(accum, c) = comp<U>(accum, c) * comp<U>(x, c)
3855
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3856
+ }
3857
+ else
3858
+ {
3859
+ #undef _IMPL
3860
+ #define _IMPL (I, S, U ) comp<S>(accum, c) = comp<S>(accum, c) * comp<S>(x, c)
3861
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3862
+
3863
+ #undef _IMPL
3864
+ #define _IMPL (T ) comp<T>(accum, c) = comp<T>(accum, c) * comp<T>(x, c)
3865
+
3866
+ IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
3867
+ }
3868
+ }
3869
+ break ;
3870
+ }
3871
+ default : RDCERR (" Unhandled PrefixOp wave opcode" ); break ;
3872
+ }
3873
+ }
3874
+ result.value = accum.value ;
3875
+ break ;
3876
+ }
3877
+ case DXOp::WavePrefixBitCount:
3878
+ {
3879
+ // WavePrefixBitCount(cond)
3880
+ // determine active lane indices in our subgroup
3881
+ rdcarray<uint32_t > activeLanes;
3882
+ GetSubgroupActiveLanes (activeMask, workgroup, activeLanes);
3883
+
3884
+ uint32_t count = 0 ;
3885
+ for (uint32_t lane : activeLanes)
3886
+ {
3887
+ // stop before processing our lane
3888
+ if (lane == m_WorkgroupIndex)
3889
+ break ;
3890
+
3891
+ ShaderVariable x;
3892
+ RDCASSERT (workgroup[lane].GetShaderVariable (inst.args [1 ], opCode, dxOpCode, x));
3893
+ count += x.value .u32v [0 ];
3894
+ }
3895
+
3896
+ result.value .u32v [0 ] = count;
3897
+ break ;
3898
+ }
3724
3899
case DXOp::WaveAnyTrue:
3725
3900
case DXOp::WaveAllTrue:
3726
3901
case DXOp::WaveActiveBallot:
3727
3902
case DXOp::WaveActiveOp:
3728
3903
{
3729
- ShaderVariable accum;
3904
+ ShaderVariable accum (result) ;
3730
3905
bool isUnsigned = true ;
3731
3906
WaveOpCode waveOpCode = WaveOpCode::Sum;
3732
3907
@@ -3743,11 +3918,12 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3743
3918
// set the identity
3744
3919
switch (waveOpCode)
3745
3920
{
3921
+ case WaveOpCode::Sum: SetShaderValueZero (accum); break ;
3922
+ case WaveOpCode::Product: SetShaderValueOne (accum); break ;
3746
3923
default :
3747
- RDCERR (" Unhandled wave opcode" );
3924
+ RDCERR (" Unhandled ActiveOp wave opcode" );
3748
3925
accum.value = {};
3749
3926
break ;
3750
- case WaveOpCode::Sum: accum.value = {}; break ;
3751
3927
}
3752
3928
}
3753
3929
else if (dxOpCode == DXOp::WaveAnyTrue)
@@ -3775,7 +3951,6 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3775
3951
{
3776
3952
switch (waveOpCode)
3777
3953
{
3778
- default : RDCERR (" Unhandled wave opcode" ); break ;
3779
3954
case WaveOpCode::Sum:
3780
3955
{
3781
3956
for (uint8_t c = 0 ; c < x.columns ; c++)
@@ -3800,6 +3975,7 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3800
3975
}
3801
3976
break ;
3802
3977
}
3978
+ default : RDCERR (" Unhandled ActiveOp wave opcode" ); break ;
3803
3979
}
3804
3980
}
3805
3981
else if (dxOpCode == DXOp::WaveAnyTrue)
@@ -4132,9 +4308,7 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
4132
4308
// Wave Operations
4133
4309
case DXOp::WaveActiveAllEqual:
4134
4310
case DXOp::WaveActiveBit:
4135
- case DXOp::WavePrefixOp:
4136
4311
case DXOp::WaveAllBitCount:
4137
- case DXOp::WavePrefixBitCount:
4138
4312
case DXOp::WaveMatch:
4139
4313
case DXOp::WaveMultiPrefixOp:
4140
4314
case DXOp::WaveMultiPrefixBitCount:
0 commit comments