@@ -3682,74 +3682,106 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
3682
3682
result.value .u32v [0 ] = (m_WorkgroupIndex == activeLanes[0 ]) ? 1 : 0 ;
3683
3683
break ;
3684
3684
}
3685
+ case DXOp::WaveAnyTrue:
3686
+ case DXOp::WaveAllTrue:
3687
+ case DXOp::WaveActiveBallot:
3685
3688
case DXOp::WaveActiveOp:
3686
3689
{
3687
- // WaveActiveOp(value,op,sop)
3690
+ ShaderVariable accum;
3691
+ bool isUnsigned = true ;
3692
+ WaveOpCode waveOpCode = WaveOpCode::Sum;
3688
3693
3689
- ShaderVariable arg;
3690
- RDCASSERT (GetShaderVariable (inst.args [2 ], opCode, dxOpCode, arg));
3691
- WaveOpCode waveOpCode = (WaveOpCode)arg.value .u32v [0 ];
3694
+ if (dxOpCode == DXOp::WaveActiveOp)
3695
+ {
3696
+ // WaveActiveOp(value,op,sop)
3697
+ ShaderVariable arg;
3698
+ RDCASSERT (GetShaderVariable (inst.args [2 ], opCode, dxOpCode, arg));
3699
+ waveOpCode = (WaveOpCode)arg.value .u32v [0 ];
3692
3700
3693
- RDCASSERT (GetShaderVariable (inst.args [3 ], opCode, dxOpCode, arg));
3694
- bool isUnsigned = (arg.value .u32v [0 ] != (uint32_t )SignedOpKind::Signed);
3701
+ RDCASSERT (GetShaderVariable (inst.args [3 ], opCode, dxOpCode, arg));
3702
+ isUnsigned = (arg.value .u32v [0 ] != (uint32_t )SignedOpKind::Signed);
3703
+
3704
+ // set the identity
3705
+ switch (waveOpCode)
3706
+ {
3707
+ default :
3708
+ RDCERR (" Unhandled wave opcode" );
3709
+ accum.value = {};
3710
+ break ;
3711
+ case WaveOpCode::Sum: accum.value = {}; break ;
3712
+ }
3713
+ }
3714
+ else if (dxOpCode == DXOp::WaveAnyTrue)
3715
+ {
3716
+ // WaveAnyTrue(cond)
3717
+ accum.value .u32v [0 ] = 0 ;
3718
+ }
3719
+ else if (dxOpCode == DXOp::WaveAllTrue)
3720
+ {
3721
+ // WaveAllTrue(cond)
3722
+ accum.value .u32v [0 ] = 1 ;
3723
+ }
3695
3724
3696
3725
// determine active lane indices in our subgroup
3697
3726
rdcarray<uint32_t > activeLanes;
3698
3727
GetSubgroupActiveLanes (activeMask, workgroup, activeLanes);
3699
-
3700
- ShaderVariable accum;
3701
- RDCASSERT (GetShaderVariable (inst.args [1 ], opCode, dxOpCode, accum));
3702
-
3703
- // set the identity
3704
- switch (waveOpCode)
3705
- {
3706
- default :
3707
- RDCERR (" Unhandled wave opcode" );
3708
- accum.value = {};
3709
- break ;
3710
- case WaveOpCode::Sum: accum.value = {}; break ;
3711
- }
3728
+ const uint32_t firstLaneInSub = m_WorkgroupIndex - m_SubgroupIdx;
3712
3729
3713
3730
for (uint32_t lane : activeLanes)
3714
3731
{
3715
3732
ShaderVariable x;
3716
3733
RDCASSERT (workgroup[lane].GetShaderVariable (inst.args [1 ], opCode, dxOpCode, x));
3717
3734
3718
- switch (waveOpCode )
3735
+ if (dxOpCode == DXOp::WaveActiveOp )
3719
3736
{
3720
- default : RDCERR (" Unhandled wave opcode" ); break ;
3721
- case WaveOpCode::Sum:
3737
+ switch (waveOpCode)
3722
3738
{
3723
- for (uint8_t c = 0 ; c < x.columns ; c++)
3739
+ default : RDCERR (" Unhandled wave opcode" ); break ;
3740
+ case WaveOpCode::Sum:
3724
3741
{
3725
- if (isUnsigned )
3742
+ for ( uint8_t c = 0 ; c < x. columns ; c++ )
3726
3743
{
3744
+ if (isUnsigned)
3745
+ {
3727
3746
#undef _IMPL
3728
3747
#define _IMPL (I, S, U ) comp<U>(accum, c) = comp<U>(accum, c) + comp<U>(x, c)
3729
- IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3730
- }
3731
- else
3732
- {
3748
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3749
+ }
3750
+ else
3751
+ {
3733
3752
#undef _IMPL
3734
3753
#define _IMPL (I, S, U ) comp<S>(accum, c) = comp<S>(accum, c) + comp<S>(x, c)
3735
- IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3754
+ IMPL_FOR_INT_TYPES_FOR_TYPE (_IMPL, x.type );
3736
3755
3737
3756
#undef _IMPL
3738
3757
#define _IMPL (T ) comp<T>(accum, c) = comp<T>(accum, c) + comp<T>(x, c)
3739
3758
3740
- IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
3759
+ IMPL_FOR_FLOAT_TYPES_FOR_TYPE (_IMPL, x.type );
3760
+ }
3741
3761
}
3762
+ break ;
3742
3763
}
3743
- break ;
3744
3764
}
3745
3765
}
3746
- }
3766
+ else if (dxOpCode == DXOp::WaveAnyTrue)
3767
+ {
3768
+ accum.value .u32v [0 ] |= x.value .u32v [0 ];
3769
+ }
3770
+ else if (dxOpCode == DXOp::WaveAllTrue)
3771
+ {
3772
+ accum.value .u32v [0 ] &= x.value .u32v [0 ];
3773
+ }
3774
+ else if (dxOpCode == DXOp::WaveActiveBallot)
3775
+ {
3776
+ uint32_t c = (lane - firstLaneInSub) / 32 ;
3777
+ uint32_t bit = 1U << ((lane - firstLaneInSub) % 32U );
3747
3778
3748
- // Copy the whole variable to ensure we get the correct type information
3749
- rdcstr name = result. name ;
3750
- result = accum;
3751
- result. name = name;
3779
+ if (x. value . u32v [ 0 ])
3780
+ accum. value . u32v [c] |= bit ;
3781
+ }
3782
+ }
3752
3783
3784
+ result.value = accum.value ;
3753
3785
break ;
3754
3786
}
3755
3787
// Quad Operations
@@ -4058,11 +4090,8 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper,
4058
4090
case DXOp::CutStream:
4059
4091
case DXOp::EmitThenCutStream:
4060
4092
4061
- // Wave/Subgroup Operations
4062
- case DXOp::WaveAnyTrue:
4063
- case DXOp::WaveAllTrue:
4093
+ // Wave Operations
4064
4094
case DXOp::WaveActiveAllEqual:
4065
- case DXOp::WaveActiveBallot:
4066
4095
case DXOp::WaveReadLaneAt:
4067
4096
case DXOp::WaveReadLaneFirst:
4068
4097
case DXOp::WaveActiveBit:
0 commit comments