@@ -279,7 +279,7 @@ class CDetourDis
279
279
PBYTE CopyVex2 (REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
280
280
PBYTE CopyVex3 (REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
281
281
PBYTE CopyVexCommon (BYTE m, PBYTE pbDst, PBYTE pbSrc);
282
- PBYTE CopyVexEvexCommon (BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p);
282
+ PBYTE CopyVexEvexCommon (BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p, BYTE fp16 = 0 );
283
283
PBYTE CopyEvex (REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
284
284
PBYTE CopyXop (REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
285
285
@@ -745,7 +745,7 @@ PBYTE CDetourDis::CopyFF(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
745
745
return pbOut;
746
746
}
747
747
748
- PBYTE CDetourDis::CopyVexEvexCommon (BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p)
748
+ PBYTE CDetourDis::CopyVexEvexCommon (BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p, BYTE fp16 )
749
749
// m is first instead of last in the hopes of pbDst/pbSrc being
750
750
// passed along efficiently in the registers they were already in.
751
751
{
@@ -762,10 +762,13 @@ PBYTE CDetourDis::CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p)
762
762
763
763
REFCOPYENTRY pEntry;
764
764
765
- switch (m) {
765
+ // see https://software.intel.com/content/www/us/en/develop/download/intel-avx512-fp16-architecture-specification.html
766
+ switch (m | fp16) {
766
767
default : return Invalid (&ceInvalid, pbDst, pbSrc);
767
768
case 1 : pEntry = &s_rceCopyTable0F[pbSrc[0 ]];
768
769
return (this ->*pEntry->pfCopy )(pEntry, pbDst, pbSrc);
770
+ case 5 : // fallthrough
771
+ case 6 : // fallthrough
769
772
case 2 : return CopyBytes (&ceF38, pbDst, pbSrc);
770
773
case 3 : return CopyBytes (&ceF3A, pbDst, pbSrc);
771
774
}
@@ -859,7 +862,9 @@ PBYTE CDetourDis::CopyEvex(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
859
862
860
863
static const COPYENTRY ceInvalid = /* 62 */ ENTRY_Invalid;
861
864
862
- if ((p0 & 0x0C ) != 0 )
865
+ // This could also be handled by default in CopyVexEvexCommon
866
+ // if 4u changed to 4|8.
867
+ if (p0 & 8u )
863
868
return Invalid (&ceInvalid, pbDst, pbSrc);
864
869
865
870
BYTE const p1 = pbSrc[2 ];
@@ -876,7 +881,7 @@ PBYTE CDetourDis::CopyEvex(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
876
881
m_bRaxOverride |= !!(p1 & 0x80 ); // w
877
882
#endif
878
883
879
- return CopyVexEvexCommon (p0 & 3u , pbDst + 4 , pbSrc + 4 , p1 & 3u );
884
+ return CopyVexEvexCommon (p0 & 3u , pbDst + 4 , pbSrc + 4 , p1 & 3u , p0 & 4u );
880
885
}
881
886
882
887
PBYTE CDetourDis::CopyXop (REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
0 commit comments