Skip to content

Commit 05623a5

Browse files
authored
Disx86: Add support for FP16. (microsoft#209)
See https://software.intel.com/content/www/us/en/develop/download/intel-avx512-fp16-architecture-specification.html For 16bit floating point numbers, EVEX maps 5 and 6 become defined. EVEX includes a 4 bit map. Previously only 2 bits were allowed. Now 3. EVEX and VEX share logic but VEX cannot encode FP16. Tests are lacking.
1 parent 84d4356 commit 05623a5

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

src/disasm.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ class CDetourDis
279279
PBYTE CopyVex2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
280280
PBYTE CopyVex3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
281281
PBYTE CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc);
282-
PBYTE CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p);
282+
PBYTE CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p, BYTE fp16 = 0);
283283
PBYTE CopyEvex(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
284284
PBYTE CopyXop(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
285285

@@ -745,7 +745,7 @@ PBYTE CDetourDis::CopyFF(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc)
745745
return pbOut;
746746
}
747747

748-
PBYTE CDetourDis::CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p)
748+
PBYTE CDetourDis::CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p, BYTE fp16)
749749
// m is first instead of last in the hopes of pbDst/pbSrc being
750750
// passed along efficiently in the registers they were already in.
751751
{
@@ -762,10 +762,13 @@ PBYTE CDetourDis::CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p)
762762

763763
REFCOPYENTRY pEntry;
764764

765-
switch (m) {
765+
// see https://software.intel.com/content/www/us/en/develop/download/intel-avx512-fp16-architecture-specification.html
766+
switch (m | fp16) {
766767
default: return Invalid(&ceInvalid, pbDst, pbSrc);
767768
case 1: pEntry = &s_rceCopyTable0F[pbSrc[0]];
768769
return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
770+
case 5: // fallthrough
771+
case 6: // fallthrough
769772
case 2: return CopyBytes(&ceF38, pbDst, pbSrc);
770773
case 3: return CopyBytes(&ceF3A, pbDst, pbSrc);
771774
}
@@ -859,7 +862,9 @@ PBYTE CDetourDis::CopyEvex(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
859862

860863
static const COPYENTRY ceInvalid = /* 62 */ ENTRY_Invalid;
861864

862-
if ((p0 & 0x0C) != 0)
865+
// This could also be handled by default in CopyVexEvexCommon
866+
// if 4u changed to 4|8.
867+
if (p0 & 8u)
863868
return Invalid(&ceInvalid, pbDst, pbSrc);
864869

865870
BYTE const p1 = pbSrc[2];
@@ -876,7 +881,7 @@ PBYTE CDetourDis::CopyEvex(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)
876881
m_bRaxOverride |= !!(p1 & 0x80); // w
877882
#endif
878883

879-
return CopyVexEvexCommon(p0 & 3u, pbDst + 4, pbSrc + 4, p1 & 3u);
884+
return CopyVexEvexCommon(p0 & 3u, pbDst + 4, pbSrc + 4, p1 & 3u, p0 & 4u);
880885
}
881886

882887
PBYTE CDetourDis::CopyXop(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc)

0 commit comments

Comments
 (0)