throw exception if not supported amx sibmem

This commit is contained in:
MITSUNARI Shigeo 2020-06-30 18:10:31 +09:00
parent 6f93fe3511
commit 34ea5c1642
4 changed files with 92 additions and 26 deletions

View file

@ -732,24 +732,24 @@ void putV4FMA()
void putAMX_TILE() void putAMX_TILE()
{ {
puts("void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0, 0x49); }"); puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }");
puts("void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }"); puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }");
puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_F2 | T_0F38 | T_W0, 0x4b); }"); puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }");
puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_66 | T_0F38 | T_W0, 0x4b); }"); puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }");
puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }"); puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
puts("void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }"); puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }");
puts("void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }"); puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
} }
void putAMX_INT8() void putAMX_INT8()
{ {
puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }"); puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }");
puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }"); puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }"); puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0, 0x5e); }"); puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
} }
void putAMX_BF16() void putAMX_BF16()
{ {
puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }"); puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }");
} }
int main(int argc, char *[]) int main(int argc, char *[])
@ -761,8 +761,8 @@ int main(int argc, char *[])
putAMX_TILE(); putAMX_TILE();
putAMX_INT8(); putAMX_INT8();
putAMX_BF16(); putAMX_BF16();
return 0;
} }
if (only64bit) return 0;
putVcmp(); putVcmp();
putX_XM(); putX_XM();
putM_X(); putM_X();

View file

@ -751,4 +751,63 @@ CYBOZU_TEST_AUTO(bf16)
CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
} }
CYBOZU_TEST_AUTO(AMX)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
ldtilecfg(ptr[rax + rcx * 4 + 64]);
sttilecfg(ptr[rsp + rax * 8 + 128]);
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
tilerelease();
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
tilezero(tmm7);
tdpbssd(tmm1, tmm2, tmm3);
tdpbsud(tmm2, tmm3, tmm4);
tdpbusd(tmm3, tmm4, tmm5);
tdpbuud(tmm4, tmm5, tmm6);
tdpbf16ps(tmm5, tmm6, tmm7);
}
} c;
// generated code by patch
const uint8_t tbl[] = {
0xc4, 0xe2, 0x78, 0x49, 0x44, 0x88, 0x40, 0xc4, 0xe2, 0x79, 0x49, 0x84, 0xc4, 0x80, 0x00, 0x00,
0x00, 0xc4, 0xe2, 0x7b, 0x4b, 0x5c, 0x57, 0x08, 0xc4, 0x82, 0x79, 0x4b, 0x64, 0x08, 0x20, 0xc4,
0xe2, 0x78, 0x49, 0xc0, 0xc4, 0x82, 0x7a, 0x4b, 0x54, 0x5a, 0x20, 0xc4, 0xe2, 0x7b, 0x49, 0xf8,
0xc4, 0xe2, 0x63, 0x5e, 0xca, 0xc4, 0xe2, 0x5a, 0x5e, 0xd3, 0xc4, 0xe2, 0x51, 0x5e, 0xdc, 0xc4,
0xe2, 0x48, 0x5e, 0xe5, 0xc4, 0xe2, 0x42, 0x5c, 0xee,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(tileloadd)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
tileloadd(tmm1, ptr[r8+r8]);
tileloadd(tmm1, ptr[rax+rcx*4]);
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
}
void notSupported()
{
tileloadd(tmm1, ptr[r8]);
}
} c;
const uint8_t tbl[] = {
0xC4, 0x82, 0x7B, 0x4B, 0x0C, 0x00,
0xC4, 0xE2, 0x7B, 0x4B, 0x0C, 0x88,
0xC4, 0x82, 0x7B, 0x4B, 0x4C, 0x08, 0x40,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
// current version does not support this sibmem format
CYBOZU_TEST_EXCEPTION(c.notSupported(), std::exception);
}
#endif #endif

View file

@ -194,6 +194,7 @@ enum {
ERR_INVALID_RIP_IN_AUTO_GROW, ERR_INVALID_RIP_IN_AUTO_GROW,
ERR_INVALID_MIB_ADDRESS, ERR_INVALID_MIB_ADDRESS,
ERR_X2APIC_IS_NOT_SUPPORTED, ERR_X2APIC_IS_NOT_SUPPORTED,
ERR_NOT_SUPPORTED,
ERR_INTERNAL // Put it at last. ERR_INTERNAL // Put it at last.
}; };
@ -255,6 +256,7 @@ public:
"invalid rip in AutoGrow", "invalid rip in AutoGrow",
"invalid mib address", "invalid mib address",
"x2APIC is not supported", "x2APIC is not supported",
"not supported",
"internal error" "internal error"
}; };
assert(err_ <= ERR_INTERNAL); assert(err_ <= ERR_INTERNAL);
@ -682,9 +684,11 @@ struct Zmm : public Ymm {
Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; } Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
}; };
#ifdef XBYAK64
struct Tmm : public Reg { struct Tmm : public Reg {
explicit Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { } explicit Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
}; };
#endif
struct Opmask : public Reg { struct Opmask : public Reg {
explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {} explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
@ -2262,11 +2266,14 @@ private:
} }
throw Error(ERR_BAD_COMBINATION); throw Error(ERR_BAD_COMBINATION);
} }
void opAMX(const Tmm& t1, const Tmm& t2, const Operand& op, int type, int code0, int imm8 = NONE) #ifdef XBYAK64
void opAMX(const Tmm& t1, const Address& addr, int type, int code0)
{ {
if (!t1.isTMM() || !t2.isTMM()) throw Error(ERR_BAD_COMBINATION); // addressing without index such as ptr[r8]
opVex(t1, &t2, op, type, code0, imm8); if (addr.getRegExp().getIndex().getBit() == 0) throw Error(ERR_NOT_SUPPORTED);
opVex(t1, &tmm0, addr, type, code0);
} }
#endif
public: public:
unsigned int getVersion() const { return VERSION; } unsigned int getVersion() const { return VERSION; }
using CodeArray::db; using CodeArray::db;

View file

@ -2033,18 +2033,18 @@ void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { op
#ifdef XBYAK64 #ifdef XBYAK64
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); } void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); } void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0, 0x49); } void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); } void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); } void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); } void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); } void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0, 0x5e); } void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0, 0x5e); } void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_F2 | T_0F38 | T_W0, 0x4b); } void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_66 | T_0F38 | T_W0, 0x4b); } void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }
void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); } void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); } void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }
void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); } void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); } void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
#endif #endif
#endif #endif