throw exception if not supported amx sibmem
This commit is contained in:
parent
6f93fe3511
commit
34ea5c1642
4 changed files with 92 additions and 26 deletions
|
@ -732,24 +732,24 @@ void putV4FMA()
|
||||||
|
|
||||||
void putAMX_TILE()
|
void putAMX_TILE()
|
||||||
{
|
{
|
||||||
puts("void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0, 0x49); }");
|
puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }");
|
||||||
puts("void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }");
|
puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }");
|
||||||
puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_F2 | T_0F38 | T_W0, 0x4b); }");
|
puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }");
|
||||||
puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_66 | T_0F38 | T_W0, 0x4b); }");
|
puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }");
|
||||||
puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
|
puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
|
||||||
puts("void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }");
|
puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }");
|
||||||
puts("void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
|
puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
|
||||||
}
|
}
|
||||||
void putAMX_INT8()
|
void putAMX_INT8()
|
||||||
{
|
{
|
||||||
puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }");
|
||||||
puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
|
||||||
puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
|
||||||
puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
|
||||||
}
|
}
|
||||||
void putAMX_BF16()
|
void putAMX_BF16()
|
||||||
{
|
{
|
||||||
puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }");
|
puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *[])
|
int main(int argc, char *[])
|
||||||
|
@ -761,8 +761,8 @@ int main(int argc, char *[])
|
||||||
putAMX_TILE();
|
putAMX_TILE();
|
||||||
putAMX_INT8();
|
putAMX_INT8();
|
||||||
putAMX_BF16();
|
putAMX_BF16();
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
if (only64bit) return 0;
|
|
||||||
putVcmp();
|
putVcmp();
|
||||||
putX_XM();
|
putX_XM();
|
||||||
putM_X();
|
putM_X();
|
||||||
|
|
|
@ -751,4 +751,63 @@ CYBOZU_TEST_AUTO(bf16)
|
||||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(AMX)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
ldtilecfg(ptr[rax + rcx * 4 + 64]);
|
||||||
|
sttilecfg(ptr[rsp + rax * 8 + 128]);
|
||||||
|
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
|
||||||
|
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
|
||||||
|
tilerelease();
|
||||||
|
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
|
||||||
|
tilezero(tmm7);
|
||||||
|
tdpbssd(tmm1, tmm2, tmm3);
|
||||||
|
tdpbsud(tmm2, tmm3, tmm4);
|
||||||
|
tdpbusd(tmm3, tmm4, tmm5);
|
||||||
|
tdpbuud(tmm4, tmm5, tmm6);
|
||||||
|
tdpbf16ps(tmm5, tmm6, tmm7);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
// generated code by patch
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
0xc4, 0xe2, 0x78, 0x49, 0x44, 0x88, 0x40, 0xc4, 0xe2, 0x79, 0x49, 0x84, 0xc4, 0x80, 0x00, 0x00,
|
||||||
|
0x00, 0xc4, 0xe2, 0x7b, 0x4b, 0x5c, 0x57, 0x08, 0xc4, 0x82, 0x79, 0x4b, 0x64, 0x08, 0x20, 0xc4,
|
||||||
|
0xe2, 0x78, 0x49, 0xc0, 0xc4, 0x82, 0x7a, 0x4b, 0x54, 0x5a, 0x20, 0xc4, 0xe2, 0x7b, 0x49, 0xf8,
|
||||||
|
0xc4, 0xe2, 0x63, 0x5e, 0xca, 0xc4, 0xe2, 0x5a, 0x5e, 0xd3, 0xc4, 0xe2, 0x51, 0x5e, 0xdc, 0xc4,
|
||||||
|
0xe2, 0x48, 0x5e, 0xe5, 0xc4, 0xe2, 0x42, 0x5c, 0xee,
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(tileloadd)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
tileloadd(tmm1, ptr[r8+r8]);
|
||||||
|
tileloadd(tmm1, ptr[rax+rcx*4]);
|
||||||
|
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
|
||||||
|
}
|
||||||
|
void notSupported()
|
||||||
|
{
|
||||||
|
tileloadd(tmm1, ptr[r8]);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
0xC4, 0x82, 0x7B, 0x4B, 0x0C, 0x00,
|
||||||
|
0xC4, 0xE2, 0x7B, 0x4B, 0x0C, 0x88,
|
||||||
|
0xC4, 0x82, 0x7B, 0x4B, 0x4C, 0x08, 0x40,
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
|
||||||
|
// current version does not support this sibmem format
|
||||||
|
CYBOZU_TEST_EXCEPTION(c.notSupported(), std::exception);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -194,6 +194,7 @@ enum {
|
||||||
ERR_INVALID_RIP_IN_AUTO_GROW,
|
ERR_INVALID_RIP_IN_AUTO_GROW,
|
||||||
ERR_INVALID_MIB_ADDRESS,
|
ERR_INVALID_MIB_ADDRESS,
|
||||||
ERR_X2APIC_IS_NOT_SUPPORTED,
|
ERR_X2APIC_IS_NOT_SUPPORTED,
|
||||||
|
ERR_NOT_SUPPORTED,
|
||||||
ERR_INTERNAL // Put it at last.
|
ERR_INTERNAL // Put it at last.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -255,6 +256,7 @@ public:
|
||||||
"invalid rip in AutoGrow",
|
"invalid rip in AutoGrow",
|
||||||
"invalid mib address",
|
"invalid mib address",
|
||||||
"x2APIC is not supported",
|
"x2APIC is not supported",
|
||||||
|
"not supported",
|
||||||
"internal error"
|
"internal error"
|
||||||
};
|
};
|
||||||
assert(err_ <= ERR_INTERNAL);
|
assert(err_ <= ERR_INTERNAL);
|
||||||
|
@ -682,9 +684,11 @@ struct Zmm : public Ymm {
|
||||||
Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
|
Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef XBYAK64
|
||||||
struct Tmm : public Reg {
|
struct Tmm : public Reg {
|
||||||
explicit Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
|
explicit Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
struct Opmask : public Reg {
|
struct Opmask : public Reg {
|
||||||
explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
|
explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
|
||||||
|
@ -2262,11 +2266,14 @@ private:
|
||||||
}
|
}
|
||||||
throw Error(ERR_BAD_COMBINATION);
|
throw Error(ERR_BAD_COMBINATION);
|
||||||
}
|
}
|
||||||
void opAMX(const Tmm& t1, const Tmm& t2, const Operand& op, int type, int code0, int imm8 = NONE)
|
#ifdef XBYAK64
|
||||||
|
void opAMX(const Tmm& t1, const Address& addr, int type, int code0)
|
||||||
{
|
{
|
||||||
if (!t1.isTMM() || !t2.isTMM()) throw Error(ERR_BAD_COMBINATION);
|
// addressing without index such as ptr[r8]
|
||||||
opVex(t1, &t2, op, type, code0, imm8);
|
if (addr.getRegExp().getIndex().getBit() == 0) throw Error(ERR_NOT_SUPPORTED);
|
||||||
|
opVex(t1, &tmm0, addr, type, code0);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
public:
|
public:
|
||||||
unsigned int getVersion() const { return VERSION; }
|
unsigned int getVersion() const { return VERSION; }
|
||||||
using CodeArray::db;
|
using CodeArray::db;
|
||||||
|
|
|
@ -2033,18 +2033,18 @@ void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { op
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
|
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
|
||||||
void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
|
void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
|
||||||
void ldtilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_0F38 | T_W0, 0x49); }
|
void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
|
||||||
void sttilecfg(const Address& addr) { opAMX(tmm0, tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
|
void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
|
||||||
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
|
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
|
||||||
void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }
|
void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }
|
||||||
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
|
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
|
||||||
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
|
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
|
||||||
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opAMX(x1, x3, x2, T_0F38 | T_W0, 0x5e); }
|
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
|
||||||
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
|
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
|
||||||
void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, tmm0, addr, T_66 | T_0F38 | T_W0, 0x4b); }
|
void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }
|
||||||
void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
|
void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
|
||||||
void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }
|
void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }
|
||||||
void tilezero(const Tmm& Tmm) { opAMX(Tmm, tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }
|
void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }
|
||||||
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
|
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue