add vfmaddsub{132,213,231}ph
This commit is contained in:
parent
ab9481b4c6
commit
9b8802cba2
4 changed files with 73 additions and 0 deletions
|
@ -40,6 +40,8 @@
|
||||||
T_VSIB = 1 << 29,
|
T_VSIB = 1 << 29,
|
||||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||||
T_FP16 = 1 << 31,
|
T_FP16 = 1 << 31,
|
||||||
|
T_MAP5 = T_FP16 | T_0F,
|
||||||
|
T_MAP6 = T_FP16 | T_0F38,
|
||||||
T_XXX
|
T_XXX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -764,9 +764,56 @@ void putFP16_1()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void putFP16_FMA()
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
uint8_t code;
|
||||||
|
const char *name;
|
||||||
|
bool isPH;
|
||||||
|
} tbl[] = {
|
||||||
|
{ 0x06, "vfmaddsub", true },
|
||||||
|
/*
|
||||||
|
{ 0x, "vfmadd", false },
|
||||||
|
{ 0x06, "vfmaddsub", true },
|
||||||
|
{ 0x07, "vfmsubadd", true },
|
||||||
|
{ 0x0A, "vfmsub", true },
|
||||||
|
{ 0x0B, "vfmsub", false },
|
||||||
|
{ 0x0C, "vfnmadd", true },
|
||||||
|
{ 0x0D, "vfnmadd", false },
|
||||||
|
{ 0x0E, "vfnmsub", true },
|
||||||
|
{ 0x0F, "vfnmsub", false },
|
||||||
|
*/
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
for (int k = 0; k < 3; k++) {
|
||||||
|
const struct Ord {
|
||||||
|
const char *str;
|
||||||
|
uint8_t code;
|
||||||
|
} ord[] = {
|
||||||
|
{ "132", 0x90 },
|
||||||
|
{ "213", 0xA0 },
|
||||||
|
{ "231", 0xB0 },
|
||||||
|
};
|
||||||
|
int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||||
|
const char *suf = 0;
|
||||||
|
if (tbl[i].isPH) {
|
||||||
|
t |= T_ER_Z | T_YMM | T_B16;
|
||||||
|
suf = "ph";
|
||||||
|
} else {
|
||||||
|
t |= T_ER_X | T_N2;
|
||||||
|
suf = "sh";
|
||||||
|
}
|
||||||
|
std::string type = type2String(t);
|
||||||
|
printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||||
|
, tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void putFP16()
|
void putFP16()
|
||||||
{
|
{
|
||||||
putFP16_1();
|
putFP16_1();
|
||||||
|
putFP16_FMA();
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *[])
|
int main(int argc, char *[])
|
||||||
|
|
|
@ -894,6 +894,16 @@ CYBOZU_TEST_AUTO(vaddph)
|
||||||
|
|
||||||
vcomish(xmm1, ptr[rax+64]);
|
vcomish(xmm1, ptr[rax+64]);
|
||||||
vcomish(xmm1|T_sae, xmm15);
|
vcomish(xmm1|T_sae, xmm15);
|
||||||
|
|
||||||
|
vfmaddsub213ph(xmm1, xmm2, ptr [rax+0x40]);
|
||||||
|
vfmaddsub213ph(xmm1, xmm2, ptr_b [rax+0x40]);
|
||||||
|
vfmaddsub213ph(xmm1|k3, xmm2, xmm5);
|
||||||
|
vfmaddsub213ph(ymm1, ymm2, ptr [rax+0x40]);
|
||||||
|
vfmaddsub213ph(ymm1, ymm2, ptr_b[rax+0x40]);
|
||||||
|
vfmaddsub213ph(ymm1|k3, ymm2, ymm5);
|
||||||
|
vfmaddsub213ph(zmm1, zmm2, ptr [rax+0x40]);
|
||||||
|
vfmaddsub213ph(zmm1, zmm2, ptr_b [rax+0x40]);
|
||||||
|
vfmaddsub213ph(zmm1|T_ru_sae, zmm2, zmm5);
|
||||||
}
|
}
|
||||||
} c;
|
} c;
|
||||||
const uint8_t tbl[] = {
|
const uint8_t tbl[] = {
|
||||||
|
@ -925,6 +935,17 @@ CYBOZU_TEST_AUTO(vaddph)
|
||||||
// vcomish
|
// vcomish
|
||||||
0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x48, 0x20,
|
0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x48, 0x20,
|
||||||
0x62, 0xd5, 0x7c, 0x18, 0x2f, 0xcf,
|
0x62, 0xd5, 0x7c, 0x18, 0x2f, 0xcf,
|
||||||
|
|
||||||
|
// vfmaddsub213ph
|
||||||
|
0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x48, 0x04,
|
||||||
|
0x62, 0xf6, 0x6d, 0x18, 0xa6, 0x48, 0x20,
|
||||||
|
0x62, 0xf6, 0x6d, 0x0b, 0xa6, 0xcd,
|
||||||
|
0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x48, 0x02,
|
||||||
|
0x62, 0xf6, 0x6d, 0x38, 0xa6, 0x48, 0x20,
|
||||||
|
0x62, 0xf6, 0x6d, 0x2b, 0xa6, 0xcd,
|
||||||
|
0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x48, 0x01,
|
||||||
|
0x62, 0xf6, 0x6d, 0x58, 0xa6, 0x48, 0x20,
|
||||||
|
0x62, 0xf6, 0x6d, 0x58, 0xa6, 0xcd,
|
||||||
};
|
};
|
||||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
|
|
@ -1942,6 +1942,9 @@ void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) {
|
||||||
void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
|
void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
|
||||||
void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
||||||
void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
||||||
|
void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x96); }
|
||||||
|
void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA6); }
|
||||||
|
void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB6); }
|
||||||
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
|
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
|
||||||
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
||||||
void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue