add vrcpph, vrcpsh

This commit is contained in:
MITSUNARI Shigeo 2021-09-07 09:33:07 +09:00
parent 4f543ca0e4
commit b1ff7891d4
3 changed files with 26 additions and 0 deletions

View file

@ -331,6 +331,8 @@ void putX_X_XM_IMM()
{ 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false }, { 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
{ 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false }, { 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
{ 0x4D, "vrcpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false },
{ 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false }, { 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
{ 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false }, { 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
@ -652,6 +654,8 @@ void putX_XM_IMM()
{ 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x4C, "vrcpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, false },
{ 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },

View file

@ -967,6 +967,15 @@ CYBOZU_TEST_AUTO(vaddph)
vfmulcph(xmm1, xmm2, ptr [rax+0x40]); vfmulcph(xmm1, xmm2, ptr [rax+0x40]);
vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]); vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]); vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
vrcpph(xmm1, ptr [rax+0x40]);
vrcpph(xmm1, ptr_b [rax+0x40]);
vrcpph(ymm1, ptr [rax+0x40]);
vrcpph(ymm1, ptr_b [rax+0x40]);
vrcpph(zmm1, ptr [rax+0x40]);
vrcpph(zmm1, ptr_b [rax+0x40]);
vrcpsh(xmm1, xmm3, ptr [rax+0x40]);
} }
} c; } c;
const uint8_t tbl[] = { const uint8_t tbl[] = {
@ -1086,6 +1095,17 @@ CYBOZU_TEST_AUTO(vaddph)
0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x48, 0x04, 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x48, 0x04,
0x62, 0xf6, 0x6e, 0xb9, 0xd6, 0x48, 0x10, 0x62, 0xf6, 0x6e, 0xb9, 0xd6, 0x48, 0x10,
0x62, 0xf6, 0x6e, 0x58, 0xd6, 0x48, 0x10, 0x62, 0xf6, 0x6e, 0x58, 0xd6, 0x48, 0x10,
// vrcpph
0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x48, 0x04,
0x62, 0xf6, 0x7d, 0x18, 0x4c, 0x48, 0x20,
0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x48, 0x02,
0x62, 0xf6, 0x7d, 0x38, 0x4c, 0x48, 0x20,
0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x48, 0x01,
0x62, 0xf6, 0x7d, 0x58, 0x4c, 0x48, 0x20,
// vrcpsh
0x62, 0xf6, 0x65, 0x08, 0x4d, 0x48, 0x20,
}; };
const size_t n = sizeof(tbl) / sizeof(tbl[0]); const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL(c.getSize(), n);

View file

@ -2183,6 +2183,8 @@ void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_
void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); } void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); } void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); }
void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); } void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); }
void vrcpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4C); }
void vrcpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX, 0x4D); }
void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); } void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); }
void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); } void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); }
void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }