diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 91a16a9..429fece 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -659,6 +659,8 @@ void putX_XM_IMM() { 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x4E, "vrsqrtph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16, false }, + { 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true }, { 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true }, diff --git a/test/misc.cpp b/test/misc.cpp index 1c8039a..59b64c1 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -976,6 +976,13 @@ CYBOZU_TEST_AUTO(vaddph) vrcpph(zmm1, ptr_b [rax+0x40]); vrcpsh(xmm1, xmm3, ptr [rax+0x40]); + + vrsqrtph(xmm1, ptr [rax+0x40]); + vrsqrtph(xmm1, ptr_b [rax+0x40]); + vrsqrtph(ymm2, ptr [rax+0x40]); + vrsqrtph(ymm2, ptr_b [rax+0x40]); + vrsqrtph(zmm2, ptr [rax+0x40]); + vrsqrtph(zmm2, ptr_b [rax+0x40]); } } c; const uint8_t tbl[] = { @@ -1106,6 +1113,14 @@ CYBOZU_TEST_AUTO(vaddph) // vrcpsh 0x62, 0xf6, 0x65, 0x08, 0x4d, 0x48, 0x20, + + // vrsqrtph + 0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x48, 0x04, + 0x62, 0xf6, 0x7d, 0x18, 0x4e, 0x48, 0x20, + 0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x50, 0x02, + 0x62, 0xf6, 0x7d, 0x38, 0x4e, 0x50, 0x20, + 0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x50, 0x01, + 0x62, 0xf6, 0x7d, 0x58, 0x4e, 0x50, 0x20, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 90ca71d..7cb1840 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -2201,6 +2201,7 @@ void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); } void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCD); } void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCD); } +void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4E); } void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x2C); } void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); } void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }