diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 6df5405..dc086b7 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -337,6 +337,7 @@ void putX_X_XM_IMM() { 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false }, { 0x4F, "vrsqrtsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false }, + { 0x51, "vsqrtsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false }, { 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true }, { 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true }, diff --git a/test/misc.cpp b/test/misc.cpp index bb002ea..a435892 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -991,6 +991,9 @@ CYBOZU_TEST_AUTO(vaddph) vsqrtph(ymm1|k4|T_z, ptr_b [rax+0x40]); vsqrtph(zmm1|k4|T_z, ptr [rax+0x40]); vsqrtph(zmm1|k4|T_z, ptr_b [rax+0x40]); + + vsqrtsh(xmm1|k4|T_z, xmm5, ptr [rax+0x40]); + vsqrtsh(xmm1|k4|T_z|T_rd_sae, xmm5, xmm7); } } c; const uint8_t tbl[] = { @@ -1139,6 +1142,10 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf5, 0x7c, 0xbc, 0x51, 0x48, 0x20, 0x62, 0xf5, 0x7c, 0xcc, 0x51, 0x48, 0x01, 0x62, 0xf5, 0x7c, 0xdc, 0x51, 0x48, 0x20, + + // vsqrtsh + 0x62, 0xf5, 0x56, 0x8c, 0x51, 0x48, 0x20, + 0x62, 0xf5, 0x56, 0xbc, 0x51, 0xcf, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index c45db3c..5aac280 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -2224,6 +2224,7 @@ void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); } void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); } void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x51); } +void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x51); } void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); } void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); } void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }