From c16f91c59b625e95dbb0ff989be5b238348d0d16 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 9 Sep 2021 10:50:43 +0900 Subject: [PATCH] add vscalefph --- gen/gen_avx512.cpp | 2 ++ test/misc.cpp | 24 ++++++++++++++++++++++++ xbyak/xbyak_mnemonic.h | 1 + 3 files changed, 27 insertions(+) diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index dc086b7..edc0571 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -347,6 +347,8 @@ void putX_X_XM_IMM() { 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false }, { 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false }, + { 0x2C, "vscalefph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Z, false }, + { 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true }, { 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, diff --git a/test/misc.cpp b/test/misc.cpp index a435892..665af2a 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -994,6 +994,21 @@ CYBOZU_TEST_AUTO(vaddph) vsqrtsh(xmm1|k4|T_z, xmm5, ptr [rax+0x40]); vsqrtsh(xmm1|k4|T_z|T_rd_sae, xmm5, xmm7); + + vscalefph(xmm1, xmm5, ptr [rax+0x40]); + vscalefph(xmm1, xmm5, ptr_b [rax+0x40]); + vscalefph(ymm1, ymm5, ptr [rax+0x40]); + vscalefph(ymm1, ymm5, ptr_b [rax+0x40]); + vscalefph(zmm1, zmm5, ptr [rax+0x40]); + vscalefph(zmm1, zmm5, ptr_b [rax+0x40]); + vscalefph(zmm1|k1|T_z|T_rd_sae, zmm5, zmm7); + + + + + + + } } c; const uint8_t tbl[] = { @@ -1146,6 +1161,15 @@ CYBOZU_TEST_AUTO(vaddph) // vsqrtsh 0x62, 0xf5, 0x56, 0x8c, 0x51, 0x48, 0x20, 0x62, 0xf5, 0x56, 0xbc, 0x51, 0xcf, + + // vscalefph + 0x62, 0xf6, 0x55, 0x08, 0x2c, 0x48, 0x04, + 0x62, 0xf6, 0x55, 0x18, 0x2c, 0x48, 0x20, + 0x62, 0xf6, 0x55, 0x28, 0x2c, 0x48, 0x02, + 0x62, 0xf6, 0x55, 0x38, 0x2c, 0x48, 0x20, + 0x62, 0xf6, 0x55, 0x48, 0x2c, 0x48, 0x01, + 0x62, 0xf6, 0x55, 0x58, 0x2c, 0x48, 0x20, + 0x62, 0xf6, 0x55, 0xb9, 0x2c, 0xcf, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 5aac280..a94a1a5 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -2204,6 +2204,7 @@ void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM( void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4E); } void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX, 0x4F); } void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x2C); } +void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x2C); } void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); } void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); } void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }