diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 0379e2a..6f3ea78 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -653,6 +653,9 @@ void putX_XM_IMM() { 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, { 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, + + { 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false }, + { 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/test/misc.cpp b/test/misc.cpp index afa3cfd..231ff32 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -522,4 +522,43 @@ CYBOZU_TEST_AUTO(vpdpbus) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } +CYBOZU_TEST_AUTO(vexpand) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + vpexpandb(xmm5|k3|T_z, xmm30); + vpexpandb(ymm5|k3|T_z, ymm30); + vpexpandb(zmm5|k3|T_z, zmm30); + vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]); + vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]); + vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]); + + vpexpandw(xmm5|k3|T_z, xmm30); + vpexpandw(ymm5|k3|T_z, ymm30); + vpexpandw(zmm5|k3|T_z, zmm30); + vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]); + vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]); + vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]); + } + } c; + const uint8_t tbl[] = { + 0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee, + 0x62, 0x92, 0x7d, 0xab, 0x62, 0xee, + 0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee, + 0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40, + 0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40, + 0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40, + + 0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee, + 0x62, 0x92, 0xfd, 0xab, 0x62, 0xee, + 0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee, + 0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20, + 0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20, + 0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} #endif diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 5ccd7a1..b3c9398 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1805,8 +1805,10 @@ void vpermt2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x void vpermt2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7E); } void vpermt2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7D); } void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); } +void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); } void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); } void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); } +void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); } void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x90, 0); } void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x90, 1); } void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x91, 2); }