diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 37db58d..86b2218 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -231,6 +231,7 @@ void putM_X() { 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K }, + { 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -873,6 +874,12 @@ void putFP16_2() printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str()); printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str()); } + { + int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2; + std::string type = type2String(t); + printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str()); + printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str()); + } } void putFP16() diff --git a/test/misc.cpp b/test/misc.cpp index 3c3e0be..c448c94 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -1057,6 +1057,10 @@ CYBOZU_TEST_AUTO(vaddph) vmovsh(ptr [rax+0x40]|k1, xmm1); vmovsh(xmm1|k2|T_z, xmm3, xmm5); + vmovw(xmm1, r13d); + vmovw(xmm3, ptr [rax+0x40]); + vmovw(r9d, xmm1); + vmovw(ptr [rax+0x40], xmm7); } } c; const uint8_t tbl[] = { @@ -1285,6 +1289,12 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf5, 0x7e, 0x89, 0x10, 0x48, 0x20, 0x62, 0xf5, 0x7e, 0x09, 0x11, 0x48, 0x20, 0x62, 0xf5, 0x66, 0x8a, 0x10, 0xcd, + + // vmovw + 0x62, 0xd5, 0x7d, 0x08, 0x6e, 0xcd, + 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x58, 0x20, + 0x62, 0xd5, 0x7d, 0x08, 0x7e, 0xc9, + 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x78, 0x20, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 5c7b66a..b98ea70 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -2033,6 +2033,9 @@ void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_ void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX | T_M_K, 0x11); } void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX, 0x10); } void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX, 0x10); } +void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x7E); } +void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x7E); } +void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x6E); } void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); } void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); } void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }