diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 9dd56bb..e28c818 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1204,15 +1204,15 @@ void put() { 0x5B, "cvtdq2ps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false }, { 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM, false }, { 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM, false }, - { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false }, - { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false }, + { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL, false }, + { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, false }, { 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z, false }, - { 0x6F, "movdqa", T_0F | T_66 | T_YMM, false }, - { 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false }, + { 0x6F, "movdqa", T_0F | T_66 | T_YMM | T_N16 | T_N_VL | T_N16 | T_N_VL, false }, + { 0x6F, "movdqu", T_0F | T_F3 | T_YMM | T_N16 | T_N_VL | T_N16 | T_N_VL, false }, { 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false }, { 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false }, - { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false }, - { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false }, + { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL, false }, + { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, false }, { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false }, { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false }, @@ -1261,12 +1261,12 @@ void put() const char *name; int type; } tbl[] = { - { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1}, - { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0}, - { 0x7F, "movdqa", T_0F | T_66 | T_YMM}, - { 0x7F, "movdqu", T_0F | T_F3 | T_YMM}, - { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1}, - { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0}, + { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL }, + { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL }, + { 0x7F, "movdqa", T_0F | T_66 | T_YMM | T_N16 | T_N_VL }, + { 0x7F, "movdqu", T_0F | T_F3 | T_YMM | T_N16 | T_N_VL }, + { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL }, + { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/test/make_512.cpp b/test/make_512.cpp index 68e6b18..5b0a4e6 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -761,8 +761,8 @@ public: }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; - put(name, MEM, ZMM); - put(name, ZMM, MEM); + put(name, MEM, _XMM3|ZMM); + put(name, _XMM3|ZMM, MEM); } } void put_vmov() @@ -1429,8 +1429,8 @@ public: separateFunc(); #endif putAVX512_M_X(); -#if 0 separateFunc(); +#if 0 put_vmov(); separateFunc(); put512_X_XM(); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 30672fe..e92c45a 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -978,15 +978,15 @@ void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x5B); } void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x5B); } void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x5B); } -void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); } -void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); } +void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x28); } +void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x28); } void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); } -void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x6F); } -void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x6F); } +void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM | T_N16 | T_N_VL, 0x6F); } +void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_N16 | T_N_VL, 0x6F); } void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x16); } void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x12); } -void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); } -void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); } +void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x10); } +void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x10); } void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1C); } void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1D); } void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x1E); } @@ -1013,12 +1013,12 @@ void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); } void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X | T_N8, 0x2E); } void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_EVEX | T_SAE_X | T_N4, 0x2E); } -void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x29); } -void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x29); } -void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM, 0x7F); } -void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM, 0x7F); } -void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x11); } -void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x11); } +void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x29); } +void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x29); } +void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM | T_N16 | T_N_VL, 0x7F); } +void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM | T_N16 | T_N_VL, 0x7F); } +void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x11); } +void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x11); } void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); } void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); } void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); }