diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index d07605d..963454f 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1144,15 +1144,15 @@ void put() { 0xD8, "psubusb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, { 0xD9, "psubusw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, - { 0x68, "punpckhbw", T_0F | T_66 | T_YMM, false, true }, - { 0x69, "punpckhwd", T_0F | T_66 | T_YMM, false, true }, - { 0x6A, "punpckhdq", T_0F | T_66 | T_YMM, false, true }, - { 0x6D, "punpckhqdq", T_0F | T_66 | T_YMM, false, true }, + { 0x68, "punpckhbw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x69, "punpckhwd", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x6A, "punpckhdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, + { 0x6D, "punpckhqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, - { 0x60, "punpcklbw", T_0F | T_66 | T_YMM, false, true }, - { 0x61, "punpcklwd", T_0F | T_66 | T_YMM, false, true }, - { 0x62, "punpckldq", T_0F | T_66 | T_YMM, false, true }, - { 0x6C, "punpcklqdq", T_0F | T_66 | T_YMM, false, true }, + { 0x60, "punpcklbw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x61, "punpcklwd", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x62, "punpckldq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, + { 0x6C, "punpcklqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, { 0xEF, "pxor", T_0F | T_66 | T_YMM, false, true }, diff --git a/test/make_nm.cpp b/test/make_nm.cpp index a27e53a..dc2cd0f 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -2946,6 +2946,30 @@ public: { "vpmuludq", _XMM3, _XMM, M_1to2 }, { "vpmuludq", ZMM_KZ, _ZMM, M_1to8 }, + + { "vpunpckhbw", _XMM3, _XMM, _XMM }, + { "vpunpckhbw", _ZMM, _ZMM, _MEM }, + + { "vpunpckhwd", _XMM3, _XMM, _XMM }, + { "vpunpckhwd", _ZMM, _ZMM, _MEM }, + + { "vpunpckhdq", _XMM3, _XMM, M_1to4 }, + { "vpunpckhdq", _ZMM, _ZMM, M_1to16 }, + + { "vpunpckhqdq", _XMM3, _XMM, M_1to2 }, + { "vpunpckhqdq", _ZMM, _ZMM, M_1to8 }, + + { "vpunpcklbw", _XMM3, _XMM, _XMM }, + { "vpunpcklbw", _ZMM, _ZMM, _MEM }, + + { "vpunpcklwd", _XMM3, _XMM, _XMM }, + { "vpunpcklwd", _ZMM, _ZMM, _MEM }, + + { "vpunpckldq", _XMM3, _XMM, M_1to4 }, + { "vpunpckldq", _ZMM, _ZMM, M_1to16 }, + + { "vpunpcklqdq", _XMM3, _XMM, M_1to2 }, + { "vpunpcklqdq", _ZMM, _ZMM, M_1to8 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 50ef2ef..8bab831 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -924,21 +924,21 @@ void vpsubusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpsubusb(const Xmm& x, const Operand& op) { vpsubusb(x, x, op); } void vpsubusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0xD9); } void vpsubusw(const Xmm& x, const Operand& op) { vpsubusw(x, x, op); } -void vpunpckhbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x68); } +void vpunpckhbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0x68); } void vpunpckhbw(const Xmm& x, const Operand& op) { vpunpckhbw(x, x, op); } -void vpunpckhwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x69); } +void vpunpckhwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0x69); } void vpunpckhwd(const Xmm& x, const Operand& op) { vpunpckhwd(x, x, op); } -void vpunpckhdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x6A); } +void vpunpckhdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x6A); } void vpunpckhdq(const Xmm& x, const Operand& op) { vpunpckhdq(x, x, op); } -void vpunpckhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x6D); } +void vpunpckhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x6D); } void vpunpckhqdq(const Xmm& x, const Operand& op) { vpunpckhqdq(x, x, op); } -void vpunpcklbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x60); } +void vpunpcklbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0x60); } void vpunpcklbw(const Xmm& x, const Operand& op) { vpunpcklbw(x, x, op); } -void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x61); } +void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0x61); } void vpunpcklwd(const Xmm& x, const Operand& op) { vpunpcklwd(x, x, op); } -void vpunpckldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x62); } +void vpunpckldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x62); } void vpunpckldq(const Xmm& x, const Operand& op) { vpunpckldq(x, x, op); } -void vpunpcklqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0x6C); } +void vpunpcklqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x6C); } void vpunpcklqdq(const Xmm& x, const Operand& op) { vpunpcklqdq(x, x, op); } void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xEF); } void vpxor(const Xmm& x, const Operand& op) { vpxor(x, x, op); }