diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 85720d5..1b0d489 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -196,6 +196,8 @@ void putX_X_XM_IMM() { 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, { 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + + { 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index d9cf45f..d07605d 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1105,14 +1105,14 @@ void put() { 0x3A, "pminuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true }, { 0x3B, "pminud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0xE4, "pmulhuw", T_0F | T_66 | T_YMM, false, true }, - { 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM, false, true }, - { 0xE5, "pmulhw", T_0F | T_66 | T_YMM, false, true }, - { 0xD5, "pmullw", T_0F | T_66 | T_YMM, false, true }, - { 0x40, "pmulld", T_0F38 | T_66 | T_YMM, false, true }, + { 0xE4, "pmulhuw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true }, + { 0xE5, "pmulhw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xD5, "pmullw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x40, "pmulld", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0xF4, "pmuludq", T_0F | T_66, false, true }, - { 0x28, "pmuldq", T_0F38 | T_66 | T_YMM, false, true }, + { 0xF4, "pmuludq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, + { 0x28, "pmuldq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, { 0xEB, "por", T_0F | T_66 | T_YMM, false, true }, { 0xF6, "psadbw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, diff --git a/test/make_nm.cpp b/test/make_nm.cpp index d878dfb..a27e53a 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -1537,7 +1537,7 @@ class Test { { "vpmullw", true }, { "vpmulld", true }, - { "vpmuludq", false }, + { "vpmuludq", true }, { "vpmuldq", true }, { "vpor", true }, @@ -2921,7 +2921,31 @@ public: { "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 }, { "vpsadbw", _XMM3, _XMM, _XMM }, - { "vpsadbw", _ZMM, _ZMM, _MEM } + { "vpsadbw", _ZMM, _ZMM, _MEM }, + + { "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 }, + { "vpmuldq", ZMM_KZ, _ZMM, M_1to8 }, + + { "vpmulhrsw", _XMM3, _XMM, _XMM }, + { "vpmulhrsw", ZMM_KZ, _ZMM, _MEM }, + + { "vpmulhuw", _XMM3, _XMM, _XMM }, + { "vpmulhuw", ZMM_KZ, _ZMM, _MEM }, + + { "vpmulhw", _XMM3, _XMM, _XMM }, + { "vpmulhw", ZMM_KZ, _ZMM, _MEM }, + + { "vpmullw", _XMM3, _XMM, _XMM }, + { "vpmullw", ZMM_KZ, _ZMM, _MEM }, + + { "vpmulld", _XMM3, _XMM, M_1to4 }, + { "vpmulld", ZMM_KZ, _ZMM, M_1to16 }, + + { "vpmullq", _XMM3, _XMM, M_1to2 }, + { "vpmullq", ZMM_KZ, _ZMM, M_1to8 }, + + { "vpmuludq", _XMM3, _XMM, M_1to2 }, + { "vpmuludq", ZMM_KZ, _ZMM, M_1to8 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h index 0c16bba..d65cbb0 100644 --- a/xbyak/xbyak_avx512.h +++ b/xbyak/xbyak_avx512.h @@ -108,5 +108,6 @@ void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); } void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEF); } void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); } +void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); } void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } #endif diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 9d0e9b5..50ef2ef 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -867,19 +867,19 @@ void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vpminuw(const Xmm& x, const Operand& op) { vpminuw(x, x, op); } void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3B); } void vpminud(const Xmm& x, const Operand& op) { vpminud(x, x, op); } -void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xE4); } +void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0xE4); } void vpmulhuw(const Xmm& x, const Operand& op) { vpmulhuw(x, x, op); } -void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x0B); } +void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM | T_EVEX, 0x0B); } void vpmulhrsw(const Xmm& x, const Operand& op) { vpmulhrsw(x, x, op); } -void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xE5); } +void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0xE5); } void vpmulhw(const Xmm& x, const Operand& op) { vpmulhw(x, x, op); } -void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xD5); } +void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM | T_EVEX, 0xD5); } void vpmullw(const Xmm& x, const Operand& op) { vpmullw(x, x, op); } -void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x40); } +void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x40); } void vpmulld(const Xmm& x, const Operand& op) { vpmulld(x, x, op); } -void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66, 0xF4); } +void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xF4); } void vpmuludq(const Xmm& x, const Operand& op) { vpmuludq(x, x, op); } -void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_YMM, 0x28); } +void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x28); } void vpmuldq(const Xmm& x, const Operand& op) { vpmuldq(x, x, op); } void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_YMM, 0xEB); } void vpor(const Xmm& x, const Operand& op) { vpor(x, x, op); }