From b7d044b7bc5a31f099b8b1ddc31e8c912eceec09 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 14 Jul 2016 12:02:41 +0900 Subject: [PATCH] add vperm{b,w,d} --- gen/gen_avx512.cpp | 3 +++ gen/gen_code.cpp | 13 ++++++++----- test/make_nm.cpp | 8 ++++++++ xbyak/xbyak_avx512.h | 2 ++ xbyak/xbyak_mnemonic.h | 2 +- 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 7f88c7c..3ab5e29 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -198,6 +198,9 @@ void putX_X_XM_IMM() { 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + + { 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, + { 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 2b6c57f..48fbeea 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1319,15 +1319,18 @@ void put() } // vpermd, vpermps { - const struct { - const char *suf; + const struct Tbl { uint8 code; + const char *name; + int type; } tbl[] = { - { "d", 0x36 }, - { "ps", 0x16 }, + { 0x36, "vpermd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { 0x16, "vpermps", T_0F38 | T_66 | T_W0 | T_YMM }, }; for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) { - printf("void vperm%s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", tbl[i].suf, tbl[i].code); + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); } } // vpermq, vpermpd diff --git a/test/make_nm.cpp b/test/make_nm.cpp index ae00f21..96fb9bd 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -2983,6 +2983,14 @@ public: { "vextractps", REG32 | _MEM, _XMM3, IMM8 }, + { "vpermb", XMM_KZ, _XMM, _XMM }, + { "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM }, + + { "vpermw", XMM_KZ, _XMM, _XMM }, + { "vpermw", ZMM_KZ, _ZMM, _ZMM | _MEM }, + + { "vpermd", YMM_KZ, _YMM, _YMM | M_1to8 }, + { "vpermd", ZMM_KZ, _ZMM, _ZMM | M_1to16 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h index 3afcee2..7c62e9c 100644 --- a/xbyak/xbyak_avx512.h +++ b/xbyak/xbyak_avx512.h @@ -109,6 +109,8 @@ void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEF); } void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); } void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); } +void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); } +void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_66 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); } void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_0F | T_66 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_0F3A | T_66 | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); } void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_0F3A | T_66 | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); } diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index e9e477b..809861a 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1037,7 +1037,7 @@ void vpmaskmovd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_X void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); } void vpmaskmovq(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W1 | T_YMM, 0x8C); } void vpmaskmovq(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W1 | T_YMM, 0x8E); } -void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_0F38 | T_66 | T_W0 | T_YMM, 0x36); } +void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_0F38 | T_66 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); } void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_0F38 | T_66 | T_W0 | T_YMM, 0x16); } void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_0F3A | T_66 | T_W1 | T_YMM, 0x00, imm); } void vpermpd(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_0F3A | T_66 | T_W1 | T_YMM, 0x01, imm); }