From dbe06b4142e620c1d99ebdfb6265cc47e5032906 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 18 Jul 2016 11:05:43 +0900 Subject: [PATCH] vpmov* are ok --- gen/gen_code.cpp | 24 +++++++++++----------- test/make_512.cpp | 45 +++++++++++++++++++++++++----------------- xbyak/xbyak_mnemonic.h | 24 +++++++++++----------- 3 files changed, 51 insertions(+), 42 deletions(-) diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 6170483..379300c 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1219,19 +1219,19 @@ void put() { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false }, { 0x41, "phminposuw", T_0F38 | T_66, false }, - { 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x21, "pmovsxbd", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x22, "pmovsxbq", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x23, "pmovsxwd", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x24, "pmovsxwq", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x25, "pmovsxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, false }, + { 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false }, + { 0x21, "pmovsxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false }, + { 0x22, "pmovsxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false }, + { 0x23, "pmovsxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false }, + { 0x24, "pmovsxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false }, + { 0x25, "pmovsxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false }, - { 0x30, "pmovzxbw", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x31, "pmovzxbd", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x32, "pmovzxbq", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x33, "pmovzxwd", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x34, "pmovzxwq", T_0F38 | T_66 | T_YMM | T_EVEX, false }, - { 0x35, "pmovzxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, false }, + { 0x30, "pmovzxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false }, + { 0x31, "pmovzxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false }, + { 0x32, "pmovzxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false }, + { 0x33, "pmovzxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false }, + { 0x34, "pmovzxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false }, + { 0x35, "pmovzxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false }, { 0x70, "pshufd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true }, { 0x70, "pshufhw", T_0F | T_F3 | T_YMM | T_EVEX, true }, diff --git a/test/make_512.cpp b/test/make_512.cpp index 80ed9a2..9ddfd7d 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -1280,24 +1280,33 @@ public: } void put512_Y_XM() { - const char *tbl[] = { - "vpmovsxbw", - "vpmovsxbd", - "vpmovsxbq", - "vpmovsxwd", - "vpmovsxwq", - "vpmovsxdq", - "vpmovzxbw", - "vpmovzxbd", - "vpmovzxbq", - "vpmovzxwd", - "vpmovzxwq", - "vpmovzxdq", + const struct Tbl { + const char *name; + bool all_xmm; // 2nd param + } tbl[] = { + { "vpmovsxbw", false }, + { "vpmovsxbd", true }, + { "vpmovsxbq", true }, + { "vpmovsxwd", false }, + { "vpmovsxwq", true }, + { "vpmovsxdq", false }, + + { "vpmovzxbw", false }, + { "vpmovzxbd", true }, + { "vpmovzxbq", true }, + { "vpmovzxwd", false }, + { "vpmovzxwq", true }, + { "vpmovzxdq", false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const char *name = tbl[i]; - put(name, XMM_KZ, _XMM); - put(name, _ZMM, _MEM); + const Tbl& p = tbl[i]; + const char *name = p.name; + put(name, XMM_KZ | YMM, _XMM | _MEM); + if (p.all_xmm) { + put(name, ZMM, _XMM | _MEM); + } else { + put(name, ZMM, YMM | _MEM); + } } } void put512_AVX1() @@ -1443,12 +1452,12 @@ public: separateFunc(); put512_X3_I(); separateFunc(); -#endif put512_FMA(); separateFunc(); -#if 0 +#endif put512_Y_XM(); separateFunc(); +#if 0 put512_AVX1(); separateFunc(); put512_cvt(); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 0c520c1..0e7f90a 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -991,18 +991,18 @@ void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_ void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1D); } void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x1E); } void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38, 0x41); } -void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x20); } -void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x21); } -void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x22); } -void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x23); } -void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x24); } -void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX, 0x25); } -void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x30); } -void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x31); } -void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x32); } -void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x33); } -void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x34); } -void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX, 0x35); } +void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N8 | T_N_VL, 0x20); } +void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N4 | T_N_VL, 0x21); } +void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N2 | T_N_VL, 0x22); } +void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N8 | T_N_VL, 0x23); } +void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N4 | T_N_VL, 0x24); } +void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_N8 | T_N_VL, 0x25); } +void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N8 | T_N_VL, 0x30); } +void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N4 | T_N_VL, 0x31); } +void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N2 | T_N_VL, 0x32); } +void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N8 | T_N_VL, 0x33); } +void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N4 | T_N_VL, 0x34); } +void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_N8 | T_N_VL, 0x35); } void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); } void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); } void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); }