diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index d724edb..6625e4e 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -461,6 +461,43 @@ void putShuff() puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); } +void putMov() +{ + puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }"); + puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }"); + puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }"); + puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }"); + + puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }"); + puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }"); + puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }"); + puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }"); + + puts("void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x32, false); }"); + puts("void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x22, false); }"); + puts("void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x12, false); }"); + + puts("void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x34, false); }"); + puts("void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x24, false); }"); + puts("void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x14, false); }"); + + puts("void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x35, true); }"); + puts("void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x25, true); }"); + puts("void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x15, true); }"); + + puts("void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x31, false); }"); + puts("void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x21, false); }"); + puts("void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x11, false); }"); + + puts("void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x33, true); }"); + puts("void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x23, true); }"); + puts("void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x13, true); }"); + + puts("void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x30, true); }"); + puts("void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x20, true); }"); + puts("void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x10, true); }"); +} + int main() { puts("#ifndef XBYAK_DISABLE_AVX512"); @@ -478,5 +515,6 @@ int main() putCvt(); putGather(); putShuff(); + putMov(); puts("#endif"); } diff --git a/test/make_512.cpp b/test/make_512.cpp index 9a40304..55f2f57 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -1752,6 +1752,57 @@ public: put("vpternlogq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8); put("vpternlogq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8); } + void putMov() + { + put("vpmovm2b", _XMM | _YMM | _ZMM, K); + put("vpmovm2w", _XMM | _YMM | _ZMM, K); + put("vpmovm2d", _XMM | _YMM | _ZMM, K); + put("vpmovm2q", _XMM | _YMM | _ZMM, K); + + put("vpmovb2m", K, _XMM | _YMM | _ZMM); + put("vpmovw2m", K, _XMM | _YMM | _ZMM); + put("vpmovd2m", K, _XMM | _YMM | _ZMM); + put("vpmovq2m", K, _XMM | _YMM | _ZMM); + + put("vpmovqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + put("vpmovsqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + put("vpmovusqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + + put("vpmovqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + put("vpmovsqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + put("vpmovusqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + + put("vpmovqd", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovqd", YMM_KZ | _MEM, _ZMM); + + put("vpmovsqd", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovsqd", YMM_KZ | _MEM, _ZMM); + + put("vpmovusqd", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovusqd", YMM_KZ | _MEM, _ZMM); + + put("vpmovdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + put("vpmovsdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + put("vpmovusdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM); + + put("vpmovdw", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovdw", YMM_KZ | _MEM, _ZMM); + + put("vpmovsdw", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovsdw", YMM_KZ | _MEM, _ZMM); + + put("vpmovusdw", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovusdw", YMM_KZ | _MEM, _ZMM); + + put("vpmovwb", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovwb", YMM_KZ | _MEM, _ZMM); + + put("vpmovswb", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovswb", YMM_KZ | _MEM, _ZMM); + + put("vpmovuswb", XMM_KZ | _MEM, _XMM | _YMM); + put("vpmovuswb", YMM_KZ | _MEM, _ZMM); + } void putMin() { #ifdef XBYAK64 @@ -1807,6 +1858,8 @@ public: putShuff(); separateFunc(); putMisc2(); + separateFunc(); + putMov(); #endif } }; diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 156000f..3e9ae03 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1936,6 +1936,19 @@ private: addr.permitVsib(); opVex(x, 0, addr, type, code); } + /* + xx_xy_yz ; mode = true + xx_xy_xz ; mode = false + */ + void opVmov(const Operand& op, const Xmm& x, int type, uint8 code, int mode) + { + if (mode) { + if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) throw Error(ERR_BAD_COMBINATION); + } else { + if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); + } + opVex(x, 0, op, type, code); + } public: unsigned int getVersion() const { return VERSION; } using CodeArray::db; diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h index e22bd05..88a6959 100644 --- a/xbyak/xbyak_avx512.h +++ b/xbyak/xbyak_avx512.h @@ -228,4 +228,30 @@ void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { op void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); } void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); } void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); } +void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); } +void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); } +void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); } +void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); } +void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); } +void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); } +void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); } +void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); } +void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x32, false); } +void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x22, false); } +void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, 0x12, false); } +void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x34, false); } +void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x24, false); } +void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x14, false); } +void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x35, true); } +void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x25, true); } +void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x15, true); } +void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x31, false); } +void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x21, false); } +void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, 0x11, false); } +void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x33, true); } +void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x23, true); } +void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x13, true); } +void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x30, true); } +void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x20, true); } +void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, 0x10, true); } #endif