diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 69f4e7f..94ba8f6 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -101,20 +101,20 @@ void putVcmp() int type; bool hasIMM; } tbl[] = { - { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_N16 | T_N_VL, true }, - { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_N16 | T_N_VL, true }, + { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66, true }, + { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM, true }, { 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true }, { 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true }, - { 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_N16 | T_N_VL, false }, - { 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_N16 | T_N_VL, false }, - { 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32 | T_N16 | T_N_VL, false }, - { 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, + { 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, + { 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, + { 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false }, + { 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_N16 | T_N_VL, false }, - { 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_N16 | T_N_VL, false }, - { 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_N16 | T_N_VL, false }, - { 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, + { 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, + { 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, + { 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, + { 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -132,19 +132,19 @@ void putX_XM() const char *name; int type; } tbl[] = { - { 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, + { 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); } - puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM | T_N16 | T_N_VL, 0x1F); }"); + puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }"); } void putM_X() @@ -154,12 +154,12 @@ void putM_X() const char *name; int type; } tbl[] = { - { 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, - { 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_N16 | T_N_VL }, + { 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -176,31 +176,31 @@ void putX_X_XM_IMM() int type; bool hasIMM; } tbl[] = { - { 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM | T_N16 | T_N_VL, true }, - { 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM | T_N16 | T_N_VL, true }, - { 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32 | T_N16 | T_N_VL, false }, - { 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, - { 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32 | T_N16 | T_N_VL, false }, - { 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, - { 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, - { 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, - { 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, - { 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, + { 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true }, + { 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true }, + { 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, + { 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, + { 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, + { 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, + { 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false }, - { 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, - { 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N16 | T_N_VL, false }, - { 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N16 | T_N_VL, false }, - { 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N16 | T_N_VL, false }, - { 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_N16 | T_N_VL, false }, - { 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, + { 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + { 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + { 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + { 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, + { 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_N16 | T_N_VL, false }, - { 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, + { 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, + { 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_N16 | T_N_VL, false }, + { 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, - { 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N16 | T_N_VL, false }, - { 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N16 | T_N_VL, false }, + { 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, + { 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -218,7 +218,7 @@ void putShift() int idx; int type; } tbl[] = { - { "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 | T_N16 | T_N_VL }, + { "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 13ffd15..05fb20c 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -95,16 +95,16 @@ void put() const char *name; } tbl[] = { { 0xFC, B|W|D, "padd" }, - { 0xEC, B|W , "padds" }, - { 0xDC, B|W , "paddus" }, + { 0xEC, B|W , "padds" }, + { 0xDC, B|W , "paddus" }, { 0x74, B|W|D, "pcmpeq" }, { 0x64, B|W|D, "pcmpgt" }, { 0xF0, W|D|Q, "psll" }, - { 0xE0, W|D , "psra" }, + { 0xE0, W|D , "psra" }, { 0xD0, W|D|Q, "psrl" }, { 0xF8, B|W|D, "psub" }, - { 0xE8, B|W , "psubs" }, - { 0xD8, B|W , "psubus" }, + { 0xE8, B|W , "psubs" }, + { 0xD8, B|W , "psubus" }, }; for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -130,7 +130,7 @@ void put() const char *name; } tbl[] = { { 0x70, 6, W|D|Q, "psll" }, - { 0x70, 4, W|D , "psra" }, + { 0x70, 4, W|D , "psra" }, { 0x70, 2, W|D|Q, "psrl" }, }; for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) { @@ -165,7 +165,7 @@ void put() }; for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n" , p->name, p->code, p->pref); + printf("void %s(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref); } } { @@ -187,10 +187,10 @@ void put() }; for (int i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) { const MmxTbl6 *p = &mmxTbl6[i]; - printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, 0x%02X); }\n" , p->name, p->code, p->pref); + printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref); printf("void %s(const Address& addr, const Xmm& xmm) { ", p->name); if (p->pref != NO) printf("db(0x%02X); ", p->pref); - printf("opModM(addr, xmm, 0x0F, 0x%02X); }\n" , p->code2); + printf("opModM(addr, xmm, 0x0F, 0x%02X); }\n", p->code2); } } { @@ -216,22 +216,22 @@ void put() bool hasImm; } tbl[] = { { 0x58, PS|SS|PD|SD, "add" }, - { 0x55, PS|PD , "andn" }, - { 0x54, PS|PD , "and" }, + { 0x55, PS|PD , "andn" }, + { 0x54, PS|PD , "and" }, { 0xC2, PS|SS|PD|SD, "cmp", true }, { 0x5E, PS|SS|PD|SD, "div" }, { 0x5F, PS|SS|PD|SD, "max" }, { 0x5D, PS|SS|PD|SD, "min" }, { 0x59, PS|SS|PD|SD, "mul" }, - { 0x56, PS|PD , "or" }, - { 0x53, PS|SS , "rcp" }, - { 0x52, PS|SS , "rsqrt" }, - { 0xC6, PS|PD , "shuf", true }, + { 0x56, PS|PD , "or" }, + { 0x53, PS|SS , "rcp" }, + { 0x52, PS|SS , "rsqrt" }, + { 0xC6, PS|PD , "shuf", true }, { 0x51, PS|SS|PD|SD, "sqrt" }, { 0x5C, PS|SS|PD|SD, "sub" }, - { 0x15, PS|PD , "unpckh" }, - { 0x14, PS|PD , "unpckl" }, - { 0x57, PS|PD , "xor" }, + { 0x15, PS|PD , "unpckh" }, + { 0x14, PS|PD , "unpckl" }, + { 0x57, PS|PD , "xor" }, // }; for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) { @@ -255,8 +255,8 @@ void put() const char *name; } tbl[] = { { 0xF7, 0x66, "maskmovdqu" }, - { 0x12, 0 , "movhlps" }, - { 0x16, 0 , "movlhps" }, + { 0x12, 0 , "movhlps" }, + { 0x16, 0 , "movlhps" }, }; for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -275,13 +275,13 @@ void put() { 0x6D, 0x66, "punpckhqdq" }, { 0x6C, 0x66, "punpcklqdq" }, - { 0x2F, NO , "comiss" }, - { 0x2E, NO , "ucomiss" }, + { 0x2F, NO , "comiss" }, + { 0x2E, NO , "ucomiss" }, { 0x2F, 0x66, "comisd" }, { 0x2E, 0x66, "ucomisd" }, { 0x5A, 0x66, "cvtpd2ps" }, - { 0x5A, NO , "cvtps2pd" }, + { 0x5A, NO , "cvtps2pd" }, { 0x5A, 0xF2, "cvtsd2ss" }, { 0x5A, 0xF3, "cvtss2sd" }, { 0xE6, 0xF2, "cvtpd2dq" }, @@ -289,7 +289,7 @@ void put() { 0xE6, 0xF3, "cvtdq2pd" }, { 0x5B, 0x66, "cvtps2dq" }, { 0x5B, 0xF3, "cvttps2dq" }, - { 0x5B, NO , "cvtdq2ps" }, + { 0x5B, NO , "cvtdq2ps" }, // SSE3 { 0xD0, 0x66, "addsubpd" }, @@ -317,11 +317,11 @@ void put() const char *name; const char *cond; } tbl[] = { - { 0x2A, NO , "cvtpi2ps", "isXMM_MMXorMEM" }, - { 0x2D, NO , "cvtps2pi", "isMMX_XMMorMEM" }, + { 0x2A, NO , "cvtpi2ps", "isXMM_MMXorMEM" }, + { 0x2D, NO , "cvtps2pi", "isMMX_XMMorMEM" }, { 0x2A, 0xF3, "cvtsi2ss", "isXMM_REG32orMEM" }, { 0x2D, 0xF3, "cvtss2si", "isREG32_XMMorMEM" }, - { 0x2C, NO , "cvttps2pi", "isMMX_XMMorMEM" }, + { 0x2C, NO , "cvttps2pi", "isMMX_XMMorMEM" }, { 0x2C, 0xF3, "cvttss2si", "isREG32_XMMorMEM" }, { 0x2A, 0x66, "cvtpi2pd", "isXMM_MMXorMEM" }, { 0x2D, 0x66, "cvtpd2pi", "isMMX_XMMorMEM" }, @@ -1003,8 +1003,8 @@ void put() }; for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x%02X); }\n", p->name, p->code); - printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x%02X); }\n", p->name, p->code); + printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code); + printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code); if (p->only_pd_ps) continue; printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x%02X); }\n", p->name, p->code); printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x%02X); }\n", p->name, p->code); @@ -1032,11 +1032,11 @@ void put() { 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false }, { 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false }, - { 0x47, "psllvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, false }, - { 0x47, "psllvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL, false, false }, - { 0x46, "psravd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, false }, - { 0x45, "psrlvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, false }, - { 0x45, "psrlvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL, false, false }, + { 0x47, "psllvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false }, + { 0x47, "psllvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false }, + { 0x46, "psravd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false }, + { 0x45, "psrlvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false }, + { 0x45, "psrlvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false }, { 0xC2, "cmppd", T_0F | T_66 | T_YMM, true, true }, { 0xC2, "cmpps", T_0F | T_YMM, true, true }, @@ -1045,26 +1045,26 @@ void put() { 0x5A, "cvtsd2ss", T_0F | T_F2, false, true }, { 0x5A, "cvtss2sd", T_0F | T_F3, false, true }, { 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0, true, true }, - { 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, - { 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x2B, "packusdw", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, + { 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, + { 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x2B, "packusdw", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0xFC, "paddb", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xFD, "paddw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xFE, "paddd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, - { 0xD4, "paddq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL, false, true }, + { 0xFC, "paddb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xFD, "paddw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xFE, "paddd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, + { 0xD4, "paddq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, - { 0xEC, "paddsb", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xED, "paddsw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, + { 0xEC, "paddsb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xED, "paddsw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, - { 0xDC, "paddusb", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xDD, "paddusw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, + { 0xDC, "paddusb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xDD, "paddusw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, { 0x0F, "palignr", T_0F3A | T_66 | T_YMM | T_EVEX, true, true }, - { 0xDB, "pand", T_0F | T_66 | T_YMM | T_N16 | T_N_VL, false, true }, - { 0xDF, "pandn", T_0F | T_66 | T_YMM | T_N16 | T_N_VL, false, true }, + { 0xDB, "pand", T_0F | T_66 | T_YMM, false, true }, + { 0xDF, "pandn", T_0F | T_66 | T_YMM, false, true }, { 0xE0, "pavgb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, { 0xE3, "pavgw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, @@ -1086,38 +1086,38 @@ void put() { 0x05, "phsubw", T_0F38 | T_66 | T_YMM, false, true }, { 0x06, "phsubd", T_0F38 | T_66 | T_YMM, false, true }, { 0x07, "phsubsw", T_0F38 | T_66 | T_YMM, false, true }, - { 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, + { 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true }, - { 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, + { 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true }, + { 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0xDE, "pmaxub", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, + { 0xDE, "pmaxub", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true }, + { 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0x38, "pminsb", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xEA, "pminsw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x39, "pminsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, + { 0x38, "pminsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true }, + { 0xEA, "pminsw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x39, "pminsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0xDA, "pminub", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x3A, "pminuw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x3B, "pminud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, + { 0xDA, "pminub", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x3A, "pminuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true }, + { 0x3B, "pminud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0xE4, "pmulhuw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xE5, "pmulhw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xD5, "pmullw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0x40, "pmulld", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, + { 0xE4, "pmulhuw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true }, + { 0xE5, "pmulhw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xD5, "pmullw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0x40, "pmulld", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0xF4, "pmuludq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL, false, true }, - { 0x28, "pmuldq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL, false, true }, + { 0xF4, "pmuludq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, + { 0x28, "pmuldq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, { 0xEB, "por", T_0F | T_66 | T_YMM, false, true }, - { 0xF6, "psadbw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, + { 0xF6, "psadbw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, - { 0x00, "pshufb", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, false }, + { 0x00, "pshufb", T_0F38 | T_66 | T_YMM | T_EVEX, false, false }, { 0x08, "psignb", T_0F38 | T_66 | T_YMM, false, true }, { 0x09, "psignw", T_0F38 | T_66 | T_YMM, false, true }, @@ -1133,16 +1133,16 @@ void put() { 0xD2, "psrld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true }, { 0xD3, "psrlq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true }, - { 0xF8, "psubb", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xF9, "psubw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xFA, "psubd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, - { 0xFB, "psubq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL, false, true }, + { 0xF8, "psubb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xF9, "psubw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xFA, "psubd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, + { 0xFB, "psubq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, - { 0xE8, "psubsb", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xE9, "psubsw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, + { 0xE8, "psubsb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xE9, "psubsw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, - { 0xD8, "psubusb", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, - { 0xD9, "psubusw", T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false, true }, + { 0xD8, "psubusb", T_0F | T_66 | T_YMM | T_EVEX, false, true }, + { 0xD9, "psubusw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, { 0x68, "punpckhbw", T_0F | T_66 | T_YMM | T_EVEX, false, true }, { 0x69, "punpckhwd", T_0F | T_66 | T_YMM | T_EVEX, false, true }, @@ -1165,11 +1165,11 @@ void put() { 0x51, "sqrtsd", T_0F | T_F2 | T_EVEX | T_EW1 | T_ER_X | T_N8, false, true }, { 0x51, "sqrtss", T_0F | T_F3 | T_EVEX | T_EW0 | T_ER_X | T_N4, false, true }, - { 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL, false, true }, - { 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, + { 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, + { 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, - { 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL, false, true }, - { 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false, true }, + { 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true }, + { 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -1204,19 +1204,19 @@ void put() { 0x5B, "cvtdq2ps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false }, { 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM, false }, { 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM, false }, - { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL, false }, - { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, false }, + { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false }, + { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false }, { 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_DUP, false }, - { 0x6F, "movdqa", T_0F | T_66 | T_YMM | T_N16 | T_N_VL, false }, - { 0x6F, "movdqu", T_0F | T_F3 | T_YMM | T_N16 | T_N_VL, false }, + { 0x6F, "movdqa", T_0F | T_66 | T_YMM, false }, + { 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false }, { 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false }, { 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false }, - { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL, false }, - { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, false }, + { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false }, + { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false }, - { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false }, - { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL, false }, - { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, false }, + { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false }, + { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false }, + { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false }, { 0x41, "phminposuw", T_0F38 | T_66, false }, { 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM | T_EVEX, false }, @@ -1233,16 +1233,16 @@ void put() { 0x34, "pmovzxwq", T_0F38 | T_66 | T_YMM | T_EVEX, false }, { 0x35, "pmovzxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, false }, - { 0x70, "pshufd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL, true }, - { 0x70, "pshufhw", T_0F | T_F3 | T_YMM | T_EVEX | T_N16 | T_N_VL, true }, - { 0x70, "pshuflw", T_0F | T_F2 | T_YMM | T_EVEX | T_N16 | T_N_VL, true }, + { 0x70, "pshufd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true }, + { 0x70, "pshufhw", T_0F | T_F3 | T_YMM | T_EVEX, true }, + { 0x70, "pshuflw", T_0F | T_F2 | T_YMM | T_EVEX, true }, { 0x17, "ptest", T_0F38 | T_66, false }, { 0x53, "rcpps", T_0F | T_YMM, false }, { 0x52, "rsqrtps", T_0F | T_YMM, false }, - { 0x51, "sqrtpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_ER_Z | T_B64 | T_N16 | T_N_VL, false }, - { 0x51, "sqrtps", T_0F | T_YMM | T_EVEX | T_EW0 | T_ER_Z | T_B32 | T_N16 | T_N_VL, false }, + { 0x51, "sqrtpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_ER_Z | T_B64, false }, + { 0x51, "sqrtps", T_0F | T_YMM | T_EVEX | T_EW0 | T_ER_Z | T_B32, false }, { 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false }, { 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false }, @@ -1261,12 +1261,12 @@ void put() const char *name; int type; } tbl[] = { - { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL }, - { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL }, - { 0x7F, "movdqa", T_0F | T_66 | T_YMM | T_N16 | T_N_VL }, - { 0x7F, "movdqu", T_0F | T_F3 | T_YMM | T_N16 | T_N_VL }, - { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL }, - { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL }, + { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 }, + { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 }, + { 0x7F, "movdqa", T_0F | T_66 | T_YMM }, + { 0x7F, "movdqu", T_0F | T_F3 | T_YMM }, + { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 }, + { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; @@ -1507,16 +1507,16 @@ void put() int idx; int type; } tbl[] = { - { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL }, - { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL }, - { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL }, - { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL }, - { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL }, - { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL }, - { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL }, - { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_N16 | T_N_VL }, - { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N16 | T_N_VL }, - { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_N16 | T_N_VL }, + { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX }, + { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX }, + { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX }, + { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 }, + { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX }, + { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX }, + { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; @@ -1556,10 +1556,10 @@ void put() printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n"); printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n"); - printf("void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, 0xE7); }\n"); - printf("void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL, 0x2B); }\n"); - printf("void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, 0x2B); }\n"); - printf("void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, 0x2A); }\n"); + printf("void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }\n"); + printf("void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }\n"); + printf("void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }\n"); + printf("void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }\n"); // vmovsd, vmovss for (int i = 0; i < 2; i++) { diff --git a/test/make_512.cpp b/test/make_512.cpp index 1ba7cb9..4f46fb0 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -244,7 +244,7 @@ class Test { } #endif case _MEM: - return isXbyak_ ? "ptr[eax+ecx+64]" : "[eax+ecx+64]"; + return isXbyak_ ? "ptr[eax+ecx+64]" : "[eax+ecx+64]"; // QQQ // return isXbyak_ ? "ptr[eax+ecx+6]" : "[eax+ecx+6]"; case _MEMe: { diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h index 1fe1e30..e91e1be 100644 --- a/xbyak/xbyak_avx512.h +++ b/xbyak/xbyak_avx512.h @@ -64,54 +64,54 @@ void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); } void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); } void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); } -void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0xC2, imm); } -void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0xC2, imm); } +void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } +void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX | T_N8, 0xC2, imm); } void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX | T_N4, 0xC2, imm); } -void vpcmpeqb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x74); } -void vpcmpeqw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x75); } -void vpcmpeqd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_B32 | T_N16 | T_N_VL, 0x76); } -void vpcmpeqq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x29); } -void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x64); } -void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x65); } -void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32 | T_N16 | T_N_VL, 0x66); } -void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x37); } -void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x6F); } -void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x6F); } -void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x6F); } -void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x6F); } -void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x6F); } -void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x6F); } -void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM | T_N16 | T_N_VL, 0x1F); } -void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x7F); } -void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x7F); } -void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x7F); } -void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x7F); } -void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x7F); } -void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_N16 | T_N_VL, 0x7F); } -void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x03, imm); } -void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x03, imm); } -void vpandd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32 | T_N16 | T_N_VL, 0xDB); } -void vpandq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0xDB); } -void vpandnd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32 | T_N16 | T_N_VL, 0xDF); } -void vpandnq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0xDF); } -void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x3D); } -void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x3F); } -void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x39); } -void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x3B); } +void vpcmpeqb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x74); } +void vpcmpeqw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x75); } +void vpcmpeqd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_B32, 0x76); } +void vpcmpeqq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x29); } +void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x64); } +void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); } +void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); } +void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); } +void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); } +void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); } +void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); } +void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); } +void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); } +void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); } +void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); } +void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); } +void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); } +void vpandd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xDB); } +void vpandq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xDB); } +void vpandnd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xDF); } +void vpandnq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xDF); } +void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3D); } +void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3F); } +void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x39); } +void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3B); } void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_N16, 0xE2); } -void vpsravq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x46); } -void vpsravw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x11); } -void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x12); } -void vpsrlvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x10); } -void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32 | T_N16 | T_N_VL, 0xEB); } -void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0xEB); } -void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32 | T_N16 | T_N_VL, 0xEF); } -void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0xEF); } -void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x40); } -void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x8D); } -void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N16 | T_N_VL, 0x8D); } -void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64 | T_N16 | T_N_VL, 0x72, imm); } +void vpsravq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x46); } +void vpsravw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x11); } +void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); } +void vpsrlvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x10); } +void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); } +void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); } +void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEF); } +void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); } +void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); } +void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); } +void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); } +void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); } void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); } void vextractf32x8(const Operand& op, const Zmm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N32, 0x1B, imm); } diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 0269f63..da61db4 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -704,38 +704,38 @@ void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); } void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); } void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); } void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); } -void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x58); } -void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x58); } +void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x58); } +void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x58); } void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x58); } void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x58); } -void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x5C); } -void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x5C); } +void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5C); } +void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5C); } void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5C); } void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5C); } -void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x59); } -void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x59); } +void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); } +void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); } void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x59); } void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x59); } -void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x5E); } -void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x5E); } +void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5E); } +void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); } void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5E); } void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5E); } -void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x5F); } -void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x5F); } +void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5F); } +void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5F); } void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5F); } void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5F); } -void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x5D); } -void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x5D); } +void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5D); } +void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); } void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5D); } void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5D); } -void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x54); } -void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x54); } -void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x55); } -void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x55); } -void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x56); } -void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x56); } -void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x57); } -void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x57); } +void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); } +void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); } +void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); } +void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); } +void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x56); } +void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x56); } +void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x57); } +void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x57); } void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); } void vblendpd(const Xmm& x, const Operand& op, uint8 imm) { vblendpd(x, x, op, imm); } void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); } @@ -758,11 +758,11 @@ void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { op void vpclmulqdq(const Xmm& x, const Operand& op, uint8 imm) { vpclmulqdq(x, x, op, imm); } void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x0C); } void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x0D); } -void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x47); } -void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0x47); } -void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x46); } -void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x45); } -void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0x45); } +void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); } +void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); } +void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); } +void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); } +void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); } void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); } void vcmppd(const Xmm& x, const Operand& op, uint8 imm) { vcmppd(x, x, op, imm); } void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); } @@ -777,35 +777,35 @@ void vcvtss2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x void vcvtss2sd(const Xmm& x, const Operand& op) { vcvtss2sd(x, x, op); } void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); } void vinsertps(const Xmm& x, const Operand& op, uint8 imm) { vinsertps(x, x, op, imm); } -void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x63); } +void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x63); } void vpacksswb(const Xmm& x, const Operand& op) { vpacksswb(x, x, op); } -void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x6B); } +void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x6B); } void vpackssdw(const Xmm& x, const Operand& op) { vpackssdw(x, x, op); } -void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x67); } +void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x67); } void vpackuswb(const Xmm& x, const Operand& op) { vpackuswb(x, x, op); } -void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x2B); } +void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x2B); } void vpackusdw(const Xmm& x, const Operand& op) { vpackusdw(x, x, op); } -void vpaddb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xFC); } +void vpaddb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFC); } void vpaddb(const Xmm& x, const Operand& op) { vpaddb(x, x, op); } -void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xFD); } +void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFD); } void vpaddw(const Xmm& x, const Operand& op) { vpaddw(x, x, op); } -void vpaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0xFE); } +void vpaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFE); } void vpaddd(const Xmm& x, const Operand& op) { vpaddd(x, x, op); } -void vpaddq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0xD4); } +void vpaddq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xD4); } void vpaddq(const Xmm& x, const Operand& op) { vpaddq(x, x, op); } -void vpaddsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xEC); } +void vpaddsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEC); } void vpaddsb(const Xmm& x, const Operand& op) { vpaddsb(x, x, op); } -void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xED); } +void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xED); } void vpaddsw(const Xmm& x, const Operand& op) { vpaddsw(x, x, op); } -void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xDC); } +void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDC); } void vpaddusb(const Xmm& x, const Operand& op) { vpaddusb(x, x, op); } -void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xDD); } +void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDD); } void vpaddusw(const Xmm& x, const Operand& op) { vpaddusw(x, x, op); } void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); } void vpalignr(const Xmm& x, const Operand& op, uint8 imm) { vpalignr(x, x, op, imm); } -void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_N16 | T_N_VL, 0xDB); } +void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDB); } void vpand(const Xmm& x, const Operand& op) { vpand(x, x, op); } -void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_N16 | T_N_VL, 0xDF); } +void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDF); } void vpandn(const Xmm& x, const Operand& op) { vpandn(x, x, op); } void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE0); } void vpavgb(const Xmm& x, const Operand& op) { vpavgb(x, x, op); } @@ -839,53 +839,53 @@ void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vphsubd(const Xmm& x, const Operand& op) { vphsubd(x, x, op); } void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x07); } void vphsubsw(const Xmm& x, const Operand& op) { vphsubsw(x, x, op); } -void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xF5); } +void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); } void vpmaddwd(const Xmm& x, const Operand& op) { vpmaddwd(x, x, op); } -void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x04); } +void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); } void vpmaddubsw(const Xmm& x, const Operand& op) { vpmaddubsw(x, x, op); } -void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x3C); } +void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3C); } void vpmaxsb(const Xmm& x, const Operand& op) { vpmaxsb(x, x, op); } -void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xEE); } +void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEE); } void vpmaxsw(const Xmm& x, const Operand& op) { vpmaxsw(x, x, op); } -void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x3D); } +void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3D); } void vpmaxsd(const Xmm& x, const Operand& op) { vpmaxsd(x, x, op); } -void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xDE); } +void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDE); } void vpmaxub(const Xmm& x, const Operand& op) { vpmaxub(x, x, op); } -void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x3E); } +void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3E); } void vpmaxuw(const Xmm& x, const Operand& op) { vpmaxuw(x, x, op); } -void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x3F); } +void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3F); } void vpmaxud(const Xmm& x, const Operand& op) { vpmaxud(x, x, op); } -void vpminsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x38); } +void vpminsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x38); } void vpminsb(const Xmm& x, const Operand& op) { vpminsb(x, x, op); } -void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xEA); } +void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEA); } void vpminsw(const Xmm& x, const Operand& op) { vpminsw(x, x, op); } -void vpminsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x39); } +void vpminsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x39); } void vpminsd(const Xmm& x, const Operand& op) { vpminsd(x, x, op); } -void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xDA); } +void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDA); } void vpminub(const Xmm& x, const Operand& op) { vpminub(x, x, op); } -void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x3A); } +void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3A); } void vpminuw(const Xmm& x, const Operand& op) { vpminuw(x, x, op); } -void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x3B); } +void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3B); } void vpminud(const Xmm& x, const Operand& op) { vpminud(x, x, op); } -void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xE4); } +void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE4); } void vpmulhuw(const Xmm& x, const Operand& op) { vpmulhuw(x, x, op); } -void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x0B); } +void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x0B); } void vpmulhrsw(const Xmm& x, const Operand& op) { vpmulhrsw(x, x, op); } -void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xE5); } +void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE5); } void vpmulhw(const Xmm& x, const Operand& op) { vpmulhw(x, x, op); } -void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xD5); } +void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD5); } void vpmullw(const Xmm& x, const Operand& op) { vpmullw(x, x, op); } -void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x40); } +void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x40); } void vpmulld(const Xmm& x, const Operand& op) { vpmulld(x, x, op); } -void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0xF4); } +void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xF4); } void vpmuludq(const Xmm& x, const Operand& op) { vpmuludq(x, x, op); } -void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0x28); } +void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x28); } void vpmuldq(const Xmm& x, const Operand& op) { vpmuldq(x, x, op); } void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEB); } void vpor(const Xmm& x, const Operand& op) { vpor(x, x, op); } -void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xF6); } +void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF6); } void vpsadbw(const Xmm& x, const Operand& op) { vpsadbw(x, x, op); } -void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x00); } +void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x00); } void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); } void vpsignb(const Xmm& x, const Operand& op) { vpsignb(x, x, op); } void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); } @@ -908,21 +908,21 @@ void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vpsrld(const Xmm& x, const Operand& op) { vpsrld(x, x, op); } void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16, 0xD3); } void vpsrlq(const Xmm& x, const Operand& op) { vpsrlq(x, x, op); } -void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xF8); } +void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); } void vpsubb(const Xmm& x, const Operand& op) { vpsubb(x, x, op); } -void vpsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xF9); } +void vpsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF9); } void vpsubw(const Xmm& x, const Operand& op) { vpsubw(x, x, op); } -void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0xFA); } +void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); } void vpsubd(const Xmm& x, const Operand& op) { vpsubd(x, x, op); } -void vpsubq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0xFB); } +void vpsubq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xFB); } void vpsubq(const Xmm& x, const Operand& op) { vpsubq(x, x, op); } -void vpsubsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xE8); } +void vpsubsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE8); } void vpsubsb(const Xmm& x, const Operand& op) { vpsubsb(x, x, op); } -void vpsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xE9); } +void vpsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE9); } void vpsubsw(const Xmm& x, const Operand& op) { vpsubsw(x, x, op); } -void vpsubusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xD8); } +void vpsubusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD8); } void vpsubusb(const Xmm& x, const Operand& op) { vpsubusb(x, x, op); } -void vpsubusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0xD9); } +void vpsubusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD9); } void vpsubusw(const Xmm& x, const Operand& op) { vpsubusw(x, x, op); } void vpunpckhbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x68); } void vpunpckhbw(const Xmm& x, const Operand& op) { vpunpckhbw(x, x, op); } @@ -954,13 +954,13 @@ void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vsqrtsd(const Xmm& x, const Operand& op) { vsqrtsd(x, x, op); } void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x51); } void vsqrtss(const Xmm& x, const Operand& op) { vsqrtss(x, x, op); } -void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0x15); } +void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x15); } void vunpckhpd(const Xmm& x, const Operand& op) { vunpckhpd(x, x, op); } -void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x15); } +void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x15); } void vunpckhps(const Xmm& x, const Operand& op) { vunpckhps(x, x, op); } -void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0x14); } +void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x14); } void vunpcklpd(const Xmm& x, const Operand& op) { vunpcklpd(x, x, op); } -void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x14); } +void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x14); } void vunpcklps(const Xmm& x, const Operand& op) { vunpcklps(x, x, op); } void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); } void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); } @@ -978,18 +978,18 @@ void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x5B); } void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x5B); } void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x5B); } -void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x28); } -void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x28); } +void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); } +void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); } void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z | T_DUP, 0x12); } -void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM | T_N16 | T_N_VL, 0x6F); } -void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_N16 | T_N_VL, 0x6F); } +void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x6F); } +void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x6F); } void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x16); } void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x12); } -void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x10); } -void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x10); } -void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x1C); } -void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x1D); } -void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x1E); } +void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); } +void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); } +void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1C); } +void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1D); } +void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x1E); } void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38, 0x41); } void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x20); } void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x21); } @@ -1003,22 +1003,22 @@ void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x33); } void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x34); } void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX, 0x35); } -void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x70, imm); } -void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x70, imm); } -void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x70, imm); } +void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); } +void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); } +void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); } void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38, 0x17); } void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x53); } void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x52); } -void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64 | T_N16 | T_N_VL, 0x51); } -void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32 | T_N16 | T_N_VL, 0x51); } +void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x51); } +void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); } void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X | T_N8, 0x2E); } void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_EVEX | T_SAE_X | T_N4, 0x2E); } -void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x29); } -void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x29); } -void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM | T_N16 | T_N_VL, 0x7F); } -void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM | T_N16 | T_N_VL, 0x7F); } -void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x11); } -void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x11); } +void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x29); } +void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x29); } +void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM, 0x7F); } +void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM, 0x7F); } +void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x11); } +void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX, 0x11); } void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); } void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); } void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); } @@ -1429,25 +1429,25 @@ void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { vpinsrw(x, x, op, imm void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, false, x2, op, !op.isMEM(), Operand::XMM, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0, 0x22, imm); } void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { vpinsrd(x, x, op, imm); } void vpmovmskb(const Reg32e& r, const Xmm& x) { bool isYMM= x.isYMM(); opAVX_X_X_XM(isYMM ? Ymm(r.getIdx()) : Xmm(r.getIdx()), isYMM ? ym0 : xm0, x, T_0F | T_66 | T_YMM, 0xD7); } -void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x73, imm); } +void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); } void vpslldq(const Xmm& x, uint8 imm) { vpslldq(x, x, imm); } -void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x73, imm); } +void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); } void vpsrldq(const Xmm& x, uint8 imm) { vpsrldq(x, x, imm); } -void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x71, imm); } +void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } void vpsllw(const Xmm& x, uint8 imm) { vpsllw(x, x, imm); } -void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x72, imm); } +void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } void vpslld(const Xmm& x, uint8 imm) { vpslld(x, x, imm); } -void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0x73, imm); } +void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); } void vpsllq(const Xmm& x, uint8 imm) { vpsllq(x, x, imm); } -void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x71, imm); } +void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } void vpsraw(const Xmm& x, uint8 imm) { vpsraw(x, x, imm); } -void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x72, imm); } +void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } void vpsrad(const Xmm& x, uint8 imm) { vpsrad(x, x, imm); } -void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_N16 | T_N_VL, 0x71, imm); } +void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } void vpsrlw(const Xmm& x, uint8 imm) { vpsrlw(x, x, imm); } -void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_N16 | T_N_VL, 0x72, imm); } +void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } void vpsrld(const Xmm& x, uint8 imm) { vpsrld(x, x, imm); } -void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_N16 | T_N_VL, 0x73, imm); } +void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(x.copyAndSetIdx(2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); } void vpsrlq(const Xmm& x, uint8 imm) { vpsrlq(x, x, imm); } void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); } void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvpd(x1, x1, op, x4); } @@ -1464,10 +1464,10 @@ void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); } void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); } void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); } -void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, 0xE7); } -void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16 | T_N_VL, 0x2B); } -void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, 0x2B); } -void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16 | T_N_VL, 0x2A); } +void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); } +void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); } +void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); } +void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); } void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F | T_EW1 | T_EVEX | T_N8, 0x10); } void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_F2 | T_0F | T_EW1 | T_EVEX | T_N8, 0x10); } void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_F2 | T_0F | T_EW1 | T_EVEX | T_N8 | T_M_K, 0x11); }