diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp index 0374c96..4436f28 100644 --- a/gen/avx_type.hpp +++ b/gen/avx_type.hpp @@ -15,15 +15,9 @@ std::string type2String(uint64_t type) assert(low < int(sizeof(tbl) / sizeof(tbl[0]))); str = tbl[low - 1]; } - if (type & T_N_VL) { - str += "|T_N_VL"; - } - if (type & T_VEX) { - str += "|T_VEX"; - } - if ((type & T_NX_MASK) == T_DUP) { - str += "|T_DUP"; - } + if (type & T_N_VL) str += "|T_N_VL"; + if (type & T_VEX) str += "|T_VEX"; + if ((type & T_NX_MASK) == T_DUP) str += "|T_DUP"; switch (type & T_F2) { // T_F2 = T_66|T_F3 case T_66: str += "|T_66"; break; case T_F3: str += "|T_F3"; break; @@ -44,75 +38,36 @@ std::string type2String(uint64_t type) str += "|T_0F38"; } } - if (type & T_0F3A) { - str += "|T_0F3A"; - } - if (type & T_L0) { - str += "|T_L0"; - } - if (type & T_L1) { - str += "|T_L1"; - } - if (type & T_W0) { - str += "|T_W0"; - } - if (type & T_W1) { - str += "|T_W1"; - } - if (type & T_EW0) { - str += "|T_EW0"; - } - if (type & T_EW1) { - str += "|T_EW1"; - } - if (type & T_YMM) { - str += "|T_YMM"; - } - if (type & T_EVEX) { - str += "|T_EVEX"; - } - if (type & T_ER_X) { - str += "|T_ER_X"; - } - if (type & T_ER_Y) { - str += "|T_ER_Y"; - } - if (type & T_ER_Z) { - str += "|T_ER_Z"; - } - if (type & T_ER_R) { - str += "|T_ER_R"; - } - if (type & T_SAE_X) { - str += "|T_SAE_X"; - } - if (type & T_SAE_Y) { - str += "|T_SAE_Y"; - } - if (type & T_SAE_Z) { - str += "|T_SAE_Z"; - } - if (type & T_MUST_EVEX) { - str += "|T_MUST_EVEX"; - } + if (type & T_0F3A) str += "|T_0F3A"; + if (type & T_L0) str += "|T_L0"; + if (type & T_L1) str += "|T_L1"; + if (type & T_W0) str += "|T_W0"; + if (type & T_W1) str += "|T_W1"; + if (type & T_EW0) str += "|T_EW0"; + if (type & T_EW1) str += "|T_EW1"; + if (type & T_YMM) str += "|T_YMM"; + if (type & T_EVEX) str += "|T_EVEX"; + if (type & T_ER_X) str += "|T_ER_X"; + if (type & T_ER_Y) str += "|T_ER_Y"; + if (type & T_ER_Z) str += "|T_ER_Z"; + if (type & T_ER_R) str += "|T_ER_R"; + if (type & T_SAE_X) str += "|T_SAE_X"; + if (type & T_SAE_Y) str += "|T_SAE_Y"; + if (type & T_SAE_Z) str += "|T_SAE_Z"; + if (type & T_MUST_EVEX) str += "|T_MUST_EVEX"; + switch (type & T_B16) { // T_B16 = T_B32 | T_B64 case T_B16: str += "|T_B16"; break; case T_B32: str += "|T_B32"; break; case T_B64: str += "|T_B64"; break; default: break; } - if (type & T_M_K) { - str += "|T_M_K"; - } - if (type & T_VSIB) { - str += "|T_VSIB"; - } - if (type & T_MEM_EVEX) { - str += "|T_MEM_EVEX"; - } - if (type & T_NF) { - str += "|T_NF"; - } + if (type & T_M_K) str += "|T_M_K"; + if (type & T_VSIB) str += "|T_VSIB"; + if (type & T_MEM_EVEX) str += "|T_MEM_EVEX"; + if (type & T_NF) str += "|T_NF"; + if (type & T_NO_OR1) str += "|T_NO_OR1"; + if (str[0] == '|') str = str.substr(1); return str; } diff --git a/gen/avx_type_def.h b/gen/avx_type_def.h index 094ef8f..5e74a93 100644 --- a/gen/avx_type_def.h +++ b/gen/avx_type_def.h @@ -1,51 +1,49 @@ // @@@begin of avx_type_def.h - enum AVXtype { - T_NONE = 0, + static const uint64_t T_NONE = 0ull; // low 3 bit - T_N1 = 1, - T_N2 = 2, - T_N4 = 3, - T_N8 = 4, - T_N16 = 5, - T_N32 = 6, - T_NX_MASK = 7, - T_DUP = T_NX_MASK,//1 << 4, // N = (8, 32, 64) - T_N_VL = 1 << 3, // N * (1, 2, 4) for VL - T_VEX = 1 << 4, - T_66 = 1 << 5, // pp = 1 - T_F3 = 1 << 6, // pp = 2 - T_F2 = T_66 | T_F3, // pp = 3 - T_ER_R = 1 << 7, // reg{er} - T_0F = 1 << 8, - T_0F38 = 1 << 9, - T_0F3A = 1 << 10, - T_L0 = 1 << 11, - T_L1 = 1 << 12, - T_W0 = 1 << 13, - T_W1 = 1 << 14, - T_EW0 = 1 << 15, - T_EW1 = 1 << 16, - T_YMM = 1 << 17, // support YMM, ZMM - T_EVEX = 1 << 18, - T_ER_X = 1 << 19, // xmm{er} - T_ER_Y = 1 << 20, // ymm{er} - T_ER_Z = 1 << 21, // zmm{er} - T_SAE_X = 1 << 22, // xmm{sae} - T_SAE_Y = 1 << 23, // ymm{sae} - T_SAE_Z = 1 << 24, // zmm{sae} - T_MUST_EVEX = 1 << 25, // contains T_EVEX - T_B32 = 1 << 26, // m32bcst - T_B64 = 1 << 27, // m64bcst - T_B16 = T_B32 | T_B64, // m16bcst (Be careful) - T_M_K = 1 << 28, // mem{k} - T_VSIB = 1 << 29, - T_MEM_EVEX = 1u << 30, // use evex if mem - T_FP16 = 1u << 31, // avx512-fp16 - T_MAP5 = T_FP16 | T_0F, - T_MAP6 = T_FP16 | T_0F38, - T_NF = 1ull << 32, // T_nf - T_XXX - }; + static const uint64_t T_N1 = 1ull; + static const uint64_t T_N2 = 2ull; + static const uint64_t T_N4 = 3ull; + static const uint64_t T_N8 = 4ull; + static const uint64_t T_N16 = 5ull; + static const uint64_t T_N32 = 6ull; + static const uint64_t T_NX_MASK = 7ull; + static const uint64_t T_DUP = T_NX_MASK;//1 << 4, // N = (8, 32, 64) + static const uint64_t T_N_VL = 1ull << 3; // N * (1, 2, 4) for VL + static const uint64_t T_VEX = 1ull << 4; + static const uint64_t T_66 = 1ull << 5; // pp = 1 + static const uint64_t T_F3 = 1ull << 6; // pp = 2 + static const uint64_t T_F2 = T_66 | T_F3; // pp = 3 + static const uint64_t T_ER_R = 1ull << 7; // reg{er} + static const uint64_t T_0F = 1ull << 8; + static const uint64_t T_0F38 = 1ull << 9; + static const uint64_t T_0F3A = 1ull << 10; + static const uint64_t T_L0 = 1ull << 11; + static const uint64_t T_L1 = 1ull << 12; + static const uint64_t T_W0 = 1ull << 13; + static const uint64_t T_W1 = 1ull << 14; + static const uint64_t T_EW0 = 1ull << 15; + static const uint64_t T_EW1 = 1ull << 16; + static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM + static const uint64_t T_EVEX = 1ull << 18; + static const uint64_t T_ER_X = 1ull << 19; // xmm{er} + static const uint64_t T_ER_Y = 1ull << 20; // ymm{er} + static const uint64_t T_ER_Z = 1ull << 21; // zmm{er} + static const uint64_t T_SAE_X = 1ull << 22; // xmm{sae} + static const uint64_t T_SAE_Y = 1ull << 23; // ymm{sae} + static const uint64_t T_SAE_Z = 1ull << 24; // zmm{sae} + static const uint64_t T_MUST_EVEX = 1ull << 25; // contains T_EVEX + static const uint64_t T_B32 = 1ull << 26; // m32bcst + static const uint64_t T_B64 = 1ull << 27; // m64bcst + static const uint64_t T_B16 = T_B32 | T_B64; // m16bcst (Be careful) + static const uint64_t T_M_K = 1ull << 28; // mem{k} + static const uint64_t T_VSIB = 1ull << 29; + static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem + static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16 + static const uint64_t T_MAP5 = T_FP16 | T_0F; + static const uint64_t T_MAP6 = T_FP16 | T_0F38; + static const uint64_t T_NF = 1ull << 32; // T_nf + static const uint64_t T_NO_OR1 = 1ull << 33; // does not "code | 1" // T_66 = 1, T_F3 = 2, T_F2 = 3 static inline uint32_t getPP(uint64_t type) { return (type >> 5) & 3; } // @@@end of avx_type_def.h diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 49d438a..7c4d9ba 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -142,9 +142,9 @@ void putVcmp() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); + std::string s = type2String(p->type); printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); } puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }"); puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }"); @@ -210,8 +210,8 @@ void putX_XM() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + std::string s = type2String(p->type); + printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); } puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }"); @@ -242,8 +242,8 @@ void putM_X() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + std::string s = type2String(p->type); + printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); } } @@ -265,8 +265,8 @@ void putXM_X() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + std::string s = type2String(p->type); + printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); } } @@ -413,9 +413,9 @@ void putX_X_XM_IMM() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); + std::string s = type2String(p->type); printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } @@ -435,8 +435,8 @@ void putShift() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); + std::string s = type2String(p.type); + printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, s.c_str(), p.code); } } @@ -461,9 +461,9 @@ void putExtractInsert() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); + std::string s = type2String(p.type); const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM"; - printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code); + printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, s.c_str(), p.code); } } { @@ -485,12 +485,12 @@ void putExtractInsert() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); + std::string s = type2String(p.type); const char *x = p.isZMM ? "Zmm" : "Ymm"; const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))"; printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {" "if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) " - "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code); + "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, s.c_str(), p.code); } } } @@ -511,9 +511,9 @@ void putBroadcast(bool only64bit) }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); + std::string s = type2String(p.type); if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) { - printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, s.c_str(), p.code); } } } @@ -583,28 +583,28 @@ void putCvt() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); + std::string s = type2String(p.type); switch (p.ptn) { case 0: - printf("void %s(const Reg32e& r, const Operand& op) { uint64_t type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Reg32e& r, const Operand& op) { uint64_t type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code); break; case 1: - printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); break; case 2: - printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); break; case 3: - printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); break; case 4: - printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); break; case 5: - printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); break; case 6: - printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code); break; } } @@ -636,8 +636,8 @@ void putGather() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type | T_VSIB); - printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode); + std::string s = type2String(p.type | T_VSIB); + printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode); } } void putScatter() @@ -660,8 +660,8 @@ void putScatter() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type | T_VSIB); - printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode); + std::string s = type2String(p.type | T_VSIB); + printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode); } } @@ -718,8 +718,8 @@ void putMov() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false"); + std::string s = type2String(p.type); + printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, s.c_str(), p.code, p.mode ? "true" : "false"); } } } @@ -770,9 +770,9 @@ void putX_XM_IMM() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); + std::string s = type2String(p->type); printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } @@ -810,9 +810,9 @@ void putMisc() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB); + std::string s = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB); printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n" - , p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM"); + , p.name, p.zm, s.c_str(), p.code, p.isZmm ? "ZMM" : "YMM"); } } @@ -887,18 +887,18 @@ void putFP16_FMA() { "213", 0xA0 }, { "231", 0xB0 }, }; - int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX; + uint64_t type = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX; const char *suf = 0; if (tbl[i].isPH) { - t |= T_ER_Z | T_YMM | T_B16; + type |= T_ER_Z | T_YMM | T_B16; suf = "ph"; } else { - t |= T_ER_X | T_N2; + type |= T_ER_X | T_N2; suf = "sh"; } - std::string type = type2String(t); + std::string s = type2String(type); printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" - , tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code); + , tbl[i].name, ord[k].str, suf, s.c_str(), tbl[i].code | ord[k].code); } } } @@ -928,9 +928,9 @@ void putFP16_FMA2() t |= T_ER_X | T_N2; suf = "sh"; } - std::string type = type2String(t); + std::string s = type2String(t); printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" - , j == 0 ? "c" : "", tbl[i].name, suf, type.c_str(), tbl[i].code); + , j == 0 ? "c" : "", tbl[i].name, suf, s.c_str(), tbl[i].code); } } } @@ -938,16 +938,16 @@ void putFP16_FMA2() void putFP16_2() { { - int t = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2; - std::string type = type2String(t); - printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str()); - printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str()); + uint64_t type = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2; + std::string s = type2String(type); + printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str()); + printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str()); } { - int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2; - std::string type = type2String(t); - printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str()); - printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str()); + uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2; + std::string s = type2String(type); + printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str()); + printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str()); } } diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 272184f..fd60773 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -216,7 +216,7 @@ void putX_X_XM(bool omitOnly) }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); + std::string s = type2String(p->type); if (omitOnly) { if (p->enableOmit) { printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8_t imm" : "", p->name, p->hasIMM ? ", imm" : ""); @@ -231,7 +231,7 @@ void putX_X_XM(bool omitOnly) } if (p->mode & 2) { printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } } @@ -483,8 +483,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + std::string s = type2String(p->type); + printf("void %s(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); } } { @@ -515,11 +515,10 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type | T_0F); - printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, type.c_str(), p->code); + std::string s = type2String(p->type | T_0F); + printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, s.c_str(), p->code); } } - { // special type const struct Tbl { @@ -543,8 +542,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type | T_0F); - printf("void %s(const Reg& reg, const Operand& op) { opSSE(reg, op, %s, 0x%02X, %s); }\n", p->name, type.c_str(), p->code, p->cond); + std::string s = type2String(p->type | T_0F); + printf("void %s(const Reg& reg, const Operand& op) { opSSE(reg, op, %s, 0x%02X, %s); }\n", p->name, s.c_str(), p->code, p->cond); } } { @@ -581,8 +580,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + std::string s = type2String(p->type); + printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); } } { @@ -812,9 +811,9 @@ void put() printf("void %s(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x%02X); }\n", p->name, p->code); printf("void %s(const Operand& op, uint32_t imm) { opOI(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); if (!p->support3op) continue; - std::string type = type2String(0);//p->type); - printf("void %s(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); - printf("void %s(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, %s, %d); }\n", p->name, type.c_str(), p->ext); + std::string s = type2String(0);//p->type); + printf("void %s(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); + printf("void %s(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, %s, %d); }\n", p->name, s.c_str(), p->ext); } } { @@ -1354,15 +1353,15 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); + std::string s = type2String(p->type); if (p->mode & 1) { const char *immS1 = p->hasIMM ? ", uint8_t imm" : ""; const char *immS2 = p->hasIMM ? ", imm" : ", NONE"; - printf("void %s(const Xmm& xmm, const Operand& op%s) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, immS1, type.c_str(), p->code, immS2); + printf("void %s(const Xmm& xmm, const Operand& op%s) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, immS1, s.c_str(), p->code, immS2); } if (p->mode & 2) { printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } } @@ -1382,9 +1381,9 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); + std::string s = type2String(p->type); printf("void v%s(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, %s, 0x%02X); }\n" - , p->name, type.c_str(), p->code); + , p->name, s.c_str(), p->code); } } // (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m) @@ -1409,13 +1408,13 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); + std::string s = type2String(p->type); if (p->mode & 1) { - printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, type.c_str(), p->code); + printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, s.c_str(), p->code); } if (p->mode & 2) { printf("void v%s(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, %s, 0x%02X); }\n" - , p->name, type.c_str(), p->code); + , p->name, s.c_str(), p->code); } } } @@ -1449,8 +1448,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + std::string s = type2String(p.type); + printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); } } // vpermq, vpermpd @@ -1465,8 +1464,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code); + std::string s = type2String(p.type); + printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, s.c_str(), p.code); } } // vcmpeqps @@ -1504,11 +1503,11 @@ void put() const Tbl& p = tbl[i]; char c = p.isH ? 'h' : 'l'; const char *suf = p.isPd ? "pd" : "ps"; - const char *type = p.isPd ? "T_0F | T_66 | T_EVEX | T_EW1 | T_N8" : "T_0F | T_EVEX | T_EW0 | T_N8"; + std::string s = type2String(p.isPd ? (T_0F | T_66 | T_EVEX | T_EW1 | T_N8) : (T_0F | T_EVEX | T_EW0 | T_N8)); printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n" - , c, suf, type, p.code); + , c, suf, s.c_str(), p.code); printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n" - , c, suf, type, p.code + 1); + , c, suf, s.c_str(), p.code + 1); } } // FMA @@ -1557,9 +1556,9 @@ void put() } else { // ss t |= T_ER_X | T_N4; } - std::string type = type2String(t); + std::string s = type2String(t); printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" - , tbl[i].name, ord[k].str, suf.c_str(), type.c_str(), tbl[i].code + ord[k].code); + , tbl[i].name, ord[k].str, suf.c_str(), s.c_str(), tbl[i].code + ord[k].code); } } } @@ -1583,8 +1582,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + std::string s = type2String(p.type); + printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code); } puts("void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }"); @@ -1634,8 +1633,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - std::string type = type2String(p.type); - printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); + std::string s = type2String(p.type); + printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, s.c_str(), p.code); } } // 4-op @@ -1724,7 +1723,7 @@ void put() const Tbl& p = tbl[i]; printf("void %s(const Xmm& x, const Address& addr) { opVex(x, 0, addr, %s, 0x%02X); }\n", p.name, type2String(p.type).c_str(), p.code); } - puts("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }"); + printf("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, %s|orEvexIf(encoding), 0x72); }", type2String(T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32).c_str()); } // haswell gpr(reg, reg, r/m) { @@ -1820,8 +1819,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code); + std::string s = type2String(p->type); + printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, s.c_str(), p->code); } } // avx-vnni-int8 @@ -1848,8 +1847,8 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - std::string type = type2String(p->type); - printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + std::string s = type2String(p->type); + printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); } } } diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 1560cd1..e0e9f49 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1756,53 +1756,51 @@ private: if (rex) db(rex); } // @@@begin of avx_type_def.h - enum AVXtype { - T_NONE = 0, + static const uint64_t T_NONE = 0ull; // low 3 bit - T_N1 = 1, - T_N2 = 2, - T_N4 = 3, - T_N8 = 4, - T_N16 = 5, - T_N32 = 6, - T_NX_MASK = 7, - T_DUP = T_NX_MASK,//1 << 4, // N = (8, 32, 64) - T_N_VL = 1 << 3, // N * (1, 2, 4) for VL - T_VEX = 1 << 4, - T_66 = 1 << 5, // pp = 1 - T_F3 = 1 << 6, // pp = 2 - T_F2 = T_66 | T_F3, // pp = 3 - T_ER_R = 1 << 7, // reg{er} - T_0F = 1 << 8, - T_0F38 = 1 << 9, - T_0F3A = 1 << 10, - T_L0 = 1 << 11, - T_L1 = 1 << 12, - T_W0 = 1 << 13, - T_W1 = 1 << 14, - T_EW0 = 1 << 15, - T_EW1 = 1 << 16, - T_YMM = 1 << 17, // support YMM, ZMM - T_EVEX = 1 << 18, - T_ER_X = 1 << 19, // xmm{er} - T_ER_Y = 1 << 20, // ymm{er} - T_ER_Z = 1 << 21, // zmm{er} - T_SAE_X = 1 << 22, // xmm{sae} - T_SAE_Y = 1 << 23, // ymm{sae} - T_SAE_Z = 1 << 24, // zmm{sae} - T_MUST_EVEX = 1 << 25, // contains T_EVEX - T_B32 = 1 << 26, // m32bcst - T_B64 = 1 << 27, // m64bcst - T_B16 = T_B32 | T_B64, // m16bcst (Be careful) - T_M_K = 1 << 28, // mem{k} - T_VSIB = 1 << 29, - T_MEM_EVEX = 1u << 30, // use evex if mem - T_FP16 = 1u << 31, // avx512-fp16 - T_MAP5 = T_FP16 | T_0F, - T_MAP6 = T_FP16 | T_0F38, - T_NF = 1ull << 32, // T_nf - T_XXX - }; + static const uint64_t T_N1 = 1ull; + static const uint64_t T_N2 = 2ull; + static const uint64_t T_N4 = 3ull; + static const uint64_t T_N8 = 4ull; + static const uint64_t T_N16 = 5ull; + static const uint64_t T_N32 = 6ull; + static const uint64_t T_NX_MASK = 7ull; + static const uint64_t T_DUP = T_NX_MASK;//1 << 4, // N = (8, 32, 64) + static const uint64_t T_N_VL = 1ull << 3; // N * (1, 2, 4) for VL + static const uint64_t T_VEX = 1ull << 4; + static const uint64_t T_66 = 1ull << 5; // pp = 1 + static const uint64_t T_F3 = 1ull << 6; // pp = 2 + static const uint64_t T_F2 = T_66 | T_F3; // pp = 3 + static const uint64_t T_ER_R = 1ull << 7; // reg{er} + static const uint64_t T_0F = 1ull << 8; + static const uint64_t T_0F38 = 1ull << 9; + static const uint64_t T_0F3A = 1ull << 10; + static const uint64_t T_L0 = 1ull << 11; + static const uint64_t T_L1 = 1ull << 12; + static const uint64_t T_W0 = 1ull << 13; + static const uint64_t T_W1 = 1ull << 14; + static const uint64_t T_EW0 = 1ull << 15; + static const uint64_t T_EW1 = 1ull << 16; + static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM + static const uint64_t T_EVEX = 1ull << 18; + static const uint64_t T_ER_X = 1ull << 19; // xmm{er} + static const uint64_t T_ER_Y = 1ull << 20; // ymm{er} + static const uint64_t T_ER_Z = 1ull << 21; // zmm{er} + static const uint64_t T_SAE_X = 1ull << 22; // xmm{sae} + static const uint64_t T_SAE_Y = 1ull << 23; // ymm{sae} + static const uint64_t T_SAE_Z = 1ull << 24; // zmm{sae} + static const uint64_t T_MUST_EVEX = 1ull << 25; // contains T_EVEX + static const uint64_t T_B32 = 1ull << 26; // m32bcst + static const uint64_t T_B64 = 1ull << 27; // m64bcst + static const uint64_t T_B16 = T_B32 | T_B64; // m16bcst (Be careful) + static const uint64_t T_M_K = 1ull << 28; // mem{k} + static const uint64_t T_VSIB = 1ull << 29; + static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem + static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16 + static const uint64_t T_MAP5 = T_FP16 | T_0F; + static const uint64_t T_MAP6 = T_FP16 | T_0F38; + static const uint64_t T_NF = 1ull << 32; // T_nf + static const uint64_t T_NO_OR1 = 1ull << 33; // does not "code | 1" // T_66 = 1, T_F3 = 2, T_F2 = 3 static inline uint32_t getPP(uint64_t type) { return (type >> 5) & 3; } // @@@end of avx_type_def.h @@ -1995,6 +1993,7 @@ private: } else if (type & T_0F3A) { db(0x0F); db(0x3A); } +// db(code | (!(type & T_NO_OR1) && !r.isBit(8))); db(code | (type == 0 && !r.isBit(8))); } void opRR(const Reg& reg1, const Reg& reg2, uint64_t type, int code) diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 44afcff..62e49db 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -29,7 +29,6 @@ void and_(const Operand& op, uint32_t imm) { opOI(op, imm, 0x20, 4); } void and_(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x20); } void and_(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NONE, 4); } void and_(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NONE, 0x20); } -void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_0F38, 0xf2); } void andnpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x55, isXMM_XMMorMEM); } void andnps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x55, isXMM_XMMorMEM); } void andpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x54, isXMM_XMMorMEM); } @@ -1015,7 +1014,7 @@ void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3| void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66|T_0F38|T_W0|T_YMM, 0xB0); } void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2|T_0F38|T_W0|T_YMM, 0xB0); } void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38|T_W0|T_YMM, 0xB0); } -void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); } +void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32|orEvexIf(encoding), 0x72); }void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_0F38, 0xf2); } void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); } void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); } void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); } @@ -1142,15 +1141,15 @@ void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0 void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3|T_0F|T_YMM, 0x7F); } void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_YMM, 0x6F); } void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); } -void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x17); } -void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); } -void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x17); } -void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); } +void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x17); } +void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x16); } +void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_0F|T_EW0|T_EVEX, 0x17); } +void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_0F|T_EW0|T_EVEX, 0x16); } void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); } -void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x13); } -void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); } -void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x13); } -void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); } +void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x13); } +void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x12); } +void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_0F|T_EW0|T_EVEX, 0x13); } +void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_0F|T_EW0|T_EVEX, 0x12); } void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); } void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); } void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); } @@ -2059,44 +2058,44 @@ void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Op void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x3B, imm); } void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x39, imm); } void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3B, imm); } -void vfcmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32|T_NF, 0x56); } -void vfcmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32|T_NF, 0xD6); } +void vfcmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32|T_NF|T_NO_OR1, 0x56); } +void vfcmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32|T_NF|T_NO_OR1, 0xD6); } void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x54, imm); } void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x54, imm); } void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_Z|T_MUST_EVEX, 0x55, imm); } void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_Z|T_MUST_EVEX, 0x55, imm); } -void vfmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0x98); } -void vfmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0x99); } -void vfmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xA8); } -void vfmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0xA9); } -void vfmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xB8); } -void vfmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0xB9); } -void vfmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32|T_NF, 0x56); } -void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0x96); } -void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xA6); } -void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xB6); } -void vfmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0x9A); } -void vfmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0x9B); } -void vfmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xAA); } -void vfmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0xAB); } -void vfmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xBA); } -void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0xBB); } -void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0x97); } -void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xA7); } -void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xB7); } -void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32|T_NF, 0xD6); } -void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0x9C); } -void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0x9D); } -void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xAC); } -void vfnmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0xAD); } -void vfnmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xBC); } -void vfnmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0xBD); } -void vfnmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0x9E); } -void vfnmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0x9F); } -void vfnmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xAE); } -void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0xAF); } -void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16|T_NF, 0xBE); } -void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX|T_NF, 0xBF); } +void vfmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x98); } +void vfmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x99); } +void vfmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA8); } +void vfmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xA9); } +void vfmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB8); } +void vfmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xB9); } +void vfmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32|T_NF|T_NO_OR1, 0x56); } +void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x96); } +void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA6); } +void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB6); } +void vfmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9A); } +void vfmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9B); } +void vfmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAA); } +void vfmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAB); } +void vfmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBA); } +void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBB); } +void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x97); } +void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA7); } +void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB7); } +void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32|T_NF|T_NO_OR1, 0xD6); } +void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9C); } +void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9D); } +void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAC); } +void vfnmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAD); } +void vfnmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBC); } +void vfnmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBD); } +void vfnmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9E); } +void vfnmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9F); } +void vfnmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAE); } +void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAF); } +void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBE); } +void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBF); } void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); } void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); } void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); } @@ -2152,11 +2151,11 @@ void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0 void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); } void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX, 0x6F); } void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_M_K, 0x11); } -void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_NF, 0x10); } -void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_NF, 0x10); } +void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); } +void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); } void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); } -void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX|T_NF, 0x7E); } -void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX|T_NF, 0x6E); } +void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); } +void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); } void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); } void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); } void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }