diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp index 5610fbe..b7f438c 100644 --- a/gen/avx_type.hpp +++ b/gen/avx_type.hpp @@ -18,6 +18,10 @@ std::string type2String(int type) if (!str.empty()) str += " | "; str += "T_N_VL"; } + if (type & T_VEX) { + if (!str.empty()) str += " | "; + str += "T_VEX"; + } if ((type & T_NX_MASK) == T_DUP) { if (!str.empty()) str += " | "; str += "T_DUP"; diff --git a/gen/avx_type_def.h b/gen/avx_type_def.h index d34cca6..489d5a0 100644 --- a/gen/avx_type_def.h +++ b/gen/avx_type_def.h @@ -10,7 +10,7 @@ T_NX_MASK = 7, T_DUP = T_NX_MASK,//1 << 4, // N = (8, 32, 64) T_N_VL = 1 << 3, // N * (1, 2, 4) for VL - // 1 << 4 is free + T_VEX = 1 << 4, T_66 = 1 << 5, // pp = 1 T_F3 = 1 << 6, // pp = 2 T_F2 = T_66 | T_F3, // pp = 3 diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 9144c0e..24dd35e 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -479,19 +479,18 @@ void put() { // (XMM, XMM) const struct Tbl { + int type; uint8_t code; - uint8_t pref; const char *name; } tbl[] = { - { 0xF7, 0x66, "maskmovdqu" }, - { 0x12, 0 , "movhlps" }, - { 0x16, 0 , "movlhps" }, + { T_66 | T_0F, 0xF7, "maskmovdqu" }, + { T_0F, 0x12, "movhlps" }, + { T_0F, 0x16, "movlhps" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Xmm& reg1, const Xmm& reg2) { ", p->name); - if (p->pref) printf("db(0x%02X); ", p->pref); - printf(" opModR(reg1, reg2, 0x0F, 0x%02X); }\n", p->code); + std::string type = type2String(p->type); + printf("void %s(const Xmm& reg1, const Xmm& reg2) { opModR2(reg1, reg2, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); } } { diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 8294e48..6f41137 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1748,6 +1748,51 @@ private: } if (rex) db(rex); } + void rexA(int type, const Operand& op1, const Operand& op2 = Operand()) + { + if (op1.getNF() | op2.getNF()) XBYAK_THROW(ERR_INVALID_NF) + uint8_t rex = 0; + const Operand *p1 = &op1, *p2 = &op2; + if (p1->isMEM()) std::swap(p1, p2); + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) + // except movsx(16bit, 32/64bit) + uint8_t p66 = (op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e)) ? 0x66 : 0; + if (p66) db(p66); + if ((type & (T_VEX|T_EVEX|T_MUST_EVEX)) == 0) { + if ((type & T_F2) == T_F2) { + db(0xF2); + } else if (type & T_66) { + if (!p66) db(0x66); // only once + } else if (type & T_F3) { + db(0xF3); + } + } + if (p2->isMEM()) { + const Reg& r = *static_cast(p1); + const Address& addr = p2->getAddress(); + const RegExp e = addr.getRegExp(); + const Reg& base = e.getBase(); + const Reg& idx = e.getIndex(); + if (BIT == 64 && addr.is32bit()) db(0x67); + rex = rexRXB(3, r.isREG(64), r, base, idx); + if (r.hasRex2() || addr.hasRex2()) { + rex2(0, rex, r, base, idx); + return; + } + if (rex || r.isExt8bit()) rex |= 0x40; + } else { + const Reg& r1 = static_cast(op1); + const Reg& r2 = static_cast(op2); + // ModRM(reg, base); + rex = rexRXB(3, r1.isREG(64) || r2.isREG(64), r2, r1); + if (r1.hasRex2() || r2.hasRex2()) { + rex2(0, rex, r2, r1); + return; + } + if (rex || r1.isExt8bit() || r2.isExt8bit()) rex |= 0x40; + } + if (rex) db(rex); + } // @@@begin of avx_type_def.h enum AVXtype { // low 3 bit @@ -1760,7 +1805,7 @@ private: T_NX_MASK = 7, T_DUP = T_NX_MASK,//1 << 4, // N = (8, 32, 64) T_N_VL = 1 << 3, // N * (1, 2, 4) for VL - // 1 << 4 is free + T_VEX = 1 << 4, T_66 = 1 << 5, // pp = 1 T_F3 = 1 << 6, // pp = 2 T_F2 = T_66 | T_F3, // pp = 3 @@ -1980,12 +2025,31 @@ private: { db(code0 | (type == 0 && !r.isBit(8))); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); } + void writeCode2(int type, const Reg& r, int code) + { + if ((type & (T_VEX|T_EVEX|T_MUST_EVEX)) == 0) { + if (type & T_0F) { + db(0x0F); + } else if (type & T_0F38) { + db(0x0F); db(0x38); + } else if (type & T_0F3A) { + db(0x0F); db(0x3A); + } + } + db(code | (type == 0 && !r.isBit(8))); + } void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE) { rex(reg2, reg1); writeCode(0, reg1, code0, code1, code2); setModRM(3, reg1.getIdx(), reg2.getIdx()); } + void opModR2(const Reg& reg1, const Reg& reg2, int type, int code) + { + rexA(type, reg2, reg1); + writeCode2(type, reg1, code); + setModRM(3, reg1.getIdx(), reg2.getIdx()); + } void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0) { if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 00818b8..822f4bb 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -491,7 +491,7 @@ void loopne(const char *label) { loopne(std::string(label)); } void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); } void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); } void lzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); } -void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); } +void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { opModR2(reg1, reg2, T_66 | T_0F, 0xF7); } void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); } void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); } void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); } @@ -522,10 +522,10 @@ void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0 void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); } void movdqu(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x7F); } void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0xF3); } -void movhlps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x12); } +void movhlps(const Xmm& reg1, const Xmm& reg2) { opModR2(reg1, reg2, T_0F, 0x12); } void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x66); } void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x100); } -void movlhps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x16); } +void movlhps(const Xmm& reg1, const Xmm& reg2) { opModR2(reg1, reg2, T_0F, 0x16); } void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); } void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x100); } void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }