diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 609ed8d..e530752 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -11,9 +11,10 @@ using namespace Xbyak; #endif enum { - PP_66 = 1 << 0, - PP_F3 = 1 << 1, - PP_F2 = 1 << 2, + PP_NONE = 1 << 0, + PP_66 = 1 << 1, + PP_F3 = 1 << 2, + PP_F2 = 1 << 3, MM_0F = 1 << 5, MM_0F38 = 1 << 6, MM_0F3A = 1 << 7 @@ -1523,6 +1524,23 @@ void put() printf("void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 1); }\n"); printf("#endif\n"); } + // haswell gpr(reg, reg, r/m) + { + const struct Tbl { + const char *name; + int type; + uint8 code; + } tbl[] = { + { "andn", MM_0F38, 0xF2 }, + { "mulx", PP_F2 | MM_0F38, 0xF6 }, + { "pdep", PP_F2 | MM_0F38, 0xF5 }, + { "pext", PP_F3 | MM_0F38, 0xF5 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %s(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, %s, 0x%x); }\n", p.name, type2String(p.type).c_str(), p.code); + } + } } int main() diff --git a/test/make_nm.cpp b/test/make_nm.cpp index e60b771..4fc5da0 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -1843,10 +1843,29 @@ public: printf( " }\n"); } + void putGpr() + { + const char *tbl1[] = { + "andn", + "mulx", + "pdep", + "pext", + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl1); i++) { + const char *name = tbl1[i]; + put(name, REG32, REG32, REG32 | MEM); +#ifdef XBYAK64 + put(name, REG64, REG64, REG64 | MEM); +#endif + } + } void put() { #ifdef USE_AVX -#ifndef USE_YASM +#ifdef USE_YASM + putFMA2(); + putGpr(); +#else putAVX1(); putAVX2(); putAVX_X_X_XM_omit(); @@ -1858,8 +1877,7 @@ public: putAVX_X_X_IMM_omit(); putFMA(); #endif - putFMA2(); -#else +#else // USE_AVX putJmp(); #ifndef USE_YASM putSIMPLE(); @@ -1897,7 +1915,7 @@ public: putFpu(); putFpuFpu(); putCmp(); -#else +#else // USE_YASM putSSSE3(); putSSE4_1(); separateFunc(); @@ -1905,9 +1923,9 @@ public: putMov64(); #ifdef XBYAK64 putRip(); -#endif -#endif -#endif +#endif // XBYAK64 +#endif // USE_YASM +#endif // USE_AVX } }; diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index c179d12..3c4a061 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1237,6 +1237,83 @@ private: { db(code1); db(code2 | reg.getIdx()); } + void opGpr(const Reg32e& r1, const Reg32e& r2, const Operand& op, int type, uint8 code) + { + const unsigned int bit = r1.getBit(); + if (r2.getBit() != bit || (op.isREG() && op.getBit() != bit)) throw ERR_BAD_COMBINATION; + int w = bit == 64; + bool x, b; + if (op.isMEM()) { + const Address& addr = static_cast(op); + uint8 rex = addr.getRex(); + x = (rex & 2) != 0; + b = (rex & 1) != 0; + if (BIT == 64 && addr.is32bit_) db(0x67); + if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0; + } else { + x = false; + b = static_cast(op).isExtIdx(); + } + if (w == -1) w = 0; + vex(r1.isExtIdx(), r2.getIdx(), false, type, x, b, w); + db(code); + if (op.isMEM()) { + const Address& addr = static_cast(op); + addr.updateRegField(static_cast(r1.getIdx())); + db(addr.getCode(), static_cast(addr.getSize())); + } else { + db(getModRM(3, r1.getIdx(), op.getIdx())); + } + } + // support (x, x, x/m), (y, y, y/m) + void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, bool supportYMM, int w = -1) + { + const Xmm *x2; + const Operand *op; + if (op2.isNone()) { + x2 = &x1; + op = &op1; + } else { + if (!(op1.isXMM() || (supportYMM && op1.isYMM()))) throw ERR_BAD_COMBINATION; + x2 = static_cast(&op1); + op = &op2; + } + // (x1, x2, op) + if (!((x1.isXMM() && x2->isXMM()) || (supportYMM && x1.isYMM() && x2->isYMM()))) throw ERR_BAD_COMBINATION; + bool x, b; + if (op->isMEM()) { + const Address& addr = *static_cast(op); + uint8 rex = addr.getRex(); + x = (rex & 2) != 0; + b = (rex & 1) != 0; + if (BIT == 64 && addr.is32bit_) db(0x67); + if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0; + } else { + x = false; + b = static_cast(op)->isExtIdx(); + } + if (w == -1) w = 0; + vex(x1.isExtIdx(), x2->getIdx(), x1.isYMM(), type, x, b, w); + db(code0); + if (op->isMEM()) { + const Address& addr = *static_cast(op); + addr.updateRegField(static_cast(x1.getIdx())); + db(addr.getCode(), static_cast(addr.getSize())); + } else { + db(getModRM(3, x1.getIdx(), op->getIdx())); + } + } + // if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op + void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, bool supportYMM, int w = -1) + { + // use static_cast to avoid calling unintentional copy constructor on gcc + opAVX_X_X_XM(x1, op1, cvt ? kind == Operand::XMM ? static_cast(Xmm(op2.getIdx())) : static_cast(Ymm(op2.getIdx())) : op2, type, code0, supportYMM, w); + } + // support (x, x/m, imm), (y, y/m, imm) + void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, bool supportYMM, int w = -1, int imm = NONE) + { + opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, op, type, code, supportYMM, w); if (imm != NONE) db((uint8)imm); + } public: unsigned int getVersion() const { return VERSION; } using CodeArray::db; @@ -1608,57 +1685,7 @@ public: db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); } - // support (x, x, x/m), (y, y, y/m) - void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, bool supportYMM, int w = -1) - { - const Xmm *x2; - const Operand *op; - if (op2.isNone()) { - x2 = &x1; - op = &op1; - } else { - if (!(op1.isXMM() || (supportYMM && op1.isYMM()))) throw ERR_BAD_COMBINATION; - x2 = static_cast(&op1); - op = &op2; - } - // (x1, x2, op) - if (!((x1.isXMM() && x2->isXMM()) || (supportYMM && x1.isYMM() && x2->isYMM()))) throw ERR_BAD_COMBINATION; - bool x, b; - if (op->isMEM()) { - const Address& addr = *static_cast(op); - uint8 rex = addr.getRex(); - x = (rex & 2) != 0; - b = (rex & 1) != 0; - if (BIT == 64 && addr.is32bit_) db(0x67); - if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0; - } else { - x = false; - b = static_cast(op)->isExtIdx(); - } - if (w == -1) w = 0; - vex(x1.isExtIdx(), x2->getIdx(), x1.isYMM(), type, x, b, w); - db(code0); - if (op->isMEM()) { - const Address& addr = *static_cast(op); - addr.updateRegField(static_cast(x1.getIdx())); - db(addr.getCode(), static_cast(addr.getSize())); - } else { - db(getModRM(3, x1.getIdx(), op->getIdx())); - } - } - // if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op - void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, bool supportYMM, int w = -1) - { - // use static_cast to avoid calling unintentional copy constructor on gcc - opAVX_X_X_XM(x1, op1, cvt ? kind == Operand::XMM ? static_cast(Xmm(op2.getIdx())) : static_cast(Ymm(op2.getIdx())) : op2, type, code0, supportYMM, w); - } - // support (x, x/m, imm), (y, y/m, imm) - void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, bool supportYMM, int w = -1, int imm = NONE) - { - opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, op, type, code, supportYMM, w); if (imm != NONE) db((uint8)imm); - } enum { NONE = 256 }; -public: CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0) : CodeArray(maxSize, userPtr, allocator) , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7) diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 0506eb2..0edbaf3 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1368,3 +1368,7 @@ void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx() void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 1); } void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 1); } #endif +void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38, 0xf2); } +void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf6); } +void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf5); } +void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F3, 0xf5); }