support ptr_b as {1toX}

This commit is contained in:
MITSUNARI Shigeo 2016-07-01 15:49:50 +09:00
parent ba4ab3283f
commit 6adff3f938
7 changed files with 131 additions and 82 deletions

View file

@ -21,7 +21,10 @@ enum {
T_SAE_X = 1 << 20, // xmm{sae} T_SAE_X = 1 << 20, // xmm{sae}
T_SAE_Y = 1 << 21, // ymm{sae} T_SAE_Y = 1 << 21, // ymm{sae}
T_SAE_Z = 1 << 22, // zmm{sae} T_SAE_Z = 1 << 22, // zmm{sae}
T_MUST_EVEX = 1 << 23 T_MUST_EVEX = 1 << 23,
T_B32 = 1 << 24, // m32bcst
T_B64 = 1 << 25, // m64bcst
T_XXX
}; };
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
@ -113,5 +116,13 @@ std::string type2String(int type)
if (!str.empty()) str += " | "; if (!str.empty()) str += " | ";
str += "T_MUST_EVEX"; str += "T_MUST_EVEX";
} }
if (type & T_B32) {
if (!str.empty()) str += " | ";
str += "T_B32";
}
if (type & T_B64) {
if (!str.empty()) str += " | ";
str += "T_B64";
}
return str; return str;
} }

View file

@ -1003,8 +1003,8 @@ void put()
}; };
for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) { for (int i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i]; const Tbl *p = &tbl[i];
printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code); printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code);
printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code); printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code);
if (p->only_pd_ps) continue; if (p->only_pd_ps) continue;
printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code); printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code);
printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code); printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x%02X); }\n", p->name, p->code);

View file

@ -137,11 +137,17 @@ vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2); vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae); vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5); vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
``` ```
Remark Remark
* k1, ..., k7 are new opmask registers. * k1, ..., k7 are new opmask registers.
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively. * use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
* `k4 | k3` is different from `k3 | k4`. * `k4 | k3` is different from `k3 | k4`.
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
### Label ### Label

View file

@ -133,10 +133,16 @@ vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae); vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5); vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
注意 注意
* k1, ..., k7 は新しいopmaskレジスタです。 * k1, ..., k7 は新しいopmaskレジスタです。
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。 * z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
* `k4 | k3`と`k3 | k4`は意味が異なります。 * `k4 | k3`と`k3 | k4`は意味が異なります。
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
・ラベル ・ラベル

View file

@ -2551,11 +2551,30 @@ public:
put("vcomiss", XMM, XMM_SAE); put("vcomiss", XMM, XMM_SAE);
#endif #endif
} }
void putBroadcastSub(int disp)
{
if (isXbyak_) {
printf("vaddpd(zmm0, zmm1, ptr_b[rax+%d]);dump();\n", disp);
printf("vaddpd(ymm0, ymm1, ptr_b[rax+%d]);dump();\n", disp);
printf("vaddpd(xmm0, xmm1, ptr_b[rax+%d]);dump();\n", disp);
} else {
printf("vaddpd zmm0, zmm1, [rax+%d]{1to8}\n", disp);
printf("vaddpd ymm0, ymm1, [rax+%d]{1to4}\n", disp);
printf("vaddpd xmm0, xmm1, [rax+%d]{1to2}\n", disp);
}
}
void putBroadcast()
{
for (int i = 0; i < 9; i++) {
putBroadcastSub(i);
}
}
void putAVX512() void putAVX512()
{ {
putOpmask(); putOpmask();
putCombi(); putCombi();
putCmpK(); putCmpK();
putBroadcast();
} }
#endif #endif
}; };

View file

@ -171,7 +171,7 @@ enum {
ERR_EVEX_IS_INVALID, ERR_EVEX_IS_INVALID,
ERR_SAE_IS_INVALID, ERR_SAE_IS_INVALID,
ERR_ER_IS_INVALID, ERR_ER_IS_INVALID,
ERR_BROADCAST_IS_ALREADY_SET, ERR_INVALID_BROADCAST,
ERR_INTERNAL ERR_INTERNAL
}; };
@ -228,7 +228,7 @@ public:
"evex is invalid", "evex is invalid",
"sae(suppress all exceptions) is invalid", "sae(suppress all exceptions) is invalid",
"er(embedded rounding) is invalid", "er(embedded rounding) is invalid",
"broadcast is alerady set", "invalid broadcast",
"internal error", "internal error",
}; };
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl)); assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
@ -522,26 +522,7 @@ struct EvexModifierRounding {
explicit EvexModifierRounding(int rounding) : rounding(rounding) {} explicit EvexModifierRounding(int rounding) : rounding(rounding) {}
int rounding; int rounding;
}; };
struct EvexModifierZero{};
namespace inner {
enum SAEtype {
T_RN_SAE = 1,
T_RD_SAE = 2,
T_RU_SAE = 3,
T_RZ_SAE = 4,
T_SAE = 5
};
} // inner
static const EvexModifierRounding T_sae(inner::T_SAE); // {sae}
static const EvexModifierRounding T_rn_sae(inner::T_RN_SAE); // {rn-sae}
static const EvexModifierRounding T_rd_sae(inner::T_RD_SAE); // {rd-sae}
static const EvexModifierRounding T_ru_sae(inner::T_RU_SAE); // {ru-sae}
static const EvexModifierRounding T_rz_sae(inner::T_RZ_SAE); // {rz-sae}
static const struct EvexModifierZero{} T_z; // {z}
static const struct EvexModifierBroadcast{} T_b; // {1to2},{1to4},{1to8},{1to16},{b}
struct Xmm : public Mmx { struct Xmm : public Mmx {
explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
@ -966,17 +947,17 @@ public:
M_64bitDisp, M_64bitDisp,
M_rip M_rip
}; };
Address(uint32 sizeBit, const RegExp& e) Address(uint32 sizeBit, bool broadcast, const RegExp& e)
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(false) : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(broadcast)
{ {
e_.verify(); e_.verify();
e_.optimize(); e_.optimize();
} }
#ifdef XBYAK64 #ifdef XBYAK64
explicit Address(size_t disp) explicit Address(size_t disp)
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false) { } : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false) { }
Address(uint32 sizeBit, const RegRip& addr) Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(M_rip), permitVsib_(false) { } : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(M_rip), permitVsib_(false), broadcast_(broadcast) { }
#endif #endif
void permitVsib() const { permitVsib_ = true; } void permitVsib() const { permitVsib_ = true; }
const RegExp& getRegExp() const { return e_; } const RegExp& getRegExp() const { return e_; }
@ -1000,13 +981,6 @@ public:
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && permitVsib_ == rhs.permitVsib_ && broadcast_ == rhs.broadcast_; return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && permitVsib_ == rhs.permitVsib_ && broadcast_ == rhs.broadcast_;
} }
bool operator!=(const Address& rhs) const { return !operator==(rhs); } bool operator!=(const Address& rhs) const { return !operator==(rhs); }
Address operator|(const EvexModifierBroadcast&) const
{
if (broadcast_) throw Error(ERR_BROADCAST_IS_ALREADY_SET);
Address r(*this);
r.broadcast_ = true;
return r;
}
private: private:
RegExp e_; RegExp e_;
const Label* label_; const Label* label_;
@ -1026,18 +1000,19 @@ class AddressFrame {
void operator=(const AddressFrame&); void operator=(const AddressFrame&);
public: public:
const uint32 bit_; const uint32 bit_;
explicit AddressFrame(uint32 bit) : bit_(bit) { } const bool broadcast_;
explicit AddressFrame(uint32 bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { }
Address operator[](const RegExp& e) const Address operator[](const RegExp& e) const
{ {
return Address(bit_, e); return Address(bit_, broadcast_, e);
} }
Address operator[](const void *disp) const Address operator[](const void *disp) const
{ {
return Address(bit_, RegExp(reinterpret_cast<size_t>(disp))); return Address(bit_, broadcast_, RegExp(reinterpret_cast<size_t>(disp)));
} }
#ifdef XBYAK64 #ifdef XBYAK64
Address operator[](uint64 disp) const { return Address(disp); } Address operator[](uint64 disp) const { return Address(disp); }
Address operator[](const RegRip& addr) const { return Address(bit_, addr); } Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); }
#endif #endif
}; };
@ -1386,7 +1361,10 @@ private:
T_SAE_X = 1 << 20, // xmm{sae} T_SAE_X = 1 << 20, // xmm{sae}
T_SAE_Y = 1 << 21, // ymm{sae} T_SAE_Y = 1 << 21, // ymm{sae}
T_SAE_Z = 1 << 22, // zmm{sae} T_SAE_Z = 1 << 22, // zmm{sae}
T_MUST_EVEX = 1 << 23 T_MUST_EVEX = 1 << 23,
T_B32 = 1 << 24, // m32bcst
T_B64 = 1 << 25, // m64bcst
T_XXX
}; };
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false) void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
{ {
@ -1416,7 +1394,14 @@ private:
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return; if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
throw Error(ERR_ER_IS_INVALID); throw Error(ERR_ER_IS_INVALID);
} }
void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool broadcast = false) enum {
T_RN_SAE = 1,
T_RD_SAE = 2,
T_RU_SAE = 3,
T_RZ_SAE = 4,
T_SAE = 5,
};
void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false)
{ {
if (!(type & T_EVEX)) throw Error(ERR_EVEX_IS_INVALID); if (!(type & T_EVEX)) throw Error(ERR_EVEX_IS_INVALID);
int w = (type & T_EW1) ? 1 : 0; int w = (type & T_EW1) ? 1 : 0;
@ -1431,13 +1416,14 @@ private:
bool X = x ? false : !base.isExtIdx2(); bool X = x ? false : !base.isExtIdx2();
bool B = !base.isExtIdx(); bool B = !base.isExtIdx();
bool Rp = !reg.isExtIdx2(); bool Rp = !reg.isExtIdx2();
bool b = broadcast;
int LL; int LL;
int rounding = base.getRounding(); int rounding = base.getRounding();
if (rounding) { if (rounding) {
if (rounding == inner::T_SAE) verifySAE(base, type); if (rounding == T_SAE){
if (rounding != inner::T_SAE) verifyER(base, type); verifySAE(base, type); LL = 0;
LL = rounding - 1; } else {
verifyER(base, type); LL = rounding - 1;
}
b = true; b = true;
} else { } else {
int bit = Max(Max(reg.getBit(), base.getBit()), (v ? v->getBit() : 0)); int bit = Max(Max(reg.getBit(), base.getBit()), (v ? v->getBit() : 0));
@ -1456,7 +1442,7 @@ private:
{ {
db(static_cast<uint8>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7))); db(static_cast<uint8>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
} }
void setSIB(const RegExp& e, int reg, bool disp32 = false) void setSIB(const RegExp& e, int reg, int disp8N = 0)
{ {
size_t disp64 = e.getDisp(); size_t disp64 = e.getDisp();
#ifdef XBYAK64 #ifdef XBYAK64
@ -1472,13 +1458,21 @@ private:
enum { enum {
mod00 = 0, mod01 = 1, mod10 = 2 mod00 = 0, mod01 = 1, mod10 = 2
}; };
int mod; int mod = mod10; // disp32
if (!baseBit || ((baseIdx & 7) != Operand::EBP && disp == 0)) { if (!baseBit || ((baseIdx & 7) != Operand::EBP && disp == 0)) {
mod = mod00; mod = mod00;
} else if (!disp32 && inner::IsInDisp8(disp)) {
mod = mod01;
} else { } else {
mod = mod10; if (disp8N == 0) {
if (inner::IsInDisp8(disp)) {
mod = mod01;
}
} else if (disp8N > 1) {
uint32_t t = disp / disp8N;
if (t * disp8N == disp && inner::IsInDisp8(t)) {
disp = t;
mod = mod01;
}
}
} }
const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand::EBP; const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand::EBP;
/* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */ /* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */
@ -1567,10 +1561,11 @@ private:
} }
// reg is reg field of ModRM // reg is reg field of ModRM
// immSize is the size for immediate value // immSize is the size for immediate value
void opAddr(const Address &addr, int reg, int immSize = 0, bool disp32 = false) // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0)
{ {
if (addr.getMode() == Address::M_ModRM) { if (addr.getMode() == Address::M_ModRM) {
setSIB(addr.getRegExp(), reg, disp32); setSIB(addr.getRegExp(), reg, disp8N);
} else if (addr.getMode() == Address::M_rip) { } else if (addr.getMode() == Address::M_rip) {
setModRM(0, reg, 5); setModRM(0, reg, 5);
if (addr.getLabel()) { // [rip + Label] if (addr.getLabel()) { // [rip + Label]
@ -1759,15 +1754,22 @@ private:
const Address& addr = static_cast<const Address&>(op2); const Address& addr = static_cast<const Address&>(op2);
const Reg& base = addr.getRegExp().getBase(); const Reg& base = addr.getRegExp().getBase();
if (BIT == 64 && addr.is32bit()) db(0x67); if (BIT == 64 && addr.is32bit()) db(0x67);
bool disp32 = false; int disp8N = 0;
bool x = addr.getRegExp().getIndex().isExtIdx(); bool x = addr.getRegExp().getIndex().isExtIdx();
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex())) { if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast()) {
evex(r, base, p1, type, code, x, addr.isBroadcast()); bool b = false;
disp32 = true; if (addr.isBroadcast()) {
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
disp8N = (type & T_B32) ? 4 : 8;
b = true;
} else {
disp8N = 1;
}
evex(r, base, p1, type, code, x, b);
} else { } else {
vex(r, base, p1, type, code, x); vex(r, base, p1, type, code, x);
} }
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp32); opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N);
} else { } else {
const Reg& base = static_cast<const Reg&>(op2); const Reg& base = static_cast<const Reg&>(op2);
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) { if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
@ -1807,10 +1809,10 @@ private:
if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) throw Error(ERR_BAD_COMBINATION); if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) throw Error(ERR_BAD_COMBINATION);
opVex(x1, x2, *op, type, code0, imm8); opVex(x1, x2, *op, type, code0, imm8);
} }
void opAVX_K_X_XM(const Opmask& k1, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE) void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE)
{ {
if (!op3.isMEM() && (x2.getKind() != op3.getKind())) throw Error(ERR_BAD_COMBINATION); if (!op3.isMEM() && (x2.getKind() != op3.getKind())) throw Error(ERR_BAD_COMBINATION);
opVex(k1, &x2, op3, type, code0, imm8); opVex(k, &x2, op3, type, code0, imm8);
} }
// if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op // if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op
void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, int imm8 = NONE) void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, int imm8 = NONE)
@ -1867,8 +1869,11 @@ public:
const Reg16 ax, cx, dx, bx, sp, bp, si, di; const Reg16 ax, cx, dx, bx, sp, bp, si, di;
const Reg8 al, cl, dl, bl, ah, ch, dh, bh; const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
const AddressFrame ptr, byte, word, dword, qword; const AddressFrame ptr, byte, word, dword, qword;
const AddressFrame ptr_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
const Fpu st0, st1, st2, st3, st4, st5, st6, st7; const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
const Opmask k0, k1, k2, k3, k4, k5, k6, k7; const Opmask k0, k1, k2, k3, k4, k5, k6, k7;
const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae}
const EvexModifierZero T_z; // {z}
#ifdef XBYAK64 #ifdef XBYAK64
const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15; const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d; const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
@ -2346,9 +2351,11 @@ public:
, eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI) , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
, ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI) , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI)
, al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH) , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
, ptr(0), byte(8), word(16), dword(32), qword(64) , ptr(0), byte(8), word(16), dword(32), qword(64), ptr_b(0, true)
, st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7) , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
, k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7) , k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7)
, T_sae(T_SAE), T_rn_sae(T_RN_SAE), T_rd_sae(T_RD_SAE), T_ru_sae(T_RU_SAE), T_rz_sae(T_RZ_SAE)
, T_z()
#ifdef XBYAK64 #ifdef XBYAK64
, rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15) , rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
, r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15) , r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15)

View file

@ -704,38 +704,38 @@ void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); } void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); }
void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); } void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); }
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); } void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x58); } void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x58); }
void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x58); } void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x58); }
void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x58); } void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x58); }
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x58); } void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x58); }
void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x5C); } void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5C); }
void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x5C); } void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5C); }
void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5C); } void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5C); }
void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5C); } void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5C); }
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x59); } void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x59); } void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x59); } void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x59); }
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x59); } void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x59); }
void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x5E); } void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5E); }
void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x5E); } void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); }
void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5E); } void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5E); }
void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5E); } void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5E); }
void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x5F); } void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5F); }
void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x5F); } void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5F); }
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5F); } void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5F); }
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5F); } void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5F); }
void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x5D); } void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5D); }
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x5D); } void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5D); } void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z, 0x5D); }
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5D); } void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z, 0x5D); }
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x54); } void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x54); } void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x55); } void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); }
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x55); } void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x56); } void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x56); }
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x56); } void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x56); }
void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z, 0x57); } void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x57); }
void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z, 0x57); } void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x57); }
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x0D, imm); } void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x0D, imm); }
void vblendpd(const Xmm& x, const Operand& op, uint8 imm) { vblendpd(x, x, op, imm); } void vblendpd(const Xmm& x, const Operand& op, uint8 imm) { vblendpd(x, x, op, imm); }
void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x0C, imm); } void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x0C, imm); }