add vgather
This commit is contained in:
parent
94c0cbbbcd
commit
ec8d36b5d3
4 changed files with 232 additions and 40 deletions
|
@ -1636,6 +1636,27 @@ void put()
|
||||||
printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code);
|
printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// gather
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
uint8 code;
|
||||||
|
int w;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "vgatherdpd", 0x92, 1 },
|
||||||
|
{ "vgatherqpd", 0x93, 1 },
|
||||||
|
{ "vgatherdps", 0x92, 0 },
|
||||||
|
{ "vgatherqps", 0x93, 0 },
|
||||||
|
{ "vpgatherdd", 0x90, 0 },
|
||||||
|
{ "vpgatherqd", 0x91, 0 },
|
||||||
|
{ "vpgatherdq", 0x90, 1 },
|
||||||
|
{ "vpgatherqq", 0x91, 1 },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl& p = tbl[i];
|
||||||
|
printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x%x, %d); }\n", p.name, p.code, p.w);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
|
|
@ -31,6 +31,10 @@ const uint64 CL = 1ULL << 20;
|
||||||
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
||||||
const uint64 NEG32 = 1ULL << 23;
|
const uint64 NEG32 = 1ULL << 23;
|
||||||
const uint64 _YMM = 1ULL << 24;
|
const uint64 _YMM = 1ULL << 24;
|
||||||
|
const uint64 VM32X_32 = 1ULL << 39;
|
||||||
|
const uint64 VM32X_64 = 1ULL << 40;
|
||||||
|
const uint64 VM32Y_32 = 1ULL << 41;
|
||||||
|
const uint64 VM32Y_64 = 1ULL << 42; // max value
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
const uint64 _MEMe = 1ULL << 25;
|
const uint64 _MEMe = 1ULL << 25;
|
||||||
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
|
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
|
||||||
|
@ -42,6 +46,8 @@ const uint64 _REG64_2 = 1ULL << 31; // r8, ...
|
||||||
const uint64 RAX = 1ULL << 32;
|
const uint64 RAX = 1ULL << 32;
|
||||||
const uint64 _XMM2 = 1ULL << 33;
|
const uint64 _XMM2 = 1ULL << 33;
|
||||||
const uint64 _YMM2 = 1ULL << 34;
|
const uint64 _YMM2 = 1ULL << 34;
|
||||||
|
const uint64 VM32X = VM32X_32 | VM32X_64;
|
||||||
|
const uint64 VM32Y = VM32Y_32 | VM32Y_64;
|
||||||
#else
|
#else
|
||||||
const uint64 _MEMe = 0;
|
const uint64 _MEMe = 0;
|
||||||
const uint64 REG32_2 = 0;
|
const uint64 REG32_2 = 0;
|
||||||
|
@ -53,6 +59,8 @@ const uint64 _REG64_2 = 0;
|
||||||
const uint64 RAX = 0;
|
const uint64 RAX = 0;
|
||||||
const uint64 _XMM2 = 0;
|
const uint64 _XMM2 = 0;
|
||||||
const uint64 _YMM2 = 0;
|
const uint64 _YMM2 = 0;
|
||||||
|
const uint64 VM32X = VM32X_32;
|
||||||
|
const uint64 VM32Y = VM32Y_32;
|
||||||
#endif
|
#endif
|
||||||
const uint64 REG64 = _REG64 | _REG64_2 | RAX;
|
const uint64 REG64 = _REG64 | _REG64_2 | RAX;
|
||||||
const uint64 REG32 = _REG32 | REG32_2 | EAX;
|
const uint64 REG32 = _REG32 | REG32_2 | EAX;
|
||||||
|
@ -291,6 +299,14 @@ class Test {
|
||||||
return isXbyak_ ? "0xda" : "0xda";
|
return isXbyak_ ? "0xda" : "0xda";
|
||||||
case NEG:
|
case NEG:
|
||||||
return "-5";
|
return "-5";
|
||||||
|
case VM32X_32:
|
||||||
|
return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]";
|
||||||
|
case VM32X_64:
|
||||||
|
return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]";
|
||||||
|
case VM32Y_32:
|
||||||
|
return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
|
||||||
|
case VM32Y_64:
|
||||||
|
return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]";
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1963,6 +1979,47 @@ public:
|
||||||
put("rorx", REG64, REG64 | MEM, IMM8);
|
put("rorx", REG64, REG64 | MEM, IMM8);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
void putGather()
|
||||||
|
{
|
||||||
|
const int y_vx_y = 0;
|
||||||
|
const int y_vy_y = 1;
|
||||||
|
const int x_vy_x = 2;
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
int mode;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "vgatherdpd", y_vx_y },
|
||||||
|
{ "vgatherqpd", y_vy_y },
|
||||||
|
{ "vgatherdps", y_vy_y },
|
||||||
|
{ "vgatherqps", x_vy_x },
|
||||||
|
{ "vpgatherdd", y_vy_y },
|
||||||
|
{ "vpgatherqd", x_vy_x },
|
||||||
|
{ "vpgatherdq", y_vx_y },
|
||||||
|
{ "vpgatherqq", y_vy_y },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl& p = tbl[i];
|
||||||
|
const char *name = p.name;
|
||||||
|
put(name, XMM, VM32X, XMM);
|
||||||
|
switch (p.mode) {
|
||||||
|
case y_vx_y:
|
||||||
|
put(name, YMM, VM32X, YMM);
|
||||||
|
break;
|
||||||
|
case y_vy_y:
|
||||||
|
put(name, YMM, VM32Y, YMM);
|
||||||
|
break;
|
||||||
|
case x_vy_x:
|
||||||
|
put(name, XMM, VM32Y, XMM);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
printf("ERR mode=%d\n", p.mode);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// all pattern
|
||||||
|
const char *name = "vgatherdpd";
|
||||||
|
put(name, "xmm1, ptr [xmm2], xmm3", "xmm1, [xmm2], xmm3");
|
||||||
|
}
|
||||||
void put()
|
void put()
|
||||||
{
|
{
|
||||||
#ifdef USE_AVX
|
#ifdef USE_AVX
|
||||||
|
@ -1972,6 +2029,7 @@ public:
|
||||||
putGprR_RM_R();
|
putGprR_RM_R();
|
||||||
putGprR_RM();
|
putGprR_RM();
|
||||||
putGprOtherwise();
|
putGprOtherwise();
|
||||||
|
putGather();
|
||||||
#else
|
#else
|
||||||
putAVX1();
|
putAVX1();
|
||||||
putAVX2();
|
putAVX2();
|
||||||
|
|
185
xbyak/xbyak.h
185
xbyak/xbyak.h
|
@ -128,6 +128,7 @@ enum Error {
|
||||||
ERR_BAD_PROTECT_MODE,
|
ERR_BAD_PROTECT_MODE,
|
||||||
ERR_BAD_PNUM,
|
ERR_BAD_PNUM,
|
||||||
ERR_BAD_TNUM,
|
ERR_BAD_TNUM,
|
||||||
|
ERR_BAD_VSIB_ADDRESSING,
|
||||||
ERR_INTERNAL
|
ERR_INTERNAL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -161,6 +162,7 @@ inline const char *ConvertErrorToString(Error err)
|
||||||
"bad protect mode",
|
"bad protect mode",
|
||||||
"bad pNum",
|
"bad pNum",
|
||||||
"bad tNum",
|
"bad tNum",
|
||||||
|
"bad vsib addressing",
|
||||||
"internal error",
|
"internal error",
|
||||||
};
|
};
|
||||||
if (err < 0 || err > ERR_INTERNAL) return 0;
|
if (err < 0 || err > ERR_INTERNAL) return 0;
|
||||||
|
@ -410,7 +412,7 @@ public:
|
||||||
, disp_(0)
|
, disp_(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
Reg32e(const Reg& base, const Reg& index, int scale, unsigned int disp)
|
Reg32e(const Reg& base, const Reg& index, int scale, unsigned int disp, bool allowUseEspIndex = false)
|
||||||
: Reg(base)
|
: Reg(base)
|
||||||
, index_(index)
|
, index_(index)
|
||||||
, scale_(scale)
|
, scale_(scale)
|
||||||
|
@ -418,7 +420,7 @@ public:
|
||||||
{
|
{
|
||||||
if (scale != 0 && scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE;
|
if (scale != 0 && scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE;
|
||||||
if (!base.isNone() && !index.isNone() && base.getBit() != index.getBit()) throw ERR_BAD_COMBINATION;
|
if (!base.isNone() && !index.isNone() && base.getBit() != index.getBit()) throw ERR_BAD_COMBINATION;
|
||||||
if (index.getIdx() == Operand::ESP) throw ERR_ESP_CANT_BE_INDEX;
|
if (!allowUseEspIndex && index.getIdx() == Operand::ESP) throw ERR_ESP_CANT_BE_INDEX;
|
||||||
}
|
}
|
||||||
Reg32e optimize() const // select smaller size
|
Reg32e optimize() const // select smaller size
|
||||||
{
|
{
|
||||||
|
@ -455,6 +457,70 @@ struct RegRip {
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// QQQ:need to refactor
|
||||||
|
struct Vsib {
|
||||||
|
// [index_ * scale_ + base_ + disp_]
|
||||||
|
uint8 indexIdx_; // xmm reg idx
|
||||||
|
uint8 scale_; // 0(none), 1, 2, 4, 8
|
||||||
|
uint8 baseIdx_; // base reg idx
|
||||||
|
uint8 baseBit_; // 0(none), 32, 64
|
||||||
|
uint32 disp_;
|
||||||
|
bool isYMM_; // idx is YMM
|
||||||
|
public:
|
||||||
|
static inline void verifyScale(int scale)
|
||||||
|
{
|
||||||
|
if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE;
|
||||||
|
}
|
||||||
|
int getIndexIdx() const { return indexIdx_; }
|
||||||
|
int getScale() const { return scale_; }
|
||||||
|
int getBaseIdx() const { return baseIdx_; }
|
||||||
|
int getBaseBit() const { return baseBit_; }
|
||||||
|
bool isYMM() const { return isYMM_; }
|
||||||
|
uint32 getDisp() const { return disp_; }
|
||||||
|
Vsib(int indexIdx, int scale, bool isYMM, int baseIdx = 0, int baseBit = 0, uint32 disp = 0)
|
||||||
|
: indexIdx_((uint8)indexIdx)
|
||||||
|
, scale_((uint8)scale)
|
||||||
|
, baseIdx_((uint8)baseIdx)
|
||||||
|
, baseBit_((uint8)baseBit)
|
||||||
|
, disp_(disp)
|
||||||
|
, isYMM_(isYMM)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
inline Vsib operator*(const Xmm& x, int scale)
|
||||||
|
{
|
||||||
|
Vsib::verifyScale(scale);
|
||||||
|
return Vsib(x.getIdx(), scale, x.isYMM());
|
||||||
|
}
|
||||||
|
inline Vsib operator+(const Xmm& x, uint32 disp)
|
||||||
|
{
|
||||||
|
return Vsib(x.getIdx(), 1, x.isYMM(), disp);
|
||||||
|
}
|
||||||
|
inline Vsib operator+(const Xmm& x, const Reg32e& r)
|
||||||
|
{
|
||||||
|
if (!r.index_.isNone()) throw ERR_BAD_COMBINATION;
|
||||||
|
return Vsib(x.getIdx(), 1, x.isYMM(), r.getIdx(), r.getBit(), r.disp_);
|
||||||
|
}
|
||||||
|
inline Vsib operator+(const Vsib& vs, uint32 disp)
|
||||||
|
{
|
||||||
|
Vsib ret(vs);
|
||||||
|
ret.disp_ += disp;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
inline Vsib operator+(const Vsib& vs, const Reg32e& r)
|
||||||
|
{
|
||||||
|
if (vs.getBaseBit() || !r.index_.isNone()) throw ERR_BAD_COMBINATION;
|
||||||
|
Vsib ret(vs);
|
||||||
|
ret.baseIdx_ = (uint8)r.getIdx();
|
||||||
|
ret.baseBit_ = (uint8)r.getBit();
|
||||||
|
ret.disp_ += r.disp_;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
inline Vsib operator+(uint32 disp, const Xmm& x) { return x + disp; }
|
||||||
|
inline Vsib operator+(uint32 disp, const Vsib& vs) { return vs + disp; }
|
||||||
|
inline Vsib operator+(const Reg32e& r, const Xmm& x) { return x + r; }
|
||||||
|
inline Vsib operator+(const Reg32e& r, const Vsib& vs) { return vs + r; }
|
||||||
|
|
||||||
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
|
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
|
||||||
void *const AutoGrow = (void*)1;
|
void *const AutoGrow = (void*)1;
|
||||||
|
|
||||||
|
@ -678,60 +744,43 @@ public:
|
||||||
class Address : public Operand, public CodeArray {
|
class Address : public Operand, public CodeArray {
|
||||||
void operator=(const Address&);
|
void operator=(const Address&);
|
||||||
uint64 disp_;
|
uint64 disp_;
|
||||||
|
uint8 rex_;
|
||||||
bool isOnlyDisp_;
|
bool isOnlyDisp_;
|
||||||
bool is64bitDisp_;
|
bool is64bitDisp_;
|
||||||
uint8 rex_;
|
mutable bool isVsib_;
|
||||||
public:
|
bool isYMM_;
|
||||||
|
void verify() const { if (isVsib_) throw ERR_BAD_VSIB_ADDRESSING; }
|
||||||
const bool is32bit_;
|
const bool is32bit_;
|
||||||
Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false)
|
public:
|
||||||
|
Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false, bool isVsib = false, bool isYMM = false)
|
||||||
: Operand(0, MEM, sizeBit)
|
: Operand(0, MEM, sizeBit)
|
||||||
, CodeArray(6) // 6 = 1(ModRM) + 1(SIB) + 4(disp)
|
, CodeArray(6) // 6 = 1(ModRM) + 1(SIB) + 4(disp)
|
||||||
, disp_(disp)
|
, disp_(disp)
|
||||||
|
, rex_(0)
|
||||||
, isOnlyDisp_(isOnlyDisp)
|
, isOnlyDisp_(isOnlyDisp)
|
||||||
, is64bitDisp_(is64bitDisp)
|
, is64bitDisp_(is64bitDisp)
|
||||||
, rex_(0)
|
, isVsib_(isVsib)
|
||||||
|
, isYMM_(isYMM)
|
||||||
, is32bit_(is32bit)
|
, is32bit_(is32bit)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
bool isOnlyDisp() const { return isOnlyDisp_; } // for mov eax
|
void setVsib(bool isVsib) const { isVsib_ = isVsib; }
|
||||||
uint64 getDisp() const { return disp_; }
|
bool isVsib() const { return isVsib_; }
|
||||||
uint8 getRex() const { return rex_; }
|
bool isYMM() const { return isYMM_; }
|
||||||
bool is64bitDisp() const { return is64bitDisp_; } // for moffset
|
bool is32bit() const { verify(); return is32bit_; }
|
||||||
|
bool isOnlyDisp() const { verify(); return isOnlyDisp_; } // for mov eax
|
||||||
|
uint64 getDisp() const { verify(); return disp_; }
|
||||||
|
uint8 getRex() const { verify(); return rex_; }
|
||||||
|
bool is64bitDisp() const { verify(); return is64bitDisp_; } // for moffset
|
||||||
void setRex(uint8 rex) { rex_ = rex; }
|
void setRex(uint8 rex) { rex_ = rex; }
|
||||||
};
|
};
|
||||||
|
|
||||||
class AddressFrame {
|
class AddressFrame {
|
||||||
private:
|
private:
|
||||||
void operator=(const AddressFrame&);
|
void operator=(const AddressFrame&);
|
||||||
public:
|
Address makeAddress(const Reg32e& r, bool isVsib, bool isYMM) const
|
||||||
const uint32 bit_;
|
|
||||||
explicit AddressFrame(uint32 bit) : bit_(bit) { }
|
|
||||||
Address operator[](const void *disp) const
|
|
||||||
{
|
{
|
||||||
size_t adr = reinterpret_cast<size_t>(disp);
|
Address frame(bit_, (r.isNone() && r.index_.isNone()), r.disp_, r.isBit(32) || r.index_.isBit(32), false, isVsib, isYMM);
|
||||||
#ifdef XBYAK64
|
|
||||||
if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG;
|
|
||||||
#endif
|
|
||||||
Reg32e r(Reg(), Reg(), 0, static_cast<uint32>(adr));
|
|
||||||
return operator[](r);
|
|
||||||
}
|
|
||||||
#ifdef XBYAK64
|
|
||||||
Address operator[](uint64 disp) const
|
|
||||||
{
|
|
||||||
return Address(64, true, disp, false, true);
|
|
||||||
}
|
|
||||||
Address operator[](const RegRip& addr) const
|
|
||||||
{
|
|
||||||
Address frame(bit_, true, addr.disp_, false);
|
|
||||||
frame.db(B00000101);
|
|
||||||
frame.dd(addr.disp_);
|
|
||||||
return frame;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
Address operator[](const Reg32e& in) const
|
|
||||||
{
|
|
||||||
const Reg32e& r = in.optimize();
|
|
||||||
Address frame(bit_, (r.isNone() && r.index_.isNone()), r.disp_, r.isBit(32) || r.index_.isBit(32));
|
|
||||||
enum {
|
enum {
|
||||||
mod00 = 0, mod01 = 1, mod10 = 2
|
mod00 = 0, mod01 = 1, mod10 = 2
|
||||||
};
|
};
|
||||||
|
@ -767,6 +816,54 @@ public:
|
||||||
frame.setRex(rex);
|
frame.setRex(rex);
|
||||||
return frame;
|
return frame;
|
||||||
}
|
}
|
||||||
|
public:
|
||||||
|
const uint32 bit_;
|
||||||
|
explicit AddressFrame(uint32 bit) : bit_(bit) { }
|
||||||
|
Address operator[](const void *disp) const
|
||||||
|
{
|
||||||
|
size_t adr = reinterpret_cast<size_t>(disp);
|
||||||
|
#ifdef XBYAK64
|
||||||
|
if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG;
|
||||||
|
#endif
|
||||||
|
Reg32e r(Reg(), Reg(), 0, static_cast<uint32>(adr));
|
||||||
|
return operator[](r);
|
||||||
|
}
|
||||||
|
#ifdef XBYAK64
|
||||||
|
Address operator[](uint64 disp) const
|
||||||
|
{
|
||||||
|
return Address(64, true, disp, false, true);
|
||||||
|
}
|
||||||
|
Address operator[](const RegRip& addr) const
|
||||||
|
{
|
||||||
|
Address frame(bit_, true, addr.disp_, false);
|
||||||
|
frame.db(B00000101);
|
||||||
|
frame.dd(addr.disp_);
|
||||||
|
return frame;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
Address operator[](const Reg32e& in) const
|
||||||
|
{
|
||||||
|
return makeAddress(in.optimize(), false, false);
|
||||||
|
}
|
||||||
|
Address operator[](const Vsib& vs) const
|
||||||
|
{
|
||||||
|
if (vs.getBaseBit() == 0) {
|
||||||
|
#ifdef XBYAK64
|
||||||
|
const int bit = 64;
|
||||||
|
#else
|
||||||
|
const int bit = 32;
|
||||||
|
#endif
|
||||||
|
const Reg32e r(Reg(), Reg32e(vs.getIndexIdx(), bit), vs.getScale(), vs.getDisp(), true);
|
||||||
|
return makeAddress(r, true, vs.isYMM());
|
||||||
|
} else {
|
||||||
|
const Reg32e r(Reg32e(vs.getBaseIdx(), vs.getBaseBit()), Reg32e(vs.getIndexIdx(), vs.getBaseBit()), vs.getScale(), vs.getDisp(), true);
|
||||||
|
return makeAddress(r, true, vs.isYMM());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Address operator[](const Xmm& x) const
|
||||||
|
{
|
||||||
|
return operator[](x + 0);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct JmpLabel {
|
struct JmpLabel {
|
||||||
|
@ -975,7 +1072,7 @@ private:
|
||||||
if (p1->isMEM()) throw ERR_BAD_COMBINATION;
|
if (p1->isMEM()) throw ERR_BAD_COMBINATION;
|
||||||
if (p2->isMEM()) {
|
if (p2->isMEM()) {
|
||||||
const Address& addr = static_cast<const Address&>(*p2);
|
const Address& addr = static_cast<const Address&>(*p2);
|
||||||
if (BIT == 64 && addr.is32bit_) db(0x67);
|
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||||
rex = addr.getRex() | static_cast<const Reg&>(*p1).getRex();
|
rex = addr.getRex() | static_cast<const Reg&>(*p1).getRex();
|
||||||
} else {
|
} else {
|
||||||
// ModRM(reg, base);
|
// ModRM(reg, base);
|
||||||
|
@ -1249,7 +1346,7 @@ private:
|
||||||
uint8 rex = addr.getRex();
|
uint8 rex = addr.getRex();
|
||||||
x = (rex & 2) != 0;
|
x = (rex & 2) != 0;
|
||||||
b = (rex & 1) != 0;
|
b = (rex & 1) != 0;
|
||||||
if (BIT == 64 && addr.is32bit_) db(0x67);
|
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||||
if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0;
|
if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0;
|
||||||
} else {
|
} else {
|
||||||
x = false;
|
x = false;
|
||||||
|
@ -1314,6 +1411,14 @@ private:
|
||||||
if (is16bit) db(0x66);
|
if (is16bit) db(0x66);
|
||||||
db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
|
db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
|
||||||
}
|
}
|
||||||
|
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int w)
|
||||||
|
{
|
||||||
|
if (!addr.isVsib()) throw ERR_BAD_VSIB_ADDRESSING;
|
||||||
|
bool isYMM = addr.isYMM();
|
||||||
|
addr.setVsib(false);
|
||||||
|
opAVX_X_X_XM(isYMM ? Ymm(x1.getIdx()) : x1, isYMM ? Ymm(x2.getIdx()) : x2, addr, type, code, true, w);
|
||||||
|
addr.setVsib(true);
|
||||||
|
}
|
||||||
public:
|
public:
|
||||||
unsigned int getVersion() const { return VERSION; }
|
unsigned int getVersion() const { return VERSION; }
|
||||||
using CodeArray::db;
|
using CodeArray::db;
|
||||||
|
|
|
@ -1402,3 +1402,11 @@ void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op,
|
||||||
void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
||||||
void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
||||||
void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
||||||
|
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 1); }
|
||||||
|
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 1); }
|
||||||
|
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 0); }
|
||||||
|
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 0); }
|
||||||
|
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 0); }
|
||||||
|
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 0); }
|
||||||
|
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 1); }
|
||||||
|
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 1); }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue