add vgather
This commit is contained in:
parent
94c0cbbbcd
commit
ec8d36b5d3
4 changed files with 232 additions and 40 deletions
|
@ -1636,6 +1636,27 @@ void put()
|
|||
printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code);
|
||||
}
|
||||
}
|
||||
// gather
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8 code;
|
||||
int w;
|
||||
} tbl[] = {
|
||||
{ "vgatherdpd", 0x92, 1 },
|
||||
{ "vgatherqpd", 0x93, 1 },
|
||||
{ "vgatherdps", 0x92, 0 },
|
||||
{ "vgatherqps", 0x93, 0 },
|
||||
{ "vpgatherdd", 0x90, 0 },
|
||||
{ "vpgatherqd", 0x91, 0 },
|
||||
{ "vpgatherdq", 0x90, 1 },
|
||||
{ "vpgatherqq", 0x91, 1 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x%x, %d); }\n", p.name, p.code, p.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
|
|
|
@ -31,6 +31,10 @@ const uint64 CL = 1ULL << 20;
|
|||
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
||||
const uint64 NEG32 = 1ULL << 23;
|
||||
const uint64 _YMM = 1ULL << 24;
|
||||
const uint64 VM32X_32 = 1ULL << 39;
|
||||
const uint64 VM32X_64 = 1ULL << 40;
|
||||
const uint64 VM32Y_32 = 1ULL << 41;
|
||||
const uint64 VM32Y_64 = 1ULL << 42; // max value
|
||||
#ifdef XBYAK64
|
||||
const uint64 _MEMe = 1ULL << 25;
|
||||
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
|
||||
|
@ -42,6 +46,8 @@ const uint64 _REG64_2 = 1ULL << 31; // r8, ...
|
|||
const uint64 RAX = 1ULL << 32;
|
||||
const uint64 _XMM2 = 1ULL << 33;
|
||||
const uint64 _YMM2 = 1ULL << 34;
|
||||
const uint64 VM32X = VM32X_32 | VM32X_64;
|
||||
const uint64 VM32Y = VM32Y_32 | VM32Y_64;
|
||||
#else
|
||||
const uint64 _MEMe = 0;
|
||||
const uint64 REG32_2 = 0;
|
||||
|
@ -53,6 +59,8 @@ const uint64 _REG64_2 = 0;
|
|||
const uint64 RAX = 0;
|
||||
const uint64 _XMM2 = 0;
|
||||
const uint64 _YMM2 = 0;
|
||||
const uint64 VM32X = VM32X_32;
|
||||
const uint64 VM32Y = VM32Y_32;
|
||||
#endif
|
||||
const uint64 REG64 = _REG64 | _REG64_2 | RAX;
|
||||
const uint64 REG32 = _REG32 | REG32_2 | EAX;
|
||||
|
@ -291,6 +299,14 @@ class Test {
|
|||
return isXbyak_ ? "0xda" : "0xda";
|
||||
case NEG:
|
||||
return "-5";
|
||||
case VM32X_32:
|
||||
return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]";
|
||||
case VM32X_64:
|
||||
return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]";
|
||||
case VM32Y_32:
|
||||
return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
|
||||
case VM32Y_64:
|
||||
return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1963,6 +1979,47 @@ public:
|
|||
put("rorx", REG64, REG64 | MEM, IMM8);
|
||||
#endif
|
||||
}
|
||||
void putGather()
|
||||
{
|
||||
const int y_vx_y = 0;
|
||||
const int y_vy_y = 1;
|
||||
const int x_vy_x = 2;
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ "vgatherdpd", y_vx_y },
|
||||
{ "vgatherqpd", y_vy_y },
|
||||
{ "vgatherdps", y_vy_y },
|
||||
{ "vgatherqps", x_vy_x },
|
||||
{ "vpgatherdd", y_vy_y },
|
||||
{ "vpgatherqd", x_vy_x },
|
||||
{ "vpgatherdq", y_vx_y },
|
||||
{ "vpgatherqq", y_vy_y },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
const char *name = p.name;
|
||||
put(name, XMM, VM32X, XMM);
|
||||
switch (p.mode) {
|
||||
case y_vx_y:
|
||||
put(name, YMM, VM32X, YMM);
|
||||
break;
|
||||
case y_vy_y:
|
||||
put(name, YMM, VM32Y, YMM);
|
||||
break;
|
||||
case x_vy_x:
|
||||
put(name, XMM, VM32Y, XMM);
|
||||
break;
|
||||
default:
|
||||
printf("ERR mode=%d\n", p.mode);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
// all pattern
|
||||
const char *name = "vgatherdpd";
|
||||
put(name, "xmm1, ptr [xmm2], xmm3", "xmm1, [xmm2], xmm3");
|
||||
}
|
||||
void put()
|
||||
{
|
||||
#ifdef USE_AVX
|
||||
|
@ -1972,6 +2029,7 @@ public:
|
|||
putGprR_RM_R();
|
||||
putGprR_RM();
|
||||
putGprOtherwise();
|
||||
putGather();
|
||||
#else
|
||||
putAVX1();
|
||||
putAVX2();
|
||||
|
|
185
xbyak/xbyak.h
185
xbyak/xbyak.h
|
@ -128,6 +128,7 @@ enum Error {
|
|||
ERR_BAD_PROTECT_MODE,
|
||||
ERR_BAD_PNUM,
|
||||
ERR_BAD_TNUM,
|
||||
ERR_BAD_VSIB_ADDRESSING,
|
||||
ERR_INTERNAL
|
||||
};
|
||||
|
||||
|
@ -161,6 +162,7 @@ inline const char *ConvertErrorToString(Error err)
|
|||
"bad protect mode",
|
||||
"bad pNum",
|
||||
"bad tNum",
|
||||
"bad vsib addressing",
|
||||
"internal error",
|
||||
};
|
||||
if (err < 0 || err > ERR_INTERNAL) return 0;
|
||||
|
@ -410,7 +412,7 @@ public:
|
|||
, disp_(0)
|
||||
{
|
||||
}
|
||||
Reg32e(const Reg& base, const Reg& index, int scale, unsigned int disp)
|
||||
Reg32e(const Reg& base, const Reg& index, int scale, unsigned int disp, bool allowUseEspIndex = false)
|
||||
: Reg(base)
|
||||
, index_(index)
|
||||
, scale_(scale)
|
||||
|
@ -418,7 +420,7 @@ public:
|
|||
{
|
||||
if (scale != 0 && scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE;
|
||||
if (!base.isNone() && !index.isNone() && base.getBit() != index.getBit()) throw ERR_BAD_COMBINATION;
|
||||
if (index.getIdx() == Operand::ESP) throw ERR_ESP_CANT_BE_INDEX;
|
||||
if (!allowUseEspIndex && index.getIdx() == Operand::ESP) throw ERR_ESP_CANT_BE_INDEX;
|
||||
}
|
||||
Reg32e optimize() const // select smaller size
|
||||
{
|
||||
|
@ -455,6 +457,70 @@ struct RegRip {
|
|||
};
|
||||
#endif
|
||||
|
||||
// QQQ:need to refactor
|
||||
struct Vsib {
|
||||
// [index_ * scale_ + base_ + disp_]
|
||||
uint8 indexIdx_; // xmm reg idx
|
||||
uint8 scale_; // 0(none), 1, 2, 4, 8
|
||||
uint8 baseIdx_; // base reg idx
|
||||
uint8 baseBit_; // 0(none), 32, 64
|
||||
uint32 disp_;
|
||||
bool isYMM_; // idx is YMM
|
||||
public:
|
||||
static inline void verifyScale(int scale)
|
||||
{
|
||||
if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE;
|
||||
}
|
||||
int getIndexIdx() const { return indexIdx_; }
|
||||
int getScale() const { return scale_; }
|
||||
int getBaseIdx() const { return baseIdx_; }
|
||||
int getBaseBit() const { return baseBit_; }
|
||||
bool isYMM() const { return isYMM_; }
|
||||
uint32 getDisp() const { return disp_; }
|
||||
Vsib(int indexIdx, int scale, bool isYMM, int baseIdx = 0, int baseBit = 0, uint32 disp = 0)
|
||||
: indexIdx_((uint8)indexIdx)
|
||||
, scale_((uint8)scale)
|
||||
, baseIdx_((uint8)baseIdx)
|
||||
, baseBit_((uint8)baseBit)
|
||||
, disp_(disp)
|
||||
, isYMM_(isYMM)
|
||||
{
|
||||
}
|
||||
};
|
||||
inline Vsib operator*(const Xmm& x, int scale)
|
||||
{
|
||||
Vsib::verifyScale(scale);
|
||||
return Vsib(x.getIdx(), scale, x.isYMM());
|
||||
}
|
||||
inline Vsib operator+(const Xmm& x, uint32 disp)
|
||||
{
|
||||
return Vsib(x.getIdx(), 1, x.isYMM(), disp);
|
||||
}
|
||||
inline Vsib operator+(const Xmm& x, const Reg32e& r)
|
||||
{
|
||||
if (!r.index_.isNone()) throw ERR_BAD_COMBINATION;
|
||||
return Vsib(x.getIdx(), 1, x.isYMM(), r.getIdx(), r.getBit(), r.disp_);
|
||||
}
|
||||
inline Vsib operator+(const Vsib& vs, uint32 disp)
|
||||
{
|
||||
Vsib ret(vs);
|
||||
ret.disp_ += disp;
|
||||
return ret;
|
||||
}
|
||||
inline Vsib operator+(const Vsib& vs, const Reg32e& r)
|
||||
{
|
||||
if (vs.getBaseBit() || !r.index_.isNone()) throw ERR_BAD_COMBINATION;
|
||||
Vsib ret(vs);
|
||||
ret.baseIdx_ = (uint8)r.getIdx();
|
||||
ret.baseBit_ = (uint8)r.getBit();
|
||||
ret.disp_ += r.disp_;
|
||||
return ret;
|
||||
}
|
||||
inline Vsib operator+(uint32 disp, const Xmm& x) { return x + disp; }
|
||||
inline Vsib operator+(uint32 disp, const Vsib& vs) { return vs + disp; }
|
||||
inline Vsib operator+(const Reg32e& r, const Xmm& x) { return x + r; }
|
||||
inline Vsib operator+(const Reg32e& r, const Vsib& vs) { return vs + r; }
|
||||
|
||||
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
|
||||
void *const AutoGrow = (void*)1;
|
||||
|
||||
|
@ -678,60 +744,43 @@ public:
|
|||
class Address : public Operand, public CodeArray {
|
||||
void operator=(const Address&);
|
||||
uint64 disp_;
|
||||
uint8 rex_;
|
||||
bool isOnlyDisp_;
|
||||
bool is64bitDisp_;
|
||||
uint8 rex_;
|
||||
public:
|
||||
mutable bool isVsib_;
|
||||
bool isYMM_;
|
||||
void verify() const { if (isVsib_) throw ERR_BAD_VSIB_ADDRESSING; }
|
||||
const bool is32bit_;
|
||||
Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false)
|
||||
public:
|
||||
Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false, bool isVsib = false, bool isYMM = false)
|
||||
: Operand(0, MEM, sizeBit)
|
||||
, CodeArray(6) // 6 = 1(ModRM) + 1(SIB) + 4(disp)
|
||||
, disp_(disp)
|
||||
, rex_(0)
|
||||
, isOnlyDisp_(isOnlyDisp)
|
||||
, is64bitDisp_(is64bitDisp)
|
||||
, rex_(0)
|
||||
, isVsib_(isVsib)
|
||||
, isYMM_(isYMM)
|
||||
, is32bit_(is32bit)
|
||||
{
|
||||
}
|
||||
bool isOnlyDisp() const { return isOnlyDisp_; } // for mov eax
|
||||
uint64 getDisp() const { return disp_; }
|
||||
uint8 getRex() const { return rex_; }
|
||||
bool is64bitDisp() const { return is64bitDisp_; } // for moffset
|
||||
void setVsib(bool isVsib) const { isVsib_ = isVsib; }
|
||||
bool isVsib() const { return isVsib_; }
|
||||
bool isYMM() const { return isYMM_; }
|
||||
bool is32bit() const { verify(); return is32bit_; }
|
||||
bool isOnlyDisp() const { verify(); return isOnlyDisp_; } // for mov eax
|
||||
uint64 getDisp() const { verify(); return disp_; }
|
||||
uint8 getRex() const { verify(); return rex_; }
|
||||
bool is64bitDisp() const { verify(); return is64bitDisp_; } // for moffset
|
||||
void setRex(uint8 rex) { rex_ = rex; }
|
||||
};
|
||||
|
||||
class AddressFrame {
|
||||
private:
|
||||
void operator=(const AddressFrame&);
|
||||
public:
|
||||
const uint32 bit_;
|
||||
explicit AddressFrame(uint32 bit) : bit_(bit) { }
|
||||
Address operator[](const void *disp) const
|
||||
Address makeAddress(const Reg32e& r, bool isVsib, bool isYMM) const
|
||||
{
|
||||
size_t adr = reinterpret_cast<size_t>(disp);
|
||||
#ifdef XBYAK64
|
||||
if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG;
|
||||
#endif
|
||||
Reg32e r(Reg(), Reg(), 0, static_cast<uint32>(adr));
|
||||
return operator[](r);
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
Address operator[](uint64 disp) const
|
||||
{
|
||||
return Address(64, true, disp, false, true);
|
||||
}
|
||||
Address operator[](const RegRip& addr) const
|
||||
{
|
||||
Address frame(bit_, true, addr.disp_, false);
|
||||
frame.db(B00000101);
|
||||
frame.dd(addr.disp_);
|
||||
return frame;
|
||||
}
|
||||
#endif
|
||||
Address operator[](const Reg32e& in) const
|
||||
{
|
||||
const Reg32e& r = in.optimize();
|
||||
Address frame(bit_, (r.isNone() && r.index_.isNone()), r.disp_, r.isBit(32) || r.index_.isBit(32));
|
||||
Address frame(bit_, (r.isNone() && r.index_.isNone()), r.disp_, r.isBit(32) || r.index_.isBit(32), false, isVsib, isYMM);
|
||||
enum {
|
||||
mod00 = 0, mod01 = 1, mod10 = 2
|
||||
};
|
||||
|
@ -767,6 +816,54 @@ public:
|
|||
frame.setRex(rex);
|
||||
return frame;
|
||||
}
|
||||
public:
|
||||
const uint32 bit_;
|
||||
explicit AddressFrame(uint32 bit) : bit_(bit) { }
|
||||
Address operator[](const void *disp) const
|
||||
{
|
||||
size_t adr = reinterpret_cast<size_t>(disp);
|
||||
#ifdef XBYAK64
|
||||
if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG;
|
||||
#endif
|
||||
Reg32e r(Reg(), Reg(), 0, static_cast<uint32>(adr));
|
||||
return operator[](r);
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
Address operator[](uint64 disp) const
|
||||
{
|
||||
return Address(64, true, disp, false, true);
|
||||
}
|
||||
Address operator[](const RegRip& addr) const
|
||||
{
|
||||
Address frame(bit_, true, addr.disp_, false);
|
||||
frame.db(B00000101);
|
||||
frame.dd(addr.disp_);
|
||||
return frame;
|
||||
}
|
||||
#endif
|
||||
Address operator[](const Reg32e& in) const
|
||||
{
|
||||
return makeAddress(in.optimize(), false, false);
|
||||
}
|
||||
Address operator[](const Vsib& vs) const
|
||||
{
|
||||
if (vs.getBaseBit() == 0) {
|
||||
#ifdef XBYAK64
|
||||
const int bit = 64;
|
||||
#else
|
||||
const int bit = 32;
|
||||
#endif
|
||||
const Reg32e r(Reg(), Reg32e(vs.getIndexIdx(), bit), vs.getScale(), vs.getDisp(), true);
|
||||
return makeAddress(r, true, vs.isYMM());
|
||||
} else {
|
||||
const Reg32e r(Reg32e(vs.getBaseIdx(), vs.getBaseBit()), Reg32e(vs.getIndexIdx(), vs.getBaseBit()), vs.getScale(), vs.getDisp(), true);
|
||||
return makeAddress(r, true, vs.isYMM());
|
||||
}
|
||||
}
|
||||
Address operator[](const Xmm& x) const
|
||||
{
|
||||
return operator[](x + 0);
|
||||
}
|
||||
};
|
||||
|
||||
struct JmpLabel {
|
||||
|
@ -975,7 +1072,7 @@ private:
|
|||
if (p1->isMEM()) throw ERR_BAD_COMBINATION;
|
||||
if (p2->isMEM()) {
|
||||
const Address& addr = static_cast<const Address&>(*p2);
|
||||
if (BIT == 64 && addr.is32bit_) db(0x67);
|
||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||
rex = addr.getRex() | static_cast<const Reg&>(*p1).getRex();
|
||||
} else {
|
||||
// ModRM(reg, base);
|
||||
|
@ -1249,7 +1346,7 @@ private:
|
|||
uint8 rex = addr.getRex();
|
||||
x = (rex & 2) != 0;
|
||||
b = (rex & 1) != 0;
|
||||
if (BIT == 64 && addr.is32bit_) db(0x67);
|
||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||
if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0;
|
||||
} else {
|
||||
x = false;
|
||||
|
@ -1314,6 +1411,14 @@ private:
|
|||
if (is16bit) db(0x66);
|
||||
db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
|
||||
}
|
||||
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int w)
|
||||
{
|
||||
if (!addr.isVsib()) throw ERR_BAD_VSIB_ADDRESSING;
|
||||
bool isYMM = addr.isYMM();
|
||||
addr.setVsib(false);
|
||||
opAVX_X_X_XM(isYMM ? Ymm(x1.getIdx()) : x1, isYMM ? Ymm(x2.getIdx()) : x2, addr, type, code, true, w);
|
||||
addr.setVsib(true);
|
||||
}
|
||||
public:
|
||||
unsigned int getVersion() const { return VERSION; }
|
||||
using CodeArray::db;
|
||||
|
|
|
@ -1402,3 +1402,11 @@ void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op,
|
|||
void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
||||
void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
||||
void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, MM_0F38, 0xf3, false); }
|
||||
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 1); }
|
||||
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 1); }
|
||||
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 0); }
|
||||
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 0); }
|
||||
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 0); }
|
||||
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 0); }
|
||||
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 1); }
|
||||
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 1); }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue