add vgather

This commit is contained in:
MITSUNARI Shigeo 2013-05-30 13:07:26 +09:00
parent 94c0cbbbcd
commit ec8d36b5d3
4 changed files with 232 additions and 40 deletions

View file

@ -1636,6 +1636,27 @@ void put()
printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code);
}
}
// gather
{
const struct Tbl {
const char *name;
uint8 code;
int w;
} tbl[] = {
{ "vgatherdpd", 0x92, 1 },
{ "vgatherqpd", 0x93, 1 },
{ "vgatherdps", 0x92, 0 },
{ "vgatherqps", 0x93, 0 },
{ "vpgatherdd", 0x90, 0 },
{ "vpgatherqd", 0x91, 0 },
{ "vpgatherdq", 0x90, 1 },
{ "vpgatherqq", 0x91, 1 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x%x, %d); }\n", p.name, p.code, p.w);
}
}
}
int main()

View file

@ -31,6 +31,10 @@ const uint64 CL = 1ULL << 20;
const uint64 MEM_ONLY_DISP = 1ULL << 21;
const uint64 NEG32 = 1ULL << 23;
const uint64 _YMM = 1ULL << 24;
const uint64 VM32X_32 = 1ULL << 39;
const uint64 VM32X_64 = 1ULL << 40;
const uint64 VM32Y_32 = 1ULL << 41;
const uint64 VM32Y_64 = 1ULL << 42; // max value
#ifdef XBYAK64
const uint64 _MEMe = 1ULL << 25;
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
@ -42,6 +46,8 @@ const uint64 _REG64_2 = 1ULL << 31; // r8, ...
const uint64 RAX = 1ULL << 32;
const uint64 _XMM2 = 1ULL << 33;
const uint64 _YMM2 = 1ULL << 34;
const uint64 VM32X = VM32X_32 | VM32X_64;
const uint64 VM32Y = VM32Y_32 | VM32Y_64;
#else
const uint64 _MEMe = 0;
const uint64 REG32_2 = 0;
@ -53,6 +59,8 @@ const uint64 _REG64_2 = 0;
const uint64 RAX = 0;
const uint64 _XMM2 = 0;
const uint64 _YMM2 = 0;
const uint64 VM32X = VM32X_32;
const uint64 VM32Y = VM32Y_32;
#endif
const uint64 REG64 = _REG64 | _REG64_2 | RAX;
const uint64 REG32 = _REG32 | REG32_2 | EAX;
@ -291,6 +299,14 @@ class Test {
return isXbyak_ ? "0xda" : "0xda";
case NEG:
return "-5";
case VM32X_32:
return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]";
case VM32X_64:
return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]";
case VM32Y_32:
return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
case VM32Y_64:
return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]";
}
return 0;
}
@ -1963,6 +1979,47 @@ public:
put("rorx", REG64, REG64 | MEM, IMM8);
#endif
}
void putGather()
{
const int y_vx_y = 0;
const int y_vy_y = 1;
const int x_vy_x = 2;
const struct Tbl {
const char *name;
int mode;
} tbl[] = {
{ "vgatherdpd", y_vx_y },
{ "vgatherqpd", y_vy_y },
{ "vgatherdps", y_vy_y },
{ "vgatherqps", x_vy_x },
{ "vpgatherdd", y_vy_y },
{ "vpgatherqd", x_vy_x },
{ "vpgatherdq", y_vx_y },
{ "vpgatherqq", y_vy_y },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
const char *name = p.name;
put(name, XMM, VM32X, XMM);
switch (p.mode) {
case y_vx_y:
put(name, YMM, VM32X, YMM);
break;
case y_vy_y:
put(name, YMM, VM32Y, YMM);
break;
case x_vy_x:
put(name, XMM, VM32Y, XMM);
break;
default:
printf("ERR mode=%d\n", p.mode);
exit(1);
}
}
// all pattern
const char *name = "vgatherdpd";
put(name, "xmm1, ptr [xmm2], xmm3", "xmm1, [xmm2], xmm3");
}
void put()
{
#ifdef USE_AVX
@ -1972,6 +2029,7 @@ public:
putGprR_RM_R();
putGprR_RM();
putGprOtherwise();
putGather();
#else
putAVX1();
putAVX2();

View file

@ -128,6 +128,7 @@ enum Error {
ERR_BAD_PROTECT_MODE,
ERR_BAD_PNUM,
ERR_BAD_TNUM,
ERR_BAD_VSIB_ADDRESSING,
ERR_INTERNAL
};
@ -161,6 +162,7 @@ inline const char *ConvertErrorToString(Error err)
"bad protect mode",
"bad pNum",
"bad tNum",
"bad vsib addressing",
"internal error",
};
if (err < 0 || err > ERR_INTERNAL) return 0;
@ -410,7 +412,7 @@ public:
, disp_(0)
{
}
Reg32e(const Reg& base, const Reg& index, int scale, unsigned int disp)
Reg32e(const Reg& base, const Reg& index, int scale, unsigned int disp, bool allowUseEspIndex = false)
: Reg(base)
, index_(index)
, scale_(scale)
@ -418,7 +420,7 @@ public:
{
if (scale != 0 && scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE;
if (!base.isNone() && !index.isNone() && base.getBit() != index.getBit()) throw ERR_BAD_COMBINATION;
if (index.getIdx() == Operand::ESP) throw ERR_ESP_CANT_BE_INDEX;
if (!allowUseEspIndex && index.getIdx() == Operand::ESP) throw ERR_ESP_CANT_BE_INDEX;
}
Reg32e optimize() const // select smaller size
{
@ -455,6 +457,70 @@ struct RegRip {
};
#endif
// QQQ:need to refactor
struct Vsib {
// [index_ * scale_ + base_ + disp_]
uint8 indexIdx_; // xmm reg idx
uint8 scale_; // 0(none), 1, 2, 4, 8
uint8 baseIdx_; // base reg idx
uint8 baseBit_; // 0(none), 32, 64
uint32 disp_;
bool isYMM_; // idx is YMM
public:
static inline void verifyScale(int scale)
{
if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE;
}
int getIndexIdx() const { return indexIdx_; }
int getScale() const { return scale_; }
int getBaseIdx() const { return baseIdx_; }
int getBaseBit() const { return baseBit_; }
bool isYMM() const { return isYMM_; }
uint32 getDisp() const { return disp_; }
Vsib(int indexIdx, int scale, bool isYMM, int baseIdx = 0, int baseBit = 0, uint32 disp = 0)
: indexIdx_((uint8)indexIdx)
, scale_((uint8)scale)
, baseIdx_((uint8)baseIdx)
, baseBit_((uint8)baseBit)
, disp_(disp)
, isYMM_(isYMM)
{
}
};
inline Vsib operator*(const Xmm& x, int scale)
{
Vsib::verifyScale(scale);
return Vsib(x.getIdx(), scale, x.isYMM());
}
inline Vsib operator+(const Xmm& x, uint32 disp)
{
return Vsib(x.getIdx(), 1, x.isYMM(), disp);
}
inline Vsib operator+(const Xmm& x, const Reg32e& r)
{
if (!r.index_.isNone()) throw ERR_BAD_COMBINATION;
return Vsib(x.getIdx(), 1, x.isYMM(), r.getIdx(), r.getBit(), r.disp_);
}
inline Vsib operator+(const Vsib& vs, uint32 disp)
{
Vsib ret(vs);
ret.disp_ += disp;
return ret;
}
inline Vsib operator+(const Vsib& vs, const Reg32e& r)
{
if (vs.getBaseBit() || !r.index_.isNone()) throw ERR_BAD_COMBINATION;
Vsib ret(vs);
ret.baseIdx_ = (uint8)r.getIdx();
ret.baseBit_ = (uint8)r.getBit();
ret.disp_ += r.disp_;
return ret;
}
inline Vsib operator+(uint32 disp, const Xmm& x) { return x + disp; }
inline Vsib operator+(uint32 disp, const Vsib& vs) { return vs + disp; }
inline Vsib operator+(const Reg32e& r, const Xmm& x) { return x + r; }
inline Vsib operator+(const Reg32e& r, const Vsib& vs) { return vs + r; }
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
void *const AutoGrow = (void*)1;
@ -678,60 +744,43 @@ public:
class Address : public Operand, public CodeArray {
void operator=(const Address&);
uint64 disp_;
uint8 rex_;
bool isOnlyDisp_;
bool is64bitDisp_;
uint8 rex_;
public:
mutable bool isVsib_;
bool isYMM_;
void verify() const { if (isVsib_) throw ERR_BAD_VSIB_ADDRESSING; }
const bool is32bit_;
Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false)
public:
Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false, bool isVsib = false, bool isYMM = false)
: Operand(0, MEM, sizeBit)
, CodeArray(6) // 6 = 1(ModRM) + 1(SIB) + 4(disp)
, disp_(disp)
, rex_(0)
, isOnlyDisp_(isOnlyDisp)
, is64bitDisp_(is64bitDisp)
, rex_(0)
, isVsib_(isVsib)
, isYMM_(isYMM)
, is32bit_(is32bit)
{
}
bool isOnlyDisp() const { return isOnlyDisp_; } // for mov eax
uint64 getDisp() const { return disp_; }
uint8 getRex() const { return rex_; }
bool is64bitDisp() const { return is64bitDisp_; } // for moffset
void setVsib(bool isVsib) const { isVsib_ = isVsib; }
bool isVsib() const { return isVsib_; }
bool isYMM() const { return isYMM_; }
bool is32bit() const { verify(); return is32bit_; }
bool isOnlyDisp() const { verify(); return isOnlyDisp_; } // for mov eax
uint64 getDisp() const { verify(); return disp_; }
uint8 getRex() const { verify(); return rex_; }
bool is64bitDisp() const { verify(); return is64bitDisp_; } // for moffset
void setRex(uint8 rex) { rex_ = rex; }
};
class AddressFrame {
private:
void operator=(const AddressFrame&);
public:
const uint32 bit_;
explicit AddressFrame(uint32 bit) : bit_(bit) { }
Address operator[](const void *disp) const
Address makeAddress(const Reg32e& r, bool isVsib, bool isYMM) const
{
size_t adr = reinterpret_cast<size_t>(disp);
#ifdef XBYAK64
if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG;
#endif
Reg32e r(Reg(), Reg(), 0, static_cast<uint32>(adr));
return operator[](r);
}
#ifdef XBYAK64
Address operator[](uint64 disp) const
{
return Address(64, true, disp, false, true);
}
Address operator[](const RegRip& addr) const
{
Address frame(bit_, true, addr.disp_, false);
frame.db(B00000101);
frame.dd(addr.disp_);
return frame;
}
#endif
Address operator[](const Reg32e& in) const
{
const Reg32e& r = in.optimize();
Address frame(bit_, (r.isNone() && r.index_.isNone()), r.disp_, r.isBit(32) || r.index_.isBit(32));
Address frame(bit_, (r.isNone() && r.index_.isNone()), r.disp_, r.isBit(32) || r.index_.isBit(32), false, isVsib, isYMM);
enum {
mod00 = 0, mod01 = 1, mod10 = 2
};
@ -767,6 +816,54 @@ public:
frame.setRex(rex);
return frame;
}
public:
const uint32 bit_;
explicit AddressFrame(uint32 bit) : bit_(bit) { }
Address operator[](const void *disp) const
{
size_t adr = reinterpret_cast<size_t>(disp);
#ifdef XBYAK64
if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG;
#endif
Reg32e r(Reg(), Reg(), 0, static_cast<uint32>(adr));
return operator[](r);
}
#ifdef XBYAK64
Address operator[](uint64 disp) const
{
return Address(64, true, disp, false, true);
}
Address operator[](const RegRip& addr) const
{
Address frame(bit_, true, addr.disp_, false);
frame.db(B00000101);
frame.dd(addr.disp_);
return frame;
}
#endif
Address operator[](const Reg32e& in) const
{
return makeAddress(in.optimize(), false, false);
}
Address operator[](const Vsib& vs) const
{
if (vs.getBaseBit() == 0) {
#ifdef XBYAK64
const int bit = 64;
#else
const int bit = 32;
#endif
const Reg32e r(Reg(), Reg32e(vs.getIndexIdx(), bit), vs.getScale(), vs.getDisp(), true);
return makeAddress(r, true, vs.isYMM());
} else {
const Reg32e r(Reg32e(vs.getBaseIdx(), vs.getBaseBit()), Reg32e(vs.getIndexIdx(), vs.getBaseBit()), vs.getScale(), vs.getDisp(), true);
return makeAddress(r, true, vs.isYMM());
}
}
Address operator[](const Xmm& x) const
{
return operator[](x + 0);
}
};
struct JmpLabel {
@ -975,7 +1072,7 @@ private:
if (p1->isMEM()) throw ERR_BAD_COMBINATION;
if (p2->isMEM()) {
const Address& addr = static_cast<const Address&>(*p2);
if (BIT == 64 && addr.is32bit_) db(0x67);
if (BIT == 64 && addr.is32bit()) db(0x67);
rex = addr.getRex() | static_cast<const Reg&>(*p1).getRex();
} else {
// ModRM(reg, base);
@ -1249,7 +1346,7 @@ private:
uint8 rex = addr.getRex();
x = (rex & 2) != 0;
b = (rex & 1) != 0;
if (BIT == 64 && addr.is32bit_) db(0x67);
if (BIT == 64 && addr.is32bit()) db(0x67);
if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0;
} else {
x = false;
@ -1314,6 +1411,14 @@ private:
if (is16bit) db(0x66);
db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
}
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int w)
{
if (!addr.isVsib()) throw ERR_BAD_VSIB_ADDRESSING;
bool isYMM = addr.isYMM();
addr.setVsib(false);
opAVX_X_X_XM(isYMM ? Ymm(x1.getIdx()) : x1, isYMM ? Ymm(x2.getIdx()) : x2, addr, type, code, true, w);
addr.setVsib(true);
}
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;

View file

@ -1402,3 +1402,11 @@ void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op,
void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, MM_0F38, 0xf3, false); }
void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, MM_0F38, 0xf3, false); }
void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, MM_0F38, 0xf3, false); }
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 1); }
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 1); }
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 0); }
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 0); }
void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 0); }
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 0); }
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 1); }
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 1); }