diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp index 745a0f8..b205c90 100644 --- a/gen/avx_type.hpp +++ b/gen/avx_type.hpp @@ -24,6 +24,7 @@ enum { T_MUST_EVEX = 1 << 23, T_B32 = 1 << 24, // m32bcst T_B64 = 1 << 25, // m64bcst + T_M_K = 1 << 26, // mem{k} T_XXX }; @@ -124,5 +125,9 @@ std::string type2String(int type) if (!str.empty()) str += " | "; str += "T_B64"; } + if (type & T_M_K) { + if (!str.empty()) str += " | "; + str += "T_M_K"; + } return str; } \ No newline at end of file diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 46e4ed5..6370009 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1541,10 +1541,12 @@ void put() // vmovsd, vmovss for (int i = 0; i < 2; i++) { char c1 = i == 0 ? 'd' : 's'; - char c2 = i == 0 ? '2' : '3'; - printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F%c, 0x10); }\n", c1, c2); - printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F%c, 0x10); }\n", c1, c2); - printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F%c, 0x11); }\n", c1, c2); + int type = T_0F | T_EVEX; + type |= i == 0 ? T_F2 | T_EW1 : T_F3 | T_EW0; + std::string s = type2String(type); + printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str()); + printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str()); + printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str()); } } // cvt diff --git a/test/make_nm.cpp b/test/make_nm.cpp index a65030e..7babd15 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -97,12 +97,13 @@ const uint64 XMM_SAE = 1ULL << 51; #ifdef XBYAK64 const uint64 XMM_KZ = 1ULL << 52; const uint64 YMM_KZ = 1ULL << 53; -const uint64 ZMM_KZ = 1ULL << 54; // max value +const uint64 ZMM_KZ = 1ULL << 54; #else const uint64 XMM_KZ = 0; const uint64 YMM_KZ = 0; const uint64 ZMM_KZ = 0; #endif +const uint64 MEM_K = 1ULL << 55; // max value const uint64 NOPARA = 1ULL << (bitEnd - 1); @@ -388,6 +389,8 @@ class Test { return isXbyak_ ? "ymm2 |k3|T_z" : "ymm2{k3}{z}"; case ZMM_KZ: return isXbyak_ ? "zmm7|k1" : "zmm7{k1}"; + case MEM_K: + return isXbyak_ ? "ptr [rax] | k1" : "[rax]{k1}"; #else case XMM_SAE: return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}"; @@ -395,6 +398,8 @@ class Test { return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}"; case ZMM_ER: return isXbyak_ ? "zmm2 | T_rd_sae" : "zmm2, {rd-sae}"; + case MEM_K: + return isXbyak_ ? "ptr [eax] | k1" : "[eax]{k1}"; #endif } return 0; @@ -2616,6 +2621,13 @@ public: put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM); put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM); put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM); + + put("vmovsd", XMM_KZ, _XMM3, _XMM3); + put("vmovsd", XMM_KZ, MEM); + put("vmovsd", MEM_K, XMM); + put("vmovss", XMM_KZ, _XMM3, _XMM3); + put("vmovss", XMM_KZ, MEM); + put("vmovss", MEM_K, XMM); { const char tbl[][16] = { "vmovhpd", diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 93173ab..4e545c5 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -172,6 +172,7 @@ enum { ERR_SAE_IS_INVALID, ERR_ER_IS_INVALID, ERR_INVALID_BROADCAST, + ERR_INVALID_OPMASK_WITH_MEMORY, ERR_INTERNAL }; @@ -229,6 +230,7 @@ public: "sae(suppress all exceptions) is invalid", "er(embedded rounding) is invalid", "invalid broadcast", + "invalid opmask with memory", "internal error", }; assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl)); @@ -546,7 +548,7 @@ struct Opmask : public Reg { template T operator|(const T& x, const Opmask& k) { - if (!x.is(Operand::XMM | Operand::YMM | Operand::ZMM | Operand::OPMASK)) throw Error(ERR_BAD_COMBINATION); + if (!x.is(Operand::XMM | Operand::YMM | Operand::ZMM | Operand::OPMASK | Operand::MEM)) throw Error(ERR_BAD_COMBINATION); T r(x); r.setOpmaskIdx(k.getIdx()); return r; @@ -955,7 +957,7 @@ public: } #ifdef XBYAK64 explicit Address(size_t disp) - : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false) { } + : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false){ } Address(uint32 sizeBit, bool broadcast, const RegRip& addr) : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(M_rip), permitVsib_(false), broadcast_(broadcast) { } #endif @@ -1364,6 +1366,7 @@ private: T_MUST_EVEX = 1 << 23, T_B32 = 1 << 24, // m32bcst T_B64 = 1 << 25, // m64bcst + T_M_K = 1 << 26, // mem{k} T_XXX }; void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false) @@ -1401,7 +1404,7 @@ private: T_RZ_SAE = 4, T_SAE = 5, }; - void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false) + void evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0) { if (!(type & T_EVEX)) throw Error(ERR_EVEX_IS_INVALID); int w = (type & T_EW1) ? 1 : 0; @@ -1431,7 +1434,7 @@ private: } bool Vp = !(v ? v->isExtIdx2() : 0); bool z = reg.hasZero(); - int aaa = reg.getOpmaskIdx(); + if (aaa == 0) aaa = reg.getOpmaskIdx(); db(0x62); db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3)); db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3)); @@ -1756,7 +1759,9 @@ private: if (BIT == 64 && addr.is32bit()) db(0x67); int disp8N = 0; bool x = addr.getRegExp().getIndex().isExtIdx(); - if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast()) { + if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) { + int aaa = addr.getOpmaskIdx(); + if (aaa & !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY); bool b = false; if (addr.isBroadcast()) { if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST); @@ -1765,7 +1770,7 @@ private: } else { disp8N = 1; } - evex(r, base, p1, type, code, x, b); + evex(r, base, p1, type, code, x, b, aaa); } else { vex(r, base, p1, type, code, x); } diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 89d8715..51f04cf 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1467,12 +1467,12 @@ void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), a void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); } void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); } void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); } -void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F2, 0x10); } -void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F2, 0x10); } -void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F2, 0x11); } -void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F3, 0x10); } -void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F3, 0x10); } -void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F3, 0x11); } +void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F2 | T_EW1 | T_EVEX, 0x10); } +void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F2 | T_EW1 | T_EVEX, 0x10); } +void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F2 | T_EW1 | T_EVEX | T_M_K, 0x11); } +void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_F3 | T_EW0 | T_EVEX, 0x10); } +void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F3 | T_EW0 | T_EVEX, 0x10); } +void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_F3 | T_EW0 | T_EVEX | T_M_K, 0x11); } void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0, 0x2D); } void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0, 0x2C); } void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0, 0x2D); }