add vshuf*, vpternlog{d,q}
This commit is contained in:
parent
6c62620430
commit
5e77cfae66
3 changed files with 63 additions and 2 deletions
|
@ -271,6 +271,15 @@ void putX_X_XM_IMM()
|
|||
{ 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
|
||||
{ 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
|
||||
{ 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -444,6 +453,14 @@ void putGather()
|
|||
}
|
||||
}
|
||||
|
||||
void putShuff()
|
||||
{
|
||||
puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }");
|
||||
puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }");
|
||||
puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }");
|
||||
puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }");
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("#ifndef XBYAK_DISABLE_AVX512");
|
||||
|
@ -460,5 +477,6 @@ int main()
|
|||
#endif
|
||||
putCvt();
|
||||
putGather();
|
||||
putShuff();
|
||||
puts("#endif");
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ const uint64 MEM32 = 1ULL << 17;
|
|||
const uint64 VM32Z = 1ULL << 19;
|
||||
const uint64 K_K = 1ULL << 20;
|
||||
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
||||
const uint64 NEG32 = 1ULL << 23;
|
||||
//const uint64 QQQ = 1ULL << 23;
|
||||
const uint64 _YMM = 1ULL << 24;
|
||||
const uint64 VM32X_32 = 1ULL << 39;
|
||||
const uint64 VM32X_64 = 1ULL << 40;
|
||||
|
@ -1714,6 +1714,11 @@ public:
|
|||
{ "vpermt2q", M_1to2 },
|
||||
{ "vpermt2ps", M_1to4 },
|
||||
{ "vpermt2pd", M_1to2 },
|
||||
|
||||
{ "vpermi2w", 0 },
|
||||
{ "vpermi2d", M_1to4 },
|
||||
{ "vpermi2q", M_1to2 },
|
||||
{ "vpermi2ps", M_1to4 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
|
@ -1723,10 +1728,33 @@ public:
|
|||
put(p.name, ZMM_KZ, _ZMM, _ZMM | _MEM | bTbl[2]);
|
||||
}
|
||||
}
|
||||
void putShuff()
|
||||
{
|
||||
put("vshuff32x4", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
|
||||
put("vshuff32x4", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
|
||||
|
||||
put("vshuff64x2", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
|
||||
put("vshuff64x2", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
|
||||
|
||||
put("vshufi32x4", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
|
||||
put("vshufi32x4", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
|
||||
|
||||
put("vshufi64x2", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
|
||||
put("vshufi64x2", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
|
||||
}
|
||||
void putMisc2()
|
||||
{
|
||||
put("vpternlogd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4, IMM8);
|
||||
put("vpternlogd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
|
||||
put("vpternlogd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
|
||||
|
||||
put("vpternlogq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2, IMM8);
|
||||
put("vpternlogq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
|
||||
put("vpternlogq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
|
||||
}
|
||||
void putMin()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
putPerm();
|
||||
#endif
|
||||
}
|
||||
void putAVX512()
|
||||
|
@ -1775,6 +1803,10 @@ public:
|
|||
putCompExp();
|
||||
separateFunc();
|
||||
putPerm();
|
||||
separateFunc();
|
||||
putShuff();
|
||||
separateFunc();
|
||||
putMisc2();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
|
|
@ -156,6 +156,13 @@ void vpermt2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
|
|||
void vpermt2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7E); }
|
||||
void vpermt2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7F); }
|
||||
void vpermt2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7F); }
|
||||
void vpermi2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x75); }
|
||||
void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x76); }
|
||||
void vpermi2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x76); }
|
||||
void vpermi2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x77); }
|
||||
void vpermi2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x77); }
|
||||
void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); }
|
||||
void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); }
|
||||
void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
|
||||
void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
|
||||
void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
|
||||
|
@ -217,4 +224,8 @@ void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T
|
|||
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x92, 1); }
|
||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x93, 2); }
|
||||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x93, 0); }
|
||||
void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
|
||||
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
||||
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
||||
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue