add vpcmp*, v(p)blend*
This commit is contained in:
parent
97743ee529
commit
c57b6026f9
3 changed files with 91 additions and 4 deletions
|
@ -115,6 +115,16 @@ void putVcmp()
|
||||||
{ 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
{ 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||||
{ 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
{ 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||||
{ 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
{ 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||||
|
|
||||||
|
{ 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
|
||||||
|
{ 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
|
||||||
|
|
||||||
|
{ 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
|
||||||
|
{ 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
|
||||||
|
{ 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
|
||||||
|
{ 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
|
||||||
|
{ 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
|
||||||
|
{ 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -211,6 +221,13 @@ void putX_X_XM_IMM()
|
||||||
|
|
||||||
{ 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
|
{ 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
|
||||||
{ 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
{ 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||||
|
|
||||||
|
{ 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||||
|
{ 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||||
|
{ 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
|
||||||
|
{ 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
|
||||||
|
{ 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||||
|
{ 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
|
|
@ -28,7 +28,7 @@ const uint64 MEM8 = 1ULL << 15;
|
||||||
const uint64 MEM16 = 1ULL << 16;
|
const uint64 MEM16 = 1ULL << 16;
|
||||||
const uint64 MEM32 = 1ULL << 17;
|
const uint64 MEM32 = 1ULL << 17;
|
||||||
const uint64 VM32Z = 1ULL << 19;
|
const uint64 VM32Z = 1ULL << 19;
|
||||||
const uint64 CL = 1ULL << 20;
|
const uint64 K_K = 1ULL << 20;
|
||||||
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
||||||
const uint64 NEG32 = 1ULL << 23;
|
const uint64 NEG32 = 1ULL << 23;
|
||||||
const uint64 _YMM = 1ULL << 24;
|
const uint64 _YMM = 1ULL << 24;
|
||||||
|
@ -335,8 +335,8 @@ class Test {
|
||||||
return "ax";
|
return "ax";
|
||||||
case AL:
|
case AL:
|
||||||
return "al";
|
return "al";
|
||||||
case CL:
|
case K_K:
|
||||||
return "cl";
|
return isXbyak_ ? "k5 | k3" : "k5{k3}";
|
||||||
case IMM32:
|
case IMM32:
|
||||||
return isXbyak_ ? "12345678" : "dword 12345678";
|
return isXbyak_ ? "12345678" : "dword 12345678";
|
||||||
case IMM8:
|
case IMM8:
|
||||||
|
@ -1589,10 +1589,62 @@ public:
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
void putBlend()
|
||||||
|
{
|
||||||
|
put("vblendmpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
|
||||||
|
put("vblendmpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
|
||||||
|
put("vblendmpd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
|
||||||
|
|
||||||
|
put("vblendmps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
|
||||||
|
put("vblendmps", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
|
||||||
|
put("vblendmps", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16);
|
||||||
|
|
||||||
|
put("vpblendmb", XMM_KZ, _XMM, _XMM | _MEM);
|
||||||
|
put("vpblendmb", YMM_KZ, _YMM, _YMM | _MEM);
|
||||||
|
put("vpblendmb", ZMM_KZ, _ZMM, _ZMM | _MEM);
|
||||||
|
|
||||||
|
put("vpblendmb", XMM_KZ, _XMM, _XMM | _MEM);
|
||||||
|
put("vpblendmb", YMM_KZ, _YMM, _YMM | _MEM);
|
||||||
|
put("vpblendmb", ZMM_KZ, _ZMM, _ZMM | _MEM);
|
||||||
|
|
||||||
|
put("vpblendmd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
|
||||||
|
put("vpblendmd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
|
||||||
|
put("vpblendmd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16);
|
||||||
|
|
||||||
|
put("vpblendmq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
|
||||||
|
put("vpblendmq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
|
||||||
|
put("vpblendmq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
|
||||||
|
}
|
||||||
|
void putVpcmp()
|
||||||
|
{
|
||||||
|
const uint64_t b0Tbl[] = { 0, 0, 0 };
|
||||||
|
const uint64_t b4Tbl[] = { M_1to4, M_1to8, M_1to16 };
|
||||||
|
const uint64_t b2Tbl[] = { M_1to2, M_1to4, M_1to8 };
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
uint64_t b;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "vpcmpb", 0 },
|
||||||
|
{ "vpcmpub", 0 },
|
||||||
|
{ "vpcmpw", 0 },
|
||||||
|
{ "vpcmpuw", 0 },
|
||||||
|
{ "vpcmpd", M_1to4 },
|
||||||
|
{ "vpcmpud", M_1to4 },
|
||||||
|
{ "vpcmpq", M_1to2 },
|
||||||
|
{ "vpcmpuq", M_1to2 },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl& p = tbl[i];
|
||||||
|
const uint64_t *bTbl = p.b == 0 ? b0Tbl : p.b == M_1to4 ? b4Tbl : b2Tbl;
|
||||||
|
put(p.name, K_K, _XMM, _XMM | _MEM | bTbl[0], IMM8);
|
||||||
|
put(p.name, K_K, _YMM, _YMM | _MEM | bTbl[1], IMM8);
|
||||||
|
put(p.name, K_K, _ZMM, _ZMM | _MEM | bTbl[2], IMM8);
|
||||||
|
}
|
||||||
|
}
|
||||||
void putMin()
|
void putMin()
|
||||||
{
|
{
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
putGather();
|
putVpcmp();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
void putAVX512()
|
void putAVX512()
|
||||||
|
@ -1631,6 +1683,10 @@ public:
|
||||||
putMisc1();
|
putMisc1();
|
||||||
separateFunc();
|
separateFunc();
|
||||||
putGather();
|
putGather();
|
||||||
|
separateFunc();
|
||||||
|
putBlend();
|
||||||
|
separateFunc();
|
||||||
|
putVpcmp();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -76,6 +76,14 @@ void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k
|
||||||
void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); }
|
void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); }
|
||||||
void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); }
|
void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); }
|
||||||
void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); }
|
void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); }
|
||||||
|
void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
|
||||||
|
void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
|
||||||
|
void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
|
||||||
|
void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
|
||||||
|
void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); }
|
||||||
|
void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); }
|
||||||
|
void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); }
|
||||||
|
void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); }
|
||||||
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
|
@ -120,6 +128,12 @@ void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
|
||||||
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
||||||
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); }
|
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); }
|
||||||
void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); }
|
void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); }
|
||||||
|
void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); }
|
||||||
|
void vblendmps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x65); }
|
||||||
|
void vpblendmb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x66); }
|
||||||
|
void vpblendmw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x66); }
|
||||||
|
void vpblendmd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x64); }
|
||||||
|
void vpblendmq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x64); }
|
||||||
void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
|
void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
|
||||||
void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
|
void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
|
||||||
void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
|
void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue