avx2 for mpsadbw pblendw, pabsb, pabsw, pabsd, pmovsxbw, vmovntdqa
This commit is contained in:
parent
5e0b37f1d5
commit
4d0e540ea3
3 changed files with 47 additions and 22 deletions
|
@ -1034,8 +1034,8 @@ void put()
|
||||||
{ 0x0C, "blendps", MM_0F3A | PP_66, true, 0, true, true },
|
{ 0x0C, "blendps", MM_0F3A | PP_66, true, 0, true, true },
|
||||||
{ 0x41, "dppd", MM_0F3A | PP_66, false, 0, true, true },
|
{ 0x41, "dppd", MM_0F3A | PP_66, false, 0, true, true },
|
||||||
{ 0x40, "dpps", MM_0F3A | PP_66, true, 0, true, true },
|
{ 0x40, "dpps", MM_0F3A | PP_66, true, 0, true, true },
|
||||||
{ 0x42, "mpsadbw", MM_0F3A | PP_66, false, 0, true, true },
|
{ 0x42, "mpsadbw", MM_0F3A | PP_66, true, 0, true, true },
|
||||||
{ 0x0E, "pblendw", MM_0F3A | PP_66, false, 0, true, true },
|
{ 0x0E, "pblendw", MM_0F3A | PP_66, true, 0, true, true },
|
||||||
{ 0x0B, "roundsd", MM_0F3A | PP_66, false, 0, true, true },
|
{ 0x0B, "roundsd", MM_0F3A | PP_66, false, 0, true, true },
|
||||||
{ 0x0A, "roundss", MM_0F3A | PP_66, false, 0, true, true },
|
{ 0x0A, "roundss", MM_0F3A | PP_66, false, 0, true, true },
|
||||||
{ 0x44, "pclmulqdq", MM_0F3A | PP_66, false, 0, true, true },
|
{ 0x44, "pclmulqdq", MM_0F3A | PP_66, false, 0, true, true },
|
||||||
|
@ -1220,12 +1220,12 @@ void put()
|
||||||
{ 0x10, "movupd", MM_0F | PP_66, true, -1, false },
|
{ 0x10, "movupd", MM_0F | PP_66, true, -1, false },
|
||||||
{ 0x10, "movups", MM_0F, true, -1, false },
|
{ 0x10, "movups", MM_0F, true, -1, false },
|
||||||
|
|
||||||
{ 0x1C, "pabsb", MM_0F38 | PP_66, false, -1, false },
|
{ 0x1C, "pabsb", MM_0F38 | PP_66, true, -1, false },
|
||||||
{ 0x1D, "pabsw", MM_0F38 | PP_66, false, -1, false },
|
{ 0x1D, "pabsw", MM_0F38 | PP_66, true, -1, false },
|
||||||
{ 0x1E, "pabsd", MM_0F38 | PP_66, false, -1, false },
|
{ 0x1E, "pabsd", MM_0F38 | PP_66, true, -1, false },
|
||||||
{ 0x41, "phminposuw", MM_0F38 | PP_66, false, -1, false },
|
{ 0x41, "phminposuw", MM_0F38 | PP_66, false, -1, false },
|
||||||
|
|
||||||
{ 0x20, "pmovsxbw", MM_0F38 | PP_66, false, -1, false },
|
{ 0x20, "pmovsxbw", MM_0F38 | PP_66, true, -1, false },
|
||||||
{ 0x21, "pmovsxbd", MM_0F38 | PP_66, false, -1, false },
|
{ 0x21, "pmovsxbd", MM_0F38 | PP_66, false, -1, false },
|
||||||
{ 0x22, "pmovsxbq", MM_0F38 | PP_66, false, -1, false },
|
{ 0x22, "pmovsxbq", MM_0F38 | PP_66, false, -1, false },
|
||||||
{ 0x23, "pmovsxwd", MM_0F38 | PP_66, false, -1, false },
|
{ 0x23, "pmovsxwd", MM_0F38 | PP_66, false, -1, false },
|
||||||
|
@ -1492,7 +1492,7 @@ void put()
|
||||||
printf("void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }\n");
|
printf("void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }\n");
|
||||||
printf("void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }\n");
|
printf("void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }\n");
|
||||||
printf("void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }\n");
|
printf("void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }\n");
|
||||||
printf("void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); }\n");
|
printf("void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); }\n");
|
||||||
|
|
||||||
// vmovsd, vmovss
|
// vmovsd, vmovss
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
|
|
|
@ -1413,8 +1413,8 @@ class Test {
|
||||||
{ "vblendps", true },
|
{ "vblendps", true },
|
||||||
{ "vdppd", false },
|
{ "vdppd", false },
|
||||||
{ "vdpps", true },
|
{ "vdpps", true },
|
||||||
{ "vmpsadbw", false },
|
{ "vmpsadbw", true },
|
||||||
{ "vpblendw", false },
|
{ "vpblendw", true },
|
||||||
{ "vroundsd", false },
|
{ "vroundsd", false },
|
||||||
{ "vroundss", false },
|
{ "vroundss", false },
|
||||||
{ "vpclmulqdq", false },
|
{ "vpclmulqdq", false },
|
||||||
|
@ -1501,9 +1501,9 @@ class Test {
|
||||||
{ "vmovupd", true },
|
{ "vmovupd", true },
|
||||||
{ "vmovups", true },
|
{ "vmovups", true },
|
||||||
|
|
||||||
{ "vpabsb", false },
|
{ "vpabsb", true },
|
||||||
{ "vpabsw", false },
|
{ "vpabsw", true },
|
||||||
{ "vpabsd", false },
|
{ "vpabsd", true },
|
||||||
{ "vphminposuw", false },
|
{ "vphminposuw", false },
|
||||||
|
|
||||||
{ "vpmovsxbw", false },
|
{ "vpmovsxbw", false },
|
||||||
|
@ -1539,6 +1539,30 @@ class Test {
|
||||||
put(p->name, YMM, YMM | MEM);
|
put(p->name, YMM, YMM | MEM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void putAVX_Y_XM()
|
||||||
|
{
|
||||||
|
const char *tbl[] = {
|
||||||
|
"vpmovsxbw",
|
||||||
|
#if 0
|
||||||
|
"vpmovsxbd",
|
||||||
|
"vpmovsxbq",
|
||||||
|
"vpmovsxwd",
|
||||||
|
"vpmovsxwq",
|
||||||
|
"vpmovsxdq",
|
||||||
|
|
||||||
|
"vpmovzxbw",
|
||||||
|
"vpmovzxbd",
|
||||||
|
"vpmovzxbq",
|
||||||
|
"vpmovzxwd",
|
||||||
|
"vpmovzxwq",
|
||||||
|
"vpmovzxdq",
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const char *name = tbl[i];
|
||||||
|
put(name, YMM, XMM);
|
||||||
|
}
|
||||||
|
}
|
||||||
void putAVX_M_X()
|
void putAVX_M_X()
|
||||||
{
|
{
|
||||||
const struct Tbl {
|
const struct Tbl {
|
||||||
|
@ -1659,7 +1683,7 @@ class Test {
|
||||||
put("vmovntdq", MEM, XMM | YMM);
|
put("vmovntdq", MEM, XMM | YMM);
|
||||||
put("vmovntpd", MEM, XMM | YMM);
|
put("vmovntpd", MEM, XMM | YMM);
|
||||||
put("vmovntps", MEM, XMM | YMM);
|
put("vmovntps", MEM, XMM | YMM);
|
||||||
put("vmovntdqa", XMM, MEM);
|
put("vmovntdqa", XMM | YMM, MEM);
|
||||||
|
|
||||||
{
|
{
|
||||||
const char tbl[][8] = { "vmovsd", "vmovss" };
|
const char tbl[][8] = { "vmovsd", "vmovss" };
|
||||||
|
@ -1918,6 +1942,7 @@ public:
|
||||||
putAVX_X_XM();
|
putAVX_X_XM();
|
||||||
putAVX_M_X();
|
putAVX_M_X();
|
||||||
putAVX_X_X_IMM_omit();
|
putAVX_X_X_IMM_omit();
|
||||||
|
putAVX_Y_XM();
|
||||||
putFMA();
|
putFMA();
|
||||||
#endif
|
#endif
|
||||||
#else // USE_AVX
|
#else // USE_AVX
|
||||||
|
|
|
@ -641,10 +641,10 @@ void vdppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX
|
||||||
void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); }
|
void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); }
|
||||||
void vdpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
|
void vdpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
|
||||||
void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
|
void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
|
||||||
void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
|
void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); }
|
||||||
void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
|
void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); }
|
||||||
void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
|
void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); }
|
||||||
void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
|
void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); }
|
||||||
void vroundsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
|
void vroundsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
|
||||||
void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
|
void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
|
||||||
void vroundss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); }
|
void vroundss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); }
|
||||||
|
@ -877,11 +877,11 @@ void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F
|
||||||
void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); }
|
void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); }
|
||||||
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); }
|
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); }
|
||||||
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); }
|
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); }
|
||||||
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, false, -1); }
|
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, true, -1); }
|
||||||
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, false, -1); }
|
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, true, -1); }
|
||||||
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, false, -1); }
|
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, true, -1); }
|
||||||
void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); }
|
void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); }
|
||||||
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, false, -1); }
|
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, true, -1); }
|
||||||
void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, false, -1); }
|
void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, false, -1); }
|
||||||
void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, false, -1); }
|
void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, false, -1); }
|
||||||
void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, false, -1); }
|
void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, false, -1); }
|
||||||
|
@ -1342,7 +1342,7 @@ void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_C
|
||||||
void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }
|
void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }
|
||||||
void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }
|
void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }
|
||||||
void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }
|
void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }
|
||||||
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); }
|
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); }
|
||||||
void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); }
|
void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); }
|
||||||
void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); }
|
void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); }
|
||||||
void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); }
|
void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue