avx2 for mpsadbw pblendw, pabsb, pabsw, pabsd, pmovsxbw, vmovntdqa

This commit is contained in:
MITSUNARI Shigeo 2013-05-23 19:49:39 +09:00
parent 5e0b37f1d5
commit 4d0e540ea3
3 changed files with 47 additions and 22 deletions

View file

@ -1034,8 +1034,8 @@ void put()
{ 0x0C, "blendps", MM_0F3A | PP_66, true, 0, true, true },
{ 0x41, "dppd", MM_0F3A | PP_66, false, 0, true, true },
{ 0x40, "dpps", MM_0F3A | PP_66, true, 0, true, true },
{ 0x42, "mpsadbw", MM_0F3A | PP_66, false, 0, true, true },
{ 0x0E, "pblendw", MM_0F3A | PP_66, false, 0, true, true },
{ 0x42, "mpsadbw", MM_0F3A | PP_66, true, 0, true, true },
{ 0x0E, "pblendw", MM_0F3A | PP_66, true, 0, true, true },
{ 0x0B, "roundsd", MM_0F3A | PP_66, false, 0, true, true },
{ 0x0A, "roundss", MM_0F3A | PP_66, false, 0, true, true },
{ 0x44, "pclmulqdq", MM_0F3A | PP_66, false, 0, true, true },
@ -1220,12 +1220,12 @@ void put()
{ 0x10, "movupd", MM_0F | PP_66, true, -1, false },
{ 0x10, "movups", MM_0F, true, -1, false },
{ 0x1C, "pabsb", MM_0F38 | PP_66, false, -1, false },
{ 0x1D, "pabsw", MM_0F38 | PP_66, false, -1, false },
{ 0x1E, "pabsd", MM_0F38 | PP_66, false, -1, false },
{ 0x1C, "pabsb", MM_0F38 | PP_66, true, -1, false },
{ 0x1D, "pabsw", MM_0F38 | PP_66, true, -1, false },
{ 0x1E, "pabsd", MM_0F38 | PP_66, true, -1, false },
{ 0x41, "phminposuw", MM_0F38 | PP_66, false, -1, false },
{ 0x20, "pmovsxbw", MM_0F38 | PP_66, false, -1, false },
{ 0x20, "pmovsxbw", MM_0F38 | PP_66, true, -1, false },
{ 0x21, "pmovsxbd", MM_0F38 | PP_66, false, -1, false },
{ 0x22, "pmovsxbq", MM_0F38 | PP_66, false, -1, false },
{ 0x23, "pmovsxwd", MM_0F38 | PP_66, false, -1, false },
@ -1492,7 +1492,7 @@ void put()
printf("void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }\n");
printf("void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }\n");
printf("void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }\n");
printf("void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); }\n");
printf("void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); }\n");
// vmovsd, vmovss
for (int i = 0; i < 2; i++) {

View file

@ -1413,8 +1413,8 @@ class Test {
{ "vblendps", true },
{ "vdppd", false },
{ "vdpps", true },
{ "vmpsadbw", false },
{ "vpblendw", false },
{ "vmpsadbw", true },
{ "vpblendw", true },
{ "vroundsd", false },
{ "vroundss", false },
{ "vpclmulqdq", false },
@ -1501,9 +1501,9 @@ class Test {
{ "vmovupd", true },
{ "vmovups", true },
{ "vpabsb", false },
{ "vpabsw", false },
{ "vpabsd", false },
{ "vpabsb", true },
{ "vpabsw", true },
{ "vpabsd", true },
{ "vphminposuw", false },
{ "vpmovsxbw", false },
@ -1539,6 +1539,30 @@ class Test {
put(p->name, YMM, YMM | MEM);
}
}
void putAVX_Y_XM()
{
const char *tbl[] = {
"vpmovsxbw",
#if 0
"vpmovsxbd",
"vpmovsxbq",
"vpmovsxwd",
"vpmovsxwq",
"vpmovsxdq",
"vpmovzxbw",
"vpmovzxbd",
"vpmovzxbq",
"vpmovzxwd",
"vpmovzxwq",
"vpmovzxdq",
#endif
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const char *name = tbl[i];
put(name, YMM, XMM);
}
}
void putAVX_M_X()
{
const struct Tbl {
@ -1659,7 +1683,7 @@ class Test {
put("vmovntdq", MEM, XMM | YMM);
put("vmovntpd", MEM, XMM | YMM);
put("vmovntps", MEM, XMM | YMM);
put("vmovntdqa", XMM, MEM);
put("vmovntdqa", XMM | YMM, MEM);
{
const char tbl[][8] = { "vmovsd", "vmovss" };
@ -1918,6 +1942,7 @@ public:
putAVX_X_XM();
putAVX_M_X();
putAVX_X_X_IMM_omit();
putAVX_Y_XM();
putFMA();
#endif
#else // USE_AVX

View file

@ -641,10 +641,10 @@ void vdppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX
void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); }
void vdpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); }
void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); }
void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); }
void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); }
void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); }
void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); }
void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); }
void vroundsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); }
void vroundss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); }
@ -877,11 +877,11 @@ void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F
void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); }
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); }
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); }
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, false, -1); }
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, false, -1); }
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, false, -1); }
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, true, -1); }
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, true, -1); }
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, true, -1); }
void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); }
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, false, -1); }
void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, true, -1); }
void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, false, -1); }
void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, false, -1); }
void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, false, -1); }
@ -1342,7 +1342,7 @@ void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_C
void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }
void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }
void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); }
void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); }
void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); }
void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); }
void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); }