diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index cc4e005..e8abcc5 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1034,8 +1034,8 @@ void put() { 0x0C, "blendps", MM_0F3A | PP_66, true, 0, true, true }, { 0x41, "dppd", MM_0F3A | PP_66, false, 0, true, true }, { 0x40, "dpps", MM_0F3A | PP_66, true, 0, true, true }, - { 0x42, "mpsadbw", MM_0F3A | PP_66, false, 0, true, true }, - { 0x0E, "pblendw", MM_0F3A | PP_66, false, 0, true, true }, + { 0x42, "mpsadbw", MM_0F3A | PP_66, true, 0, true, true }, + { 0x0E, "pblendw", MM_0F3A | PP_66, true, 0, true, true }, { 0x0B, "roundsd", MM_0F3A | PP_66, false, 0, true, true }, { 0x0A, "roundss", MM_0F3A | PP_66, false, 0, true, true }, { 0x44, "pclmulqdq", MM_0F3A | PP_66, false, 0, true, true }, @@ -1220,12 +1220,12 @@ void put() { 0x10, "movupd", MM_0F | PP_66, true, -1, false }, { 0x10, "movups", MM_0F, true, -1, false }, - { 0x1C, "pabsb", MM_0F38 | PP_66, false, -1, false }, - { 0x1D, "pabsw", MM_0F38 | PP_66, false, -1, false }, - { 0x1E, "pabsd", MM_0F38 | PP_66, false, -1, false }, + { 0x1C, "pabsb", MM_0F38 | PP_66, true, -1, false }, + { 0x1D, "pabsw", MM_0F38 | PP_66, true, -1, false }, + { 0x1E, "pabsd", MM_0F38 | PP_66, true, -1, false }, { 0x41, "phminposuw", MM_0F38 | PP_66, false, -1, false }, - { 0x20, "pmovsxbw", MM_0F38 | PP_66, false, -1, false }, + { 0x20, "pmovsxbw", MM_0F38 | PP_66, true, -1, false }, { 0x21, "pmovsxbd", MM_0F38 | PP_66, false, -1, false }, { 0x22, "pmovsxbq", MM_0F38 | PP_66, false, -1, false }, { 0x23, "pmovsxwd", MM_0F38 | PP_66, false, -1, false }, @@ -1492,7 +1492,7 @@ void put() printf("void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); }\n"); printf("void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); }\n"); printf("void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); }\n"); - printf("void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); }\n"); + printf("void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); }\n"); // vmovsd, vmovss for (int i = 0; i < 2; i++) { diff --git a/test/make_nm.cpp b/test/make_nm.cpp index feaf901..30b8374 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -1413,8 +1413,8 @@ class Test { { "vblendps", true }, { "vdppd", false }, { "vdpps", true }, - { "vmpsadbw", false }, - { "vpblendw", false }, + { "vmpsadbw", true }, + { "vpblendw", true }, { "vroundsd", false }, { "vroundss", false }, { "vpclmulqdq", false }, @@ -1501,9 +1501,9 @@ class Test { { "vmovupd", true }, { "vmovups", true }, - { "vpabsb", false }, - { "vpabsw", false }, - { "vpabsd", false }, + { "vpabsb", true }, + { "vpabsw", true }, + { "vpabsd", true }, { "vphminposuw", false }, { "vpmovsxbw", false }, @@ -1539,6 +1539,30 @@ class Test { put(p->name, YMM, YMM | MEM); } } + void putAVX_Y_XM() + { + const char *tbl[] = { + "vpmovsxbw", +#if 0 + "vpmovsxbd", + "vpmovsxbq", + "vpmovsxwd", + "vpmovsxwq", + "vpmovsxdq", + + "vpmovzxbw", + "vpmovzxbd", + "vpmovzxbq", + "vpmovzxwd", + "vpmovzxwq", + "vpmovzxdq", +#endif + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const char *name = tbl[i]; + put(name, YMM, XMM); + } + } void putAVX_M_X() { const struct Tbl { @@ -1659,7 +1683,7 @@ class Test { put("vmovntdq", MEM, XMM | YMM); put("vmovntpd", MEM, XMM | YMM); put("vmovntps", MEM, XMM | YMM); - put("vmovntdqa", XMM, MEM); + put("vmovntdqa", XMM | YMM, MEM); { const char tbl[][8] = { "vmovsd", "vmovss" }; @@ -1918,6 +1942,7 @@ public: putAVX_X_XM(); putAVX_M_X(); putAVX_X_X_IMM_omit(); + putAVX_Y_XM(); putFMA(); #endif #else // USE_AVX diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 2898742..6226d47 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -641,10 +641,10 @@ void vdppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); } void vdpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); } void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); } -void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); } -void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); } -void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); } -void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); } +void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); } +void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); } +void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); } +void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); } void vroundsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); } void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); } void vroundss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); } @@ -877,11 +877,11 @@ void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); } void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); } void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); } -void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, false, -1); } -void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, false, -1); } -void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, false, -1); } +void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, true, -1); } +void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, true, -1); } +void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, true, -1); } void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); } -void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, false, -1); } +void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, true, -1); } void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, false, -1); } void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, false, -1); } void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, false, -1); } @@ -1342,7 +1342,7 @@ void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_C void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); } void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); } void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); } -void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); } +void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); } void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); } void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); } void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); }