diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 8929b7a..51446ae 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -772,10 +772,9 @@ void putFP16_FMA() bool isPH; } tbl[] = { { 0x06, "vfmaddsub", true }, + { 0x07, "vfmsubadd", true }, /* { 0x, "vfmadd", false }, - { 0x06, "vfmaddsub", true }, - { 0x07, "vfmsubadd", true }, { 0x0A, "vfmsub", true }, { 0x0B, "vfmsub", false }, { 0x0C, "vfnmadd", true }, diff --git a/test/misc.cpp b/test/misc.cpp index db21de1..27ecfef 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -904,6 +904,14 @@ CYBOZU_TEST_AUTO(vaddph) vfmaddsub213ph(zmm1, zmm2, ptr [rax+0x40]); vfmaddsub213ph(zmm1, zmm2, ptr_b [rax+0x40]); vfmaddsub213ph(zmm1|T_ru_sae, zmm2, zmm5); + + vfmsubadd132ph(xmm1, xmm2, ptr [rax+0x40]); + vfmsubadd132ph(xmm1, xmm2, ptr_b [rax+0x40]); + vfmsubadd132ph(ymm1, ymm2, ptr [rax+0x40]); + vfmsubadd132ph(ymm1, ymm2, ptr_b [rax+0x40]); + vfmsubadd132ph(zmm1, zmm2, ptr [rax+0x40]); + vfmsubadd132ph(zmm1, zmm2, ptr_b [rax+0x40]); + vfmsubadd132ph(zmm1|T_ru_sae, zmm2, zmm5); } } c; const uint8_t tbl[] = { @@ -946,6 +954,15 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x48, 0x01, 0x62, 0xf6, 0x6d, 0x58, 0xa6, 0x48, 0x20, 0x62, 0xf6, 0x6d, 0x58, 0xa6, 0xcd, + + // vfmsubadd132ph + 0x62, 0xf6, 0x6d, 0x08, 0x97, 0x48, 0x04, + 0x62, 0xf6, 0x6d, 0x18, 0x97, 0x48, 0x20, + 0x62, 0xf6, 0x6d, 0x28, 0x97, 0x48, 0x02, + 0x62, 0xf6, 0x6d, 0x38, 0x97, 0x48, 0x20, + 0x62, 0xf6, 0x6d, 0x48, 0x97, 0x48, 0x01, + 0x62, 0xf6, 0x6d, 0x58, 0x97, 0x48, 0x20, + 0x62, 0xf6, 0x6d, 0x58, 0x97, 0xcd, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index ceac1a1..7b45a56 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1945,6 +1945,9 @@ void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x96); } void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA6); } void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB6); } +void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x97); } +void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA7); } +void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB7); } void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); } void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); } void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }