diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index d25c9ec..c402352 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -815,6 +815,7 @@ void putFP16_FMA2() bool isPH; } tbl[] = { { 0x56, "maddc", true }, + { 0xD6, "mulc", true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { for (int j = 0; j < 2; j++) { diff --git a/test/misc.cpp b/test/misc.cpp index d60bf17..eeca2da 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -956,6 +956,14 @@ CYBOZU_TEST_AUTO(vaddph) vfmaddcph(xm1, xm2, ptr[rax+0x40]); vfmaddcph(ym1|k1|T_z, ym2, ptr_b[rax+0x40]); vfmaddcph(zm1, zm2, ptr_b[rax+0x40]); + + vfcmulcph(xmm1, xmm2, ptr [rax+0x40]); + vfcmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]); + vfcmulcph(zmm1, zmm2, ptr_b [rax+0x40]); + + vfmulcph(xmm1, xmm2, ptr [rax+0x40]); + vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]); + vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]); } } c; const uint8_t tbl[] = { @@ -1061,6 +1069,16 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf6, 0x6e, 0x08, 0x56, 0x48, 0x04, 0x62, 0xf6, 0x6e, 0xb9, 0x56, 0x48, 0x10, 0x62, 0xf6, 0x6e, 0x58, 0x56, 0x48, 0x10, + + // vfcmulcph + 0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x48, 0x04, + 0x62, 0xf6, 0x6f, 0xb9, 0xd6, 0x48, 0x10, + 0x62, 0xf6, 0x6f, 0x58, 0xd6, 0x48, 0x10, + + // vfmulcph + 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x48, 0x04, + 0x62, 0xf6, 0x6e, 0xb9, 0xd6, 0x48, 0x10, + 0x62, 0xf6, 0x6e, 0x58, 0xd6, 0x48, 0x10, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 28084f1..0eec8f4 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1939,6 +1939,7 @@ void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Op void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); } void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); } void vfcmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x56); } +void vfcmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0xD6); } void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); } void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); } void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } @@ -1962,6 +1963,7 @@ void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x97); } void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA7); } void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB7); } +void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0xD6); } void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9C); } void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9D); } void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAC); }