diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 3adc032..9124388 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -564,6 +564,7 @@ void putCvt() { 0x5B, "vcvtdq2ph", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, { 0x1D, "vcvtps2phx", T_66 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, + { 0x7A, "vcvtudq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; diff --git a/test/misc.cpp b/test/misc.cpp index 87861b2..8224ed3 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -1187,6 +1187,15 @@ CYBOZU_TEST_AUTO(vaddph) vcvtps2phx(ymm1|k2|T_z|T_rd_sae, zmm5); vcvtps2phx(ymm1, ptr [rax+0x40]); vcvtps2phx(ymm1, ptr_b [rax+0x40]); + + vcvtudq2ph(xmm1, xmm5); + vcvtudq2ph(xmm1, xword [rax+0x40]); + vcvtudq2ph(xmm1, xword_b [rax+0x40]); + vcvtudq2ph(xmm1, yword [rax+0x40]); + vcvtudq2ph(xmm1, yword_b [rax+0x40]); + vcvtudq2ph(ymm1|k2|T_z|T_rd_sae, zmm5); + vcvtudq2ph(ymm1, ptr [rax+0x40]); + vcvtudq2ph(ymm1, ptr_b [rax+0x40]); } } c; const uint8_t tbl[] = { @@ -1562,6 +1571,16 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf5, 0x7d, 0xba, 0x1d, 0xcd, 0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x48, 0x01, 0x62, 0xf5, 0x7d, 0x58, 0x1d, 0x48, 0x10, + + // vcvtudq2ph + 0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xcd, + 0x62, 0xf5, 0x7f, 0x08, 0x7a, 0x48, 0x04, + 0x62, 0xf5, 0x7f, 0x18, 0x7a, 0x48, 0x10, + 0x62, 0xf5, 0x7f, 0x28, 0x7a, 0x48, 0x02, + 0x62, 0xf5, 0x7f, 0x38, 0x7a, 0x48, 0x10, + 0x62, 0xf5, 0x7f, 0xba, 0x7a, 0xcd, + 0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x48, 0x01, + 0x62, 0xf5, 0x7f, 0x58, 0x7a, 0x48, 0x10, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index b1bf96e..40383d8 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1933,6 +1933,7 @@ void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0 void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_N8 | T_F2 | T_0F | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); } void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_N4 | T_F3 | T_0F | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); } void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_F3 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7A); } +void vcvtudq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_F2 | T_MAP5 | T_EW0 | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); } void vcvtudq2ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); } void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); } void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }