add tests of avx-vnni-int{8,16}
This commit is contained in:
parent
4a6132d660
commit
dd66cfb764
3 changed files with 78 additions and 0 deletions
|
@ -1817,6 +1817,7 @@ void put()
|
|||
}
|
||||
}
|
||||
// avx-vnni-int8
|
||||
// avx-vnni-int16
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
|
@ -1829,6 +1830,13 @@ void put()
|
|||
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
||||
{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
||||
|
||||
{ 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
|
|
@ -2217,4 +2217,68 @@ CYBOZU_TEST_AUTO(crypto)
|
|||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(avx_vnni_int)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpdpbssd(xmm1, xmm2, xmm3);
|
||||
vpdpbssd(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbssds(xmm1, xmm2, xmm3);
|
||||
vpdpbssds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbsud(xmm1, xmm2, xmm3);
|
||||
vpdpbsud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbsuds(xmm1, xmm2, xmm3);
|
||||
vpdpbsuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbuud(xmm1, xmm2, xmm3);
|
||||
vpdpbuud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbuuds(xmm1, xmm2, xmm3);
|
||||
vpdpbuuds(ymm1, ymm2, ptr [rax]);
|
||||
|
||||
vpdpwsud(xmm1, xmm2, xmm3);
|
||||
vpdpwsud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwsuds(xmm1, xmm2, xmm3);
|
||||
vpdpwsuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwusd(xmm1, xmm2, xmm3);
|
||||
vpdpwusd(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwusds(xmm1, xmm2, xmm3);
|
||||
vpdpwusds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwuud(xmm1, xmm2, xmm3);
|
||||
vpdpwuud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwuuds(xmm1, xmm2, xmm3);
|
||||
vpdpwuuds(ymm1, ymm2, ptr [rax]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0xc4, 0xe2, 0x6b, 0x50, 0xcb,
|
||||
0xc4, 0xe2, 0x6f, 0x50, 0x08,
|
||||
0xc4, 0xe2, 0x6b, 0x51, 0xcb,
|
||||
0xc4, 0xe2, 0x6f, 0x51, 0x08,
|
||||
0xc4, 0xe2, 0x6a, 0x50, 0xcb,
|
||||
0xc4, 0xe2, 0x6e, 0x50, 0x08,
|
||||
0xc4, 0xe2, 0x6a, 0x51, 0xcb,
|
||||
0xc4, 0xe2, 0x6e, 0x51, 0x08,
|
||||
0xc4, 0xe2, 0x68, 0x50, 0xcb,
|
||||
0xc4, 0xe2, 0x6c, 0x50, 0x08,
|
||||
0xc4, 0xe2, 0x68, 0x51, 0xcb,
|
||||
0xc4, 0xe2, 0x6c, 0x51, 0x08,
|
||||
0xc4, 0xe2, 0x6a, 0xd2, 0xcb,
|
||||
0xc4, 0xe2, 0x6e, 0xd2, 0x08,
|
||||
0xc4, 0xe2, 0x6a, 0xd3, 0xcb,
|
||||
0xc4, 0xe2, 0x6e, 0xd3, 0x08,
|
||||
0xc4, 0xe2, 0x69, 0xd2, 0xcb,
|
||||
0xc4, 0xe2, 0x6d, 0xd2, 0x08,
|
||||
0xc4, 0xe2, 0x69, 0xd3, 0xcb,
|
||||
0xc4, 0xe2, 0x6d, 0xd3, 0x08,
|
||||
0xc4, 0xe2, 0x68, 0xd2, 0xcb,
|
||||
0xc4, 0xe2, 0x6c, 0xd2, 0x08,
|
||||
0xc4, 0xe2, 0x68, 0xd3, 0xcb,
|
||||
0xc4, 0xe2, 0x6c, 0xd3, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1219,6 +1219,12 @@ void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
|
|||
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
||||
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0xD2); }
|
||||
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0xD3); }
|
||||
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_YMM, 0xD2); }
|
||||
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_YMM, 0xD3); }
|
||||
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0xD2); }
|
||||
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0xD3); }
|
||||
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
||||
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
||||
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue