add tests of avx-vnni-int{8,16}
This commit is contained in:
parent
4a6132d660
commit
dd66cfb764
3 changed files with 78 additions and 0 deletions
|
@ -1817,6 +1817,7 @@ void put()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// avx-vnni-int8
|
// avx-vnni-int8
|
||||||
|
// avx-vnni-int16
|
||||||
{
|
{
|
||||||
const struct Tbl {
|
const struct Tbl {
|
||||||
uint8_t code;
|
uint8_t code;
|
||||||
|
@ -1829,6 +1830,13 @@ void put()
|
||||||
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
||||||
{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
||||||
|
|
||||||
|
{ 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
|
|
@ -2217,4 +2217,68 @@ CYBOZU_TEST_AUTO(crypto)
|
||||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(avx_vnni_int)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
vpdpbssd(xmm1, xmm2, xmm3);
|
||||||
|
vpdpbssd(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpbssds(xmm1, xmm2, xmm3);
|
||||||
|
vpdpbssds(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpbsud(xmm1, xmm2, xmm3);
|
||||||
|
vpdpbsud(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpbsuds(xmm1, xmm2, xmm3);
|
||||||
|
vpdpbsuds(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpbuud(xmm1, xmm2, xmm3);
|
||||||
|
vpdpbuud(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpbuuds(xmm1, xmm2, xmm3);
|
||||||
|
vpdpbuuds(ymm1, ymm2, ptr [rax]);
|
||||||
|
|
||||||
|
vpdpwsud(xmm1, xmm2, xmm3);
|
||||||
|
vpdpwsud(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpwsuds(xmm1, xmm2, xmm3);
|
||||||
|
vpdpwsuds(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpwusd(xmm1, xmm2, xmm3);
|
||||||
|
vpdpwusd(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpwusds(xmm1, xmm2, xmm3);
|
||||||
|
vpdpwusds(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpwuud(xmm1, xmm2, xmm3);
|
||||||
|
vpdpwuud(ymm1, ymm2, ptr [rax]);
|
||||||
|
vpdpwuuds(xmm1, xmm2, xmm3);
|
||||||
|
vpdpwuuds(ymm1, ymm2, ptr [rax]);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
0xc4, 0xe2, 0x6b, 0x50, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6f, 0x50, 0x08,
|
||||||
|
0xc4, 0xe2, 0x6b, 0x51, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6f, 0x51, 0x08,
|
||||||
|
0xc4, 0xe2, 0x6a, 0x50, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6e, 0x50, 0x08,
|
||||||
|
0xc4, 0xe2, 0x6a, 0x51, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6e, 0x51, 0x08,
|
||||||
|
0xc4, 0xe2, 0x68, 0x50, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6c, 0x50, 0x08,
|
||||||
|
0xc4, 0xe2, 0x68, 0x51, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6c, 0x51, 0x08,
|
||||||
|
0xc4, 0xe2, 0x6a, 0xd2, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6e, 0xd2, 0x08,
|
||||||
|
0xc4, 0xe2, 0x6a, 0xd3, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6e, 0xd3, 0x08,
|
||||||
|
0xc4, 0xe2, 0x69, 0xd2, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6d, 0xd2, 0x08,
|
||||||
|
0xc4, 0xe2, 0x69, 0xd3, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6d, 0xd3, 0x08,
|
||||||
|
0xc4, 0xe2, 0x68, 0xd2, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6c, 0xd2, 0x08,
|
||||||
|
0xc4, 0xe2, 0x68, 0xd3, 0xcb,
|
||||||
|
0xc4, 0xe2, 0x6c, 0xd3, 0x08,
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1219,6 +1219,12 @@ void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
|
||||||
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); }
|
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
||||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
||||||
|
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0xD2); }
|
||||||
|
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0xD3); }
|
||||||
|
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_YMM, 0xD2); }
|
||||||
|
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_YMM, 0xD3); }
|
||||||
|
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0xD2); }
|
||||||
|
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0xD3); }
|
||||||
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
||||||
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
||||||
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
|
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue