vpclmulqdq supports AVX-512

This commit is contained in:
MITSUNARI Shigeo 2018-01-04 11:53:38 +09:00
parent 9e16b40b06
commit 5a402477f8
3 changed files with 29 additions and 2 deletions

View file

@ -60,7 +60,7 @@ void putX_X_XM(bool omitOnly)
{ 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 }, { 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 },
{ 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 }, { 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 },
{ 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 }, { 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 },
{ 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0, true, true, 3 }, { 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0 | T_YMM | T_EVEX, true, true, 3 },
{ 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, { 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
{ 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, { 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },

View file

@ -173,4 +173,31 @@ CYBOZU_TEST_AUTO(vaes)
CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
} }
CYBOZU_TEST_AUTO(vpclmulqdq)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
}
} c;
const uint8_t tbl[] = {
0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
#endif #endif

View file

@ -1100,7 +1100,7 @@ void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isME
void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); } void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); }
void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); } void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); }
void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); } void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); }
void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x44, imm); } void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); }
void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); } void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); }
void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); } void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); }
void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); } void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); }