add vpopcnt{b,w,d,q}
This commit is contained in:
parent
9acfc1323d
commit
0e1a11b486
3 changed files with 52 additions and 0 deletions
|
@ -642,6 +642,11 @@ void putX_XM_IMM()
|
||||||
|
|
||||||
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||||
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||||
|
|
||||||
|
{ 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
|
||||||
|
{ 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||||
|
{ 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
|
||||||
|
{ 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
|
|
@ -356,4 +356,47 @@ CYBOZU_TEST_AUTO(shrd)
|
||||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
}
|
}
|
||||||
|
CYBOZU_TEST_AUTO(vpopcnt)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
|
||||||
|
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
|
||||||
|
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
|
||||||
|
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04,
|
||||||
|
0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02,
|
||||||
|
0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01,
|
||||||
|
|
||||||
|
0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04,
|
||||||
|
0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02,
|
||||||
|
0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01,
|
||||||
|
|
||||||
|
0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04,
|
||||||
|
0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
|
||||||
|
0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
|
||||||
|
|
||||||
|
0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
|
||||||
|
0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
|
||||||
|
0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1843,6 +1843,10 @@ void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_
|
||||||
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x30, true); }
|
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x30, true); }
|
||||||
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
||||||
void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
|
void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
|
||||||
|
void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
||||||
|
void vpopcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x55); }
|
||||||
|
void vpopcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x55); }
|
||||||
|
void vpopcntw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
||||||
void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); }
|
void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); }
|
||||||
void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); }
|
void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); }
|
||||||
void vprold(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
|
void vprold(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue