add m16bcst
This commit is contained in:
parent
84053c8e72
commit
6333ec099f
5 changed files with 23 additions and 20 deletions
|
@ -35,6 +35,7 @@
|
|||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||
T_B32 = 1 << 26, // m32bcst
|
||||
T_B64 = 1 << 27, // m64bcst
|
||||
T_B16 = T_B32 | T_B64, // m16bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
|
@ -157,9 +158,12 @@ std::string type2String(int type)
|
|||
}
|
||||
if (type & T_B32) {
|
||||
if (!str.empty()) str += " | ";
|
||||
if (type & T_B64) {
|
||||
str += "T_B16"; // T_B16 = T_B32 | T_B64
|
||||
} else {
|
||||
str += "T_B32";
|
||||
}
|
||||
if (type & T_B64) {
|
||||
} else if (type & T_B64) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_B64";
|
||||
}
|
||||
|
|
|
@ -757,7 +757,7 @@ void putFP16_1()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
printf("void v%sph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code);
|
||||
printf("void v%sph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x%02X); }\n", p->name, p->code);
|
||||
printf("void v%ssh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_Z | T_N2, 0x%02X); }\n", p->name, p->code);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -866,7 +866,7 @@ CYBOZU_TEST_AUTO(vnni)
|
|||
CYBOZU_TEST_EXCEPTION(c.badVex(), std::exception);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(fp16)
|
||||
CYBOZU_TEST_AUTO(vaddph)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
|
@ -874,22 +874,20 @@ CYBOZU_TEST_AUTO(fp16)
|
|||
vaddph(zmm0, zmm1, ptr[rax+64]);
|
||||
vaddph(ymm0, ymm1, ptr[rax+64]);
|
||||
vaddph(xmm0, xmm1, ptr[rax+64]);
|
||||
vaddsh(xmm0, xmm15, ptr[rax+64]);
|
||||
|
||||
vcmpph(k1, xm15, ptr[rax+64], 1);
|
||||
vcmpph(k2, ym15, ptr[rax+64], 2);
|
||||
vcmpph(k3, zm15, ptr[rax+64], 3);
|
||||
vaddph(zmm0, zmm1, ptr_b[rax+64]);
|
||||
vaddph(ymm0, ymm1, ptr_b[rax+64]);
|
||||
vaddph(xmm0, xmm1, ptr_b[rax+64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xF5, 0x74, 0x48, 0x58, 0x40, 0x01,
|
||||
0x62, 0xF5, 0x74, 0x28, 0x58, 0x40, 0x02,
|
||||
0x62, 0xF5, 0x74, 0x08, 0x58, 0x40, 0x04,
|
||||
0x62, 0xF5, 0x06, 0x08, 0x58, 0x40, 0x20,
|
||||
|
||||
0x62, 0xF3, 0x04, 0x08, 0xC2, 0x48, 0x04, 0x01,
|
||||
0x62, 0xF3, 0x04, 0x28, 0xC2, 0x50, 0x02, 0x02,
|
||||
0x62, 0xF3, 0x04, 0x48, 0xC2, 0x58, 0x01, 0x03,
|
||||
0x62, 0xF5, 0x74, 0x58, 0x58, 0x40, 0x20,
|
||||
0x62, 0xF5, 0x74, 0x38, 0x58, 0x40, 0x20,
|
||||
0x62, 0xF5, 0x74, 0x18, 0x58, 0x40, 0x20
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
|
|
|
@ -1675,6 +1675,7 @@ private:
|
|||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||
T_B32 = 1 << 26, // m32bcst
|
||||
T_B64 = 1 << 27, // m64bcst
|
||||
T_B16 = T_B32 | T_B64, // m16bcst (Be careful)
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
|
@ -1748,7 +1749,7 @@ private:
|
|||
VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
|
||||
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
|
||||
if (b) {
|
||||
disp8N = (type & T_B32) ? 4 : 8;
|
||||
disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
|
||||
} else if (type & T_DUP) {
|
||||
disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
|
||||
} else {
|
||||
|
|
|
@ -1741,7 +1741,7 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM
|
|||
void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }
|
||||
void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }
|
||||
void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
|
||||
void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B32, 0x58); }
|
||||
void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x58); }
|
||||
void vaddsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_Z | T_N2, 0x58); }
|
||||
void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); }
|
||||
void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); }
|
||||
|
@ -1921,7 +1921,7 @@ void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T
|
|||
void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
||||
void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
||||
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
|
||||
void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B32, 0x5E); }
|
||||
void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5E); }
|
||||
void vdivsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_Z | T_N2, 0x5E); }
|
||||
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
|
||||
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
|
||||
|
@ -1972,9 +1972,9 @@ void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm)
|
|||
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
||||
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
|
||||
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
||||
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B32, 0x5F); }
|
||||
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5F); }
|
||||
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_Z | T_N2, 0x5F); }
|
||||
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B32, 0x5D); }
|
||||
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5D); }
|
||||
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_Z | T_N2, 0x5D); }
|
||||
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||
|
@ -1988,7 +1988,7 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3
|
|||
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B32, 0x59); }
|
||||
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
|
||||
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_Z | T_N2, 0x59); }
|
||||
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
|
||||
void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
|
||||
|
@ -2183,7 +2183,7 @@ void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) {
|
|||
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
||||
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
||||
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
|
||||
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B32, 0x5C); }
|
||||
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
|
||||
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_Z | T_N2, 0x5C); }
|
||||
#ifdef XBYAK64
|
||||
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue