accept k0 mask register (it means no mask)

This commit is contained in:
MITSUNARI Shigeo 2020-04-20 11:21:35 +09:00
parent 7e3167e4b0
commit 9607626565
7 changed files with 24 additions and 8 deletions

View file

@ -1,6 +1,6 @@
[![Build Status](https://travis-ci.org/herumi/xbyak.png)](https://travis-ci.org/herumi/xbyak)
# Xbyak 5.90 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
# Xbyak 5.91 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
## Abstract
@ -151,6 +151,8 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64],
```
### Remark
* `k1`, ..., `k7` are opmask registers.
- `k0` is dealt as no mask.
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
* `k4 | k3` is different from `k3 | k4`.
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
@ -423,6 +425,7 @@ modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
## History
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
* 2020/Feb/26 ver 5.891 fix typo of type
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd

View file

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.90
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.91
-----------------------------------------------------------------------------
◎概要
@ -371,6 +371,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
2020/04/20 ver 5.91 マスクレジスタk0を受け入れる(マスクをしない)
2020/04/09 ver 5.90 kmov{b,w,d,q}がサポートされないレジスタを受けると例外を投げる
2020/02/26 ver 5.891 zm0のtype修正
2020/01/03 ver 5.89 vfpclasspdの処理エラー修正

View file

@ -537,7 +537,7 @@ public:
printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
} else {
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
if (z) pz = "{z}";
if (z && kIdx) pz = "{z}";
printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
}
}

View file

@ -2628,7 +2628,7 @@ public:
printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
} else {
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
if (z) pz = "{z}";
if (z && kIdx) pz = "{z}";
printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
}
}

View file

@ -119,6 +119,18 @@ CYBOZU_TEST_AUTO(kmask)
CYBOZU_TEST_EXCEPTION(kmovd(k1, rax), std::exception);
CYBOZU_TEST_NO_EXCEPTION(kmovq(k1, rax));
#endif
CYBOZU_TEST_NO_EXCEPTION(vmovaps(xm0|k0, ptr[eax]));
checkT_z();
}
void checkT_z()
{
const uint8_t *p1 = getCurr();
vmovaps(zm0, ptr[eax]);
const uint8_t *p2 = getCurr();
vmovaps(zm0|T_z, ptr[eax]);
const uint8_t *end = getCurr();
CYBOZU_TEST_EQUAL(p2 - p1, end - p2);
CYBOZU_TEST_EQUAL_ARRAY(p1, p2, end - p2);
}
} c;
}

View file

@ -115,7 +115,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x5900 /* 0xABCD = A.BC(D) */
VERSION = 0x5910 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -464,9 +464,8 @@ public:
}
// err if MMX/FPU/OPMASK/BNDREG
void setBit(int bit);
void setOpmaskIdx(int idx, bool ignore_idx0 = false)
void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
{
if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET);
mask_ = idx;
}
@ -1662,6 +1661,7 @@ private:
bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
if (aaa == 0) z = 0; // clear T_z if mask is not set
db(0x62);
db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));

View file

@ -1,4 +1,4 @@
const char *getVersionString() const { return "5.90"; }
const char *getVersionString() const { return "5.91"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }