accept k0 mask register (it means no mask)
This commit is contained in:
parent
7e3167e4b0
commit
9607626565
7 changed files with 24 additions and 8 deletions
|
@ -1,6 +1,6 @@
|
|||
[](https://travis-ci.org/herumi/xbyak)
|
||||
|
||||
# Xbyak 5.90 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
# Xbyak 5.91 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
|
||||
## Abstract
|
||||
|
||||
|
@ -151,6 +151,8 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64],
|
|||
```
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
- `k0` is dealt as no mask.
|
||||
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||
* `k4 | k3` is different from `k3 | k4`.
|
||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||
|
@ -423,6 +425,7 @@ modified new BSD License
|
|||
http://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
## History
|
||||
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
|
||||
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
|
||||
* 2020/Feb/26 ver 5.891 fix typo of type
|
||||
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.90
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.91
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -371,6 +371,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2020/04/20 ver 5.91 マスクレジスタk0を受け入れる(マスクをしない)
|
||||
2020/04/09 ver 5.90 kmov{b,w,d,q}がサポートされないレジスタを受けると例外を投げる
|
||||
2020/02/26 ver 5.891 zm0のtype修正
|
||||
2020/01/03 ver 5.89 vfpclasspdの処理エラー修正
|
||||
|
|
|
@ -537,7 +537,7 @@ public:
|
|||
printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
|
||||
} else {
|
||||
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
|
||||
if (z) pz = "{z}";
|
||||
if (z && kIdx) pz = "{z}";
|
||||
printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2628,7 +2628,7 @@ public:
|
|||
printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
|
||||
} else {
|
||||
if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
|
||||
if (z) pz = "{z}";
|
||||
if (z && kIdx) pz = "{z}";
|
||||
printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -119,6 +119,18 @@ CYBOZU_TEST_AUTO(kmask)
|
|||
CYBOZU_TEST_EXCEPTION(kmovd(k1, rax), std::exception);
|
||||
CYBOZU_TEST_NO_EXCEPTION(kmovq(k1, rax));
|
||||
#endif
|
||||
CYBOZU_TEST_NO_EXCEPTION(vmovaps(xm0|k0, ptr[eax]));
|
||||
checkT_z();
|
||||
}
|
||||
void checkT_z()
|
||||
{
|
||||
const uint8_t *p1 = getCurr();
|
||||
vmovaps(zm0, ptr[eax]);
|
||||
const uint8_t *p2 = getCurr();
|
||||
vmovaps(zm0|T_z, ptr[eax]);
|
||||
const uint8_t *end = getCurr();
|
||||
CYBOZU_TEST_EQUAL(p2 - p1, end - p2);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(p1, p2, end - p2);
|
||||
}
|
||||
} c;
|
||||
}
|
||||
|
|
|
@ -115,7 +115,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x5900 /* 0xABCD = A.BC(D) */
|
||||
VERSION = 0x5910 /* 0xABCD = A.BC(D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -464,9 +464,8 @@ public:
|
|||
}
|
||||
// err if MMX/FPU/OPMASK/BNDREG
|
||||
void setBit(int bit);
|
||||
void setOpmaskIdx(int idx, bool ignore_idx0 = false)
|
||||
void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
|
||||
{
|
||||
if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
|
||||
if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET);
|
||||
mask_ = idx;
|
||||
}
|
||||
|
@ -1662,6 +1661,7 @@ private:
|
|||
bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
|
||||
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
|
||||
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
||||
if (aaa == 0) z = 0; // clear T_z if mask is not set
|
||||
db(0x62);
|
||||
db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
|
||||
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "5.90"; }
|
||||
const char *getVersionString() const { return "5.91"; }
|
||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue