fix vpextrw reg, xmm, imm
This commit is contained in:
parent
ccf165322d
commit
6a8267992f
8 changed files with 182 additions and 1284 deletions
|
@ -1315,7 +1315,8 @@ void put()
|
|||
printf("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }\n");
|
||||
|
||||
printf("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }\n");
|
||||
printf("void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }\n");
|
||||
// according to Intel' manual, VEX.W1 is ignored in 64-bit mode, then always VEX.W = 0, but I follow yasm encoding.
|
||||
printf("void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }\n");
|
||||
printf("void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }\n");
|
||||
printf("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }\n");
|
||||
|
||||
|
|
13
readme.txt
13
readme.txt
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.99
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.991
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -61,6 +61,16 @@ test byte [esp], 4 --> test (byte [esp], 4);
|
|||
(注意) dword, word, byteはクラス変数です.従ってたとえばunsigned intの
|
||||
つもりでdwordをtypedefしないでください.
|
||||
|
||||
・AVX
|
||||
|
||||
大抵の3オペランド形式の命令はデスティネーションを省略した形で呼び出すことができます.
|
||||
FMAについては簡略表記を導入するか検討中です(アイデア募集中).
|
||||
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
|
||||
vfmadd231pd(xmm1, xmm2, xmm3); // xmm1 <- (xmm2 * xmm3) + xmm1
|
||||
|
||||
・ラベル
|
||||
|
||||
L(文字列);
|
||||
|
@ -199,6 +209,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm
|
||||
2011/02/04 ver 2.99 beta support AVX
|
||||
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
|
||||
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||
|
|
14
readme_e.txt
14
readme_e.txt
|
@ -1,5 +1,5 @@
|
|||
|
||||
Xbyak 2.99 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
Xbyak 2.991 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
<Abstract>
|
||||
|
@ -57,6 +57,13 @@ test byte [esp], 4 --> test (byte [esp], 4);
|
|||
NB. dword, word and byte are class members, then don't use dword as
|
||||
unsigned int, for example.
|
||||
|
||||
AVX
|
||||
|
||||
You can omit a destination for almost 3-op mnemonics.
|
||||
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
|
||||
Label
|
||||
|
||||
L("L1");
|
||||
|
@ -148,6 +155,7 @@ http://www.opensource.org/licenses/bsd-license.php
|
|||
-----------------------------------------------------------------------------
|
||||
<History>
|
||||
|
||||
2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm
|
||||
2011/Feb/04 ver 2.99 beta support AVX
|
||||
2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
||||
2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||
|
@ -190,5 +198,5 @@ http://www.opensource.org/licenses/bsd-license.php
|
|||
MITSUNARI Shigeo(herumi at nifty dot com)
|
||||
|
||||
---
|
||||
$Revision: 1.54 $
|
||||
$Date: 2011/02/04 03:27:59 $
|
||||
$Revision: 1.56 $
|
||||
$Date: 2011/02/07 06:18:26 $
|
||||
|
|
|
@ -1653,7 +1653,6 @@ class Test {
|
|||
}
|
||||
}
|
||||
put("vpextrb", REG32e|MEM, XMM, IMM);
|
||||
put("vpextrw", REG32e, XMM, IMM);
|
||||
put("vpextrd", REG32|MEM, XMM, IMM);
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
@ -1748,7 +1747,7 @@ class Test {
|
|||
put("vpcmpgtq", XMM, XMM | MEM);
|
||||
put("vpcmpgtq", XMM, XMM, XMM | MEM);
|
||||
|
||||
put("vpextrw", MEM, XMM, IMM); // nasm iw wrong?
|
||||
put("vpextrw", REG32e | MEM, XMM, IMM); // nasm iw wrong?
|
||||
#endif
|
||||
}
|
||||
public:
|
||||
|
|
1396
test/nm.cpp
1396
test/nm.cpp
File diff suppressed because it is too large
Load diff
|
@ -4,9 +4,9 @@
|
|||
@file xbyak.h
|
||||
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
|
||||
@author herumi
|
||||
@version $Revision: 1.238 $
|
||||
@version $Revision: 1.239 $
|
||||
@url http://homepage1.nifty.com/herumi/soft/xbyak.html
|
||||
@date $Date: 2011/02/04 03:46:09 $
|
||||
@date $Date: 2011/02/07 06:09:35 $
|
||||
@note modified new BSD license
|
||||
http://www.opensource.org/licenses/bsd-license.php
|
||||
*/
|
||||
|
@ -56,7 +56,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x2990, /* 0xABCD = A.BC(D) */
|
||||
VERSION = 0x2991, /* 0xABCD = A.BC(D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "2.99"; }
|
||||
const char *getVersionString() const { return "2.991"; }
|
||||
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
|
||||
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
|
||||
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
|
||||
|
@ -943,7 +943,7 @@ void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, f
|
|||
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
|
||||
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
|
||||
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
|
||||
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }
|
||||
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }
|
||||
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
|
||||
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
|
||||
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
|
||||
|
|
|
@ -44,6 +44,10 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
extern "C" unsigned __int64 __xgetbv(int);
|
||||
#endif
|
||||
|
||||
namespace Xbyak { namespace util {
|
||||
|
||||
/**
|
||||
|
@ -62,6 +66,16 @@ public:
|
|||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||
#else
|
||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
}
|
||||
static inline uint64 getXfeature()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __xgetbv(0);
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
#endif
|
||||
}
|
||||
enum Type {
|
||||
|
@ -121,10 +135,15 @@ public:
|
|||
if (data[2] & (1U << 25)) type_ |= tAESNI;
|
||||
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (data[2] & (1U << 27)) type_ |= tOSXSACE;
|
||||
// QQQ
|
||||
// should check XFEATURE_ENABLED_MASK[2:1] = '11b' by xgetvb
|
||||
|
||||
if (type_ & tOSXSACE) {
|
||||
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
|
||||
uint64 bv = getXfeature();
|
||||
if ((bv & 6) == 6) {
|
||||
if (data[2] & (1U << 28)) type_ |= tAVX;
|
||||
if (data[2] & (1U << 12)) type_ |= tFMA;
|
||||
}
|
||||
}
|
||||
|
||||
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1U << 23)) type_ |= tMMX;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue