fix vpextrw reg, xmm, imm

This commit is contained in:
MITSUNARI Shigeo 2011-02-08 07:15:26 +09:00
parent ccf165322d
commit 6a8267992f
8 changed files with 182 additions and 1284 deletions

View file

@ -1315,7 +1315,8 @@ void put()
printf("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }\n"); printf("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }\n");
printf("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }\n"); printf("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }\n");
printf("void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }\n"); // according to Intel' manual, VEX.W1 is ignored in 64-bit mode, then always VEX.W = 0, but I follow yasm encoding.
printf("void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }\n");
printf("void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }\n"); printf("void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }\n");
printf("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }\n"); printf("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }\n");

View file

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.99 C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.991
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
◎概要 ◎概要
@ -61,6 +61,16 @@ test byte [esp], 4 --> test (byte [esp], 4);
(注意) dword, word, byteはクラス変数です従ってたとえばunsigned intの (注意) dword, word, byteはクラス変数です従ってたとえばunsigned intの
つもりでdwordをtypedefしないでください つもりでdwordをtypedefしないでください
・AVX
大抵の3オペランド形式の命令はデスティネーションを省略した形で呼び出すことができます.
FMAについては簡略表記を導入するか検討中です(アイデア募集中).
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
vfmadd231pd(xmm1, xmm2, xmm3); // xmm1 <- (xmm2 * xmm3) + xmm1
・ラベル ・ラベル
L(文字列); L(文字列);
@ -199,6 +209,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
◎履歴 ◎履歴
2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm
2011/02/04 ver 2.99 beta support AVX 2011/02/04 ver 2.99 beta support AVX
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp 2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist 2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist

View file

@ -1,5 +1,5 @@
Xbyak 2.99 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ Xbyak 2.991 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
<Abstract> <Abstract>
@ -57,6 +57,13 @@ test byte [esp], 4 --> test (byte [esp], 4);
NB. dword, word and byte are class members, then don't use dword as NB. dword, word and byte are class members, then don't use dword as
unsigned int, for example. unsigned int, for example.
AVX
You can omit a destination for almost 3-op mnemonics.
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
Label Label
L("L1"); L("L1");
@ -148,6 +155,7 @@ http://www.opensource.org/licenses/bsd-license.php
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
<History> <History>
2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm
2011/Feb/04 ver 2.99 beta support AVX 2011/Feb/04 ver 2.99 beta support AVX
2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
@ -190,5 +198,5 @@ http://www.opensource.org/licenses/bsd-license.php
MITSUNARI Shigeo(herumi at nifty dot com) MITSUNARI Shigeo(herumi at nifty dot com)
--- ---
$Revision: 1.54 $ $Revision: 1.56 $
$Date: 2011/02/04 03:27:59 $ $Date: 2011/02/07 06:18:26 $

View file

@ -1653,7 +1653,6 @@ class Test {
} }
} }
put("vpextrb", REG32e|MEM, XMM, IMM); put("vpextrb", REG32e|MEM, XMM, IMM);
put("vpextrw", REG32e, XMM, IMM);
put("vpextrd", REG32|MEM, XMM, IMM); put("vpextrd", REG32|MEM, XMM, IMM);
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
@ -1748,7 +1747,7 @@ class Test {
put("vpcmpgtq", XMM, XMM | MEM); put("vpcmpgtq", XMM, XMM | MEM);
put("vpcmpgtq", XMM, XMM, XMM | MEM); put("vpcmpgtq", XMM, XMM, XMM | MEM);
put("vpextrw", MEM, XMM, IMM); // nasm iw wrong? put("vpextrw", REG32e | MEM, XMM, IMM); // nasm iw wrong?
#endif #endif
} }
public: public:

File diff suppressed because it is too large Load diff

View file

@ -4,9 +4,9 @@
@file xbyak.h @file xbyak.h
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++ @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
@author herumi @author herumi
@version $Revision: 1.238 $ @version $Revision: 1.239 $
@url http://homepage1.nifty.com/herumi/soft/xbyak.html @url http://homepage1.nifty.com/herumi/soft/xbyak.html
@date $Date: 2011/02/04 03:46:09 $ @date $Date: 2011/02/07 06:09:35 $
@note modified new BSD license @note modified new BSD license
http://www.opensource.org/licenses/bsd-license.php http://www.opensource.org/licenses/bsd-license.php
*/ */
@ -56,7 +56,7 @@ namespace Xbyak {
enum { enum {
DEFAULT_MAX_CODE_SIZE = 4096, DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x2990, /* 0xABCD = A.BC(D) */ VERSION = 0x2991, /* 0xABCD = A.BC(D) */
}; };
#ifndef MIE_INTEGER_TYPE_DEFINED #ifndef MIE_INTEGER_TYPE_DEFINED

View file

@ -1,4 +1,4 @@
const char *getVersionString() const { return "2.99"; } const char *getVersionString() const { return "2.991"; }
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); } void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); } void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); } void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
@ -943,7 +943,7 @@ void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, f
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); } void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); } void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); } void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); } void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); } void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); } void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); } void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }

View file

@ -44,6 +44,10 @@
#endif #endif
#endif #endif
#ifdef _MSC_VER
extern "C" unsigned __int64 __xgetbv(int);
#endif
namespace Xbyak { namespace util { namespace Xbyak { namespace util {
/** /**
@ -62,6 +66,16 @@ public:
__cpuid(reinterpret_cast<int*>(data), eaxIn); __cpuid(reinterpret_cast<int*>(data), eaxIn);
#else #else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]); __cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
}
static inline uint64 getXfeature()
{
#ifdef _MSC_VER
return __xgetbv(0);
#else
unsigned int eax, edx;
__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64)edx << 32) | eax;
#endif #endif
} }
enum Type { enum Type {
@ -121,10 +135,15 @@ public:
if (data[2] & (1U << 25)) type_ |= tAESNI; if (data[2] & (1U << 25)) type_ |= tAESNI;
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ; if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
if (data[2] & (1U << 27)) type_ |= tOSXSACE; if (data[2] & (1U << 27)) type_ |= tOSXSACE;
// QQQ
// should check XFEATURE_ENABLED_MASK[2:1] = '11b' by xgetvb if (type_ & tOSXSACE) {
if (data[2] & (1U << 28)) type_ |= tAVX; // check XFEATURE_ENABLED_MASK[2:1] = '11b'
if (data[2] & (1U << 12)) type_ |= tFMA; uint64 bv = getXfeature();
if ((bv & 6) == 6) {
if (data[2] & (1U << 28)) type_ |= tAVX;
if (data[2] & (1U << 12)) type_ |= tFMA;
}
}
if (data[3] & (1U << 15)) type_ |= tCMOV; if (data[3] & (1U << 15)) type_ |= tCMOV;
if (data[3] & (1U << 23)) type_ |= tMMX; if (data[3] & (1U << 23)) type_ |= tMMX;