fix vpextrw reg, xmm, imm
This commit is contained in:
parent
ccf165322d
commit
6a8267992f
8 changed files with 182 additions and 1284 deletions
|
@ -1315,7 +1315,8 @@ void put()
|
||||||
printf("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }\n");
|
printf("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }\n");
|
||||||
|
|
||||||
printf("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }\n");
|
printf("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }\n");
|
||||||
printf("void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }\n");
|
// according to Intel' manual, VEX.W1 is ignored in 64-bit mode, then always VEX.W = 0, but I follow yasm encoding.
|
||||||
|
printf("void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }\n");
|
||||||
printf("void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }\n");
|
printf("void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }\n");
|
||||||
printf("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }\n");
|
printf("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }\n");
|
||||||
|
|
||||||
|
|
13
readme.txt
13
readme.txt
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.99
|
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.991
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎概要
|
◎概要
|
||||||
|
@ -61,6 +61,16 @@ test byte [esp], 4 --> test (byte [esp], 4);
|
||||||
(注意) dword, word, byteはクラス変数です.従ってたとえばunsigned intの
|
(注意) dword, word, byteはクラス変数です.従ってたとえばunsigned intの
|
||||||
つもりでdwordをtypedefしないでください.
|
つもりでdwordをtypedefしないでください.
|
||||||
|
|
||||||
|
・AVX
|
||||||
|
|
||||||
|
大抵の3オペランド形式の命令はデスティネーションを省略した形で呼び出すことができます.
|
||||||
|
FMAについては簡略表記を導入するか検討中です(アイデア募集中).
|
||||||
|
|
||||||
|
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||||
|
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||||
|
|
||||||
|
vfmadd231pd(xmm1, xmm2, xmm3); // xmm1 <- (xmm2 * xmm3) + xmm1
|
||||||
|
|
||||||
・ラベル
|
・ラベル
|
||||||
|
|
||||||
L(文字列);
|
L(文字列);
|
||||||
|
@ -199,6 +209,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎履歴
|
◎履歴
|
||||||
|
|
||||||
|
2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm
|
||||||
2011/02/04 ver 2.99 beta support AVX
|
2011/02/04 ver 2.99 beta support AVX
|
||||||
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
|
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
|
||||||
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||||
|
|
14
readme_e.txt
14
readme_e.txt
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
Xbyak 2.99 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
Xbyak 2.991 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
<Abstract>
|
<Abstract>
|
||||||
|
@ -57,6 +57,13 @@ test byte [esp], 4 --> test (byte [esp], 4);
|
||||||
NB. dword, word and byte are class members, then don't use dword as
|
NB. dword, word and byte are class members, then don't use dword as
|
||||||
unsigned int, for example.
|
unsigned int, for example.
|
||||||
|
|
||||||
|
AVX
|
||||||
|
|
||||||
|
You can omit a destination for almost 3-op mnemonics.
|
||||||
|
|
||||||
|
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||||
|
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||||
|
|
||||||
Label
|
Label
|
||||||
|
|
||||||
L("L1");
|
L("L1");
|
||||||
|
@ -148,6 +155,7 @@ http://www.opensource.org/licenses/bsd-license.php
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
<History>
|
<History>
|
||||||
|
|
||||||
|
2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm
|
||||||
2011/Feb/04 ver 2.99 beta support AVX
|
2011/Feb/04 ver 2.99 beta support AVX
|
||||||
2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
||||||
2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||||
|
@ -190,5 +198,5 @@ http://www.opensource.org/licenses/bsd-license.php
|
||||||
MITSUNARI Shigeo(herumi at nifty dot com)
|
MITSUNARI Shigeo(herumi at nifty dot com)
|
||||||
|
|
||||||
---
|
---
|
||||||
$Revision: 1.54 $
|
$Revision: 1.56 $
|
||||||
$Date: 2011/02/04 03:27:59 $
|
$Date: 2011/02/07 06:18:26 $
|
||||||
|
|
|
@ -1653,7 +1653,6 @@ class Test {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
put("vpextrb", REG32e|MEM, XMM, IMM);
|
put("vpextrb", REG32e|MEM, XMM, IMM);
|
||||||
put("vpextrw", REG32e, XMM, IMM);
|
|
||||||
put("vpextrd", REG32|MEM, XMM, IMM);
|
put("vpextrd", REG32|MEM, XMM, IMM);
|
||||||
|
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
|
@ -1748,7 +1747,7 @@ class Test {
|
||||||
put("vpcmpgtq", XMM, XMM | MEM);
|
put("vpcmpgtq", XMM, XMM | MEM);
|
||||||
put("vpcmpgtq", XMM, XMM, XMM | MEM);
|
put("vpcmpgtq", XMM, XMM, XMM | MEM);
|
||||||
|
|
||||||
put("vpextrw", MEM, XMM, IMM); // nasm iw wrong?
|
put("vpextrw", REG32e | MEM, XMM, IMM); // nasm iw wrong?
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
|
|
1396
test/nm.cpp
1396
test/nm.cpp
File diff suppressed because it is too large
Load diff
|
@ -4,9 +4,9 @@
|
||||||
@file xbyak.h
|
@file xbyak.h
|
||||||
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
|
@brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
|
||||||
@author herumi
|
@author herumi
|
||||||
@version $Revision: 1.238 $
|
@version $Revision: 1.239 $
|
||||||
@url http://homepage1.nifty.com/herumi/soft/xbyak.html
|
@url http://homepage1.nifty.com/herumi/soft/xbyak.html
|
||||||
@date $Date: 2011/02/04 03:46:09 $
|
@date $Date: 2011/02/07 06:09:35 $
|
||||||
@note modified new BSD license
|
@note modified new BSD license
|
||||||
http://www.opensource.org/licenses/bsd-license.php
|
http://www.opensource.org/licenses/bsd-license.php
|
||||||
*/
|
*/
|
||||||
|
@ -56,7 +56,7 @@ namespace Xbyak {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x2990, /* 0xABCD = A.BC(D) */
|
VERSION = 0x2991, /* 0xABCD = A.BC(D) */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
const char *getVersionString() const { return "2.99"; }
|
const char *getVersionString() const { return "2.991"; }
|
||||||
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
|
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
|
||||||
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
|
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
|
||||||
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
|
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
|
||||||
|
@ -943,7 +943,7 @@ void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, f
|
||||||
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
|
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); }
|
||||||
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
|
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); }
|
||||||
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
|
void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); }
|
||||||
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), MM_0F | PP_66, 0xC5, false); db(imm); }
|
void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); }
|
||||||
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
|
void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); }
|
||||||
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
|
void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); }
|
||||||
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
|
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); }
|
||||||
|
|
|
@ -44,6 +44,10 @@
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
extern "C" unsigned __int64 __xgetbv(int);
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Xbyak { namespace util {
|
namespace Xbyak { namespace util {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -62,6 +66,16 @@ public:
|
||||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||||
#else
|
#else
|
||||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
static inline uint64 getXfeature()
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
return __xgetbv(0);
|
||||||
|
#else
|
||||||
|
unsigned int eax, edx;
|
||||||
|
__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||||
|
return ((uint64)edx << 32) | eax;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
enum Type {
|
enum Type {
|
||||||
|
@ -121,10 +135,15 @@ public:
|
||||||
if (data[2] & (1U << 25)) type_ |= tAESNI;
|
if (data[2] & (1U << 25)) type_ |= tAESNI;
|
||||||
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
|
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||||
if (data[2] & (1U << 27)) type_ |= tOSXSACE;
|
if (data[2] & (1U << 27)) type_ |= tOSXSACE;
|
||||||
// QQQ
|
|
||||||
// should check XFEATURE_ENABLED_MASK[2:1] = '11b' by xgetvb
|
if (type_ & tOSXSACE) {
|
||||||
if (data[2] & (1U << 28)) type_ |= tAVX;
|
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
|
||||||
if (data[2] & (1U << 12)) type_ |= tFMA;
|
uint64 bv = getXfeature();
|
||||||
|
if ((bv & 6) == 6) {
|
||||||
|
if (data[2] & (1U << 28)) type_ |= tAVX;
|
||||||
|
if (data[2] & (1U << 12)) type_ |= tFMA;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||||
if (data[3] & (1U << 23)) type_ |= tMMX;
|
if (data[3] & (1U << 23)) type_ |= tMMX;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue