diff --git a/readme.md b/readme.md index fa5ae8b..d945615 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@ -Xbyak 5.33 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +Xbyak 5.34 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ============= Abstract @@ -323,6 +323,7 @@ The header files under xbyak/ are independent of cybozulib. History ------------- +* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso) * 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N * 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro) * 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW diff --git a/readme.txt b/readme.txt index c6e0b7b..c2f8f4c 100644 --- a/readme.txt +++ b/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.33 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.34 ----------------------------------------------------------------------------- ◎概要 @@ -333,6 +333,7 @@ cybozulibは単体テストでのみ利用されていて、xbyak/ディレク ----------------------------------------------------------------------------- ◎履歴 +2016/12/07 ver 5.34 disp8N時の負のオフセット処理の修正(thanks to rsdubtso) 2016/12/06 ver 5.33 disp8N時のvpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w}のバグ修正 2016/12/01 ver 5.32 clang for Visual Studioサポートのために__xgetbv()を_xgetbv()に変更(thanks to freiro) 2016/11/27 ver 5.31 AVX512_4VNNIをAVX512_4VNNIWに変更 diff --git a/test/make_512.cpp b/test/make_512.cpp index 60132a8..4efd69f 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -2104,7 +2104,21 @@ public: } } { + const int tbl[] = { + -1024, -512 -256, -128, -64, -32, -16, -8, -4, -2, -1, + 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512 + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + char xs[128], ns[128]; + int v = tbl[i]; + CYBOZU_SNPRINTF(xs, sizeof(xs), "zmm0, zmm1, ptr_b[eax%+d]", v); + CYBOZU_SNPRINTF(ns, sizeof(ns), "zmm0, zmm1, [eax%+d]{1to16}", v); + put("vaddps", xs, ns); + } } +#ifdef XBYAK64 + put("vfmadd231ps", "zmm8, zmm31, ptr_b[r14+rbp-0x1e4]", "zmm8, zmm31, [r14+rbp-0x1e4]{1to16}"); +#endif } void putAVX512() { diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 26e0597..7b8da5a 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -105,7 +105,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5330 /* 0xABCD = A.BC(D) */ + VERSION = 0x5340 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -1506,8 +1506,9 @@ private: mod = mod01; } } else { - uint32 t = disp / disp8N; - if (t * disp8N == disp && inner::IsInDisp8(t)) { + // disp must be casted to signed + uint32 t = static_cast(static_cast(disp) / disp8N); + if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) { disp = t; mod = mod01; } diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 053493d..a7c021e 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "5.33"; } +const char *getVersionString() const { return "5.34"; } void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }