change format and add getter for data_cache_size
This commit is contained in:
parent
80b3c7b933
commit
fd587b55ca
6 changed files with 59 additions and 44 deletions
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
Xbyak 5.61 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
Xbyak 5.62 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||||
=============
|
=============
|
||||||
|
|
||||||
Abstract
|
Abstract
|
||||||
|
@ -333,6 +333,7 @@ The header files under xbyak/ are independent of cybozulib.
|
||||||
|
|
||||||
History
|
History
|
||||||
-------------
|
-------------
|
||||||
|
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||||
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
||||||
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
||||||
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.610
|
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.62
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎概要
|
◎概要
|
||||||
|
@ -343,6 +343,7 @@ cybozulibは単体テストでのみ利用されていて、xbyak/ディレク
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎履歴
|
◎履歴
|
||||||
|
|
||||||
|
2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||||
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
|
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
|
||||||
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
|
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
|
||||||
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
|
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
|
||||||
|
|
|
@ -104,6 +104,9 @@ void putCPUinfo()
|
||||||
Core i7-3930K 6 2D
|
Core i7-3930K 6 2D
|
||||||
*/
|
*/
|
||||||
cpu.putFamily();
|
cpu.putFamily();
|
||||||
|
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
||||||
|
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
|
|
@ -105,7 +105,7 @@ namespace Xbyak {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x5610 /* 0xABCD = A.BC(D) */
|
VERSION = 0x5620 /* 0xABCD = A.BC(D) */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
const char *getVersionString() const { return "5.61"; }
|
const char *getVersionString() const { return "5.62"; }
|
||||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||||
|
|
|
@ -84,52 +84,54 @@ class Cpu {
|
||||||
displayModel = model;
|
displayModel = model;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unsigned int value_from_bits(unsigned int val, unsigned int base, unsigned int end)
|
unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
|
||||||
{
|
{
|
||||||
unsigned int shift = sizeof(val) * 8 - end - 1;
|
return (val >> base) & ((1u << (end - base)) - 1);
|
||||||
return (val << shift) >> (shift + base);
|
|
||||||
}
|
}
|
||||||
void setCacheHierarchy()
|
void setCacheHierarchy()
|
||||||
{
|
{
|
||||||
unsigned int cache_type = 42;
|
if ((type_ & tINTEL) == 0) return;
|
||||||
|
const unsigned int NO_CACHE = 0;
|
||||||
|
const unsigned int DATA_CACHE = 1;
|
||||||
|
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||||
|
const unsigned int UNIFIED_CACHE = 3;
|
||||||
unsigned int smt_width = 0;
|
unsigned int smt_width = 0;
|
||||||
unsigned int n_cores;
|
unsigned int n_cores = 0;
|
||||||
unsigned int data[4];
|
unsigned int data[4];
|
||||||
|
|
||||||
if ((type_ & tINTEL) == 0) {
|
/*
|
||||||
fprintf(stderr, "ERR cache hierarchy querying is not supported\n");
|
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
|
||||||
throw Error(ERR_INTERNAL);
|
If x2APIC is supported, these are the only correct numbers.
|
||||||
}
|
*/
|
||||||
|
|
||||||
// if leaf 11 exists, we use it to get the number of smt cores and cores on socket
|
|
||||||
// If x2APIC is supported, these are the only correct numbers.
|
|
||||||
getCpuidEx(0x0, 0, data);
|
getCpuidEx(0x0, 0, data);
|
||||||
if(data[0] >= 11){
|
if (data[0] >= 11) {
|
||||||
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
|
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
|
||||||
smt_width = (data[1] & 0x7FFF);
|
smt_width = data[1] & 0x7FFF;
|
||||||
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
|
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
|
||||||
n_cores = (data[1] & 0x7FFF);
|
n_cores = data[1] & 0x7FFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Assumptions:
|
/*
|
||||||
* - the first level of data cache is not shared (which is the
|
Assumptions:
|
||||||
* case for every existing architecture) and use this to
|
the first level of data cache is not shared (which is the
|
||||||
* determine the SMT width for arch not supporting leaf 11
|
case for every existing architecture) and use this to
|
||||||
* - when leaf 4 reports a number of core less than n_cores
|
determine the SMT width for arch not supporting leaf 11.
|
||||||
* on socket reported by leaf 11, then it is a correct number
|
when leaf 4 reports a number of core less than n_cores
|
||||||
* of cores not an upperbound */
|
on socket reported by leaf 11, then it is a correct number
|
||||||
for (int i = 0; ((cache_type != NO_CACHE) && (data_cache_levels < max_number_cache_levels)); i++) {
|
of cores not an upperbound.
|
||||||
|
*/
|
||||||
|
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
|
||||||
getCpuidEx(0x4, i, data);
|
getCpuidEx(0x4, i, data);
|
||||||
cache_type = value_from_bits(data[0], 0, 4);
|
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||||
if ((cache_type == DATA_CACHE) || (cache_type == UNIFIED_CACHE)) {
|
if (cacheType == NO_CACHE) break;
|
||||||
int nb_logical_cores = (std::min)(value_from_bits(data[0], 14, 25) + 1,
|
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||||
n_cores);
|
unsigned int nb_logical_cores = (std::min)(extractBit(data[0], 14, 25) + 1, n_cores);
|
||||||
data_cache_size[data_cache_levels] =
|
data_cache_size[data_cache_levels] =
|
||||||
(value_from_bits(data[1], 22, 31) + 1)
|
(extractBit(data[1], 22, 31) + 1)
|
||||||
* (value_from_bits(data[1], 12, 21) + 1)
|
* (extractBit(data[1], 12, 21) + 1)
|
||||||
* (value_from_bits(data[1], 0, 11) + 1)
|
* (extractBit(data[1], 0, 11) + 1)
|
||||||
* (data[2] + 1);
|
* (data[2] + 1);
|
||||||
if ((cache_type == DATA_CACHE) && (smt_width == 0)) smt_width = nb_logical_cores;
|
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
|
||||||
assert(smt_width != 0);
|
assert(smt_width != 0);
|
||||||
cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width;
|
cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width;
|
||||||
data_cache_levels++;
|
data_cache_levels++;
|
||||||
|
@ -146,11 +148,24 @@ public:
|
||||||
int displayFamily; // family + extFamily
|
int displayFamily; // family + extFamily
|
||||||
int displayModel; // model + extModel
|
int displayModel; // model + extModel
|
||||||
|
|
||||||
static const unsigned int max_number_cache_levels = 10;
|
// may I move these members into private?
|
||||||
unsigned int data_cache_size[max_number_cache_levels];
|
static const unsigned int maxNumberCacheLevels = 10;
|
||||||
unsigned int cores_sharing_data_cache[max_number_cache_levels];
|
unsigned int data_cache_size[maxNumberCacheLevels];
|
||||||
|
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
|
||||||
unsigned int data_cache_levels;
|
unsigned int data_cache_levels;
|
||||||
|
|
||||||
|
unsigned int getDataCacheLevels() const { return data_cache_levels; }
|
||||||
|
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||||
|
{
|
||||||
|
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||||
|
return cores_sharing_data_cache[i];
|
||||||
|
}
|
||||||
|
unsigned int getDataCacheSize(unsigned int i) const
|
||||||
|
{
|
||||||
|
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||||
|
return data_cache_size[i];
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
data[] = { eax, ebx, ecx, edx }
|
data[] = { eax, ebx, ecx, edx }
|
||||||
*/
|
*/
|
||||||
|
@ -183,10 +198,6 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
typedef uint64 Type;
|
typedef uint64 Type;
|
||||||
static const Type NO_CACHE = 0;
|
|
||||||
static const Type DATA_CACHE = 1;
|
|
||||||
static const Type INSTRUCTION_CACHE = 2;
|
|
||||||
static const Type UNIFIED_CACHE = 3;
|
|
||||||
|
|
||||||
static const Type NONE = 0;
|
static const Type NONE = 0;
|
||||||
static const Type tMMX = 1 << 0;
|
static const Type tMMX = 1 << 0;
|
||||||
|
@ -346,8 +357,7 @@ public:
|
||||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||||
}
|
}
|
||||||
setFamily();
|
setFamily();
|
||||||
if ((type_ & tINTEL) == tINTEL)
|
setCacheHierarchy();
|
||||||
setCacheHierarchy();
|
|
||||||
}
|
}
|
||||||
void putFamily() const
|
void putFamily() const
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue