Skip to content

Commit

Permalink
detect AVX-512 FMA count (#125)
Browse files Browse the repository at this point in the history
* add Ice Lake Server and Sapphire Rapids models

The information contained in this commit was obtained from
"Intel® Architecture Instruction Set Extensions and Future Features Programming Reference" document 319433-040 from
https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Signed-off-by: Jeff Hammond <[email protected]>

* Tiger Lake; Ice Lake NNP-I; SPR string

Signed-off-by: Hammond, Jeff R <[email protected]>

* second FMA features - incomplete and wrong

Signed-off-by: Hammond, Jeff R <[email protected]>

* oops: use T/F not 2/1

Signed-off-by: Jeff Hammond <[email protected]>

* implement SKX lookup

Signed-off-by: Hammond, Jeff R <[email protected]>

* add Intel copyright

* cleanup AVX512 second FMA code

1) remove debug stuff
2) remove ICX - will add details when available

Signed-off-by: Hammond, Jeff R <[email protected]>

* fix CPX detection

Signed-off-by: Hammond, Jeff R <[email protected]>

* remove elses

Signed-off-by: Hammond, Jeff R <[email protected]>

* remove curly braces from single-line conditional bodies

Signed-off-by: Hammond, Jeff R <[email protected]>

* apply clang-format

Signed-off-by: Hammond, Jeff R <[email protected]>

Fixes #120
  • Loading branch information
Jeff Hammond authored Sep 22, 2020
1 parent 76dafc7 commit 17ffb65
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 14 deletions.
5 changes: 4 additions & 1 deletion include/cpuinfo_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,11 @@ typedef struct {
int avx512bitalg : 1;
int avx512vpopcntdq : 1;
int avx512_4vnniw : 1;
int avx512_4vbmi2 : 1;
int avx512_second_fma : 1;
int avx512_4fmaps : 1;
int avx512_bf16 : 1;
int avx512_vp2intersect : 1;

int amx_bf16 : 1;
int amx_tile : 1;
int amx_int8 : 1;
Expand Down Expand Up @@ -194,6 +195,8 @@ typedef enum {
X86_AVX512BITALG,
X86_AVX512VPOPCNTDQ,
X86_AVX512_4VNNIW,
X86_AVX512_4VBMI2,
X86_AVX512_SECOND_FMA,
X86_AVX512_4FMAPS,
X86_AVX512_BF16,
X86_AVX512_VP2INTERSECT,
Expand Down
72 changes: 59 additions & 13 deletions src/cpuinfo_x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
// limitations under the License.

#include "cpuinfo_x86.h"
#include "internal/bit_utils.h"
#include "internal/cpuid_x86.h"

#include <stdbool.h>
#include <string.h>

#include "internal/bit_utils.h"
#include "internal/cpuid_x86.h"

#if !defined(CPU_FEATURES_ARCH_X86)
#error "Cannot compile cpuinfo_x86 on a non x86 platform."
#endif
Expand Down Expand Up @@ -125,6 +126,35 @@ static bool HasTmmOsXSave(uint32_t xcr0_eax) {
MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA);
}

static bool HasSecondFMA(uint32_t model) {
// Skylake server
if (model == 0x55) {
char proc_name[49] = {0};
FillX86BrandString(proc_name);
// detect Xeon
if (proc_name[9] == 'X') {
// detect Silver or Bronze
if (proc_name[17] == 'S' || proc_name[17] == 'B') return false;
// detect Gold 5_20 and below, except for Gold 53__
if (proc_name[17] == 'G' && proc_name[22] == '5')
return ((proc_name[23] == '3') ||
(proc_name[24] == '2' && proc_name[25] == '2'));
// detect Xeon W 210x
if (proc_name[17] == 'W' && proc_name[21] == '0') return false;
// detect Xeon D 2xxx
if (proc_name[17] == 'D' && proc_name[19] == '2' && proc_name[20] == '1')
return false;
}
return true;
}
// Cannon Lake client
if (model == 0x66) return false;
// Ice Lake client
if (model == 0x7d || model == 0x7e) return false;
// This is the right default...
return true;
}

static void SetVendor(const Leaf leaf, char* const vendor) {
*(uint32_t*)(vendor) = leaf.ebx;
*(uint32_t*)(vendor + 4) = leaf.edx;
Expand Down Expand Up @@ -1059,7 +1089,8 @@ typedef struct {
} OsSupport;

// Reference https://en.wikipedia.org/wiki/CPUID.
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* os_support) {
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info,
OsSupport* os_support) {
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1);
Expand Down Expand Up @@ -1141,6 +1172,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport*
features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
features->avx512_second_fma = HasSecondFMA(info->model);
features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3);
features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5);
features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8);
Expand All @@ -1153,7 +1186,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport*
}
}

// Reference https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
// Reference
// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
static void ParseExtraAMDCpuId(X86Info* info, OsSupport os_support) {
const Leaf leaf_80000000 = CpuId(0x80000000);
const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax;
Expand Down Expand Up @@ -1265,11 +1299,11 @@ X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
case CPUID(0x06, 0x66):
// https://en.wikipedia.org/wiki/Cannon_Lake_(microarchitecture)
return INTEL_CNL;
case CPUID(0x06, 0x7D): // client
case CPUID(0x06, 0x7E): // client
case CPUID(0x06, 0x9D): // NNP-I
case CPUID(0x06, 0x6A): // server
case CPUID(0x06, 0x6C): // server
case CPUID(0x06, 0x7D): // client
case CPUID(0x06, 0x7E): // client
case CPUID(0x06, 0x9D): // NNP-I
case CPUID(0x06, 0x6A): // server
case CPUID(0x06, 0x6C): // server
// https://en.wikipedia.org/wiki/Ice_Lake_(microprocessor)
return INTEL_ICL;
case CPUID(0x06, 0x8C):
Expand All @@ -1281,10 +1315,14 @@ X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
return INTEL_SPR;
case CPUID(0x06, 0x8E):
switch (info->stepping) {
case 9: return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake
case 10: return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake
case 11: return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
default: return X86_UNKNOWN;
case 9:
return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake
case 10:
return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake
case 11:
return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
default:
return X86_UNKNOWN;
}
case CPUID(0x06, 0x9E):
if (info->stepping > 9) {
Expand Down Expand Up @@ -1427,6 +1465,10 @@ int GetX86FeaturesEnumValue(const X86Features* features,
return features->avx512vpopcntdq;
case X86_AVX512_4VNNIW:
return features->avx512_4vnniw;
case X86_AVX512_4VBMI2:
return features->avx512_4vbmi2;
case X86_AVX512_SECOND_FMA:
return features->avx512_second_fma;
case X86_AVX512_4FMAPS:
return features->avx512_4fmaps;
case X86_AVX512_BF16:
Expand Down Expand Up @@ -1551,6 +1593,10 @@ const char* GetX86FeaturesEnumName(X86FeaturesEnum value) {
return "avx512vpopcntdq";
case X86_AVX512_4VNNIW:
return "avx512_4vnniw";
case X86_AVX512_4VBMI2:
return "avx512_4vbmi2";
case X86_AVX512_SECOND_FMA:
return "avx512_second_fma";
case X86_AVX512_4FMAPS:
return "avx512_4fmaps";
case X86_AVX512_BF16:
Expand Down

0 comments on commit 17ffb65

Please sign in to comment.