Skip to content

Commit

Permalink
feat: add support for NCNN_ISA environment variable detection
Browse files Browse the repository at this point in the history
Changes:
- Implemented `get_isa_env` function to retrieve and parse ISA flags from the environment variable.

Signed-off-by: Kaiyao Duan <[email protected]>
  • Loading branch information
inspireMeNow committed Sep 9, 2024
1 parent 6211838 commit 54d7c94
Showing 1 changed file with 92 additions and 14 deletions.
106 changes: 92 additions & 14 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#endif // __wasi__
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#ifdef _OPENMP
#if NCNN_SIMPLEOMP
Expand Down Expand Up @@ -1859,6 +1860,55 @@ static int detect_cpu_is_arm_a53_a55()
#endif // __aarch64__
#endif // defined __ANDROID__ || defined __linux__

static int get_isa_env(const char* isa_flags)
{
const char* isa = getenv("NCNN_ISA");

if (!isa)
{
return false;
}

char* isa_copy = strdup(isa);
char* token = strtok(isa_copy, " ,");

while (token != NULL)
{
if (strcmp(token, isa_flags) == 0)
{
if (isa_flags[0] == '+')
return false;
if (isa_flags[0] == '-')
{
memmove(token, token + 1, strlen(token));
fprintf(stderr, "warning: %s disabled via environment variable!\n", token);
return true;
}
}
token = strtok(NULL, " ,");
}

free(isa_copy);
return false;
}

#if (__aarch64__ || __arm__)
static int is_cpu_arm_cpuid_disabled = get_isa_env("-cpuid");
static int is_cpu_arm_asimdhp_disabled = get_isa_env("-asimdhp");
static int is_cpu_arm_asimddp_disabled = get_isa_env("-asimddp");
static int is_cpu_arm_asimdfhm_disabled = get_isa_env("-asimdfhm");
static int is_cpu_arm_bf16_disabled = get_isa_env("-bf16");
static int is_cpu_arm_i8mm_disabled = get_isa_env("-i8mm");
static int is_cpu_arm_sve_disabled = get_isa_env("-sve");
static int is_cpu_arm_sve2_disabled = get_isa_env("-sve2");
static int is_cpu_arm_svebf16_disabled = get_isa_env("-svebf16");
static int is_cpu_arm_svei8mm_disabled = get_isa_env("-svei8mm");
static int is_cpu_arm_svef32mm_disabled = get_isa_env("-svef32mm");
static int is_cpu_arm_edsp_disabled = get_isa_env("-edsp");
static int is_cpu_arm_vfpv4_disabled = get_isa_env("-vfpv4");
static int is_cpu_arm_neon_disabled = get_isa_env("-neon");
#endif

// the initialization
static void initialize_global_cpu_info()
{
Expand Down Expand Up @@ -1909,16 +1959,16 @@ static void initialize_global_cpu_info()
#endif

#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
g_cpu_support_x86_avx = get_cpu_support_x86_avx();
g_cpu_support_x86_fma = get_cpu_support_x86_fma();
g_cpu_support_x86_xop = get_cpu_support_x86_xop();
g_cpu_support_x86_f16c = get_cpu_support_x86_f16c();
g_cpu_support_x86_avx2 = get_cpu_support_x86_avx2();
g_cpu_support_x86_avx_vnni = get_cpu_support_x86_avx_vnni();
g_cpu_support_x86_avx512 = get_cpu_support_x86_avx512();
g_cpu_support_x86_avx512_vnni = get_cpu_support_x86_avx512_vnni();
g_cpu_support_x86_avx512_bf16 = get_cpu_support_x86_avx512_bf16();
g_cpu_support_x86_avx512_fp16 = get_cpu_support_x86_avx512_fp16();
g_cpu_support_x86_avx = get_cpu_support_x86_avx() && !(get_isa_env("-avx"));
g_cpu_support_x86_fma = get_cpu_support_x86_fma() && !(get_isa_env("-fma"));
g_cpu_support_x86_xop = get_cpu_support_x86_xop() && !(get_isa_env("-xop"));
g_cpu_support_x86_f16c = get_cpu_support_x86_f16c() && !(get_isa_env("-f16c"));
g_cpu_support_x86_avx2 = get_cpu_support_x86_avx2() && !(get_isa_env("-avx2"));
g_cpu_support_x86_avx_vnni = get_cpu_support_x86_avx_vnni() && !(get_isa_env("-avx_vnni"));
g_cpu_support_x86_avx512 = get_cpu_support_x86_avx512() && !(get_isa_env("-avx512"));
g_cpu_support_x86_avx512_vnni = get_cpu_support_x86_avx512_vnni() && !(get_isa_env("-avx512_vnni"));
g_cpu_support_x86_avx512_bf16 = get_cpu_support_x86_avx512_bf16() && !(get_isa_env("-bf16"));
g_cpu_support_x86_avx512_fp16 = get_cpu_support_x86_avx512_fp16() && !(get_isa_env("-fp16"));
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)

g_cpu_level2_cachesize = get_cpu_level2_cachesize();
Expand Down Expand Up @@ -2087,6 +2137,8 @@ int cpu_support_arm_edsp()
{
try_initialize_global_cpu_info();
#if __arm__ && !__aarch64__
if (is_cpu_arm_edsp_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_edsp;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2107,6 +2159,8 @@ int cpu_support_arm_neon()
#if __aarch64__
return 1;
#elif __arm__
if (is_cpu_arm_neon_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_neon;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2127,6 +2181,8 @@ int cpu_support_arm_vfpv4()
#if __aarch64__
return 1;
#elif __arm__
if (is_cpu_arm_vfpv4_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_vfpv4;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2145,6 +2201,8 @@ int cpu_support_arm_asimdhp()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_asimdhp_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_asimdhp;
#elif defined __ANDROID__ || defined __linux__
Expand Down Expand Up @@ -2173,6 +2231,8 @@ int cpu_support_arm_cpuid()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_cpuid_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_cpuid;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2191,6 +2251,8 @@ int cpu_support_arm_asimddp()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_asimddp_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_asimddp;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2217,6 +2279,8 @@ int cpu_support_arm_asimdfhm()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_asimdfhm_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_asimdfhm;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2243,6 +2307,8 @@ int cpu_support_arm_bf16()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_bf16_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_bf16;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2267,6 +2333,8 @@ int cpu_support_arm_i8mm()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_i8mm_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_i8mm;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2291,6 +2359,8 @@ int cpu_support_arm_sve()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_sve_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_sve;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2309,6 +2379,8 @@ int cpu_support_arm_sve2()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_sve2_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_sve2;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2327,6 +2399,8 @@ int cpu_support_arm_svebf16()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_svebf16_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_svebf16;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2345,6 +2419,8 @@ int cpu_support_arm_svei8mm()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_svei8mm_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_svei8mm;
#elif defined __ANDROID__ || defined __linux__
Expand All @@ -2363,6 +2439,8 @@ int cpu_support_arm_svef32mm()
{
try_initialize_global_cpu_info();
#if __aarch64__
if (is_cpu_arm_svef32mm_disabled)
return 0;
#if defined _WIN32
return g_cpu_support_arm_svef32mm;
#elif defined __ANDROID__ || defined __linux__
Expand Down Expand Up @@ -2482,7 +2560,7 @@ int cpu_support_mips_msa()
try_initialize_global_cpu_info();
#if defined __ANDROID__ || defined __linux__
#if __mips__
return g_hwcaps & HWCAP_MIPS_MSA;
return (g_hwcaps & HWCAP_MIPS_MSA) && !get_isa_env(("-msa"));
#else
return 0;
#endif
Expand Down Expand Up @@ -2538,7 +2616,7 @@ int cpu_support_riscv_v()
try_initialize_global_cpu_info();
#if defined __ANDROID__ || defined __linux__
#if __riscv
return g_hwcaps & COMPAT_HWCAP_ISA_V;
return (g_hwcaps & COMPAT_HWCAP_ISA_V) && !get_isa_env(("-rvv"));
#else
return 0;
#endif
Expand All @@ -2554,7 +2632,7 @@ int cpu_support_riscv_zfh()
#if __riscv
// v + f does not imply zfh, but how to discover zfh properly ?
// upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
return (g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F) && !get_isa_env(("-rvzfh"));
#else
return 0;
#endif
Expand All @@ -2577,7 +2655,7 @@ int cpu_riscv_vlenb()
: "=r"(a)
:
: "memory", "a3");
return a;
return a && !get_isa_env(("-rvvlenb"));
#else
return 0;
#endif
Expand Down

0 comments on commit 54d7c94

Please sign in to comment.