diff --git a/3rdparty/cpuinfo/include/cpuinfo-mock.h b/3rdparty/cpuinfo/include/cpuinfo-mock.h index 5e129aa659904..7bb6d1eed082f 100644 --- a/3rdparty/cpuinfo/include/cpuinfo-mock.h +++ b/3rdparty/cpuinfo/include/cpuinfo-mock.h @@ -60,7 +60,7 @@ ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity); void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap); #endif #if CPUINFO_ARCH_ARM -void CPUINFO_ABI cpuinfo_set_hwcap2(uint32_t hwcap2); +void CPUINFO_ABI cpuinfo_set_hwcap2(uint64_t hwcap2); #endif #endif diff --git a/3rdparty/cpuinfo/include/cpuinfo.h b/3rdparty/cpuinfo/include/cpuinfo.h index 2d74b62fd6e6f..387611cc9bd3b 100644 --- a/3rdparty/cpuinfo/include/cpuinfo.h +++ b/3rdparty/cpuinfo/include/cpuinfo.h @@ -496,13 +496,19 @@ enum cpuinfo_uarch { cpuinfo_uarch_cortex_x2 = 0x00300502, /** ARM Cortex-X3. */ cpuinfo_uarch_cortex_x3 = 0x00300503, + /** ARM Cortex-X4. */ + cpuinfo_uarch_cortex_x4 = 0x00300504, /** ARM Cortex-A510. */ cpuinfo_uarch_cortex_a510 = 0x00300551, + /** ARM Cortex-A520. */ + cpuinfo_uarch_cortex_a520 = 0x00300552, /** ARM Cortex-A710. */ cpuinfo_uarch_cortex_a710 = 0x00300571, /** ARM Cortex-A715. */ cpuinfo_uarch_cortex_a715 = 0x00300572, + /** ARM Cortex-A720. */ + cpuinfo_uarch_cortex_a720 = 0x00300573, /** Qualcomm Scorpion. */ cpuinfo_uarch_scorpion = 0x00400100, @@ -1664,6 +1670,14 @@ struct cpuinfo_arm_isa { bool sve; bool sve2; bool i8mm; + bool sme; + bool sme2; + bool sme2p1; + bool sme_i16i32; + bool sme_bi32i32; + bool sme_b16b16; + bool sme_f16f16; + uint32_t svelen; #endif bool rdm; bool fp16arith; @@ -2036,6 +2050,71 @@ static inline bool cpuinfo_has_arm_sve2(void) { #endif } +// Function to get the max SVE vector length on ARM CPU's which support SVE. +static inline uint32_t cpuinfo_get_max_arm_sve_length(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.svelen * 8; // bytes * 8 = bit length(vector length) +#else + return 0; +#endif +} + +static inline bool cpuinfo_has_arm_sme(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme2(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme2p1(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme2p1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_i16i32(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_i16i32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_bi32i32(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_bi32i32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_b16b16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_b16b16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_f16f16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_f16f16; +#else + return false; +#endif +} + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 /* This structure is not a part of stable API. Use cpuinfo_has_riscv_* functions * instead. */ diff --git a/3rdparty/cpuinfo/src/arm/api.h b/3rdparty/cpuinfo/src/arm/api.h index 9cfedf60dae18..ac735e3e780de 100644 --- a/3rdparty/cpuinfo/src/arm/api.h +++ b/3rdparty/cpuinfo/src/arm/api.h @@ -64,6 +64,7 @@ enum cpuinfo_arm_chipset_series { cpuinfo_arm_chipset_series_telechips_tcc, cpuinfo_arm_chipset_series_texas_instruments_omap, cpuinfo_arm_chipset_series_unisoc_t, + cpuinfo_arm_chipset_series_unisoc_ums, cpuinfo_arm_chipset_series_wondermedia_wm, cpuinfo_arm_chipset_series_max, }; diff --git a/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c b/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c index bd5020c7c2dd9..29663a70ccd96 100644 --- a/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c +++ b/3rdparty/cpuinfo/src/arm/linux/aarch32-isa.c @@ -24,7 +24,7 @@ void cpuinfo_set_wcid(uint32_t wcid) { void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( uint32_t features, - uint32_t features2, + uint64_t features2, uint32_t midr, uint32_t architecture_version, uint32_t architecture_flags, @@ -147,6 +147,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( "VDOT instructions disabled: cause occasional SIGILL on Spreadtrum SC9863A"); } else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_t && chipset->model == 310) { cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc T310"); + } else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_ums && chipset->model == 312) { + cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc UMS312"); } else { switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { case UINT32_C(0x4100D0B0): /* Cortex-A76 */ diff --git a/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c b/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c index db5349ecf8f99..bc2186f61c05a 100644 --- a/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c +++ b/3rdparty/cpuinfo/src/arm/linux/aarch64-isa.c @@ -3,9 +3,11 @@ #include #include +#include + void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( uint32_t features, - uint32_t features2, + uint64_t features2, uint32_t midr, const struct cpuinfo_arm_chipset chipset[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]) { @@ -142,6 +144,27 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) { isa->sve2 = true; } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME) { + isa->sme = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2) { + isa->sme2 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2P1) { + isa->sme2p1 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_I16I32) { + isa->sme_i16i32 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_BI32I32) { + isa->sme_bi32i32 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_B16B16) { + isa->sme_b16b16 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_F16F16) { + isa->sme_f16f16 = true; + } // SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16 // feature flag was added in Linux kernel before the BF16 feature flag, // so we check for either. @@ -151,4 +174,21 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM) { isa->fhm = true; } + +#ifndef PR_SVE_GET_VL +#define PR_SVE_GET_VL 51 +#endif + +#ifndef PR_SVE_VL_LEN_MASK +#define PR_SVE_VL_LEN_MASK 0xffff +#endif + + int ret = prctl(PR_SVE_GET_VL); + if (ret < 0) { + cpuinfo_log_warning("No SVE support on this machine"); + isa->svelen = 0; // Assume no SVE support if the call fails + } else { + // Mask out the SVE vector length bits + isa->svelen = ret & PR_SVE_VL_LEN_MASK; + } } diff --git a/3rdparty/cpuinfo/src/arm/linux/api.h b/3rdparty/cpuinfo/src/arm/linux/api.h index 365fea6c7aaa5..14fed7ceb562d 100644 --- a/3rdparty/cpuinfo/src/arm/linux/api.h +++ b/3rdparty/cpuinfo/src/arm/linux/api.h @@ -137,6 +137,13 @@ struct cpuinfo_arm_linux_proc_cpuinfo_cache { #define CPUINFO_ARM_LINUX_FEATURE2_DGH UINT32_C(0x00008000) #define CPUINFO_ARM_LINUX_FEATURE2_RNG UINT32_C(0x00010000) #define CPUINFO_ARM_LINUX_FEATURE2_BTI UINT32_C(0x00020000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME UINT32_C(0x00800000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME2 UINT64_C(0x0000002000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME2P1 UINT64_C(0x0000004000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME_I16I32 UINT64_C(0x0000008000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME_BI32I32 UINT64_C(0x0000010000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME_B16B16 UINT64_C(0x0000020000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME_F16F16 UINT64_C(0x0000040000000000) #endif #define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000) @@ -172,7 +179,7 @@ struct cpuinfo_arm_linux_processor { struct cpuinfo_arm_linux_proc_cpuinfo_cache proc_cpuinfo_cache; #endif uint32_t features; - uint32_t features2; + uint64_t features2; /** * Main ID Register value. */ @@ -295,14 +302,14 @@ CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo( #if CPUINFO_ARCH_ARM CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_getauxval( uint32_t hwcap[restrict static 1], - uint32_t hwcap2[restrict static 1]); + uint64_t hwcap2[restrict static 1]); CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_procfs( uint32_t hwcap[restrict static 1], - uint32_t hwcap2[restrict static 1]); + uint64_t hwcap2[restrict static 1]); CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( uint32_t features, - uint32_t features2, + uint64_t features2, uint32_t midr, uint32_t architecture_version, uint32_t architecture_flags, @@ -311,11 +318,11 @@ CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( #elif CPUINFO_ARCH_ARM64 CPUINFO_INTERNAL void cpuinfo_arm_linux_hwcap_from_getauxval( uint32_t hwcap[restrict static 1], - uint32_t hwcap2[restrict static 1]); + uint64_t hwcap2[restrict static 1]); CPUINFO_INTERNAL void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( uint32_t features, - uint32_t features2, + uint64_t features2, uint32_t midr, const struct cpuinfo_arm_chipset chipset[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]); diff --git a/3rdparty/cpuinfo/src/arm/linux/chipset.c b/3rdparty/cpuinfo/src/arm/linux/chipset.c index 1f93351dddb0e..c4977c388880b 100644 --- a/3rdparty/cpuinfo/src/arm/linux/chipset.c +++ b/3rdparty/cpuinfo/src/arm/linux/chipset.c @@ -85,6 +85,7 @@ static enum cpuinfo_arm_chipset_vendor chipset_series_vendor[cpuinfo_arm_chipset [cpuinfo_arm_chipset_series_telechips_tcc] = cpuinfo_arm_chipset_vendor_telechips, [cpuinfo_arm_chipset_series_texas_instruments_omap] = cpuinfo_arm_chipset_vendor_texas_instruments, [cpuinfo_arm_chipset_series_unisoc_t] = cpuinfo_arm_chipset_vendor_unisoc, + [cpuinfo_arm_chipset_series_unisoc_ums] = cpuinfo_arm_chipset_vendor_unisoc, [cpuinfo_arm_chipset_series_wondermedia_wm] = cpuinfo_arm_chipset_vendor_wondermedia, }; @@ -959,6 +960,70 @@ static bool match_t(const char* start, const char* end, struct cpuinfo_arm_chips return true; } +/** + * Tries to match, case-sentitively, /Unisoc UMS\d{3,4}/ signature for Unisoc UMS + * chipset. If match successful, extracts model information into \p chipset + * argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware + * string, ro.product.board, ro.board.platform, or ro.chipname) to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string, + * ro.product.board, ro.board.platform, or ro.chipname) to match. + * @param[out] chipset - location where chipset information will be stored upon + * a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_ums(const char* start, const char* end, struct cpuinfo_arm_chipset chipset[restrict static 1]) { + /* Expect 13-14 symbols: "Unisoc UMS" (10 symbols) + 3-4-digit model number + */ + const size_t length = end - start; + switch (length) { + case 13: + case 14: + break; + default: + return false; + } + + /* Check that string starts with "Unisoc UMS". The first four characters + * are loaded as 32-bit little endian word */ + const uint32_t expected_unis = load_u32le(start); + if (expected_unis != UINT32_C(0x73696E55) /* "sinU" = reverse("Unis") */) { + return false; + } + + /* The next four characters are loaded as 32-bit little endian word */ + const uint32_t expected_oc_u = load_u32le(start + 4); + if (expected_oc_u != UINT32_C(0x5520636F) /* "U co" = reverse("oc U") */) { + return false; + } + + /* The next four characters are loaded as 16-bit little endian word */ + const uint16_t expected_ms = load_u16le(start + 8); + if (expected_ms != UINT16_C(0x534D) /* "SM" = reverse("MS") */) { + return false; + } + + /* Validate and parse 3-4 digit model number */ + uint32_t model = 0; + for (uint32_t i = 10; i < length; i++) { + const uint32_t digit = (uint32_t)(uint8_t)start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + *chipset = (struct cpuinfo_arm_chipset){ + .vendor = cpuinfo_arm_chipset_vendor_unisoc, + .series = cpuinfo_arm_chipset_series_unisoc_ums, + .model = model, + }; + return true; +} + /** * Tries to match /lc\d{4}[a-z]?$/ signature for Leadcore LC chipsets. * If match successful, extracts model information into \p chipset argument. @@ -2508,6 +2573,16 @@ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_ha return chipset; } + /* Check Unisoc UMS signature */ + if (match_ums(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Unisoc UMS signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int)hardware_length, + hardware); + + return chipset; + } + #if CPUINFO_ARCH_ARM /* Check Marvell PXA signature */ if (match_pxa(hardware, hardware_end, &chipset)) { @@ -3726,6 +3801,7 @@ static const char* chipset_series_string[cpuinfo_arm_chipset_series_max] = { [cpuinfo_arm_chipset_series_telechips_tcc] = "TCC", [cpuinfo_arm_chipset_series_texas_instruments_omap] = "OMAP", [cpuinfo_arm_chipset_series_unisoc_t] = "T", + [cpuinfo_arm_chipset_series_unisoc_ums] = "UMS", [cpuinfo_arm_chipset_series_wondermedia_wm] = "WM", }; diff --git a/3rdparty/cpuinfo/src/arm/linux/hwcap.c b/3rdparty/cpuinfo/src/arm/linux/hwcap.c index e836548db190a..7f7b4dfddb46b 100644 --- a/3rdparty/cpuinfo/src/arm/linux/hwcap.c +++ b/3rdparty/cpuinfo/src/arm/linux/hwcap.c @@ -31,8 +31,8 @@ void cpuinfo_set_hwcap(uint32_t hwcap) { mock_hwcap = hwcap; } -static uint32_t mock_hwcap2 = 0; -void cpuinfo_set_hwcap2(uint32_t hwcap2) { +static uint64_t mock_hwcap2 = 0; +void cpuinfo_set_hwcap2(uint64_t hwcap2) { mock_hwcap2 = hwcap2; } #endif @@ -40,7 +40,7 @@ void cpuinfo_set_hwcap2(uint32_t hwcap2) { #if CPUINFO_ARCH_ARM typedef unsigned long (*getauxval_function_t)(unsigned long); -bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) { +bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) { #if CPUINFO_MOCK *hwcap = mock_hwcap; *hwcap2 = mock_hwcap2; @@ -83,13 +83,13 @@ bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], u } #ifdef __ANDROID__ -bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) { +bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) { #if CPUINFO_MOCK *hwcap = mock_hwcap; *hwcap2 = mock_hwcap2; return true; #else - uint32_t hwcaps[2] = {0, 0}; + uint64_t hwcaps[2] = {0, 0}; bool result = false; int file = -1; @@ -113,7 +113,7 @@ bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint hwcaps[0] = (uint32_t)elf_auxv.a_un.a_val; break; case AT_HWCAP2: - hwcaps[1] = (uint32_t)elf_auxv.a_un.a_val; + hwcaps[1] = (uint64_t)elf_auxv.a_un.a_val; break; } } else { @@ -141,13 +141,13 @@ bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint } #endif /* __ANDROID__ */ #elif CPUINFO_ARCH_ARM64 -void cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) { +void cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) { #if CPUINFO_MOCK *hwcap = mock_hwcap; *hwcap2 = mock_hwcap2; #else *hwcap = (uint32_t)getauxval(AT_HWCAP); - *hwcap2 = (uint32_t)getauxval(AT_HWCAP2); + *hwcap2 = (uint64_t)getauxval(AT_HWCAP2); return; #endif } diff --git a/3rdparty/cpuinfo/src/arm/linux/init.c b/3rdparty/cpuinfo/src/arm/linux/init.c index 988f05aaa787d..1eab69d5fce92 100644 --- a/3rdparty/cpuinfo/src/arm/linux/init.c +++ b/3rdparty/cpuinfo/src/arm/linux/init.c @@ -247,7 +247,8 @@ void cpuinfo_arm_linux_init(void) { #endif #if CPUINFO_ARCH_ARM - uint32_t isa_features = 0, isa_features2 = 0; + uint32_t isa_features = 0; + uint64_t isa_features2 = 0; #ifdef __ANDROID__ /* * On Android before API 20, libc.so does not provide getauxval @@ -299,7 +300,8 @@ void cpuinfo_arm_linux_init(void) { &chipset, &cpuinfo_isa); #elif CPUINFO_ARCH_ARM64 - uint32_t isa_features = 0, isa_features2 = 0; + uint32_t isa_features = 0; + uint64_t isa_features2 = 0; /* getauxval is always available on ARM64 Android */ cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2); cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( @@ -333,18 +335,52 @@ void cpuinfo_arm_linux_init(void) { } /* Propagate topology group IDs among siblings */ + bool detected_core_siblings_list_node = false; + bool detected_cluster_cpus_list_node = false; for (uint32_t i = 0; i < arm_linux_processors_count; i++) { if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { continue; } - if (arm_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) { + if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_PACKAGE_ID)) { + continue; + } + + /* Use the cluster_cpus_list topology node if available. If not + * found, cache the result to avoid repeatedly attempting to + * read the non-existent paths. + * */ + if (!detected_core_siblings_list_node && !detected_cluster_cpus_list_node) { + if (cpuinfo_linux_detect_cluster_cpus( + arm_linux_processors_count, + i, + (cpuinfo_siblings_callback)cluster_siblings_parser, + arm_linux_processors)) { + detected_cluster_cpus_list_node = true; + continue; + } else { + detected_core_siblings_list_node = true; + } + } + + /* The cached result above will guarantee only one of the blocks + * below will execute, with a bias towards cluster_cpus_list. + **/ + if (detected_core_siblings_list_node) { cpuinfo_linux_detect_core_siblings( arm_linux_processors_count, i, (cpuinfo_siblings_callback)cluster_siblings_parser, arm_linux_processors); } + + if (detected_cluster_cpus_list_node) { + cpuinfo_linux_detect_cluster_cpus( + arm_linux_processors_count, + i, + (cpuinfo_siblings_callback)cluster_siblings_parser, + arm_linux_processors); + } } /* Propagate all cluster IDs */ diff --git a/3rdparty/cpuinfo/src/arm/mach/init.c b/3rdparty/cpuinfo/src/arm/mach/init.c index 9d83c05a95742..3fb62414e0f4b 100644 --- a/3rdparty/cpuinfo/src/arm/mach/init.c +++ b/3rdparty/cpuinfo/src/arm/mach/init.c @@ -399,6 +399,16 @@ void cpuinfo_arm_mach_init(void) { cpuinfo_isa.i8mm = true; } + const uint32_t has_feat_sme = get_sys_info_by_name("hw.optional.arm.FEAT_SME"); + if (has_feat_sme != 0) { + cpuinfo_isa.sme = true; + } + + const uint32_t has_feat_sme2 = get_sys_info_by_name("hw.optional.arm.FEAT_SME2"); + if (has_feat_sme2 != 0) { + cpuinfo_isa.sme2 = true; + } + uint32_t num_clusters = 1; for (uint32_t i = 0; i < mach_topology.cores; i++) { cores[i] = (struct cpuinfo_core){ diff --git a/3rdparty/cpuinfo/src/freebsd/topology.c b/3rdparty/cpuinfo/src/freebsd/topology.c index da941e9cb243e..675a81f8bb467 100644 --- a/3rdparty/cpuinfo/src/freebsd/topology.c +++ b/3rdparty/cpuinfo/src/freebsd/topology.c @@ -24,8 +24,10 @@ static char* sysctl_str(const char* name) { size_t value_size = 0; if (sysctlbyname(name, NULL, &value_size, NULL, 0) != 0) { cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", name, strerror(errno)); + return NULL; } else if (value_size <= 0) { cpuinfo_log_error("sysctlbyname(\"%s\") returned invalid value size %zu", name, value_size); + return NULL; } value_size += 1; char* value = calloc(value_size, 1); @@ -52,29 +54,22 @@ struct cpuinfo_freebsd_topology cpuinfo_freebsd_detect_topology(void) { if (!topology_spec) { return topology; } - const char* group_tag = ""; - char* p = strstr(topology_spec, group_tag); - while (p) { - const char* cpu_tag = "cpu count=\""; - char* q = strstr(p, cpu_tag); - if (q) { - p = q + strlen(cpu_tag); - topology.packages += atoi(p); - } else { - break; - } - } - if (topology.packages == 0) { - const char* group_tag = "", " 0) { + break; + } } + if (topology.packages == 0) { - cpuinfo_log_error("failed to parse topology_spec:%s", topology_spec); + cpuinfo_log_error("failed to parse topology_spec: %s", topology_spec); free(topology_spec); goto fail; } @@ -84,6 +79,7 @@ struct cpuinfo_freebsd_topology cpuinfo_freebsd_detect_topology(void) { goto fail; } if (topology.cores < topology.packages) { + cpuinfo_log_error("invalid numbers of package and core: %d %d", topology.packages, topology.cores); goto fail; } topology.threads_per_core = sysctl_int("kern.smp.threads_per_core"); diff --git a/3rdparty/cpuinfo/src/x86/freebsd/init.c b/3rdparty/cpuinfo/src/x86/freebsd/init.c index c6c6d7533be9a..797fa24b95a55 100644 --- a/3rdparty/cpuinfo/src/x86/freebsd/init.c +++ b/3rdparty/cpuinfo/src/x86/freebsd/init.c @@ -135,6 +135,10 @@ void cpuinfo_x86_freebsd_init(void) { if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) { /* Assume that threads on the same core share L1 */ threads_per_l1 = freebsd_topology.threads / freebsd_topology.cores; + if (threads_per_l1 == 0) { + cpuinfo_log_error("failed to detect threads_per_l1"); + goto cleanup; + } cpuinfo_log_warning( "freebsd kernel did not report number of " "threads sharing L1 cache; assume %" PRIu32, @@ -154,6 +158,10 @@ void cpuinfo_x86_freebsd_init(void) { * the same package share L2 */ threads_per_l2 = freebsd_topology.threads / freebsd_topology.packages; } + if (threads_per_l2 == 0) { + cpuinfo_log_error("failed to detect threads_per_l1"); + goto cleanup; + } cpuinfo_log_warning( "freebsd kernel did not report number of " "threads sharing L2 cache; assume %" PRIu32, @@ -170,6 +178,10 @@ void cpuinfo_x86_freebsd_init(void) { * may be L4 cache as well) */ threads_per_l3 = freebsd_topology.threads / freebsd_topology.packages; + if (threads_per_l3 == 0) { + cpuinfo_log_error("failed to detect threads_per_l3"); + goto cleanup; + } cpuinfo_log_warning( "freebsd kernel did not report number of " "threads sharing L3 cache; assume %" PRIu32, @@ -187,6 +199,10 @@ void cpuinfo_x86_freebsd_init(void) { * shared L4 (like on IBM POWER8). */ threads_per_l4 = freebsd_topology.threads; + if (threads_per_l4 == 0) { + cpuinfo_log_error("failed to detect threads_per_l4"); + goto cleanup; + } cpuinfo_log_warning( "freebsd kernel did not report number of " "threads sharing L4 cache; assume %" PRIu32, @@ -203,7 +219,7 @@ void cpuinfo_x86_freebsd_init(void) { "%" PRIu32 " L1I caches", l1_count * sizeof(struct cpuinfo_cache), l1_count); - return; + goto cleanup; } for (uint32_t c = 0; c < l1_count; c++) { l1i[c] = (struct cpuinfo_cache){ @@ -230,7 +246,7 @@ void cpuinfo_x86_freebsd_init(void) { "%" PRIu32 " L1D caches", l1_count * sizeof(struct cpuinfo_cache), l1_count); - return; + goto cleanup; } for (uint32_t c = 0; c < l1_count; c++) { l1d[c] = (struct cpuinfo_cache){ @@ -257,7 +273,7 @@ void cpuinfo_x86_freebsd_init(void) { "%" PRIu32 " L2 caches", l2_count * sizeof(struct cpuinfo_cache), l2_count); - return; + goto cleanup; } for (uint32_t c = 0; c < l2_count; c++) { l2[c] = (struct cpuinfo_cache){ @@ -284,7 +300,7 @@ void cpuinfo_x86_freebsd_init(void) { "%" PRIu32 " L3 caches", l3_count * sizeof(struct cpuinfo_cache), l3_count); - return; + goto cleanup; } for (uint32_t c = 0; c < l3_count; c++) { l3[c] = (struct cpuinfo_cache){ @@ -311,7 +327,7 @@ void cpuinfo_x86_freebsd_init(void) { "%" PRIu32 " L4 caches", l4_count * sizeof(struct cpuinfo_cache), l4_count); - return; + goto cleanup; } for (uint32_t c = 0; c < l4_count; c++) { l4[c] = (struct cpuinfo_cache){