From 4adde197c817b44b02cd87b7c560c271f9c9068c Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Sun, 8 Dec 2024 15:26:27 +0100 Subject: [PATCH 1/7] runmodes: query the active runmode with a function call --- src/runmodes.c | 10 ++++++++++ src/runmodes.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/src/runmodes.c b/src/runmodes.c index b326a96e3a67..ea93ad62eec1 100644 --- a/src/runmodes.c +++ b/src/runmodes.c @@ -196,6 +196,16 @@ char *RunmodeGetActive(void) return active_runmode; } +bool RunmodeIsWorkers(void) +{ + return (strcmp(RunmodeGetActive(), "workers") == 0); +} + +bool RunmodeIsAutofp(void) +{ + return (strcmp(RunmodeGetActive(), "autofp") == 0); +} + /** * Return the running mode * diff --git a/src/runmodes.h b/src/runmodes.h index cce5fcbbaa42..56bbe76f7f3f 100644 --- a/src/runmodes.h +++ b/src/runmodes.h @@ -75,6 +75,8 @@ extern const char *thread_name_counter_stats; extern const char *thread_name_counter_wakeup; char *RunmodeGetActive(void); +bool RunmodeIsWorkers(void); +bool RunmodeIsAutofp(void); const char *RunModeGetMainMode(void); void RunModeListRunmodes(void); From 2a14c933647212a711bdb0a69960de018d2aa4f5 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Tue, 3 Sep 2024 13:23:44 +0200 Subject: [PATCH 2/7] github-ci: install hwloc as a mandatory dependency --- .github/workflows/builds.yml | 64 ++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 866ff54b03a5..4d1e798c2b72 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -102,6 +102,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -258,6 +260,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -353,6 +357,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -504,6 +510,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -601,6 +609,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libtool \ @@ -693,6 +703,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ hiredis-devel \ jansson-devel \ jq \ @@ -791,6 +803,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -889,6 +903,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -987,6 +1003,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -1080,6 +1098,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -1165,6 +1185,8 @@ jobs: gcc-c++ \ git \ hiredis-devel \ + hwloc \ + hwloc-devel \ jansson-devel \ jq \ libasan \ @@ -1261,6 +1283,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-devel \ libasan \ libtool \ libyaml-devel \ @@ -1382,6 +1406,8 @@ jobs: coccinelle \ dpdk-dev \ git \ + hwloc \ + hwloc-dev \ jq \ libcap-ng-dev \ libevent-dev \ @@ -1454,6 +1480,8 @@ jobs: clang-14 \ curl \ git \ + hwloc \ + hwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -1591,6 +1619,8 @@ jobs: llvm-14-dev \ clang-14 \ git \ + hwloc \ + hwloc-dev \ jq \ inetutils-ping \ libc++-dev \ @@ -1696,6 +1726,8 @@ jobs: llvm-14-dev \ clang-14 \ git \ + hwloc \ + hwloc-dev \ jq \ inetutils-ping \ libc++-dev \ @@ -1835,6 +1867,8 @@ jobs: cbindgen \ clang-18 \ git \ + hwloc \ + hwloc-dev \ jq \ libc++-dev \ libc++abi-dev \ @@ -1922,6 +1956,8 @@ jobs: cbindgen \ clang-18 \ git \ + hwloc \ + hwloc-dev \ jq \ inetutils-ping \ libc++-dev \ @@ -2023,6 +2059,8 @@ jobs: llvm-14-dev \ clang-14 \ git \ + hwloc \ + hwloc-dev \ jq \ libc++-dev \ libc++abi-dev \ @@ -2122,6 +2160,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + hwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -2205,6 +2245,8 @@ jobs: apt -y install \ build-essential \ curl \ + hwloc \ + hwloc-dev \ libtool \ libpcap-dev \ libnet1-dev \ @@ -2274,6 +2316,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + hwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -2358,6 +2402,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + hwloc-dev \ libtool \ libpcap-dev \ libnet1-dev \ @@ -2423,6 +2469,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + hwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -2562,6 +2610,8 @@ jobs: automake \ cargo \ git \ + hwloc \ + hwloc-dev \ jq \ libtool \ libpcap-dev \ @@ -2660,6 +2710,8 @@ jobs: curl \ dpdk-dev \ git \ + hwloc \ + hwloc-dev \ jq \ make \ libpcre3 \ @@ -2763,6 +2815,8 @@ jobs: cmake \ curl \ git \ + hwloc \ + hwloc-dev \ jq \ make \ libpcre3 \ @@ -2844,6 +2898,8 @@ jobs: curl \ dpdk-dev \ git \ + hwloc \ + hwloc-dev \ jq \ make \ libpcre3 \ @@ -2928,6 +2984,8 @@ jobs: ccache \ curl \ git \ + hwloc \ + hwloc-dev \ jq \ libpcre2-dev \ libpcap-dev \ @@ -3001,6 +3059,8 @@ jobs: ccache \ curl \ git \ + hwloc \ + hwloc-dev \ jq \ libpcre2-dev \ libpcap-dev \ @@ -3065,6 +3125,8 @@ jobs: cbindgen \ curl \ hiredis \ + hwloc \ + hwloc-dev \ jansson \ jq \ libmagic \ @@ -3294,6 +3356,8 @@ jobs: gcc \ gcc-c++ \ git \ + hwloc \ + hwloc-dev \ jansson-devel \ libtool \ libyaml-devel \ From 6398f80e327ce75a1f9f4463cad1f80db3818ba4 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Sun, 2 Jun 2024 13:17:28 +0200 Subject: [PATCH 3/7] threading: set optimal threading settings related to NUMA This work let's you set threading settings either: - automatically - to let Suricata decide what cores from what NUMA is better for the given interface - manually - you can per-interface configure threading settings This requires hwloc-devel / hwloc-dev to be installed --- configure.ac | 23 ++ src/runmode-dpdk.c | 60 +++- src/suricata.c | 2 + src/threadvars.h | 3 + src/tm-threads.c | 13 +- src/util-affinity.c | 774 +++++++++++++++++++++++++++++++++++++------- src/util-affinity.h | 20 +- src/util-device.c | 15 + src/util-device.h | 1 + src/util-runmodes.c | 2 + suricata.yaml.in | 52 +-- 11 files changed, 807 insertions(+), 158 deletions(-) diff --git a/configure.ac b/configure.ac index ca964d9039a0..46ba2b9a2986 100644 --- a/configure.ac +++ b/configure.ac @@ -741,6 +741,28 @@ exit 1 fi + LIBHWLOC="" + PKG_CHECK_MODULES([HWLOC], [hwloc >= 2.0.0], + [AC_DEFINE([HAVE_HWLOC], [1], [Define if hwloc library is present and meets version requirements])], + LIBHWLOC="no") + + if test "$LIBHWLOC" = "no"; then + echo + echo " ERROR! hwloc library version > 2.0.0 not found, go get it" + echo " from https://www.open-mpi.org/projects/hwloc/ " + echo " or your distribution:" + echo + echo " Ubuntu: apt-get install hwloc libhwloc-dev" + echo " Fedora: dnf install hwloc hwloc-devel" + echo " CentOS/RHEL: yum install hwloc hwloc-devel" + echo + exit 1 + else + CFLAGS="${CFLAGS} ${HWLOC_CFLAGS}" + LDFLAGS="${LDFLAGS} ${HWLOC_LIBS}" + enable_hwloc="yes" + fi + # libpthread AC_ARG_WITH(libpthread_includes, [ --with-libpthread-includes=DIR libpthread include directory], @@ -2561,6 +2583,7 @@ SURICATA_BUILD_CONF="Suricata Configuration: JA4 support: ${enable_ja4} Non-bundled htp: ${enable_non_bundled_htp} Hyperscan support: ${enable_hyperscan} + Hwloc support: ${enable_hwloc} Libnet support: ${enable_libnet} liblz4 support: ${enable_liblz4} Landlock support: ${enable_landlock} diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index 5df81f685883..a086aafee0c1 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -368,12 +368,17 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str) SCReturnInt(-EINVAL); } - ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set"); + bool wtaf_periface = true; + ThreadsAffinityType *wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", iconf->iface); if (wtaf == NULL) { - SCLogError("Specify worker-cpu-set list in the threading section"); - SCReturnInt(-EINVAL); + wtaf_periface = false; + wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", NULL); // mandatory + if (wtaf == NULL) { + SCLogError("Specify worker-cpu-set list in the threading section"); + SCReturnInt(-EINVAL); + } } - ThreadsAffinityType *mtaf = GetAffinityTypeFromName("management-cpu-set"); + ThreadsAffinityType *mtaf = GetAffinityTypeForNameAndIface("management-cpu-set", NULL); if (mtaf == NULL) { SCLogError("Specify management-cpu-set list in the threading section"); SCReturnInt(-EINVAL); @@ -406,7 +411,12 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str) } if (strcmp(entry_str, "auto") == 0) { - iconf->threads = (uint16_t)sched_cpus / LiveGetDeviceCount(); + if (wtaf_periface) { + iconf->threads = (uint16_t)sched_cpus; + SCLogConfig("%s: auto-assigned %u threads", iconf->iface, iconf->threads); + SCReturnInt(0); + } + iconf->threads = (uint16_t)sched_cpus / LiveGetDeviceCountWithoutAssignedThreading(); if (iconf->threads == 0) { SCLogError("Not enough worker CPU cores with affinity were configured"); SCReturnInt(-ERANGE); @@ -416,7 +426,8 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str) iconf->threads++; remaining_auto_cpus--; } else if (remaining_auto_cpus == -1) { - remaining_auto_cpus = (int32_t)sched_cpus % LiveGetDeviceCount(); + remaining_auto_cpus = + (int32_t)sched_cpus % LiveGetDeviceCountWithoutAssignedThreading(); if (remaining_auto_cpus > 0) { iconf->threads++; remaining_auto_cpus--; @@ -844,20 +855,35 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface) SCReturnInt(0); } -static int32_t ConfigValidateThreads(uint16_t iface_threads) +static int32_t ConfigValidateThreads(uint16_t iface_threads, const char *iface) { static uint32_t total_cpus = 0; - total_cpus += iface_threads; - ThreadsAffinityType *wtaf = GetAffinityTypeFromName("worker-cpu-set"); + bool per_iface_set = true; + ThreadsAffinityType *wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", iface); if (wtaf == NULL) { - SCLogError("Specify worker-cpu-set list in the threading section"); - return -1; + per_iface_set = false; + wtaf = GetAffinityTypeForNameAndIface("worker-cpu-set", NULL); + if (wtaf == NULL) { + SCLogError("Specify worker-cpu-set list in the threading section"); + return -1; + } } - if (total_cpus > UtilAffinityGetAffinedCPUNum(wtaf)) { - SCLogError("Interfaces requested more cores than configured in the threading section " - "(requested %d configured %d", - total_cpus, UtilAffinityGetAffinedCPUNum(wtaf)); - return -1; + + if (!per_iface_set) { + total_cpus += iface_threads; + if (total_cpus > UtilAffinityGetAffinedCPUNum(wtaf)) { + SCLogError("Interfaces requested more cores than configured in the threading section " + "(requested %d configured %d", + total_cpus, UtilAffinityGetAffinedCPUNum(wtaf)); + return -1; + } + } else { + if (iface_threads > UtilAffinityGetAffinedCPUNum(wtaf)) { + SCLogError("Interface %s requested more cores than configured in the threading section " + "(requested %d configured %d", + iface, iface_threads, UtilAffinityGetAffinedCPUNum(wtaf)); + return -1; + } } return 0; @@ -873,7 +899,7 @@ static DPDKIfaceConfig *ConfigParse(const char *iface) ConfigInit(&iconf); retval = ConfigLoad(iconf, iface); - if (retval < 0 || ConfigValidateThreads(iconf->threads) != 0) { + if (retval < 0 || ConfigValidateThreads(iconf->threads, iface) != 0) { iconf->DerefFunc(iconf); SCReturnPtr(NULL, "void *"); } diff --git a/src/suricata.c b/src/suricata.c index ee9dfc0b5b69..bee07d3fd920 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -111,6 +111,7 @@ #include "tmqh-packetpool.h" #include "tm-queuehandlers.h" +#include "util-affinity.h" #include "util-byte.h" #include "util-conf.h" #include "util-coredump-config.h" @@ -2298,6 +2299,7 @@ void PostRunDeinit(const int runmode, struct timeval *start_time) StreamTcpFreeConfig(STREAM_VERBOSE); DefragDestroy(); HttpRangeContainersDestroy(); + TopologyDestroy(); TmqResetQueues(); #ifdef PROFILING diff --git a/src/threadvars.h b/src/threadvars.h index 6f339e9839d5..471714a254c4 100644 --- a/src/threadvars.h +++ b/src/threadvars.h @@ -136,6 +136,9 @@ typedef struct ThreadVars_ { struct FlowQueue_ *flow_queue; bool break_loop; + /** Interface-specific thread affinity */ + char *iface_name; + Storage storage[]; } ThreadVars; diff --git a/src/tm-threads.c b/src/tm-threads.c index 07f9a9390df0..a0fa8f095af9 100644 --- a/src/tm-threads.c +++ b/src/tm-threads.c @@ -865,8 +865,19 @@ TmEcode TmThreadSetupOptions(ThreadVars *tv) TmThreadSetPrio(tv); if (tv->thread_setup_flags & THREAD_SET_AFFTYPE) { ThreadsAffinityType *taf = &thread_affinity[tv->cpu_affinity]; + bool use_iface_affinity = + RunmodeIsAutofp() && + tv->cpu_affinity == RECEIVE_CPU_SET && + FindAffinityByInterface(taf, tv->iface_name) != NULL; + use_iface_affinity |= + RunmodeIsWorkers() && + tv->cpu_affinity == WORKER_CPU_SET && + FindAffinityByInterface(taf, tv->iface_name) != NULL; + + if (use_iface_affinity) + taf = FindAffinityByInterface(taf, tv->iface_name); if (taf->mode_flag == EXCLUSIVE_AFFINITY) { - uint16_t cpu = AffinityGetNextCPU(taf); + uint16_t cpu = AffinityGetNextCPU(tv, taf); SetCPUAffinity(cpu); /* If CPU is in a set overwrite the default thread prio */ if (CPU_ISSET(cpu, &taf->lowprio_cpu)) { diff --git a/src/util-affinity.c b/src/util-affinity.c index 8a224711e884..4e21e71e5688 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -30,37 +30,43 @@ #include "util-cpu.h" #include "util-byte.h" #include "util-debug.h" +#include "util-dpdk.h" +#include "suricata.h" ThreadsAffinityType thread_affinity[MAX_CPU_SET] = { { - .name = "receive-cpu-set", - .mode_flag = EXCLUSIVE_AFFINITY, - .prio = PRIO_MEDIUM, - .lcpu = 0, + .name = "receive-cpu-set", + .mode_flag = EXCLUSIVE_AFFINITY, + .prio = PRIO_MEDIUM, + .lcpu = { 0 }, }, { - .name = "worker-cpu-set", - .mode_flag = EXCLUSIVE_AFFINITY, - .prio = PRIO_MEDIUM, - .lcpu = 0, + .name = "worker-cpu-set", + .mode_flag = EXCLUSIVE_AFFINITY, + .prio = PRIO_MEDIUM, + .lcpu = { 0 }, }, { - .name = "verdict-cpu-set", - .mode_flag = BALANCED_AFFINITY, - .prio = PRIO_MEDIUM, - .lcpu = 0, + .name = "verdict-cpu-set", + .mode_flag = BALANCED_AFFINITY, + .prio = PRIO_MEDIUM, + .lcpu = { 0 }, }, { - .name = "management-cpu-set", - .mode_flag = BALANCED_AFFINITY, - .prio = PRIO_MEDIUM, - .lcpu = 0, + .name = "management-cpu-set", + .mode_flag = BALANCED_AFFINITY, + .prio = PRIO_MEDIUM, + .lcpu = { 0 }, }, }; int thread_affinity_init_done = 0; +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun +static hwloc_topology_t topology = NULL; +#endif /* OS_WIN32 and __OpenBSD__ */ + /** * \brief find affinity by its name * \retval a pointer to the affinity or NULL if not found @@ -76,6 +82,119 @@ ThreadsAffinityType * GetAffinityTypeFromName(const char *name) return NULL; } +static ThreadsAffinityType *AllocAndInitAffinityType( + const char *name, const char *interface_name, ThreadsAffinityType *parent) +{ + ThreadsAffinityType *new_affinity = SCCalloc(1, sizeof(ThreadsAffinityType)); + if (new_affinity == NULL) { + FatalError("Unable to allocate memory for new affinity type"); + } + + new_affinity->name = strdup(interface_name); + new_affinity->parent = parent; + new_affinity->mode_flag = EXCLUSIVE_AFFINITY; + new_affinity->prio = PRIO_MEDIUM; + for (int i = 0; i < MAX_NUMA_NODES; i++) { + new_affinity->lcpu[i] = 0; + } + + if (parent != NULL) { + if (parent->nb_children == parent->nb_children_capacity) { + if (parent->nb_children_capacity == 0) { + parent->nb_children_capacity = 2; + } else { + parent->nb_children_capacity *= 2; + } + parent->children = SCRealloc( + parent->children, parent->nb_children_capacity * sizeof(ThreadsAffinityType *)); + if (parent->children == NULL) { + FatalError("Unable to reallocate memory for children affinity types"); + } + } + parent->children[parent->nb_children++] = new_affinity; + } + + return new_affinity; +} + +ThreadsAffinityType *FindAffinityByInterface( + ThreadsAffinityType *parent, const char *interface_name) +{ + for (uint32_t i = 0; i < parent->nb_children; i++) { + if (interface_name && strcmp(parent->children[i]->name, interface_name) == 0) { + return parent->children[i]; + } + } + return NULL; +} + +/** + * \brief find affinity by its name and interface name, if children are not allowed, then those are + * alloced and initialized. \retval a pointer to the affinity or NULL if not found + */ +ThreadsAffinityType *GetAffinityTypeForNameAndIface(const char *name, const char *interface_name) +{ + int i; + ThreadsAffinityType *parent_affinity = NULL; + + for (i = 0; i < MAX_CPU_SET; i++) { + if (strcmp(thread_affinity[i].name, name) == 0) { + parent_affinity = &thread_affinity[i]; + break; + } + } + + if (parent_affinity == NULL) { + SCLogError("Affinity with name \"%s\" not found", name); + return NULL; + } + + if (interface_name != NULL) { + ThreadsAffinityType *child_affinity = + FindAffinityByInterface(parent_affinity, interface_name); + // found or not found, it is returned + return child_affinity; + } + + return parent_affinity; +} + +/** + * \brief find affinity by its name and interface name, if children are not allowed, then those are + * alloced and initialized. \retval a pointer to the affinity or NULL if not found + */ +ThreadsAffinityType *GetOrAllocAffinityTypeForIfaceOfName( + const char *name, const char *interface_name) +{ + int i; + ThreadsAffinityType *parent_affinity = NULL; + + for (i = 0; i < MAX_CPU_SET; i++) { + if (strcmp(thread_affinity[i].name, name) == 0) { + parent_affinity = &thread_affinity[i]; + break; + } + } + + if (parent_affinity == NULL) { + SCLogError("Affinity with name \"%s\" not found", name); + return NULL; + } + + if (interface_name != NULL) { + ThreadsAffinityType *child_affinity = + FindAffinityByInterface(parent_affinity, interface_name); + if (child_affinity != NULL) { + return child_affinity; + } + + // If not found, allocate and initialize a new child affinity + return AllocAndInitAffinityType(name, interface_name, parent_affinity); + } + + return parent_affinity; +} + #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun static void AffinitySetupInit(void) { @@ -158,154 +277,571 @@ static void BuildCpuset(const char *name, ConfNode *node, cpu_set_t *cpu) #endif /* OS_WIN32 and __OpenBSD__ */ /** - * \brief Extract cpu affinity configuration from current config file + * \brief Get the appropriate set name for a given affinity value. + */ +static const char *GetAffinitySetName(const char *val) +{ + if (strcmp(val, "decode-cpu-set") == 0 || strcmp(val, "stream-cpu-set") == 0 || + strcmp(val, "reject-cpu-set") == 0 || strcmp(val, "output-cpu-set") == 0) { + return NULL; + } + + return (strcmp(val, "detect-cpu-set") == 0) ? "worker-cpu-set" : val; +} + +/** + * \brief Set up CPU sets for the given affinity type. */ +static void SetupCpuSets(ThreadsAffinityType *taf, ConfNode *affinity, const char *setname) +{ + CPU_ZERO(&taf->cpu_set); + ConfNode *cpu_node = ConfNodeLookupChild(affinity->head.tqh_first, "cpu"); + if (cpu_node != NULL) { + BuildCpuset(setname, cpu_node, &taf->cpu_set); + } else { + SCLogInfo("Unable to find 'cpu' node for set %s", setname); + } +} + +/** + * \brief Build a priority CPU set for the given priority level. + */ +static void BuildPriorityCpuset(ThreadsAffinityType *taf, ConfNode *prio_node, const char *priority, + cpu_set_t *cpuset, const char *setname) +{ + ConfNode *node = ConfNodeLookupChild(prio_node, priority); + if (node != NULL) { + BuildCpuset(setname, node, cpuset); + } else { + SCLogDebug("Unable to find '%s' priority for set %s", priority, setname); + } +} + +/** + * \brief Set up the default priority for the given affinity type. + */ +static void SetupDefaultPriority(ThreadsAffinityType *taf, ConfNode *prio_node, const char *setname) +{ + ConfNode *default_node = ConfNodeLookupChild(prio_node, "default"); + if (default_node == NULL) + return; + + if (strcmp(default_node->val, "low") == 0) { + taf->prio = PRIO_LOW; + } else if (strcmp(default_node->val, "medium") == 0) { + taf->prio = PRIO_MEDIUM; + } else if (strcmp(default_node->val, "high") == 0) { + taf->prio = PRIO_HIGH; + } else { + FatalError("Unknown default CPU affinity priority: %s", default_node->val); + } + + SCLogConfig("Using default priority '%s' for set %s", default_node->val, setname); +} + +/** + * \brief Set up priority CPU sets for the given affinity type. + */ +static void SetupAffinityPriority(ThreadsAffinityType *taf, ConfNode *affinity, const char *setname) +{ + CPU_ZERO(&taf->lowprio_cpu); + CPU_ZERO(&taf->medprio_cpu); + CPU_ZERO(&taf->hiprio_cpu); + + ConfNode *prio_node = ConfNodeLookupChild(affinity->head.tqh_first, "prio"); + if (prio_node == NULL) + return; + + BuildPriorityCpuset(taf, prio_node, "low", &taf->lowprio_cpu, setname); + BuildPriorityCpuset(taf, prio_node, "medium", &taf->medprio_cpu, setname); + BuildPriorityCpuset(taf, prio_node, "high", &taf->hiprio_cpu, setname); + + SetupDefaultPriority(taf, prio_node, setname); +} + +/** + * \brief Set up CPU affinity mode for the given affinity type. + */ +static void SetupAffinityMode(ThreadsAffinityType *taf, ConfNode *affinity) +{ + ConfNode *mode_node = ConfNodeLookupChild(affinity->head.tqh_first, "mode"); + if (mode_node == NULL) + return; + + if (strcmp(mode_node->val, "exclusive") == 0) { + taf->mode_flag = EXCLUSIVE_AFFINITY; + } else if (strcmp(mode_node->val, "balanced") == 0) { + taf->mode_flag = BALANCED_AFFINITY; + } else { + FatalError("Unknown CPU affinity mode: %s", mode_node->val); + } +} + +/** + * \brief Set up the number of threads for the given affinity type. + */ +static void SetupAffinityThreads(ThreadsAffinityType *taf, ConfNode *affinity) +{ + ConfNode *threads_node = ConfNodeLookupChild(affinity->head.tqh_first, "threads"); + if (threads_node == NULL) + return; + + if (StringParseUint32(&taf->nb_threads, 10, 0, threads_node->val) < 0 || taf->nb_threads == 0) { + FatalError("Invalid thread count: %s", threads_node->val); + } +} + +/** + * \brief Check if the set name corresponds to a worker CPU set. + */ +static bool IsWorkerCpuSet(const char *setname) +{ + return (strcmp(setname, "worker-cpu-set") == 0); +} + +/** + * \brief Check if the set name corresponds to a receive CPU set. + */ +static bool IsReceiveCpuSet(const char *setname) +{ + return (strcmp(setname, "receive-cpu-set") == 0); +} + +/** + * \brief Set up affinity configuration for a single interface. + */ +static void SetupSingleIfaceAffinity(ThreadsAffinityType *taf, ConfNode *iface_node) +{ + // offload to Setup function + ConfNode *child_node; + const char *interface_name; + TAILQ_FOREACH (child_node, &iface_node->head, next) { + if (strcmp(child_node->name, "interface") == 0) { + interface_name = child_node->val; + break; + } + } + if (interface_name == NULL) + return; + + ThreadsAffinityType *iface_taf = + GetOrAllocAffinityTypeForIfaceOfName(taf->name, interface_name); + if (iface_taf == NULL) { + FatalError("Unknown CPU affinity type for interface: %s", interface_name); + } + + SetupCpuSets(iface_taf, iface_node, interface_name); + SetupAffinityPriority(iface_taf, iface_node, interface_name); + SetupAffinityMode(iface_taf, iface_node); + SetupAffinityThreads(iface_taf, iface_node); +} + +/** + * \brief Set up per-interface affinity configurations. + */ +static void SetupPerIfaceAffinity(ThreadsAffinityType *taf, ConfNode *affinity) +{ + ConfNode *per_iface_node = ConfNodeLookupChild(affinity, "interface-specific-cpu-set"); + if (per_iface_node == NULL) + return; + + ConfNode *iface_node; + TAILQ_FOREACH (iface_node, &per_iface_node->head, next) { + if (strcmp(iface_node->val, "interface") == 0) { + SetupSingleIfaceAffinity(taf, iface_node); + } else { + SCLogWarning("Unknown node in interface-specific-cpu-set: %s", iface_node->name); + } + } +} + +/** + * \brief Extract CPU affinity configuration from current config file + */ void AffinitySetupLoadFromConfig(void) { #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun - ConfNode *root = ConfGetNode("threading.cpu-affinity"); - ConfNode *affinity; - if (thread_affinity_init_done == 0) { AffinitySetupInit(); thread_affinity_init_done = 1; } - SCLogDebug("Load affinity from config\n"); + SCLogDebug("Loading CPU affinity from config...\n"); + + ConfNode *root = ConfGetNode("threading.cpu-affinity"); if (root == NULL) { - SCLogInfo("can't get cpu-affinity node"); + SCLogInfo("Cannot find cpu-affinity node in config"); return; } + ConfNode *affinity; TAILQ_FOREACH(affinity, &root->head, next) { - if (strcmp(affinity->val, "decode-cpu-set") == 0 || - strcmp(affinity->val, "stream-cpu-set") == 0 || - strcmp(affinity->val, "reject-cpu-set") == 0 || - strcmp(affinity->val, "output-cpu-set") == 0) { + const char *setname = GetAffinitySetName(affinity->val); + if (setname == NULL) continue; + + ThreadsAffinityType *taf = GetOrAllocAffinityTypeForIfaceOfName(setname, NULL); + if (taf == NULL) { + FatalError("Unknown CPU affinity type: %s", setname); } - const char *setname = affinity->val; - if (strcmp(affinity->val, "detect-cpu-set") == 0) - setname = "worker-cpu-set"; + SCLogConfig("Found affinity definition for \"%s\"", setname); - ThreadsAffinityType *taf = GetAffinityTypeFromName(setname); - ConfNode *node = NULL; - ConfNode *nprio = NULL; + SetupCpuSets(taf, affinity, setname); + SetupAffinityPriority(taf, affinity, setname); + SetupAffinityMode(taf, affinity); + SetupAffinityThreads(taf, affinity); - if (taf == NULL) { - FatalError("unknown cpu-affinity type"); - } else { - SCLogConfig("Found affinity definition for \"%s\"", setname); + if (IsWorkerCpuSet(setname) || IsReceiveCpuSet(setname)) { + SetupPerIfaceAffinity(taf, affinity); } + } +#endif /* OS_WIN32 and __OpenBSD__ */ +} - CPU_ZERO(&taf->cpu_set); - node = ConfNodeLookupChild(affinity->head.tqh_first, "cpu"); - if (node == NULL) { - SCLogInfo("unable to find 'cpu'"); - } else { - BuildCpuset(setname, node, &taf->cpu_set); - } +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun - CPU_ZERO(&taf->lowprio_cpu); - CPU_ZERO(&taf->medprio_cpu); - CPU_ZERO(&taf->hiprio_cpu); - nprio = ConfNodeLookupChild(affinity->head.tqh_first, "prio"); - if (nprio != NULL) { - node = ConfNodeLookupChild(nprio, "low"); - if (node == NULL) { - SCLogDebug("unable to find 'low' prio using default value"); - } else { - BuildCpuset(setname, node, &taf->lowprio_cpu); - } +static int HwLocDeviceNumaGet(hwloc_topology_t topology, hwloc_obj_t obj) +{ +#if HWLOC_VERSION_MAJOR >= 2 && HWLOC_VERSION_MINOR >= 5 + // TODO: test this block of code or remove it + hwloc_obj_t nodes[MAX_NUMA_NODES]; // Assuming a maximum of 16 NUMA nodes + unsigned num_nodes = MAX_NUMA_NODES; + struct hwloc_location location; + + location.type = HWLOC_LOCATION_TYPE_OBJECT; + location.location.object = obj; + + int result = hwloc_get_local_numanode_objs(topology, &location, &num_nodes, nodes, 0); + if (result == 0 && num_nodes > 0 && num_nodes <= MAX_NUMA_NODES) { + return nodes[0]->logical_index; + // printf("NUMA nodes for PCIe device:\n"); + // for (unsigned i = 0; i < num_nodes; i++) { + // printf("NUMA node %d\n", nodes[i]->logical_index); + // } + } + return -1; +#endif /* HWLOC_VERSION_MAJOR >= 2 && HWLOC_VERSION_MINOR >= 5 */ - node = ConfNodeLookupChild(nprio, "medium"); - if (node == NULL) { - SCLogDebug("unable to find 'medium' prio using default value"); - } else { - BuildCpuset(setname, node, &taf->medprio_cpu); - } + hwloc_obj_t non_io_ancestor = hwloc_get_non_io_ancestor_obj(topology, obj); + if (non_io_ancestor == NULL) { + return -1; + } - node = ConfNodeLookupChild(nprio, "high"); - if (node == NULL) { - SCLogDebug("unable to find 'high' prio using default value"); - } else { - BuildCpuset(setname, node, &taf->hiprio_cpu); - } - node = ConfNodeLookupChild(nprio, "default"); - if (node != NULL) { - if (!strcmp(node->val, "low")) { - taf->prio = PRIO_LOW; - } else if (!strcmp(node->val, "medium")) { - taf->prio = PRIO_MEDIUM; - } else if (!strcmp(node->val, "high")) { - taf->prio = PRIO_HIGH; - } else { - FatalError("unknown cpu_affinity prio"); + // Iterate over NUMA nodes and check their nodeset + hwloc_obj_t numa_node = NULL; + while ((numa_node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa_node)) != + NULL) { + if (hwloc_bitmap_isset(non_io_ancestor->nodeset, numa_node->os_index)) { + return numa_node->logical_index; + } + } + + return -1; +} + +static hwloc_obj_t HwLocDeviceGetByKernelName(hwloc_topology_t topology, const char *interface_name) +{ + hwloc_obj_t obj = NULL; + + while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) { + if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK && + strcmp(obj->name, interface_name) == 0) { + hwloc_obj_t parent = obj->parent; + while (parent) { + if (parent->type == HWLOC_OBJ_PCI_DEVICE) { + return parent; } - SCLogConfig("Using default prio '%s' for set '%s'", - node->val, setname); + parent = parent->parent; } } + } + return NULL; +} - node = ConfNodeLookupChild(affinity->head.tqh_first, "mode"); - if (node != NULL) { - if (!strcmp(node->val, "exclusive")) { - taf->mode_flag = EXCLUSIVE_AFFINITY; - } else if (!strcmp(node->val, "balanced")) { - taf->mode_flag = BALANCED_AFFINITY; - } else { - FatalError("unknown cpu_affinity node"); - } +// Static function to deparse PCIe interface string name to individual components +/** + * \brief Parse PCIe address string to individual components + * \param[in] pcie_address PCIe address string + * \param[out] domain Domain component + * \param[out] bus Bus component + * \param[out] device Device component + * \param[out] function Function component + */ +static int PcieAddressToComponents(const char *pcie_address, unsigned int *domain, + unsigned int *bus, unsigned int *device, unsigned int *function) +{ + // Handle both full and short PCIe address formats + if (sscanf(pcie_address, "%x:%x:%x.%x", domain, bus, device, function) != 4) { + if (sscanf(pcie_address, "%x:%x.%x", bus, device, function) != 3) { + return -1; } + *domain = 0; // Default domain to 0 if not provided + } + return 0; +} - node = ConfNodeLookupChild(affinity->head.tqh_first, "threads"); - if (node != NULL) { - if (StringParseUint32(&taf->nb_threads, 10, 0, (const char *)node->val) < 0) { - FatalError("invalid value for threads " - "count: '%s'", - node->val); - } - if (! taf->nb_threads) { - FatalError("bad value for threads count"); +// Function to convert PCIe address to hwloc object +static hwloc_obj_t HwLocDeviceGetByPcie(hwloc_topology_t topology, const char *pcie_address) +{ + hwloc_obj_t obj = NULL; + unsigned int domain, bus, device, function; + int r = PcieAddressToComponents(pcie_address, &domain, &bus, &device, &function); + if (r == 0) { + while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { + if (obj->attr->pcidev.domain == domain && obj->attr->pcidev.bus == bus && + obj->attr->pcidev.dev == device && obj->attr->pcidev.func == function) { + return obj; } } } -#endif /* OS_WIN32 and __OpenBSD__ */ + return NULL; } -/** - * \brief Return next cpu to use for a given thread family - * \retval the cpu to used given by its id - */ -uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) +static void HwlocObjectDump(hwloc_obj_t obj, const char *iface_name) { - uint16_t ncpu = 0; -#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun - int iter = 0; + if (!obj) { + SCLogDebug("No object found for the given PCIe address.\n"); + return; + } + + static char pcie_address[32]; + snprintf(pcie_address, sizeof(pcie_address), "%04x:%02x:%02x.%x", obj->attr->pcidev.domain, + obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func); + SCLogDebug("Interface (%s / %s) has NUMA ID %d", iface_name, pcie_address, + HwLocDeviceNumaGet(topology, obj)); + + SCLogDebug("Object type: %s\n", hwloc_obj_type_string(obj->type)); + SCLogDebug("Logical index: %u\n", obj->logical_index); + SCLogDebug("Depth: %u\n", obj->depth); + SCLogDebug("Attributes:\n"); + if (obj->type == HWLOC_OBJ_PCI_DEVICE) { + SCLogDebug(" Domain: %04x\n", obj->attr->pcidev.domain); + SCLogDebug(" Bus: %02x\n", obj->attr->pcidev.bus); + SCLogDebug(" Device: %02x\n", obj->attr->pcidev.dev); + SCLogDebug(" Function: %01x\n", obj->attr->pcidev.func); + SCLogDebug(" Class ID: %04x\n", obj->attr->pcidev.class_id); + SCLogDebug(" Vendor ID: %04x\n", obj->attr->pcidev.vendor_id); + SCLogDebug(" Device ID: %04x\n", obj->attr->pcidev.device_id); + SCLogDebug(" Subvendor ID: %04x\n", obj->attr->pcidev.subvendor_id); + SCLogDebug(" Subdevice ID: %04x\n", obj->attr->pcidev.subdevice_id); + SCLogDebug(" Revision: %02x\n", obj->attr->pcidev.revision); + SCLogDebug(" Link speed: %f GB/s\n", obj->attr->pcidev.linkspeed); + } else { + SCLogDebug(" No PCI device attributes available.\n"); + } +} + +static bool CPUIsFromNuma(uint16_t ncpu, uint16_t numa) +{ + int core_id = ncpu; + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t numa_node = NULL; + + while ((numa_node = hwloc_get_next_obj_by_depth(topology, depth, numa_node)) != NULL) { + hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_copy(cpuset, numa_node->cpuset); + + if (hwloc_bitmap_isset(cpuset, core_id)) { + SCLogDebug("Core %d - NUMA %d", core_id, numa_node->logical_index); + hwloc_bitmap_free(cpuset); + break; + } + hwloc_bitmap_free(cpuset); + } + + if (numa == numa_node->logical_index) + return true; + + return false; +} + +static bool TopologyShouldAutooptimize(ThreadVars *tv, ThreadsAffinityType *taf) +{ + bool cond; SCMutexLock(&taf->taf_mutex); - ncpu = taf->lcpu; - while (!CPU_ISSET(ncpu, &taf->cpu_set) && iter < 2) { - ncpu++; - if (ncpu >= UtilCpuGetNumProcessorsOnline()) { - ncpu = 0; - iter++; + cond = tv->type == TVT_PPT && tv->iface_name && + ( strcmp(tv->iface_name, taf->name) == 0 || + (strcmp("worker-cpu-set", taf->name) == 0 && RunmodeIsWorkers()) || + (strcmp("receive-cpu-set", taf->name) == 0 && RunmodeIsAutofp()) ); + SCMutexUnlock(&taf->taf_mutex); + return cond; +} + +static void TopologyInitialize(void) +{ + if (topology == NULL) { + if (hwloc_topology_init(&topology) == -1) { + FatalError("Failed to initialize topology"); + } + + if (hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) == -1 || + hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL) == -1 || + hwloc_topology_load(topology) == -1) { + FatalError("Failed to set/load topology"); } } - if (iter == 2) { - SCLogError("cpu_set does not contain " - "available cpus, cpu affinity conf is invalid"); +} + +void TopologyDestroy() +{ + if (topology != NULL) { + hwloc_topology_destroy(topology); + topology = NULL; + } +} + +static int InterfaceGetNumaNode(ThreadVars *tv) +{ + hwloc_obj_t if_obj = HwLocDeviceGetByKernelName(topology, tv->iface_name); + if (if_obj == NULL) { + if_obj = HwLocDeviceGetByPcie(topology, tv->iface_name); + } + + if (if_obj != NULL) { + HwlocObjectDump(if_obj, tv->iface_name); + } + + int32_t numa_id = HwLocDeviceNumaGet(topology, if_obj); + if (numa_id < 0 -1 && SCRunmodeGet() == RUNMODE_DPDK) { + // DPDK fallback for e.g. net_bonding (vdev) PMDs + int32_t r = DPDKDeviceNameSetSocketID(tv->iface_name, &numa_id); + if (r < 0) { + numa_id = -1; + } + } + + if (numa_id < 0) { + SCLogDebug("Unable to find NUMA node for interface %s", tv->iface_name); + } + + return numa_id; +} + +static int16_t FindCPUInNumaNode(int numa_node, ThreadsAffinityType *taf) +{ + if (taf->lcpu[numa_node] >= UtilCpuGetNumProcessorsOnline()) { + return -1; + } + + uint16_t cpu = taf->lcpu[numa_node]; + while (cpu < UtilCpuGetNumProcessorsOnline() && + (!CPU_ISSET(cpu, &taf->cpu_set) || !CPUIsFromNuma(cpu, numa_node))) { + cpu++; + } + + taf->lcpu[numa_node] = (CPU_ISSET(cpu, &taf->cpu_set) && CPUIsFromNuma(cpu, numa_node)) + ? cpu + 1 + : UtilCpuGetNumProcessorsOnline(); + return (CPU_ISSET(cpu, &taf->cpu_set) && CPUIsFromNuma(cpu, numa_node)) ? cpu : -1; +} + +static bool AllCPUsUsed(ThreadsAffinityType *taf) +{ + for (int i = 0; i < MAX_NUMA_NODES; i++) { + if (taf->lcpu[i] < UtilCpuGetNumProcessorsOnline()) { + return false; + } + } + return true; +} + +static void ResetCPUs(ThreadsAffinityType *taf) +{ + for (int i = 0; i < MAX_NUMA_NODES; i++) { + taf->lcpu[i] = 0; } - taf->lcpu = ncpu + 1; - if (taf->lcpu >= UtilCpuGetNumProcessorsOnline()) - taf->lcpu = 0; +} + +static int16_t CPUSelectFromNuma(int iface_numa, ThreadsAffinityType *taf) +{ + if (iface_numa != -1) { + return FindCPUInNumaNode(iface_numa, taf); + } + return -1; +} + +static int16_t CPUSelectAlternative(ThreadsAffinityType *taf) +{ + for (int nid = 0; nid < MAX_NUMA_NODES; nid++) { + int cpu = FindCPUInNumaNode(nid, taf); + if (cpu != -1) { + return cpu; + } + } + return -1; +} + +static uint16_t CPUSelectDefault(ThreadsAffinityType *taf) +{ + uint16_t cpu = taf->lcpu[0]; + int attempts = 0; + + while (!CPU_ISSET(cpu, &taf->cpu_set) && attempts < 2) { + cpu = (cpu + 1) % UtilCpuGetNumProcessorsOnline(); + if (cpu == 0) + attempts++; + } + + taf->lcpu[0] = cpu + 1; + + if (attempts == 2) { + SCLogError( + "cpu_set does not contain available CPUs, CPU affinity configuration is invalid"); + } + + return cpu; +} + +static uint16_t GetNextAvailableCPU(int iface_numa, ThreadsAffinityType *taf) +{ + int16_t cpu = CPUSelectFromNuma(iface_numa, taf); + if (iface_numa == -1 || cpu == -1) { + cpu = CPUSelectAlternative(taf); + if (cpu == -1) { + ResetCPUs(taf); + } + } + + if (cpu != -1) + return cpu; + + cpu = CPUSelectDefault(taf); + + return cpu; +} + +#endif /* OS_WIN32 and __OpenBSD__ */ + +uint16_t AffinityGetNextCPU(ThreadVars *tv, ThreadsAffinityType *taf) +{ + uint16_t next_cpu = 0; +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + int iface_numa = -1; + if (TopologyShouldAutooptimize(tv, taf)) { + TopologyInitialize(); + iface_numa = InterfaceGetNumaNode(tv); + } + + SCMutexLock(&taf->taf_mutex); + next_cpu = GetNextAvailableCPU(iface_numa, taf); + + if (AllCPUsUsed(taf)) { + ResetCPUs(taf); + } + + SCLogDebug("Setting affinity on CPU %d", cpu); SCMutexUnlock(&taf->taf_mutex); - SCLogDebug("Setting affinity on CPU %d", ncpu); #endif /* OS_WIN32 and __OpenBSD__ */ - return ncpu; + + return next_cpu; } +/** + * \brief Return the total number of CPUs in a given affinity + * \retval the number of affined CPUs + */ uint16_t UtilAffinityGetAffinedCPUNum(ThreadsAffinityType *taf) { uint16_t ncpu = 0; diff --git a/src/util-affinity.h b/src/util-affinity.h index 2fa4509ffa2c..ee00183df64f 100644 --- a/src/util-affinity.h +++ b/src/util-affinity.h @@ -26,6 +26,9 @@ #include "suricata-common.h" #include "conf.h" #include "threads.h" +#include "threadvars.h" + +#include #if defined OS_FREEBSD #include @@ -62,10 +65,13 @@ enum { MAX_AFFINITY }; +#define MAX_NUMA_NODES 16 + typedef struct ThreadsAffinityType_ { const char *name; uint8_t mode_flag; - uint16_t lcpu; /* use by exclusive mode */ + uint16_t lcpu[MAX_NUMA_NODES]; /* use by exclusive mode */ + // uint16_t lcpu; /* use by exclusive mode */ int prio; uint32_t nb_threads; SCMutex taf_mutex; @@ -76,6 +82,10 @@ typedef struct ThreadsAffinityType_ { cpu_set_t medprio_cpu; cpu_set_t hiprio_cpu; #endif + struct ThreadsAffinityType_ **children; + uint32_t nb_children; + uint32_t nb_children_capacity; + struct ThreadsAffinityType_ *parent; } ThreadsAffinityType; /** store thread affinity mode for all type of threads */ @@ -85,8 +95,14 @@ extern ThreadsAffinityType thread_affinity[MAX_CPU_SET]; void AffinitySetupLoadFromConfig(void); ThreadsAffinityType * GetAffinityTypeFromName(const char *name); +ThreadsAffinityType *GetOrAllocAffinityTypeForIfaceOfName( + const char *name, const char *interface_name); +ThreadsAffinityType *GetAffinityTypeForNameAndIface(const char *name, const char *interface_name); +ThreadsAffinityType *FindAffinityByInterface( + ThreadsAffinityType *parent, const char *interface_name); -uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf); +void TopologyDestroy(void); +uint16_t AffinityGetNextCPU(ThreadVars *tv, ThreadsAffinityType *taf); uint16_t UtilAffinityGetAffinedCPUNum(ThreadsAffinityType *taf); #ifdef HAVE_DPDK uint16_t UtilAffinityCpusOverlap(ThreadsAffinityType *taf1, ThreadsAffinityType *taf2); diff --git a/src/util-device.c b/src/util-device.c index fd4cf5685f0b..ec1e91b41374 100644 --- a/src/util-device.c +++ b/src/util-device.c @@ -24,6 +24,7 @@ #include "device-storage.h" #include "util-debug.h" +#include "util-affinity.h" #define MAX_DEVNAME 10 @@ -173,6 +174,20 @@ int LiveGetDeviceCount(void) return i; } +int LiveGetDeviceCountWithoutAssignedThreading(void) +{ + int i = 0; + LiveDevice *pd; + + TAILQ_FOREACH (pd, &live_devices, next) { + if (GetAffinityTypeForNameAndIface("worker-cpu-set", pd->dev) == NULL) { + i++; + } + } + + return i; +} + /** * \brief Get a pointer to the device name at idx * diff --git a/src/util-device.h b/src/util-device.h index 0774825385a3..075c21567c81 100644 --- a/src/util-device.h +++ b/src/util-device.h @@ -85,6 +85,7 @@ void LiveDevAddBypassStats(LiveDevice *dev, uint64_t cnt, int family); void LiveDevSubBypassStats(LiveDevice *dev, uint64_t cnt, int family); void LiveDevAddBypassFail(LiveDevice *dev, uint64_t cnt, int family); void LiveDevAddBypassSuccess(LiveDevice *dev, uint64_t cnt, int family); +int LiveGetDeviceCountWithoutAssignedThreading(void); int LiveGetDeviceCount(void); const char *LiveGetDeviceName(int number); LiveDevice *LiveGetDevice(const char *dev); diff --git a/src/util-runmodes.c b/src/util-runmodes.c index f78e857abfc6..078deeb82a98 100644 --- a/src/util-runmodes.c +++ b/src/util-runmodes.c @@ -175,6 +175,7 @@ int RunModeSetLiveCaptureAutoFp(ConfigIfaceParserFunc ConfigParser, FatalError("TmThreadsCreate failed"); } tv_receive->printable_name = printable_threadname; + tv_receive->iface_name = dev ? strdup(dev) : NULL; TmModule *tm_module = TmModuleGetByName(recv_mod_name); if (tm_module == NULL) { FatalError("TmModuleGetByName failed for %s", recv_mod_name); @@ -283,6 +284,7 @@ static int RunModeSetLiveCaptureWorkersForDevice(ConfigIfaceThreadsCountFunc Mod FatalError("TmThreadsCreate failed"); } tv->printable_name = printable_threadname; + tv->iface_name = live_dev ? strdup(live_dev) : NULL; tm_module = TmModuleGetByName(recv_mod_name); if (tm_module == NULL) { diff --git a/suricata.yaml.in b/suricata.yaml.in index 82a72bad0f3a..9e724b09578a 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -1781,25 +1781,39 @@ threading: # verdict-cpu-set is used for IPS verdict threads # cpu-affinity: - - management-cpu-set: - cpu: [ 0 ] # include only these CPUs in affinity settings - - receive-cpu-set: - cpu: [ 0 ] # include only these CPUs in affinity settings - - worker-cpu-set: - cpu: [ "all" ] - mode: "exclusive" - # Use explicitly 3 threads and don't compute number by using - # detect-thread-ratio variable: - # threads: 3 - prio: - low: [ 0 ] - medium: [ "1-2" ] - high: [ 3 ] - default: "medium" - #- verdict-cpu-set: - # cpu: [ 0 ] - # prio: - # default: "high" + management-cpu-set: + cpu: [ 0 ] # include only these CPUs in affinity settings + receive-cpu-set: + cpu: [ 0 ] # include only these CPUs in affinity settings + # interface-specific-cpu-set: + # - interface: "enp4s0f0" + # cpu: [ 1,3,5,7,9 ] + # mode: "exclusive" + # prio: + # high: [ "all" ] + # default: "medium" + worker-cpu-set: + cpu: [ "all" ] + mode: "exclusive" + # Use explicitly 3 threads and don't compute number by using + # detect-thread-ratio variable: + # threads: 3 + prio: + low: [ 0 ] + medium: [ "1-2" ] + high: [ 3 ] + default: "medium" + interface-specific-cpu-set: + - interface: "enp4s0f0" # 0000:3b:00.0 # net_bonding0 # ens1f0 + cpu: [ 1,3,5,7,9 ] + mode: "exclusive" + prio: + high: [ "all" ] + default: "medium" + #verdict-cpu-set: + # cpu: [ 0 ] + # prio: + # default: "high" # # By default Suricata creates one "detect" thread per available CPU/CPU core. # This setting allows controlling this behaviour. A ratio setting of 2 will From c5d852cac1c73ac5c35357f53ef5f584568af9e7 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 6 Dec 2024 13:31:13 +0100 Subject: [PATCH 4/7] dpdk: move DPDK socket retrieval to utils --- src/runmode-dpdk.c | 25 ++----------------------- src/util-dpdk.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/util-dpdk.h | 2 ++ 3 files changed, 48 insertions(+), 23 deletions(-) diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index a086aafee0c1..bc36f90e3896 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -1147,27 +1147,6 @@ static void DeviceSetMTU(struct rte_eth_conf *port_conf, uint16_t mtu) #endif } -/** - * \param port_id - queried port - * \param socket_id - socket ID of the queried port - * \return non-negative number on success, negative on failure (errno) - */ -static int32_t DeviceSetSocketID(uint16_t port_id, int32_t *socket_id) -{ - rte_errno = 0; - int retval = rte_eth_dev_socket_id(port_id); - *socket_id = retval; - -#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) // DPDK API changed since 22.11 - retval = -rte_errno; -#else - if (retval == SOCKET_ID_ANY) - retval = 0; // DPDK couldn't determine socket ID of a port -#endif - - return retval; -} - static void PortConfSetInterruptMode(const DPDKIfaceConfig *iconf, struct rte_eth_conf *port_conf) { SCLogConfig("%s: interrupt mode is %s", iconf->iface, @@ -1409,7 +1388,7 @@ static int DeviceConfigureIPS(DPDKIfaceConfig *iconf) SCReturnInt(-ENODEV); } int32_t out_port_socket_id; - int retval = DeviceSetSocketID(iconf->out_port_id, &out_port_socket_id); + int retval = DPDKDeviceSetSocketID(iconf->out_port_id, &out_port_socket_id); if (retval < 0) { SCLogError("%s: invalid socket id: %s", iconf->out_iface, rte_strerror(-retval)); SCReturnInt(retval); @@ -1488,7 +1467,7 @@ static int DeviceConfigure(DPDKIfaceConfig *iconf) SCReturnInt(-ENODEV); } - int32_t retval = DeviceSetSocketID(iconf->port_id, &iconf->socket_id); + int32_t retval = DPDKDeviceSetSocketID(iconf->port_id, &iconf->socket_id); if (retval < 0) { SCLogError("%s: invalid socket id: %s", iconf->iface, rte_strerror(-retval)); SCReturnInt(retval); diff --git a/src/util-dpdk.c b/src/util-dpdk.c index b5f46a30a5d8..1266a5c91b2f 100644 --- a/src/util-dpdk.c +++ b/src/util-dpdk.c @@ -65,6 +65,50 @@ void DPDKFreeDevice(LiveDevice *ldev) #endif } +/** + * \param port_id - queried port + * \param socket_id - socket ID of the queried port + * \return non-negative number on success, negative on failure (errno) + */ +int32_t DPDKDeviceSetSocketID(uint16_t port_id, int32_t *socket_id) +{ +#ifdef HAVE_DPDK + rte_errno = 0; + int retval = rte_eth_dev_socket_id(port_id); + *socket_id = retval; + +#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) // DPDK API changed since 22.11 + retval = -rte_errno; +#else + if (retval == SOCKET_ID_ANY) + retval = 0; // DPDK couldn't determine socket ID of a port +#endif + + return retval; +#endif /* HAVE_DPDK */ + return -ENOTSUP; +} + +/** + * \param iface_name - name of the queried interface + * \param socket_id - socket ID of the queried port + * \return non-negative number on success, negative on failure (errno) + */ +int32_t DPDKDeviceNameSetSocketID(char *iface_name, int32_t *socket_id) +{ +#ifdef HAVE_DPDK + uint16_t port_id = 0; + int r = rte_eth_dev_get_port_by_name(iface_name, &port_id); + if (r < 0) { + SCLogError("%s: interface not found: %s", iface_name, rte_strerror(-r)); + SCReturnInt(r); + } + return DPDKDeviceSetSocketID(port_id, socket_id); +#endif /* HAVE_DPDK */ + return -ENOTSUP; +} + + #ifdef HAVE_DPDK /** * Retrieves name of the port from port id diff --git a/src/util-dpdk.h b/src/util-dpdk.h index 1fb3532f5d4d..0c72dfc269d6 100644 --- a/src/util-dpdk.h +++ b/src/util-dpdk.h @@ -121,6 +121,8 @@ void DPDKCleanupEAL(void); void DPDKCloseDevice(LiveDevice *ldev); void DPDKFreeDevice(LiveDevice *ldev); +int32_t DPDKDeviceSetSocketID(uint16_t port_id, int32_t *socket_id); +int32_t DPDKDeviceNameSetSocketID(char *iface_name, int32_t *socket_id); #ifdef HAVE_DPDK const char *DPDKGetPortNameByPortID(uint16_t pid); From 5ff2f9f0b945c19fa0751c708f54f4d9bfada636 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 6 Dec 2024 13:42:21 +0100 Subject: [PATCH 5/7] util-affinity: move properties of *-cpu-set node one layer up in YAML --- src/util-affinity.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/util-affinity.c b/src/util-affinity.c index 4e21e71e5688..0546ffeaa5c1 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -295,8 +295,7 @@ static const char *GetAffinitySetName(const char *val) static void SetupCpuSets(ThreadsAffinityType *taf, ConfNode *affinity, const char *setname) { CPU_ZERO(&taf->cpu_set); - - ConfNode *cpu_node = ConfNodeLookupChild(affinity->head.tqh_first, "cpu"); + ConfNode *cpu_node = ConfNodeLookupChild(affinity, "cpu"); if (cpu_node != NULL) { BuildCpuset(setname, cpu_node, &taf->cpu_set); } else { @@ -348,15 +347,13 @@ static void SetupAffinityPriority(ThreadsAffinityType *taf, ConfNode *affinity, CPU_ZERO(&taf->lowprio_cpu); CPU_ZERO(&taf->medprio_cpu); CPU_ZERO(&taf->hiprio_cpu); - - ConfNode *prio_node = ConfNodeLookupChild(affinity->head.tqh_first, "prio"); + ConfNode *prio_node = ConfNodeLookupChild(affinity, "prio"); if (prio_node == NULL) return; BuildPriorityCpuset(taf, prio_node, "low", &taf->lowprio_cpu, setname); BuildPriorityCpuset(taf, prio_node, "medium", &taf->medprio_cpu, setname); BuildPriorityCpuset(taf, prio_node, "high", &taf->hiprio_cpu, setname); - SetupDefaultPriority(taf, prio_node, setname); } @@ -365,7 +362,7 @@ static void SetupAffinityPriority(ThreadsAffinityType *taf, ConfNode *affinity, */ static void SetupAffinityMode(ThreadsAffinityType *taf, ConfNode *affinity) { - ConfNode *mode_node = ConfNodeLookupChild(affinity->head.tqh_first, "mode"); + ConfNode *mode_node = ConfNodeLookupChild(affinity, "mode"); if (mode_node == NULL) return; @@ -383,7 +380,7 @@ static void SetupAffinityMode(ThreadsAffinityType *taf, ConfNode *affinity) */ static void SetupAffinityThreads(ThreadsAffinityType *taf, ConfNode *affinity) { - ConfNode *threads_node = ConfNodeLookupChild(affinity->head.tqh_first, "threads"); + ConfNode *threads_node = ConfNodeLookupChild(affinity, "threads"); if (threads_node == NULL) return; From 2b51c4366463f526f2c951f94f53e7d22dd461a5 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 6 Dec 2024 13:47:43 +0100 Subject: [PATCH 6/7] threading: transform *-cpu-set nodes from list items to nodes Part of Ticket 2321 work to remove unnecessary lists from the config file. Ticket: 2321 --- src/util-affinity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util-affinity.c b/src/util-affinity.c index 0546ffeaa5c1..0b87059c3dab 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -474,7 +474,7 @@ void AffinitySetupLoadFromConfig(void) ConfNode *affinity; TAILQ_FOREACH(affinity, &root->head, next) { - const char *setname = GetAffinitySetName(affinity->val); + const char *setname = GetAffinitySetName(affinity->name); if (setname == NULL) continue; From a7c081329b7db4e5bd126f1909dca4e37f7ca06a Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Fri, 6 Dec 2024 13:49:58 +0100 Subject: [PATCH 7/7] dpdk: ice card seems to have RSS key length 52 from 22.11 - todo verify 22.11.x subversions --- src/util-dpdk-ice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util-dpdk-ice.c b/src/util-dpdk-ice.c index 4b714d88c353..0dd783c94b64 100644 --- a/src/util-dpdk-ice.c +++ b/src/util-dpdk-ice.c @@ -49,7 +49,7 @@ static void iceDeviceSetRSSHashFunction(uint64_t *rss_hf) void iceDeviceSetRSSConf(struct rte_eth_rss_conf *rss_conf) { iceDeviceSetRSSHashFunction(&rss_conf->rss_hf); -#if RTE_VERSION < RTE_VERSION_NUM(23, 11, 0, 0) +#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0) rss_conf->rss_key_len = 40; #else rss_conf->rss_key_len = 52;