From 517b45ea2d5537c88ea2a081a620adb6621896c5 Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Wed, 29 Jun 2016 17:55:54 +0200 Subject: [PATCH] netmap: switch to nm_* API Process multiple packets at nm_dispatch. Use zero copy for workers recv mode. Add configure check netmap check for API 11+ and find netmap api version. Add netmap guide to the userguide. --- configure.ac | 63 +- doc/userguide/capture-hardware/index.rst | 1 + doc/userguide/capture-hardware/netmap.rst | 223 +++++++ src/runmode-netmap.c | 97 ++- src/source-netmap.c | 700 +++++++--------------- src/source-netmap.h | 19 +- src/suricata.c | 6 +- suricata.yaml.in | 6 +- 8 files changed, 590 insertions(+), 525 deletions(-) create mode 100644 doc/userguide/capture-hardware/netmap.rst diff --git a/configure.ac b/configure.ac index 6515b3f26407..b4873214c9fb 100644 --- a/configure.ac +++ b/configure.ac @@ -1445,6 +1445,67 @@ fi AC_CHECK_HEADER(net/netmap_user.h,,[AC_ERROR(net/netmap_user.h not found ...)],) + + have_recent_netmap="no" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + #include + ],[ + #ifndef NETMAP_API + #error "outdated netmap, need one with NETMAP_API" + #endif + #if NETMAP_API < 11 + #error "outdated netmap, need at least api version 11" + #endif + ])], [have_recent_netmap="yes"]) + if test "x$have_recent_netmap" != "xyes"; then + echo "ERROR: outdated netmap" + exit 1 + fi + have_netmap_version="unknown" + have_v11_netmap="no" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + #include + ],[ + #if NETMAP_API != 11 + #error "not 11" + #endif + ])], [have_v11_netmap="yes"]) + if test "x$have_v11_netmap" = "xyes"; then + have_netmap_version="v11" + fi + have_v12_netmap="no" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + #include + ],[ + #if NETMAP_API != 12 + #error "not 12" + #endif + ])], [have_v12_netmap="yes"]) + if test "x$have_v12_netmap" = "xyes"; then + have_netmap_version="v12" + fi + have_v13_netmap="no" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + #include + ],[ + #if NETMAP_API != 13 + #error "not 13" + #endif + ])], [have_v13_netmap="yes"]) + if test "x$have_v13_netmap" = "xyes"; then + have_netmap_version="v13" + fi + have_gtv13_netmap="no" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + #include + ],[ + #if NETMAP_API <= 13 + #error "not gt 13" + #endif + ])], [have_gtv13_netmap="yes"]) + if test "x$have_gtv13_netmap" = "xyes"; then + have_netmap_version="> v13" + fi ]) # Suricata-Update. @@ -2459,7 +2520,7 @@ SURICATA_BUILD_CONF="Suricata Configuration: NFQueue support: ${enable_nfqueue} NFLOG support: ${enable_nflog} IPFW support: ${enable_ipfw} - Netmap support: ${enable_netmap} + Netmap support: ${enable_netmap} ${have_netmap_version} DAG enabled: ${enable_dag} Napatech enabled: ${enable_napatech} WinDivert enabled: ${enable_windivert} diff --git a/doc/userguide/capture-hardware/index.rst b/doc/userguide/capture-hardware/index.rst index 309e664fd85b..e74c730fbf22 100644 --- a/doc/userguide/capture-hardware/index.rst +++ b/doc/userguide/capture-hardware/index.rst @@ -7,3 +7,4 @@ Using Capture Hardware napatech myricom ebpf-xdp + netmap diff --git a/doc/userguide/capture-hardware/netmap.rst b/doc/userguide/capture-hardware/netmap.rst new file mode 100644 index 000000000000..3f0b24bf044c --- /dev/null +++ b/doc/userguide/capture-hardware/netmap.rst @@ -0,0 +1,223 @@ +Netmap +====== + +Netmap is a high speed capture framework for Linux and FreeBSD. In Linux it +is available as an external module, while in FreeBSD 11+ it is available by +default. + + +Compiling Suricata +------------------ + +FreeBSD +~~~~~~~ + +On FreeBSD 11 and up, NETMAP is included and enabled by default in the kernel. + +To build Suricata with NETMAP, add ``--enable-netmap`` to the configure line. +The location of the NETMAP includes (/usr/src/sys/net/) does not have to be +specified. + +Linux +~~~~~ + +On Linux, NETMAP is not included by default. It can be pulled from github. +Follow the instructions on installation included in the NETMAP repository. + +When NETMAP is installed, add ``--enable-netmap`` to the configure line. +If the includes are not added to a standard location, the location can +be specified on the Suricata configure commandline. + +Example:: + + ./configure --enable-netmap --with-netmap-includes=/usr/local/include/netmap/ + +Starting Suricata +----------------- + +When opening an interface, netmap can take various special characters as +options in the interface string. + +.. warning:: the interface that netmap reads from will become unavailable + for normal network operations. You can lock yourself out of + your system. + +IDS +~~~ + +Suricata can be started in 2 ways to use netmap: + +:: + + suricata --netmap= + suricata --netmap=igb0 + +In the above example Suricata will start reading from igb0. The number of +threads created depends on the number of RSS queues available on the NIC. + +:: + + suricata --netmap + +In the above example Suricata will take the ``netmap`` block from the yaml +and open each of the interfaces listed. + +:: + + netmap: + - interface: igb0 + threads: 2 + - interface: igb1 + threads: 4 + +For the above configuration, both igb0 and igb1 would be opened. With 2 +threads for igb0 and 4 capture threads for igb1. + +.. warning:: This multi threaded setup only works correctly if the NIC + has symmetric RSS hashing. If this is not the case, consider + using the the 'lb' method below. + +IPS +~~~ + +Suricata's Netmap based IPS mode is based on the concept of creating +a layer 2 software bridge between 2 interfaces. Suricata reads packets on +one interface and transmits them on another. + +Packets that are blocked by the IPS policy, are simply not transmitted. + +:: + + netmap: + - interface: igb0 + copy-mode: ips + copy-iface: igb1 + - interface: igb1 + copy-mode: ips + copy-iface: igb0 + +Advanced setups +--------------- + +lb (load balance) +----------------- + +"lb" is a tool written by Seth Hall to allow for load balancing for single +or multiple tools. One common use case is being able to run Suricata and +Zeek together on the same traffic. + +starting lb:: + + lb -i eth0 -p suricata:6 -p zeek:6 + +.. note:: On FreeBSD 11, the named prefix doesn't work. + +yaml:: + + netmap: + - interface: suricata + threads: 6 + +startup:: + + suricata --netmap=netmap:suricata + +The interface name as passed to Suricata includes a 'netmap:' prefix. This +tells Suricata that it's going to read from netmap pipes instead of a real +interface. + +Then Zeek (formerly Bro) can be configured to load 6 instances. Both will +get a copy of the same traffic. The number of netmap pipes does not have +to be equal for both tools. + +FreeBSD 11 +~~~~~~~~~~ + +On FreeBSD 11 the named pipe is not available. + +starting lb:: + + lb -i eth0 -p 6 + +yaml:: + + netmap: + - interface: netmap:eth0 + threads: 6 + +startup:: + + suricata --netmap + + +.. note:: "lb" is bundled with netmap. + +Single NIC +~~~~~~~~~~ + +When an interface enters NETMAP mode, it is no longer available to +the OS for other operations. This can be undesirable in certain +cases, but there is a workaround. + +By running Suricata in a special inline mode, the interface will +show it's traffic to the OS. + +:: + + netmap: + - interface: igb0 + copy-mode: tap + copy-iface: igb0^ + - interface: igb0^ + copy-mode: tap + copy-iface: igb0 + +The copy-mode can be both 'tap' and 'ips', where the former never +drops packets based on the policies in use, and the latter may drop +packets. + +.. warning:: Misconfiguration can lead to connectivity loss. Use + with care. + +.. note:: This set up can also be used to mix NETMAP with firewall + setups like pf or ipfw. + +VALE switches +~~~~~~~~~~~~~ + +VALE is a virtual switch that can be used to create an all virtual +network or a mix of virtual and real nics. + +A simple all virtual setup:: + + vale-ctl -n vi0 + vale-ctl -a vale0:vi0 + vale-ctl -n vi1 + vale-ctl -a vale0:vi1 + +We now have a virtual switch "vale0" with 2 ports "vi0" and "vi1". + +We can start Suricata to listen on one of the ports:: + + suricata --netmap=vale0:vi1 + +Then we can + +Inline IDS +---------- + +The inline IDS is almost the same as the IPS setup above, but it will not +enfore ``drop`` policies. + +:: + + netmap: + - interface: igb0 + copy-mode: tap + copy-iface: igb1 + - interface: igb1 + copy-mode: tap + copy-iface: igb0 + +The only difference with the IPS mode is that the ``copy-mode`` setting is +set to ``tap``. diff --git a/src/runmode-netmap.c b/src/runmode-netmap.c index 3a525e76f4e4..0faae741db0f 100644 --- a/src/runmode-netmap.c +++ b/src/runmode-netmap.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2014 Open Information Security Foundation +/* Copyright (C) 2014-2018 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -57,11 +57,9 @@ extern int max_pending_packets; -static const char *default_mode_workers = NULL; - const char *RunModeNetmapGetDefaultMode(void) { - return default_mode_workers; + return "workers"; } void RunModeIdsNetmapRegister(void) @@ -71,13 +69,12 @@ void RunModeIdsNetmapRegister(void) RunModeIdsNetmapSingle); RunModeRegisterNewRunMode(RUNMODE_NETMAP, "workers", "Workers netmap mode, each thread does all" - " tasks from acquisition to logging", + " tasks from acquisition to logging", RunModeIdsNetmapWorkers); - default_mode_workers = "workers"; RunModeRegisterNewRunMode(RUNMODE_NETMAP, "autofp", "Multi threaded netmap mode. Packets from " - "each flow are assigned to a single detect " - "thread.", + "each flow are assigned to a single detect " + "thread.", RunModeIdsNetmapAutoFp); return; } @@ -97,19 +94,31 @@ static int ParseNetmapSettings(NetmapIfaceSettings *ns, const char *iface, ConfNode *if_root, ConfNode *if_default) { ns->threads = 0; - ns->promisc = 1; + ns->promisc = true; ns->checksum_mode = CHECKSUM_VALIDATION_AUTO; ns->copy_mode = NETMAP_COPY_MODE_NONE; - strlcpy(ns->iface, iface, sizeof(ns->iface)); + if (ns->iface[0]) { size_t len = strlen(ns->iface); if (ns->iface[len-1] == '+') { - ns->iface[len-1] = '\0'; - ns->sw_ring = 1; + SCLogWarning(SC_WARN_OPTION_OBSOLETE, + "netmap interface %s uses obsolete '+' notation. " + "Using '^' instead.", ns->iface); + ns->iface[len-1] = '^'; + ns->sw_ring = true; + } else if (ns->iface[len-1] == '^') { + ns->sw_ring = true; } } + /* prefixed with netmap or vale means it's not a real interface + * and we don't check offloading. */ + if (strncmp(ns->iface, "netmap:", 7) != 0 && + strncmp(ns->iface, "vale", 4) != 0) { + ns->real = true; + } + const char *bpf_filter = NULL; if (ConfGet("bpf-filter", &bpf_filter) == 1) { if (strlen(bpf_filter) > 0) { @@ -134,9 +143,11 @@ static int ParseNetmapSettings(NetmapIfaceSettings *ns, const char *iface, const char *threadsstr = NULL; if (ConfGetChildValueWithDefault(if_root, if_default, "threads", &threadsstr) != 1) { ns->threads = 0; + ns->threads_auto = true; } else { if (strcmp(threadsstr, "auto") == 0) { ns->threads = 0; + ns->threads_auto = true; } else { ns->threads = atoi(threadsstr); } @@ -157,7 +168,7 @@ static int ParseNetmapSettings(NetmapIfaceSettings *ns, const char *iface, (void)ConfGetChildValueBoolWithDefault(if_root, if_default, "disable-promisc", (int *)&boolval); if (boolval) { SCLogInfo("Disabling promiscuous mode on iface %s", ns->iface); - ns->promisc = 0; + ns->promisc = false; } const char *tmpctype; @@ -192,28 +203,23 @@ static int ParseNetmapSettings(NetmapIfaceSettings *ns, const char *iface, finalize: + ns->ips = (ns->copy_mode != NETMAP_COPY_MODE_NONE); + if (ns->sw_ring) { /* just one thread per interface supported */ ns->threads = 1; - } else if (ns->threads == 0) { - /* As NetmapGetRSSCount is broken on Linux, first run - * GetIfaceRSSQueuesNum. If that fails, run NetmapGetRSSCount */ - ns->threads = GetIfaceRSSQueuesNum(ns->iface); + } else if (ns->threads_auto) { + /* As NetmapGetRSSCount used to be broken on Linux, + * fall back to GetIfaceRSSQueuesNum if needed. */ + ns->threads = NetmapGetRSSCount(ns->iface); if (ns->threads == 0) { - ns->threads = NetmapGetRSSCount(ns->iface); + ns->threads = GetIfaceRSSQueuesNum(ns->iface); } } if (ns->threads <= 0) { ns->threads = 1; } - /* netmap needs all offloading to be disabled */ - if (LiveGetOffload() == 0) { - (void)GetIfaceOffloading(ns->iface, 1, 1); - } else { - DisableIfaceOffloading(LiveGetDevice(ns->iface), 1, 1); - } - return 0; } @@ -231,7 +237,6 @@ static void *ParseNetmapConfig(const char *iface_name) { ConfNode *if_root = NULL; ConfNode *if_default = NULL; - ConfNode *netmap_node; const char *out_iface = NULL; if (iface_name == NULL) { @@ -242,15 +247,15 @@ static void *ParseNetmapConfig(const char *iface_name) if (unlikely(aconf == NULL)) { return NULL; } - memset(aconf, 0, sizeof(*aconf)); + aconf->DerefFunc = NetmapDerefConfig; strlcpy(aconf->iface_name, iface_name, sizeof(aconf->iface_name)); SC_ATOMIC_INIT(aconf->ref); (void) SC_ATOMIC_ADD(aconf->ref, 1); /* Find initial node */ - netmap_node = ConfGetNode("netmap"); + ConfNode *netmap_node = ConfGetNode("netmap"); if (netmap_node == NULL) { SCLogInfo("Unable to find netmap config using default value"); } else { @@ -262,15 +267,41 @@ static void *ParseNetmapConfig(const char *iface_name) ParseNetmapSettings(&aconf->in, aconf->iface_name, if_root, if_default); /* if we have a copy iface, parse that as well */ - if (netmap_node != NULL) { - if (ConfGetChildValueWithDefault(if_root, if_default, "copy-iface", &out_iface) == 1) { - if (strlen(out_iface) > 0) { - if_root = ConfFindDeviceConfig(netmap_node, out_iface); - ParseNetmapSettings(&aconf->out, out_iface, if_root, if_default); + if (netmap_node != NULL && + ConfGetChildValueWithDefault(if_root, if_default, "copy-iface", &out_iface) == 1) + { + if (strlen(out_iface) > 0) { + if_root = ConfFindDeviceConfig(netmap_node, out_iface); + ParseNetmapSettings(&aconf->out, out_iface, if_root, if_default); + + /* if one side of the IPS peering uses a sw_ring, we will default + * to using a single ring/thread on the other side as well. Only + * if thread variable is set to 'auto'. So the user can override + * this. */ + if (aconf->out.sw_ring && aconf->in.threads_auto) { + aconf->out.threads = aconf->in.threads = 1; + } else if (aconf->in.sw_ring && aconf->out.threads_auto) { + aconf->out.threads = aconf->in.threads = 1; } } } + /* netmap needs all offloading to be disabled */ + if (aconf->in.real) { + char base_name[sizeof(aconf->in.iface)]; + strlcpy(base_name, aconf->in.iface, sizeof(base_name)); + /* for a sw_ring enabled device name, strip the trailing char */ + if (aconf->in.sw_ring) { + base_name[strlen(base_name) - 1] = '\0'; + } + + if (LiveGetOffload() == 0) { + (void)GetIfaceOffloading(base_name, 1, 1); + } else { + DisableIfaceOffloading(LiveGetDevice(base_name), 1, 1); + } + } + SC_ATOMIC_RESET(aconf->ref); (void) SC_ATOMIC_ADD(aconf->ref, aconf->in.threads); SCLogPerf("Using %d threads for interface %s", aconf->in.threads, diff --git a/src/source-netmap.c b/src/source-netmap.c index d0340b7b9d26..75e23be7fd85 100644 --- a/src/source-netmap.c +++ b/src/source-netmap.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2014 Open Information Security Foundation +/* Copyright (C) 2011-2018 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -25,23 +25,21 @@ * \file * * \author Aleksey Katargin +* \author Victor Julien * * Netmap socket acquisition support * +* Many thanks to Luigi Rizzo for guidance and support. +* */ #include "suricata-common.h" -#include "config.h" #include "suricata.h" #include "decode.h" -#include "packet-queue.h" #include "threads.h" #include "threadvars.h" -#include "tm-queuehandlers.h" -#include "tm-modules.h" #include "tm-threads.h" -#include "tm-threads-common.h" #include "conf.h" #include "util-bpf.h" #include "util-debug.h" @@ -50,8 +48,8 @@ #include "util-privs.h" #include "util-optimize.h" #include "util-checksum.h" -#include "util-ioctl.h" -#include "util-host-info.h" +#include "util-validate.h" + #include "tmqh-packetpool.h" #include "source-netmap.h" #include "runmodes.h" @@ -66,61 +64,48 @@ #include #endif +#define NETMAP_WITH_LIBS +#ifdef DEBUG +#define DEBUG_NETMAP_USER +#endif #include #endif /* HAVE_NETMAP */ #include "util-ioctl.h" -extern intmax_t max_pending_packets; - #ifndef HAVE_NETMAP -TmEcode NoNetmapSupportExit(ThreadVars *, const void *, void **); +/** +* \brief this function prints an error message and exits. +*/ +static TmEcode NoNetmapSupportExit(ThreadVars *tv, const void *initdata, void **data) +{ + SCLogError(SC_ERR_NO_NETMAP,"Error creating thread %s: you do not have " + "support for netmap enabled, please recompile " + "with --enable-netmap", tv->name); + exit(EXIT_FAILURE); +} void TmModuleReceiveNetmapRegister (void) { tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap"; tmm_modules[TMM_RECEIVENETMAP].ThreadInit = NoNetmapSupportExit; - tmm_modules[TMM_RECEIVENETMAP].Func = NULL; - tmm_modules[TMM_RECEIVENETMAP].ThreadExitPrintStats = NULL; - tmm_modules[TMM_RECEIVENETMAP].ThreadDeinit = NULL; - tmm_modules[TMM_RECEIVENETMAP].RegisterTests = NULL; - tmm_modules[TMM_RECEIVENETMAP].cap_flags = 0; tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM; } /** * \brief Registration Function for DecodeNetmap. -* \todo Unit tests are needed for this module. */ void TmModuleDecodeNetmapRegister (void) { tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap"; tmm_modules[TMM_DECODENETMAP].ThreadInit = NoNetmapSupportExit; - tmm_modules[TMM_DECODENETMAP].Func = NULL; - tmm_modules[TMM_DECODENETMAP].ThreadExitPrintStats = NULL; - tmm_modules[TMM_DECODENETMAP].ThreadDeinit = NULL; - tmm_modules[TMM_DECODENETMAP].RegisterTests = NULL; - tmm_modules[TMM_DECODENETMAP].cap_flags = 0; tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM; } -/** -* \brief this function prints an error message and exits. -*/ -TmEcode NoNetmapSupportExit(ThreadVars *tv, const void *initdata, void **data) -{ - SCLogError(SC_ERR_NO_NETMAP,"Error creating thread %s: you do not have " - "support for netmap enabled, please recompile " - "with --enable-netmap", tv->name); - exit(EXIT_FAILURE); -} - #else /* We have NETMAP support */ -#define max(a, b) (((a) > (b)) ? (a) : (b)) - #define POLL_TIMEOUT 100 #if defined(__linux__) @@ -144,36 +129,20 @@ enum { }; /** - * \brief Netmap ring isntance. - */ -typedef struct NetmapRing -{ - int fd; - struct netmap_ring *rx; - struct netmap_ring *tx; - int dst_ring_from; - int dst_ring_to; - int dst_next_ring; - SCSpinlock tx_lock; -} NetmapRing; - -/** - * \brief Netmap device instance. + * \brief Netmap device instance. Each ring for each device gets its own + * device. */ typedef struct NetmapDevice_ { - char ifname[IFNAMSIZ]; - void *mem; - size_t memsize; - struct netmap_if *nif; - int rings_cnt; - int rx_rings_cnt; - int tx_rings_cnt; - /* hw rings + sw ring */ - NetmapRing *rings; + struct nm_desc *nmd; unsigned int ref; SC_ATOMIC_DECLARE(unsigned int, threads_run); TAILQ_ENTRY(NetmapDevice_) next; + // actual ifname can only be 16, but we store a bit more, + // like the options string and a 'netmap:' prefix. + char ifname[32]; + int ring; + int direction; // 0 rx, 1 tx } NetmapDevice; /** @@ -186,13 +155,10 @@ typedef struct NetmapThreadVars_ /* dst interface for IPS mode */ NetmapDevice *ifdst; - int src_ring_from; - int src_ring_to; - int thread_idx; int flags; struct bpf_program bpf_prog; - /* internal shit */ + /* suricata internals */ TmSlot *slot; ThreadVars *tv; LiveDevice *livedev; @@ -207,8 +173,6 @@ typedef struct NetmapThreadVars_ uint64_t drops; uint16_t capture_kernel_packets; uint16_t capture_kernel_drops; - - } NetmapThreadVars; typedef TAILQ_HEAD(NetmapDeviceList_, NetmapDevice_) NetmapDeviceList; @@ -217,12 +181,12 @@ static NetmapDeviceList netmap_devlist = TAILQ_HEAD_INITIALIZER(netmap_devlist); static SCMutex netmap_devlist_lock = SCMUTEX_INITIALIZER; /** \brief get RSS RX-queue count - * \retval rx_rings RSS RX queue count or 1 on error + * \retval rx_rings RSS RX queue count or 0 on error */ int NetmapGetRSSCount(const char *ifname) { struct nmreq nm_req; - int rx_rings = 1; + int rx_rings = 0; SCMutexLock(&netmap_devlist_lock); @@ -264,165 +228,138 @@ int NetmapGetRSSCount(const char *ifname) * \param verbose Verbose error logging. * \return Zero on success. */ -static int NetmapOpen(char *ifname, int promisc, NetmapDevice **pdevice, int verbose) +static int NetmapOpen(NetmapIfaceSettings *ns, + NetmapDevice **pdevice, int verbose, int read, bool zerocopy) { - NetmapDevice *pdev = NULL; - struct nmreq nm_req; - - *pdevice = NULL; - - SCMutexLock(&netmap_devlist_lock); + SCEnter(); + SCLogDebug("ifname %s", ns->iface); + + char base_name[IFNAMSIZ]; + strlcpy(base_name, ns->iface, sizeof(base_name)); + if (strlen(base_name) > 0 && + (base_name[strlen(base_name)-1] == '^' || + base_name[strlen(base_name)-1] == '*')) + { + base_name[strlen(base_name)-1] = '\0'; + } + + if (ns->real) { + /* check interface is up */ + int if_flags = GetIfaceFlags(base_name); + if (if_flags == -1) { + if (verbose) { + SCLogError(SC_ERR_NETMAP_CREATE, + "Can not access to interface '%s' (%s)", + base_name, ns->iface); + } + goto error; + } - /* search interface in our already opened list */ - TAILQ_FOREACH(pdev, &netmap_devlist, next) { - if (strcmp(ifname, pdev->ifname) == 0) { - *pdevice = pdev; - pdev->ref++; - SCMutexUnlock(&netmap_devlist_lock); - return 0; + /* bring iface up if it is down */ + if ((if_flags & IFF_UP) == 0) { + SCLogError(SC_ERR_NETMAP_CREATE, "interface '%s' (%s) is down", base_name, ns->iface); + goto error; + } + /* if needed, try to set iface in promisc mode */ + if (ns->promisc && (if_flags & (IFF_PROMISC|IFF_PPROMISC)) == 0) { + if_flags |= IFF_PPROMISC; + SetIfaceFlags(base_name, if_flags); // TODO reset at exit + // TODO move to parse config? } } - - /* not found, create new record */ + NetmapDevice *pdev = NULL, *spdev = NULL; pdev = SCMalloc(sizeof(*pdev)); if (unlikely(pdev == NULL)) { SCLogError(SC_ERR_MEM_ALLOC, "Memory allocation failed"); goto error; } - memset(pdev, 0, sizeof(*pdev)); SC_ATOMIC_INIT(pdev->threads_run); - strlcpy(pdev->ifname, ifname, sizeof(pdev->ifname)); - - /* open netmap */ - int fd = open("/dev/netmap", O_RDWR); - if (fd == -1) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Couldn't open netmap device, error %s", - strerror(errno)); - goto error_pdev; - } - - /* check interface is up */ - int if_flags = GetIfaceFlags(ifname); - if (if_flags == -1) { - if (verbose) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Can not access to interface '%s'", - ifname); - } - goto error_fd; - } - if ((if_flags & IFF_UP) == 0) { - SCLogWarning(SC_ERR_NETMAP_CREATE, "Interface '%s' is down", ifname); - goto error_fd; - } - /* if needed, try to set iface in promisc mode */ - if (promisc && (if_flags & (IFF_PROMISC|IFF_PPROMISC)) == 0) { - if_flags |= IFF_PPROMISC; - SetIfaceFlags(ifname, if_flags); - } - /* query netmap info */ - memset(&nm_req, 0, sizeof(nm_req)); - strlcpy(nm_req.nr_name, ifname, sizeof(nm_req.nr_name)); - nm_req.nr_version = NETMAP_API; + SCMutexLock(&netmap_devlist_lock); - if (ioctl(fd, NIOCGINFO, &nm_req) != 0) { - if (verbose) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Couldn't query netmap for %s, error %s", - ifname, strerror(errno)); + const int direction = (read != 1); + int ring = 0; + /* search interface in our already opened list */ + TAILQ_FOREACH(spdev, &netmap_devlist, next) { + SCLogDebug("spdev %s", spdev->ifname); + if (direction == spdev->direction && strcmp(ns->iface, spdev->ifname) == 0) { + ring = spdev->ring + 1; } - goto error_fd; - }; - - pdev->memsize = nm_req.nr_memsize; - pdev->rx_rings_cnt = nm_req.nr_rx_rings; - pdev->tx_rings_cnt = nm_req.nr_tx_rings; - pdev->rings_cnt = max(pdev->rx_rings_cnt, pdev->tx_rings_cnt); - - /* hw rings + sw ring */ - pdev->rings = SCMalloc(sizeof(*pdev->rings) * (pdev->rings_cnt + 1)); - if (unlikely(pdev->rings == NULL)) { - SCLogError(SC_ERR_MEM_ALLOC, "Memory allocation failed"); - goto error_fd; } - memset(pdev->rings, 0, sizeof(*pdev->rings) * (pdev->rings_cnt + 1)); - - /* open individual instance for each ring */ - int success_cnt = 0; - for (int i = 0; i <= pdev->rings_cnt; i++) { - NetmapRing *pring = &pdev->rings[i]; - pring->fd = open("/dev/netmap", O_RDWR); - if (pring->fd == -1) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Couldn't open netmap device: %s", - strerror(errno)); - break; - } - - if (i < pdev->rings_cnt) { - nm_req.nr_flags = NR_REG_ONE_NIC; - nm_req.nr_ringid = i | NETMAP_NO_TX_POLL; - } else { - nm_req.nr_flags = NR_REG_SW; - nm_req.nr_ringid = NETMAP_NO_TX_POLL; - } - if (ioctl(pring->fd, NIOCREGIF, &nm_req) != 0) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Couldn't register %s with netmap: %s", - ifname, strerror(errno)); - break; - } + SCLogDebug("netmap/%s: using ring %d", ns->iface, ring); - if (pdev->mem == NULL) { - pdev->mem = mmap(0, pdev->memsize, PROT_WRITE | PROT_READ, - MAP_SHARED, pring->fd, 0); - if (pdev->mem == MAP_FAILED) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Couldn't mmap netmap device: %s", - strerror(errno)); - break; - } - pdev->nif = NETMAP_IF(pdev->mem, nm_req.nr_offset); - } + const char *opt_R = "R"; + const char *opt_T = "T"; + const char *opt_x = "x"; // not for IPS + const char *opt_z = "z"; // zero copy, not for IPS - if ((i < pdev->rx_rings_cnt) || (i == pdev->rings_cnt)) { - pring->rx = NETMAP_RXRING(pdev->nif, i); - } - if ((i < pdev->tx_rings_cnt) || (i == pdev->rings_cnt)) { - pring->tx = NETMAP_TXRING(pdev->nif, i); + // FreeBSD 11 doesn't have R and T. +#if NETMAP_API<=11 + opt_R = ""; + opt_T = ""; +#endif + /* assemble options string */ + char optstr[16]; + if (ns->ips) + opt_x = ""; +// z seems to not play well with multiple opens of a real dev on linux +// if (!zerocopy || ips) + opt_z = ""; + + // loop to retry opening if unsupported options are used +retry: + snprintf(optstr, sizeof(optstr), "%s%s%s", opt_z, opt_x, direction == 0 ? opt_R : opt_T); + + char devname[64]; + if (strncmp(ns->iface, "netmap:", 7) == 0) { + snprintf(devname, sizeof(devname), "%s}%d%s%s", + ns->iface, ring, strlen(optstr) ? "/" : "", optstr); + } else if (strlen(ns->iface) > 5 && strncmp(ns->iface, "vale", 4) == 0 && isdigit(ns->iface[4])) { + snprintf(devname, sizeof(devname), "%s", ns->iface); + } else if (ns->iface[strlen(ns->iface)-1] == '*' || + ns->iface[strlen(ns->iface)-1] == '^') { + SCLogDebug("device with SW-ring enabled (ns->iface): %s",ns->iface); + snprintf(devname, sizeof(devname), "netmap:%s", ns->iface); + SCLogDebug("device with SW-ring enabled (devname): %s",devname); + /* just a single ring, so don't use ring param */ + } else if (ring == 0 && ns->threads == 1) { + snprintf(devname, sizeof(devname), "netmap:%s%s%s", + ns->iface, strlen(optstr) ? "/" : "", optstr); + } else { + snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", + ns->iface, ring, strlen(optstr) ? "/" : "", optstr); + } + strlcpy(pdev->ifname, ns->iface, sizeof(pdev->ifname)); + + pdev->nmd = nm_open(devname, NULL, 0, NULL); + if (pdev->nmd == NULL) { + if (errno == EINVAL && opt_z[0] == 'z') { + SCLogNotice("got '%s' EINVAL: going to retry without 'z'", devname); + opt_z = ""; + goto retry; + } else if (errno == EINVAL && opt_x[0] == 'x') { + SCLogNotice("dev '%s' got EINVAL: going to retry without 'x'", devname); + opt_x = ""; + goto retry; } - SCSpinInit(&pring->tx_lock, 0); - success_cnt++; - } - if (success_cnt != (pdev->rings_cnt + 1)) { - for(int i = 0; i < success_cnt; i++) { - close(pdev->rings[i].fd); - } - if (pdev->mem) { - munmap(pdev->mem, pdev->memsize); - } - SCFree(pdev->rings); - goto error_fd; + SCLogError(SC_ERR_NETMAP_CREATE, "opening devname %s failed: %s", + devname, strerror(errno)); + exit(EXIT_FAILURE); } + SCLogDebug("devname %s %s opened", devname, ns->iface); - close(fd); - *pdevice = pdev; - + pdev->direction = direction; + pdev->ring = ring; TAILQ_INSERT_TAIL(&netmap_devlist, pdev, next); + + SCLogNotice("opened %s from %s: %p", devname, ns->iface, pdev->nmd); SCMutexUnlock(&netmap_devlist_lock); + *pdevice = pdev; return 0; - -error_fd: - close(fd); -error_pdev: - SCFree(pdev); error: - SCMutexUnlock(&netmap_devlist_lock); return -1; } @@ -441,15 +378,7 @@ static int NetmapClose(NetmapDevice *dev) if (pdev == dev) { pdev->ref--; if (!pdev->ref) { - munmap(pdev->mem, pdev->memsize); - // First close SW ring (https://github.com/luigirizzo/netmap/issues/144) - for (int i = pdev->rings_cnt; i >= 0; i--) { - NetmapRing *pring = &pdev->rings[i]; - close(pring->fd); - SCSpinDestroy(&pring->tx_lock); - } - SCFree(pdev->rings); - TAILQ_REMOVE(&netmap_devlist, pdev, next); + nm_close(pdev->nmd); SCFree(pdev); } SCMutexUnlock(&netmap_devlist_lock); @@ -508,15 +437,16 @@ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, voi goto error_ntv; } - if (NetmapOpen(aconf->in.iface, aconf->in.promisc, &ntv->ifsrc, 1) != 0) { - goto error_ntv; + /* enable zero-copy mode for workers runmode */ + char const *active_runmode = RunmodeGetActive(); + if (strcmp("workers", active_runmode) == 0) { + ntv->flags |= NETMAP_FLAG_ZERO_COPY; + SCLogDebug("Enabling zero copy mode for %s", aconf->in.iface); } - if (unlikely(!aconf->in.sw_ring && !ntv->ifsrc->rx_rings_cnt)) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Input interface '%s' does not have Rx rings", - aconf->iface_name); - goto error_src; + if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, + (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0) != 0) { + goto error_ntv; } if (unlikely(aconf->in.sw_ring && aconf->in.threads > 1)) { @@ -525,70 +455,14 @@ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, voi "Thread count can't be greater than 1 for SW ring.", aconf->iface_name); goto error_src; - } else if (unlikely(aconf->in.threads > ntv->ifsrc->rx_rings_cnt)) { - SCLogError(SC_ERR_INVALID_VALUE, - "Thread count can't be greater than Rx ring count. " - "Configured %d threads for interface '%s' with %d Rx rings.", - aconf->in.threads, aconf->iface_name, ntv->ifsrc->rx_rings_cnt); - goto error_src; - } - - if (aconf->in.sw_ring) { - ntv->thread_idx = 0; - } else { - do { - ntv->thread_idx = SC_ATOMIC_GET(ntv->ifsrc->threads_run); - } while (SC_ATOMIC_CAS(&ntv->ifsrc->threads_run, ntv->thread_idx, ntv->thread_idx + 1) == 0); - } - - /* calculate thread rings binding */ - if (aconf->in.sw_ring) { - ntv->src_ring_from = ntv->src_ring_to = ntv->ifsrc->rings_cnt; - } else { - int tmp = (ntv->ifsrc->rx_rings_cnt + 1) / aconf->in.threads; - ntv->src_ring_from = ntv->thread_idx * tmp; - ntv->src_ring_to = ntv->src_ring_from + tmp - 1; - if (ntv->thread_idx == (aconf->in.threads - 1)) { - ntv->src_ring_to = ntv->ifsrc->rx_rings_cnt - 1; - } } - SCLogDebug("netmap: %s thread:%d rings:%d-%d", aconf->iface_name, - ntv->thread_idx, ntv->src_ring_from, ntv->src_ring_to); if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { - if (NetmapOpen(aconf->out.iface, aconf->out.promisc, &ntv->ifdst, 1) != 0) { + SCLogDebug("IPS: opening out iface %s", aconf->out.iface); + if (NetmapOpen(&aconf->out, &ntv->ifdst, + 1, 0, false) != 0) { goto error_src; } - - if (unlikely(!aconf->out.sw_ring && !ntv->ifdst->tx_rings_cnt)) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Output interface '%s' does not have Tx rings", - aconf->out.iface); - goto error_dst; - } - - /* calculate dst rings bindings */ - for (int i = ntv->src_ring_from; i <= ntv->src_ring_to; i++) { - NetmapRing *ring = &ntv->ifsrc->rings[i]; - if (aconf->out.sw_ring) { - ring->dst_ring_from = ring->dst_ring_to = ntv->ifdst->rings_cnt; - } else if (ntv->ifdst->tx_rings_cnt > ntv->ifsrc->rx_rings_cnt) { - int tmp = (ntv->ifdst->tx_rings_cnt + 1) / ntv->ifsrc->rx_rings_cnt; - ring->dst_ring_from = i * tmp; - ring->dst_ring_to = ring->dst_ring_from + tmp - 1; - if (i == (ntv->src_ring_to - 1)) { - ring->dst_ring_to = ntv->ifdst->tx_rings_cnt - 1; - } - } else { - ring->dst_ring_from = ring->dst_ring_to = - i % ntv->ifdst->tx_rings_cnt; - } - ring->dst_next_ring = ring->dst_ring_from; - - SCLogDebug("netmap: %s(%d)->%s(%d-%d)", - aconf->in.iface, i, aconf->out.iface, - ring->dst_ring_from, ring->dst_ring_to); - } } /* basic counters */ @@ -597,24 +471,6 @@ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, voi ntv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops", ntv->tv); - /* enable zero-copy mode for workers runmode */ - char const *active_runmode = RunmodeGetActive(); - if ((aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) && active_runmode && - strcmp("workers", active_runmode) == 0) { - ntv->flags |= NETMAP_FLAG_ZERO_COPY; - SCLogPerf("Enabling zero copy mode for %s->%s", - aconf->in.iface, aconf->out.iface); - } else { - uint16_t ring_size = ntv->ifsrc->rings[0].rx->num_slots; - if (ring_size > max_pending_packets) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Packet pool size (%" PRIuMAX ") must be greater or equal than %s ring size (%" PRIu16 "). " - "Increase max_pending_packets option.", - max_pending_packets, aconf->iface_name, ring_size); - goto error_dst; - } - } - if (aconf->in.bpf_filter) { SCLogConfig("Using BPF '%s' on iface '%s'", aconf->in.bpf_filter, ntv->ifsrc->ifname); @@ -638,7 +494,6 @@ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, voi *data = (void *)ntv; aconf->DerefFunc(aconf); SCReturnInt(TM_ECODE_OK); - error_dst: if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { NetmapClose(ntv->ifdst); @@ -664,48 +519,18 @@ static TmEcode NetmapWritePacket(NetmapThreadVars *ntv, Packet *p) return TM_ECODE_OK; } } + DEBUG_VALIDATE_BUG_ON(ntv->ifdst == NULL); - /* map src ring_id to dst ring_id */ - NetmapRing *rxring = &ntv->ifsrc->rings[p->netmap_v.ring_id]; - NetmapRing *txring = &ntv->ifdst->rings[p->netmap_v.dst_ring_id]; - - SCSpinLock(&txring->tx_lock); - - if (!nm_ring_space(txring->tx)) { + if (nm_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) { + SCLogDebug("failed to send %s -> %s", + ntv->ifsrc->ifname, ntv->ifdst->ifname); ntv->drops++; - SCSpinUnlock(&txring->tx_lock); - return TM_ECODE_FAILED; - } - - struct netmap_slot *ts = &txring->tx->slot[txring->tx->cur]; - - if (ntv->flags & NETMAP_FLAG_ZERO_COPY) { - struct netmap_slot *rs = &rxring->rx->slot[p->netmap_v.slot_id]; - - /* swap slot buffers */ - uint32_t tmp_idx; - tmp_idx = ts->buf_idx; - ts->buf_idx = rs->buf_idx; - rs->buf_idx = tmp_idx; - - ts->len = rs->len; - - ts->flags |= NS_BUF_CHANGED; - rs->flags |= NS_BUF_CHANGED; - } else { - unsigned char *slot_data = (unsigned char *)NETMAP_BUF(txring->tx, ts->buf_idx); - memcpy(slot_data, GET_PKT_DATA(p), GET_PKT_LEN(p)); - ts->len = GET_PKT_LEN(p); - ts->flags |= NS_BUF_CHANGED; - } - - txring->tx->head = txring->tx->cur = nm_ring_next(txring->tx, txring->tx->cur); - if ((ntv->flags & NETMAP_FLAG_ZERO_COPY) == 0) { - ioctl(txring->fd, NIOCTXSYNC, 0); } + SCLogDebug("sent succesfully: %s(%d)->%s(%d) (%u)", + ntv->ifsrc->ifname, ntv->ifsrc->ring, + ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p)); - SCSpinUnlock(&txring->tx_lock); - + ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0); return TM_ECODE_OK; } @@ -717,8 +542,6 @@ static void NetmapReleasePacket(Packet *p) { NetmapThreadVars *ntv = (NetmapThreadVars *)p->netmap_v.ntv; - /* Need to be in copy mode and need to detect early release - where Ethernet header could not be set (and pseudo packet) */ if ((ntv->copy_mode != NETMAP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) { NetmapWritePacket(ntv, p); } @@ -726,101 +549,52 @@ static void NetmapReleasePacket(Packet *p) PacketFreeOrRelease(p); } -/** - * \brief Read packets from ring and pass them further. - * \param ntv Thread local variables. - * \param ring_id Ring id to read. - */ -static int NetmapRingRead(NetmapThreadVars *ntv, int ring_id) +static void NetmapCallback(u_char *user, const struct nm_pkthdr *ph, const u_char *d) { - SCEnter(); + NetmapThreadVars *ntv = (NetmapThreadVars *)user; - NetmapRing *ring = &ntv->ifsrc->rings[ring_id]; - struct netmap_ring *rx = ring->rx; - uint32_t avail = nm_ring_space(rx); - uint32_t cur = rx->cur; - - if (!(ntv->flags & NETMAP_FLAG_ZERO_COPY)) { - PacketPoolWaitForN(avail); - } - - while (likely(avail-- > 0)) { - struct netmap_slot *slot = &rx->slot[cur]; - unsigned char *slot_data = (unsigned char *)NETMAP_BUF(rx, slot->buf_idx); - - if (ntv->bpf_prog.bf_len) { - struct pcap_pkthdr pkthdr = { {0, 0}, slot->len, slot->len }; - if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, slot_data) == 0) { - /* rejected by bpf */ - cur = nm_ring_next(rx, cur); - continue; - } + if (ntv->bpf_prog.bf_len) { + struct pcap_pkthdr pkthdr = { {0, 0}, ph->len, ph->len }; + if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, d) == 0) { + return; } + } - Packet *p = PacketPoolGetPacket(); - if (unlikely(p == NULL)) { - SCReturnInt(NETMAP_FAILURE); - } + Packet *p = PacketPoolGetPacket(); + if (unlikely(p == NULL)) { + return; + } - PKT_SET_SRC(p, PKT_SRC_WIRE); - p->livedev = ntv->livedev; - p->datalink = LINKTYPE_ETHERNET; - p->ts = rx->ts; - ntv->pkts++; - ntv->bytes += slot->len; - - /* checksum validation */ - if (ntv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) { - p->flags |= PKT_IGNORE_CHECKSUM; - } else if (ntv->checksum_mode == CHECKSUM_VALIDATION_AUTO) { - if (ntv->livedev->ignore_checksum) { - p->flags |= PKT_IGNORE_CHECKSUM; - } else if (ChecksumAutoModeCheck(ntv->pkts, - SC_ATOMIC_GET(ntv->livedev->pkts), - SC_ATOMIC_GET(ntv->livedev->invalid_checksums))) { - ntv->livedev->ignore_checksum = 1; - p->flags |= PKT_IGNORE_CHECKSUM; - } - } + PKT_SET_SRC(p, PKT_SRC_WIRE); + p->livedev = ntv->livedev; + p->datalink = LINKTYPE_ETHERNET; + p->ts = ph->ts; + ntv->pkts++; + ntv->bytes += ph->len; - if (ntv->flags & NETMAP_FLAG_ZERO_COPY) { - if (PacketSetData(p, slot_data, slot->len) == -1) { - TmqhOutputPacketpool(ntv->tv, p); - SCReturnInt(NETMAP_FAILURE); - } - } else { - if (PacketCopyData(p, slot_data, slot->len) == -1) { - TmqhOutputPacketpool(ntv->tv, p); - SCReturnInt(NETMAP_FAILURE); - } + if (ntv->flags & NETMAP_FLAG_ZERO_COPY) { + if (PacketSetData(p, (uint8_t *)d, ph->len) == -1) { + TmqhOutputPacketpool(ntv->tv, p); + return; } - - p->ReleasePacket = NetmapReleasePacket; - p->netmap_v.ring_id = ring_id; - p->netmap_v.slot_id = cur; - p->netmap_v.dst_ring_id = ring->dst_next_ring; - p->netmap_v.ntv = ntv; - - if (ring->dst_ring_from != ring->dst_ring_to) { - ring->dst_next_ring++; - if (ring->dst_next_ring == ring->dst_ring_to) { - ring->dst_next_ring = ring->dst_ring_from; - } + } else { + if (PacketCopyData(p, (uint8_t *)d, ph->len) == -1) { + TmqhOutputPacketpool(ntv->tv, p); + return; } + } - SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", - GET_PKT_LEN(p), p, GET_PKT_DATA(p)); + p->ReleasePacket = NetmapReleasePacket; + p->netmap_v.ntv = ntv; - if (TmThreadsSlotProcessPkt(ntv->tv, ntv->slot, p) != TM_ECODE_OK) { - TmqhOutputPacketpool(ntv->tv, p); - SCReturnInt(NETMAP_FAILURE); - } + SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", + GET_PKT_LEN(p), p, GET_PKT_DATA(p)); - cur = nm_ring_next(rx, cur); + if (TmThreadsSlotProcessPkt(ntv->tv, ntv->slot, p) != TM_ECODE_OK) { + TmqhOutputPacketpool(ntv->tv, p); + return; } - rx->head = rx->cur = cur; - - SCReturnInt(NETMAP_OK); + return; } /** @@ -832,24 +606,14 @@ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) TmSlot *s = (TmSlot *)slot; NetmapThreadVars *ntv = (NetmapThreadVars *)data; - struct pollfd *fds; - int rings_count = ntv->src_ring_to - ntv->src_ring_from + 1; + struct pollfd fds; ntv->slot = s->slot_next; - - fds = SCMalloc(sizeof(*fds) * rings_count); - if (unlikely(fds == NULL)) { - SCLogError(SC_ERR_MEM_ALLOC, "Memory allocation failed"); - SCReturnInt(TM_ECODE_FAILED); - } - - for (int i = 0; i < rings_count; i++) { - fds[i].fd = ntv->ifsrc->rings[ntv->src_ring_from + i].fd; - fds[i].events = POLLIN; - } + fds.fd = ntv->ifsrc->nmd->fd; + fds.events = POLLIN; for(;;) { - if (suricata_ctl_flags != 0) { + if (unlikely(suricata_ctl_flags != 0)) { break; } @@ -857,65 +621,50 @@ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) * to prevent us from alloc'ing packets at line rate */ PacketPoolWait(); - int r = poll(fds, rings_count, POLL_TIMEOUT); - + int r = poll(&fds, 1, POLL_TIMEOUT); if (r < 0) { /* error */ - if(errno != EINTR) + if (errno != EINTR) SCLogError(SC_ERR_NETMAP_READ, "Error polling netmap from iface '%s': (%d" PRIu32 ") %s", ntv->ifsrc->ifname, errno, strerror(errno)); continue; + } else if (r == 0) { /* no events, timeout */ - SCLogDebug("(%s:%d-%d) Poll timeout", ntv->ifsrc->ifname, - ntv->src_ring_from, ntv->src_ring_to); + //SCLogDebug("(%s:%d-%d) Poll timeout", ntv->ifsrc->ifname, + // ntv->src_ring_from, ntv->src_ring_to); + + /* sync counters */ + NetmapDumpCounters(ntv); + StatsSyncCountersIfSignalled(tv); /* poll timed out, lets see if we need to inject a fake packet */ TmThreadsCaptureInjectPacket(tv, ntv->slot, NULL); continue; } - for (int i = 0; i < rings_count; i++) { - if (fds[i].revents & POLL_EVENTS) { - if (fds[i].revents & POLLERR) { - SCLogError(SC_ERR_NETMAP_READ, - "Error reading data from iface '%s': (%d" PRIu32 ") %s", - ntv->ifsrc->ifname, errno, strerror(errno)); - } else if (fds[i].revents & POLLNVAL) { - SCLogError(SC_ERR_NETMAP_READ, - "Invalid polling request"); - } - continue; + if (unlikely(fds.revents & POLL_EVENTS)) { + if (fds.revents & POLLERR) { + //SCLogError(SC_ERR_NETMAP_READ, + // "Error reading data from iface '%s': (%d" PRIu32 ") %s", + // ntv->ifsrc->ifname, errno, strerror(errno)); + } else if (fds.revents & POLLNVAL) { + SCLogError(SC_ERR_NETMAP_READ, + "Invalid polling request"); } + continue; + } - if (likely(fds[i].revents & POLLIN)) { - int src_ring_id = ntv->src_ring_from + i; - NetmapRingRead(ntv, src_ring_id); - - if ((ntv->copy_mode != NETMAP_COPY_MODE_NONE) && - (ntv->flags & NETMAP_FLAG_ZERO_COPY)) { - - NetmapRing *src_ring = &ntv->ifsrc->rings[src_ring_id]; - - /* sync dst tx rings */ - for (int j = src_ring->dst_ring_from; j <= src_ring->dst_ring_to; j++) { - NetmapRing *dst_ring = &ntv->ifdst->rings[j]; - /* if locked, another loop already do sync */ - if (SCSpinTrylock(&dst_ring->tx_lock) == 0) { - ioctl(dst_ring->fd, NIOCTXSYNC, 0); - SCSpinUnlock(&dst_ring->tx_lock); - } - } - } - } + if (likely(fds.revents & POLLIN)) { + nm_dispatch(ntv->ifsrc->nmd, -1, NetmapCallback, (void *)ntv); } NetmapDumpCounters(ntv); StatsSyncCountersIfSignalled(tv); } - SCFree(fds); + NetmapDumpCounters(ntv); StatsSyncCountersIfSignalled(tv); SCReturnInt(TM_ECODE_OK); } @@ -961,6 +710,8 @@ static TmEcode ReceiveNetmapThreadDeinit(ThreadVars *tv, void *data) SCBPFFree(&ntv->bpf_prog); } + SCFree(ntv); + SCReturnInt(TM_ECODE_OK); } @@ -973,10 +724,8 @@ static TmEcode ReceiveNetmapThreadDeinit(ThreadVars *tv, void *data) static TmEcode DecodeNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data) { SCEnter(); - DecodeThreadVars *dtv = NULL; - - dtv = DecodeThreadVarsAlloc(tv); + DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv); if (dtv == NULL) SCReturnInt(TM_ECODE_FAILED); @@ -1036,18 +785,15 @@ static TmEcode DecodeNetmapThreadDeinit(ThreadVars *tv, void *data) } /** - * \brief Registration Function for RecieveNetmap. + * \brief Registration Function for ReceiveNetmap. */ void TmModuleReceiveNetmapRegister(void) { tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap"; tmm_modules[TMM_RECEIVENETMAP].ThreadInit = ReceiveNetmapThreadInit; - tmm_modules[TMM_RECEIVENETMAP].Func = NULL; tmm_modules[TMM_RECEIVENETMAP].PktAcqLoop = ReceiveNetmapLoop; - tmm_modules[TMM_RECEIVENETMAP].PktAcqBreakLoop = NULL; tmm_modules[TMM_RECEIVENETMAP].ThreadExitPrintStats = ReceiveNetmapThreadExitStats; tmm_modules[TMM_RECEIVENETMAP].ThreadDeinit = ReceiveNetmapThreadDeinit; - tmm_modules[TMM_RECEIVENETMAP].RegisterTests = NULL; tmm_modules[TMM_RECEIVENETMAP].cap_flags = SC_CAP_NET_RAW; tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM; } @@ -1060,15 +806,13 @@ void TmModuleDecodeNetmapRegister(void) tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap"; tmm_modules[TMM_DECODENETMAP].ThreadInit = DecodeNetmapThreadInit; tmm_modules[TMM_DECODENETMAP].Func = DecodeNetmap; - tmm_modules[TMM_DECODENETMAP].ThreadExitPrintStats = NULL; tmm_modules[TMM_DECODENETMAP].ThreadDeinit = DecodeNetmapThreadDeinit; - tmm_modules[TMM_DECODENETMAP].RegisterTests = NULL; tmm_modules[TMM_DECODENETMAP].cap_flags = 0; tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM; } #endif /* HAVE_NETMAP */ -/* eof */ + /** * @} */ diff --git a/src/source-netmap.h b/src/source-netmap.h index caa59eefd043..3755cd3cf8e3 100644 --- a/src/source-netmap.h +++ b/src/source-netmap.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2014 Open Information Security Foundation +/* Copyright (C) 2014-2018 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -19,6 +19,7 @@ * \file * * \author Aleksey Katargin +* \author Victor Julien */ #ifndef __SOURCE_NETMAP_H__ @@ -40,10 +41,14 @@ typedef struct NetmapIfaceSettings_ /* real inner interface name */ char iface[NETMAP_IFACE_NAME_LENGTH]; - int threads; /* sw ring flag for out_iface */ - int sw_ring; - int promisc; + bool sw_ring; + bool promisc; + bool real; /**< real iface or not. Not in case of vale, pipe */ + bool ips; /**< set to true if checksum_mode != NETMAP_COPY_MODE_NONE */ + bool threads_auto; + + int threads; int copy_mode; ChecksumValidationMode checksum_mode; const char *bpf_filter; @@ -57,9 +62,6 @@ typedef struct NetmapIfaceConfig_ /* settings for out capture device*/ NetmapIfaceSettings in; - /* semantic interface name */ - char *out_iface_name; - /* settings for outgoing iface for IPS/TAP */ NetmapIfaceSettings out; @@ -69,9 +71,6 @@ typedef struct NetmapIfaceConfig_ typedef struct NetmapPacketVars_ { - int ring_id; - int slot_id; - int dst_ring_id; /* NetmapThreadVars */ void *ntv; } NetmapPacketVars; diff --git a/src/suricata.c b/src/suricata.c index 7935c45ff5db..b0d98b152831 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -2504,7 +2504,11 @@ static int ConfigGetCaptureValue(SCInstance *suri) if (strip_trailing_plus) { size_t len = strlen(dev); - if (len && dev[len-1] == '+') { + if (len && + (dev[len-1] == '+' || + dev[len-1] == '^' || + dev[len-1] == '*')) + { dev[len-1] = '\0'; } } diff --git a/suricata.yaml.in b/suricata.yaml.in index 483050b420ec..957e193a864e 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -1680,7 +1680,7 @@ capture: # Netmap support # -# Netmap operates with NIC directly in driver, so you need FreeBSD which have +# Netmap operates with NIC directly in driver, so you need FreeBSD 11+ which have # built-in netmap support or compile and install netmap module and appropriate # NIC driver on your Linux system. # To reach maximum throughput disable all receive-, segmentation-, @@ -1692,7 +1692,9 @@ capture: netmap: # To specify OS endpoint add plus sign at the end (e.g. "eth0+") - interface: eth2 - # Number of receive threads. "auto" uses number of RSS queues on interface. + # Number of capture threads. "auto" uses number of RSS queues on interface. + # Warning: unless the RSS hashing is symmetrical, this will lead to + # accuracy issues. #threads: auto # You can use the following variables to activate netmap tap or IPS mode. # If copy-mode is set to ips or tap, the traffic coming to the current