netmap: Make memory pools NUMA-aware

Each netmap adapter associated with a physical adapter is attached to a
netmap memory pool.  contigmalloc() is used to allocate physically
contiguous memory for the pool, but ideally we would ensure that all
such memory is allocated from the NUMA domain local to the adapter.

Augment netmap's memory pools with a NUMA domain ID, similar to how
IOMMU groups are handled in the Linux port.  That is, when attaching to
a physical adapter, ensure that the associated memory pools are local to
the adapter's associated memory domain, creating new pools as needed.

Some types of ifnets do not have any defined NUMA affinity; in this case
the domain ID in question is the sentinel value -1.

Add a sysctl, dev.netmap.port_numa_affinity, which can be used to enable
the new behaviour.  Keep it disabled by now to avoid surprises in case
netmap applications are relying on zero-copy optimizations to forward
packets between ports belonging to different NUMA domains.

Reviewed by:	vmaffione
MFC after:	2 weeks
Sponsored by:	Klara, Inc.
Differential Revision:	https://reviews.freebsd.org/D46666
This commit is contained in:
Mark Johnston 2024-10-14 13:30:09 +00:00
parent 4e15366c6a
commit 1bae9dc584
5 changed files with 74 additions and 25 deletions

View file

@ -25,7 +25,7 @@
.\" This document is derived in part from the enet man page (enet.4)
.\" distributed with 4.3BSD Unix.
.\"
.Dd March 6, 2022
.Dd October 10, 2024
.Dt NETMAP 4
.Os
.Sh NAME
@ -938,6 +938,16 @@ switches that can be created. This tunable can be specified
at loader time.
.It Va dev.netmap.ptnet_vnet_hdr: 1
Allow ptnet devices to use virtio-net headers
.It Va dev.netmap.port_numa_affinity: 0
On
.Xr numa 4
systems, allocate memory for netmap ports from the local NUMA domain when
possible.
This can improve performance by reducing the number of remote memory accesses.
However, when forwarding packets between ports attached to different NUMA
domains, this will prevent zero-copy forwarding optimizations and thus may hurt
performance.
Note that this setting must be specified as a loader tunable at boot time.
.El
.Sh SYSTEM CALLS
.Nm

View file

@ -4010,8 +4010,8 @@ netmap_attach_common(struct netmap_adapter *na)
na->active_fds = 0;
if (na->nm_mem == NULL) {
/* use iommu or global allocator */
na->nm_mem = netmap_mem_get_iommu(na);
/* select an allocator based on IOMMU and NUMA affinity */
na->nm_mem = netmap_mem_get_allocator(na);
}
if (na->nm_bdg_attach == NULL)
/* no special nm_bdg_attach callback. On VALE

View file

@ -81,6 +81,7 @@
#if defined(__FreeBSD__)
#include <sys/selinfo.h>
#include <vm/vm.h>
#define likely(x) __builtin_expect((long)!!(x), 1L)
#define unlikely(x) __builtin_expect((long)!!(x), 0L)
@ -1727,10 +1728,30 @@ extern int netmap_generic_txqdisc;
#define NM_IS_NATIVE(ifp) (NM_NA_VALID(ifp) && NA(ifp)->nm_dtor == netmap_hw_dtor)
#if defined(__FreeBSD__)
extern int netmap_port_numa_affinity;
/* Assigns the device IOMMU domain to an allocator.
* Returns -ENOMEM in case the domain is different */
#define nm_iommu_group_id(dev) (-1)
static inline int
nm_iommu_group_id(struct netmap_adapter *na)
{
return (-1);
}
static inline int
nm_numa_domain(struct netmap_adapter *na)
{
int domain;
/*
* If the system has only one NUMA domain, don't bother distinguishing
* between IF_NODOM and domain 0.
*/
if (vm_ndomains == 1 || netmap_port_numa_affinity == 0)
return (-1);
domain = if_getnumadomain(na->ifp);
if (domain == IF_NODOM)
domain = -1;
return (domain);
}
/* Callback invoked by the dma machinery after a successful dmamap_load */
static void netmap_dmamap_cb(__unused void *arg,

View file

@ -37,8 +37,8 @@
#endif /* __APPLE__ */
#ifdef __FreeBSD__
#include <sys/cdefs.h> /* prerequisite */
#include <sys/types.h>
#include <sys/domainset.h>
#include <sys/malloc.h>
#include <sys/kernel.h> /* MALLOC_DEFINE */
#include <sys/proc.h>
@ -174,7 +174,8 @@ struct netmap_mem_d {
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
nm_memid_t nm_id; /* allocator identifier */
int nm_grp; /* iommu group id */
int nm_grp; /* iommu group id */
int nm_numa_domain; /* local NUMA domain */
/* list of all existing allocators, sorted by nm_id */
struct netmap_mem_d *prev, *next;
@ -310,7 +311,7 @@ netmap_mem_rings_delete(struct netmap_adapter *na)
static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
static int nm_mem_check_group(struct netmap_mem_d *, bus_dma_tag_t);
static int nm_mem_check_group(struct netmap_mem_d *, void *);
static void nm_mem_release_id(struct netmap_mem_d *);
nm_memid_t
@ -576,6 +577,7 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.nm_id = 1,
.nm_grp = -1,
.nm_numa_domain = -1,
.prev = &nm_mem,
.next = &nm_mem,
@ -615,6 +617,7 @@ static const struct netmap_mem_d nm_blueprint = {
},
.nm_grp = -1,
.nm_numa_domain = -1,
.flags = NETMAP_MEM_PRIVATE,
@ -625,7 +628,6 @@ static const struct netmap_mem_d nm_blueprint = {
#define STRINGIFY(x) #x
#define DECLARE_SYSCTLS(id, name) \
SYSBEGIN(mem2_ ## name); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
@ -649,9 +651,14 @@ DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
int netmap_port_numa_affinity = 0;
SYSCTL_INT(_dev_netmap, OID_AUTO, port_numa_affinity,
CTLFLAG_RDTUN, &netmap_port_numa_affinity, 0,
"Use NUMA-local memory for memory pools when possible");
/* call with nm_mem_list_lock held */
static int
nm_mem_assign_id_locked(struct netmap_mem_d *nmd, int grp_id)
nm_mem_assign_id_locked(struct netmap_mem_d *nmd, int grp_id, int domain)
{
nm_memid_t id;
struct netmap_mem_d *scan = netmap_last_mem_d;
@ -666,6 +673,7 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd, int grp_id)
if (id != scan->nm_id) {
nmd->nm_id = id;
nmd->nm_grp = grp_id;
nmd->nm_numa_domain = domain;
nmd->prev = scan->prev;
nmd->next = scan;
scan->prev->next = nmd;
@ -688,7 +696,7 @@ nm_mem_assign_id(struct netmap_mem_d *nmd, int grp_id)
int ret;
NM_MTX_LOCK(nm_mem_list_lock);
ret = nm_mem_assign_id_locked(nmd, grp_id);
ret = nm_mem_assign_id_locked(nmd, grp_id, -1);
NM_MTX_UNLOCK(nm_mem_list_lock);
return ret;
@ -728,7 +736,7 @@ netmap_mem_find(nm_memid_t id)
}
static int
nm_mem_check_group(struct netmap_mem_d *nmd, bus_dma_tag_t dev)
nm_mem_check_group(struct netmap_mem_d *nmd, void *dev)
{
int err = 0, id;
@ -1399,7 +1407,7 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
/* call with NMA_LOCK held */
static int
netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
netmap_finalize_obj_allocator(struct netmap_mem_d *nmd, struct netmap_obj_pool *p)
{
int i; /* must be signed */
@ -1440,8 +1448,16 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
* can live with standard malloc, because the hardware will not
* access the pages directly.
*/
clust = contigmalloc(p->_clustsize, M_NETMAP, M_NOWAIT | M_ZERO,
(size_t)0, -1UL, PAGE_SIZE, 0);
if (nmd->nm_numa_domain == -1) {
clust = contigmalloc(p->_clustsize, M_NETMAP,
M_NOWAIT | M_ZERO, (size_t)0, -1UL, PAGE_SIZE, 0);
} else {
struct domainset *ds;
ds = DOMAINSET_PREF(nmd->nm_numa_domain);
clust = contigmalloc_domainset(p->_clustsize, M_NETMAP,
ds, M_NOWAIT | M_ZERO, (size_t)0, -1UL, PAGE_SIZE, 0);
}
if (clust == NULL) {
/*
* If we get here, there is a severe memory shortage,
@ -1634,7 +1650,7 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd)
nmd->lasterr = 0;
nmd->nm_totalsize = 0;
for (i = 0; i < NETMAP_POOLS_NR; i++) {
nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]);
nmd->lasterr = netmap_finalize_obj_allocator(nmd, &nmd->pools[i]);
if (nmd->lasterr)
goto error;
nmd->nm_totalsize += nmd->pools[i].memtotal;
@ -1802,24 +1818,26 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
return d;
}
/* Reference iommu allocator - find existing or create new,
* for not hw addapeters fallback to global allocator.
/* Reference IOMMU and NUMA local allocator - find existing or create new,
* for non-hw adapters, fall back to global allocator.
*/
struct netmap_mem_d *
netmap_mem_get_iommu(struct netmap_adapter *na)
netmap_mem_get_allocator(struct netmap_adapter *na)
{
int i, err, grp_id;
int i, domain, err, grp_id;
struct netmap_mem_d *nmd;
if (na == NULL || na->pdev == NULL)
return netmap_mem_get(&nm_mem);
domain = nm_numa_domain(na->pdev);
grp_id = nm_iommu_group_id(na->pdev);
NM_MTX_LOCK(nm_mem_list_lock);
nmd = netmap_last_mem_d;
do {
if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_grp == grp_id) {
if (!(nmd->flags & NETMAP_MEM_HIDDEN) &&
nmd->nm_grp == grp_id && nmd->nm_numa_domain == domain) {
nmd->refcount++;
NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
NM_MTX_UNLOCK(nm_mem_list_lock);
@ -1834,7 +1852,7 @@ netmap_mem_get_iommu(struct netmap_adapter *na)
*nmd = nm_mem_blueprint;
err = nm_mem_assign_id_locked(nmd, grp_id);
err = nm_mem_assign_id_locked(nmd, grp_id, domain);
if (err)
goto error_free;
@ -2878,7 +2896,7 @@ netmap_mem_pt_guest_create(nm_memid_t mem_id)
ptnmd->pt_ifs = NULL;
/* Assign new id in the guest (We have the lock) */
err = nm_mem_assign_id_locked(&ptnmd->up, -1);
err = nm_mem_assign_id_locked(&ptnmd->up, -1, -1);
if (err)
goto error;

View file

@ -146,7 +146,7 @@ struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_
#define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__)
#define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__)
struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int);
struct netmap_mem_d* netmap_mem_get_iommu(struct netmap_adapter *);
struct netmap_mem_d* netmap_mem_get_allocator(struct netmap_adapter *);
void __netmap_mem_put(struct netmap_mem_d *, const char *, int);
struct netmap_mem_d* netmap_mem_find(nm_memid_t);
unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd);