From 657dc81d90d6bb35fc3a06958151182a3501e929 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 19 Sep 2019 22:15:57 +0000 Subject: [PATCH] Improve ioat(4) NUMA-awareness. Allocate ioat->ring memory from the device domain. Schedule ioat->poll_timer to the first CPU of the device domain. According to pcm-numa tool from intel-pcm port, this reduces number of remote DRAM accesses while copying data by 75%. And unless it is a noise, I've noticed some speed improvement when copying data to other domain. MFC after: 1 week Sponsored by: iXsystems, Inc. --- sys/dev/ioat/ioat.c | 17 ++++++++++++----- sys/dev/ioat/ioat_internal.h | 2 ++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c index b13d9da9be4..59840932f7a 100644 --- a/sys/dev/ioat/ioat.c +++ b/sys/dev/ioat/ioat.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -44,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -266,6 +268,11 @@ ioat_attach(device_t device) ioat = DEVICE2SOFTC(device); ioat->device = device; + if (bus_get_domain(device, &ioat->domain) != 0) + ioat->domain = 0; + ioat->cpu = CPU_FFS(&cpuset_domain[ioat->domain]) - 1; + if (ioat->cpu < 0) + ioat->cpu = CPU_FIRST(); error = ioat_map_pci_bar(ioat); if (error != 0) @@ -600,8 +607,8 @@ ioat3_attach(device_t device) __func__, error); return (error); } - ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT, - M_ZERO | M_WAITOK); + ioat->ring = malloc_domainset(num_descriptors * sizeof(*ring), M_IOAT, + DOMAINSET_PREF(ioat->domain), M_ZERO | M_WAITOK); ring = ioat->ring; for (i = 0; i < num_descriptors; i++) { @@ -1107,8 +1114,8 @@ ioat_release(bus_dmaengine_t dmaengine) (uint16_t)ioat->head); if (!callout_pending(&ioat->poll_timer)) { - callout_reset(&ioat->poll_timer, 1, - ioat_poll_timer_callback, ioat); + callout_reset_on(&ioat->poll_timer, 1, + ioat_poll_timer_callback, ioat, ioat->cpu); } } mtx_unlock(&ioat->submit_lock); @@ -1644,7 +1651,7 @@ ioat_free_ring(struct ioat_softc *ioat, uint32_t size, struct ioat_descriptor *ring) { - free(ring, M_IOAT); + free_domain(ring, M_IOAT); } static struct ioat_descriptor * diff --git a/sys/dev/ioat/ioat_internal.h b/sys/dev/ioat/ioat_internal.h index 919e9218316..23730581181 100644 --- a/sys/dev/ioat/ioat_internal.h +++ b/sys/dev/ioat/ioat_internal.h @@ -442,6 +442,8 @@ struct ioat_softc { }) device_t device; + int domain; + int cpu; int version; unsigned chan_idx;