rss: add sysctl enable toggle

This commit also includes the original refactoring changes

This change allows the kernel to operate with the default netisr cpu-affinity settings while having RSS compiled in. Normally, RSS changes quite a bit of the behaviour of the kernel dispatch service - this change allows for reducing impact on incompatible hardware while preserving the option to boost throughput speeds based on packet flow CPU affinity.

Make sure to compile the following options in the kernel:

    options  RSS

As well as setting the following sysctls:

    net.inet.rss.enabled: 1
    net.isr.bindthreads: 1
    net.isr.maxthreads: -1 (automatically sets it to the number of CPUs)

And optionally (to force a 1:1 mapping between CPUs and buckets):

    net.inet.rss.bits: 3 (for 8 CPUs)
    net.inet.rss.bits: 2 (for 4 CPUs)

etc.

Set pin_default_swi to 0 by default in the RSS case.
This commit is contained in:
Stephan de Wit 2021-10-29 09:14:23 +02:00 committed by Franco Fichtner
parent d7d6d360f4
commit 5e72057985
17 changed files with 131 additions and 10 deletions

View file

@ -113,7 +113,7 @@ SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout
"Number of entries in callwheel and size of timeout() preallocation");
#ifdef RSS
static int pin_default_swi = 1;
static int pin_default_swi = 0;
static int pin_pcpu_swi = 1;
#else
static int pin_default_swi = 0;

View file

@ -766,6 +766,12 @@ static void
ether_init(__unused void *arg)
{
#ifdef RSS
if (!rss_get_enabled()) {
ether_nh.nh_m2cpuid = NULL;
ether_nh.nh_policy = NETISR_POLICY_SOURCE;
}
#endif
netisr_register(&ether_nh);
}
SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);

View file

@ -73,6 +73,7 @@
#include <netinet/ip_var.h>
#ifdef RSS
#include <netinet/in_rss.h>
#include <net/rss_config.h>
#endif
#endif
@ -651,9 +652,11 @@ gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
#ifdef INET
case AF_INET:
#ifdef RSS
flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
mtod(m, struct ip *)->ip_dst);
break;
if (rss_get_enabled()) {
flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
mtod(m, struct ip *)->ip_dst);
break;
}
#endif
flowid = mtod(m, struct ip *)->ip_src.s_addr ^
mtod(m, struct ip *)->ip_dst.s_addr;
@ -662,10 +665,12 @@ gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
#ifdef INET6
case AF_INET6:
#ifdef RSS
flowid = rss_hash_ip6_2tuple(
&mtod(m, struct ip6_hdr *)->ip6_src,
&mtod(m, struct ip6_hdr *)->ip6_dst);
break;
if (rss_get_enabled()) {
flowid = rss_hash_ip6_2tuple(
&mtod(m, struct ip6_hdr *)->ip6_src,
&mtod(m, struct ip6_hdr *)->ip6_dst);
break;
}
#endif
flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];

View file

@ -6645,7 +6645,7 @@ iflib_msix_init(if_ctx_t ctx)
queuemsgs = msgs - admincnt;
#endif
#ifdef RSS
queues = imin(queuemsgs, rss_getnumbuckets());
queues = imin(queuemsgs, rss_get_enabled() ? rss_getnumbuckets() : queuemsgs);
#else
queues = queuemsgs;
#endif

View file

@ -141,6 +141,15 @@ int rss_debug = 0;
SYSCTL_INT(_net_inet_rss, OID_AUTO, debug, CTLFLAG_RWTUN, &rss_debug, 0,
"RSS debug level");
/*
* RSS enable toggle
* 0 - disable
* non-zero - enabled
*/
static u_int rss_enabled = 0;
SYSCTL_INT(_net_inet_rss, OID_AUTO, enabled, CTLFLAG_RDTUN, &rss_enabled, 0,
"RSS enabled");
/*
* RSS secret key, intended to prevent attacks on load-balancing. Its
* effectiveness may be limited by algorithm choice and available entropy
@ -210,8 +219,20 @@ rss_init(__unused void *arg)
* much point in having buckets to rearrange for load-balancing!
*/
if (rss_ncpus > 1) {
if (rss_bits == 0)
if (rss_bits == 0) {
rss_bits = fls(rss_ncpus - 1) + 1;
if (!rss_enabled) {
/*
* In order to prevent every driver from
* having to check if RSS is enabled in the kernel,
* the default round-robin (1:1 mapping between
* buckets -> cpus) is set here, allowing
* drivers to keep distributing packets over
* multiple CPUs while RSS is disabled in the kernel.
*/
rss_bits = rss_bits - 1;
}
}
/*
* Microsoft limits RSS table entries to 128, so apply that
@ -258,6 +279,12 @@ rss_init(__unused void *arg)
}
SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL);
u_int
rss_get_enabled(void)
{
return (rss_enabled);
}
static uint32_t
rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen,
const uint8_t *data)
@ -426,6 +453,11 @@ void
rss_getkey(uint8_t *key)
{
if (!rss_enabled) {
arc4rand(key, sizeof(rss_key), 0);
return;
}
bcopy(rss_key, key, sizeof(rss_key));
}
@ -472,6 +504,10 @@ rss_gethashconfig(void)
* as 2-tuple.
* So for now disable UDP 4-tuple hashing until all of the other
* pieces are in place.
*
* XXX: The configuration is shared here regardless of RSS being
* enabled via sysctl, since drivers may still want to enable
* RSS in the hardware even if there is no support for it in the kernel.
*/
return (
RSS_HASHTYPE_RSS_IPV4

View file

@ -108,6 +108,7 @@ extern int rss_debug;
* Device driver interfaces to query RSS properties that must be programmed
* into hardware.
*/
u_int rss_get_enabled(void);
u_int rss_getbits(void);
u_int rss_getbucket(u_int hash);
u_int rss_get_indirection_to_bucket(u_int index);

View file

@ -342,9 +342,18 @@ ip_vnet_init(void *arg __unused)
printf("%s: WARNING: unable to register output helper hook\n",
__func__);
#ifdef RSS
if (!rss_get_enabled()) {
ip_nh.nh_m2cpuid = NULL;
ip_nh.nh_policy = NETISR_POLICY_FLOW;
ip_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT;
}
#endif
#ifdef VIMAGE
netisr_register_vnet(&ip_nh);
#ifdef RSS
if (rss_get_enabled())
netisr_register_vnet(&ip_direct_nh);
#endif
#endif
@ -375,8 +384,17 @@ ip_init(const void *unused __unused)
IPPROTO_REGISTER(IPPROTO_SCTP, sctp_input, sctp_ctlinput);
#endif
#ifdef RSS
if (!rss_get_enabled()) {
ip_nh.nh_m2cpuid = NULL;
ip_nh.nh_policy = NETISR_POLICY_FLOW;
ip_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT;
}
#endif
netisr_register(&ip_nh);
#ifdef RSS
if (rss_get_enabled())
netisr_register(&ip_direct_nh);
#endif
}
@ -389,6 +407,7 @@ ip_destroy(void *unused __unused)
int error;
#ifdef RSS
if (rss_get_enabled())
netisr_unregister_vnet(&ip_direct_nh);
#endif
netisr_unregister_vnet(&ip_nh);

View file

@ -1241,6 +1241,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
break;
#ifdef RSS
case IP_RECVRSSBUCKETID:
if (!rss_get_enabled())
break;
OPTSET2(INP_RECVRSSBUCKETID, optval);
break;
#endif
@ -1458,6 +1460,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
break;
#ifdef RSS
case IP_RSSBUCKETID:
if (!rss_get_enabled())
break;
retval = rss_hash2bucket(inp->inp_flowid,
inp->inp_flowtype,
&rss_bucket);

View file

@ -536,6 +536,7 @@ ip_reass(struct mbuf *m)
IPQ_UNLOCK(hash);
#ifdef RSS
if (rss_get_enabled()) {
/*
* Query the RSS layer for the flowid / flowtype for the
* mbuf payload.
@ -564,6 +565,7 @@ ip_reass(struct mbuf *m)
*/
netisr_dispatch(NETISR_IP_DIRECT, m);
return (NULL);
}
#endif
/* Handle in-line */

View file

@ -1014,11 +1014,13 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
}
/* If one is set the other must be the same */
#ifdef RSS
if (rss_get_enabled()) {
cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
if (cpuid == NETISR_CPUID_NONE)
return (hpts_random_cpu());
else
return (cpuid);
}
#endif
/*
* We don't have a flowid -> cpuid mapping, so cheat and just map

View file

@ -234,11 +234,13 @@ inp_to_cpuid(struct inpcb *inp)
if (per_cpu_timers) {
#ifdef RSS
if (rss_get_enabled()) {
cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
if (cpuid == NETISR_CPUID_NONE)
return (curcpu); /* XXX */
else
return (cpuid);
}
#endif
/*
* We don't have a flowid -> cpuid mapping, so cheat and

View file

@ -1424,7 +1424,11 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
M_HASHTYPE_SET(m, flowtype);
}
#if defined(ROUTE_MPATH) || defined(RSS)
#ifdef RSS
else if (rss_get_enabled() || CALC_FLOWID_OUTBOUND_SENDTO) {
#else
else if (CALC_FLOWID_OUTBOUND_SENDTO) {
#endif
uint32_t hash_val, hash_type;
hash_val = fib4_calc_packet_hash(laddr, faddr,

View file

@ -55,6 +55,9 @@
#include <net/netisr.h>
#include <net/route.h>
#include <net/vnet.h>
#ifdef RSS
#include <net/rss_config.h>
#endif
#include <netinet/in.h>
#include <netinet/in_var.h>
@ -885,6 +888,7 @@ postinsert:
}
#ifdef RSS
if (rss_get_enabled()) {
mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc),
M_NOWAIT);
if (mtag == NULL)
@ -895,6 +899,7 @@ postinsert:
ip6dc->ip6dc_off = offset;
m_tag_prepend(m, mtag);
}
#endif
IP6QB_UNLOCK(bucket);
@ -903,9 +908,11 @@ postinsert:
#ifdef RSS
/* Queue/dispatch for reprocessing. */
if (rss_get_enabled()) {
netisr_dispatch(NETISR_IPV6_DIRECT, m);
*mp = NULL;
return (IPPROTO_DONE);
}
#endif
/* Tell launch routine the next header. */

View file

@ -100,6 +100,9 @@
#include <net/if_types.h>
#include <net/route.h>
#include <net/route/nhop.h>
#ifdef RSS
#include <net/rss_config.h>
#endif
#include <net/vnet.h>
#include <netinet/in.h>

View file

@ -266,10 +266,19 @@ ip6_vnet_init(void *arg __unused)
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
#ifdef RSS
if (!rss_get_enabled()) {
ip6_nh.nh_m2cpuid = NULL;
ip6_nh.nh_policy = NETISR_POLICY_FLOW;
ip6_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT;
}
#endif
/* Skip global initialization stuff for non-default instances. */
#ifdef VIMAGE
netisr_register_vnet(&ip6_nh);
#ifdef RSS
if (rss_get_enabled())
netisr_register_vnet(&ip6_direct_nh);
#endif
#endif
@ -302,8 +311,17 @@ ip6_init(void *arg __unused)
EVENTHANDLER_REGISTER(mbuf_lowmem, frag6_drain, NULL,
LOWMEM_PRI_DEFAULT);
#ifdef RSS
if (!rss_get_enabled()) {
ip6_nh.nh_m2cpuid = NULL;
ip6_nh.nh_policy = NETISR_POLICY_FLOW;
ip6_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT;
}
#endif
netisr_register(&ip6_nh);
#ifdef RSS
if (rss_get_enabled())
netisr_register(&ip6_direct_nh);
#endif
}
@ -347,6 +365,7 @@ ip6_destroy(void *unused __unused)
int error;
#ifdef RSS
if (rss_get_enabled())
netisr_unregister_vnet(&ip6_direct_nh);
#endif
netisr_unregister_vnet(&ip6_nh);
@ -1534,6 +1553,7 @@ ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp)
}
#ifdef RSS
if (rss_get_enabled())
if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
uint32_t flowid, flow_type;
uint32_t rss_bucketid;

View file

@ -1868,6 +1868,8 @@ do { \
#ifdef RSS
case IPV6_RECVRSSBUCKETID:
if (!rss_get_enabled())
break;
OPTSET2(INP_RECVRSSBUCKETID, optval);
break;
#endif
@ -2230,6 +2232,8 @@ do { \
break;
#ifdef RSS
case IPV6_RSSBUCKETID:
if (!rss_get_enabled())
break;
retval =
rss_hash2bucket(inp->inp_flowid,
inp->inp_flowtype,
@ -2241,6 +2245,8 @@ do { \
break;
case IPV6_RECVRSSBUCKETID:
if (!rss_get_enabled())
break;
optval = OPTBIT2(INP_RECVRSSBUCKETID);
break;
#endif

View file

@ -908,7 +908,11 @@ udp6_send(struct socket *so, int flags_arg, struct mbuf *m,
flags = 0;
#if defined(ROUTE_MPATH) || defined(RSS)
#ifdef RSS
if (rss_get_enabled() || CALC_FLOWID_OUTBOUND_SENDTO) {
#else
if (CALC_FLOWID_OUTBOUND_SENDTO) {
#endif
uint32_t hash_type, hash_val;
uint8_t pr;