From 5e72057985b64ffeeb61b3b543fd237df8a2cb6b Mon Sep 17 00:00:00 2001 From: Stephan de Wit <33954429+swhite2@users.noreply.github.com> Date: Fri, 29 Oct 2021 09:14:23 +0200 Subject: [PATCH] rss: add sysctl enable toggle This commit also includes the original refactoring changes This change allows the kernel to operate with the default netisr cpu-affinity settings while having RSS compiled in. Normally, RSS changes quite a bit of the behaviour of the kernel dispatch service - this change allows for reducing impact on incompatible hardware while preserving the option to boost throughput speeds based on packet flow CPU affinity. Make sure to compile the following options in the kernel: options RSS As well as setting the following sysctls: net.inet.rss.enabled: 1 net.isr.bindthreads: 1 net.isr.maxthreads: -1 (automatically sets it to the number of CPUs) And optionally (to force a 1:1 mapping between CPUs and buckets): net.inet.rss.bits: 3 (for 8 CPUs) net.inet.rss.bits: 2 (for 4 CPUs) etc. Set pin_default_swi to 0 by default in the RSS case. --- sys/kern/kern_timeout.c | 2 +- sys/net/if_ethersubr.c | 6 ++++++ sys/net/if_gre.c | 19 ++++++++++++------- sys/net/iflib.c | 2 +- sys/net/rss_config.c | 38 +++++++++++++++++++++++++++++++++++++- sys/net/rss_config.h | 1 + sys/netinet/ip_input.c | 19 +++++++++++++++++++ sys/netinet/ip_output.c | 4 ++++ sys/netinet/ip_reass.c | 2 ++ sys/netinet/tcp_hpts.c | 2 ++ sys/netinet/tcp_timer.c | 2 ++ sys/netinet/udp_usrreq.c | 4 ++++ sys/netinet6/frag6.c | 7 +++++++ sys/netinet6/in6_pcb.c | 3 +++ sys/netinet6/ip6_input.c | 20 ++++++++++++++++++++ sys/netinet6/ip6_output.c | 6 ++++++ sys/netinet6/udp6_usrreq.c | 4 ++++ 17 files changed, 131 insertions(+), 10 deletions(-) diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index c9a0fa3e5ba..36795c61a42 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -113,7 +113,7 @@ SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout "Number of entries in callwheel and size of timeout() preallocation"); #ifdef RSS -static int pin_default_swi = 1; +static int pin_default_swi = 0; static int pin_pcpu_swi = 1; #else static int pin_default_swi = 0; diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 4d82bb0cfdc..43b42408b3f 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -766,6 +766,12 @@ static void ether_init(__unused void *arg) { +#ifdef RSS + if (!rss_get_enabled()) { + ether_nh.nh_m2cpuid = NULL; + ether_nh.nh_policy = NETISR_POLICY_SOURCE; + } +#endif netisr_register(ðer_nh); } SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index ca9c4835daf..39e4ed1143a 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -73,6 +73,7 @@ #include #ifdef RSS #include +#include #endif #endif @@ -651,9 +652,11 @@ gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af) #ifdef INET case AF_INET: #ifdef RSS - flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src, - mtod(m, struct ip *)->ip_dst); - break; + if (rss_get_enabled()) { + flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src, + mtod(m, struct ip *)->ip_dst); + break; + } #endif flowid = mtod(m, struct ip *)->ip_src.s_addr ^ mtod(m, struct ip *)->ip_dst.s_addr; @@ -662,10 +665,12 @@ gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af) #ifdef INET6 case AF_INET6: #ifdef RSS - flowid = rss_hash_ip6_2tuple( - &mtod(m, struct ip6_hdr *)->ip6_src, - &mtod(m, struct ip6_hdr *)->ip6_dst); - break; + if (rss_get_enabled()) { + flowid = rss_hash_ip6_2tuple( + &mtod(m, struct ip6_hdr *)->ip6_src, + &mtod(m, struct ip6_hdr *)->ip6_dst); + break; + } #endif flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^ mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3]; diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 997da7ca5b0..456c7dafd84 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -6645,7 +6645,7 @@ iflib_msix_init(if_ctx_t ctx) queuemsgs = msgs - admincnt; #endif #ifdef RSS - queues = imin(queuemsgs, rss_getnumbuckets()); + queues = imin(queuemsgs, rss_get_enabled() ? rss_getnumbuckets() : queuemsgs); #else queues = queuemsgs; #endif diff --git a/sys/net/rss_config.c b/sys/net/rss_config.c index 266ea57b2dc..7a0fdc2a052 100644 --- a/sys/net/rss_config.c +++ b/sys/net/rss_config.c @@ -141,6 +141,15 @@ int rss_debug = 0; SYSCTL_INT(_net_inet_rss, OID_AUTO, debug, CTLFLAG_RWTUN, &rss_debug, 0, "RSS debug level"); +/* + * RSS enable toggle + * 0 - disable + * non-zero - enabled + */ +static u_int rss_enabled = 0; +SYSCTL_INT(_net_inet_rss, OID_AUTO, enabled, CTLFLAG_RDTUN, &rss_enabled, 0, + "RSS enabled"); + /* * RSS secret key, intended to prevent attacks on load-balancing. Its * effectiveness may be limited by algorithm choice and available entropy @@ -210,8 +219,20 @@ rss_init(__unused void *arg) * much point in having buckets to rearrange for load-balancing! */ if (rss_ncpus > 1) { - if (rss_bits == 0) + if (rss_bits == 0) { rss_bits = fls(rss_ncpus - 1) + 1; + if (!rss_enabled) { + /* + * In order to prevent every driver from + * having to check if RSS is enabled in the kernel, + * the default round-robin (1:1 mapping between + * buckets -> cpus) is set here, allowing + * drivers to keep distributing packets over + * multiple CPUs while RSS is disabled in the kernel. + */ + rss_bits = rss_bits - 1; + } + } /* * Microsoft limits RSS table entries to 128, so apply that @@ -258,6 +279,12 @@ rss_init(__unused void *arg) } SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL); +u_int +rss_get_enabled(void) +{ + return (rss_enabled); +} + static uint32_t rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen, const uint8_t *data) @@ -426,6 +453,11 @@ void rss_getkey(uint8_t *key) { + if (!rss_enabled) { + arc4rand(key, sizeof(rss_key), 0); + return; + } + bcopy(rss_key, key, sizeof(rss_key)); } @@ -472,6 +504,10 @@ rss_gethashconfig(void) * as 2-tuple. * So for now disable UDP 4-tuple hashing until all of the other * pieces are in place. + * + * XXX: The configuration is shared here regardless of RSS being + * enabled via sysctl, since drivers may still want to enable + * RSS in the hardware even if there is no support for it in the kernel. */ return ( RSS_HASHTYPE_RSS_IPV4 diff --git a/sys/net/rss_config.h b/sys/net/rss_config.h index 07c2d09b44c..68c13a96198 100644 --- a/sys/net/rss_config.h +++ b/sys/net/rss_config.h @@ -108,6 +108,7 @@ extern int rss_debug; * Device driver interfaces to query RSS properties that must be programmed * into hardware. */ +u_int rss_get_enabled(void); u_int rss_getbits(void); u_int rss_getbucket(u_int hash); u_int rss_get_indirection_to_bucket(u_int index); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 5524f23f780..268c1b55965 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -342,9 +342,18 @@ ip_vnet_init(void *arg __unused) printf("%s: WARNING: unable to register output helper hook\n", __func__); +#ifdef RSS + if (!rss_get_enabled()) { + ip_nh.nh_m2cpuid = NULL; + ip_nh.nh_policy = NETISR_POLICY_FLOW; + ip_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT; + } +#endif + #ifdef VIMAGE netisr_register_vnet(&ip_nh); #ifdef RSS + if (rss_get_enabled()) netisr_register_vnet(&ip_direct_nh); #endif #endif @@ -375,8 +384,17 @@ ip_init(const void *unused __unused) IPPROTO_REGISTER(IPPROTO_SCTP, sctp_input, sctp_ctlinput); #endif +#ifdef RSS + if (!rss_get_enabled()) { + ip_nh.nh_m2cpuid = NULL; + ip_nh.nh_policy = NETISR_POLICY_FLOW; + ip_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT; + } +#endif + netisr_register(&ip_nh); #ifdef RSS + if (rss_get_enabled()) netisr_register(&ip_direct_nh); #endif } @@ -389,6 +407,7 @@ ip_destroy(void *unused __unused) int error; #ifdef RSS + if (rss_get_enabled()) netisr_unregister_vnet(&ip_direct_nh); #endif netisr_unregister_vnet(&ip_nh); diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index bcd4ed4c94c..3a99d40d670 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1241,6 +1241,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) break; #ifdef RSS case IP_RECVRSSBUCKETID: + if (!rss_get_enabled()) + break; OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif @@ -1458,6 +1460,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) break; #ifdef RSS case IP_RSSBUCKETID: + if (!rss_get_enabled()) + break; retval = rss_hash2bucket(inp->inp_flowid, inp->inp_flowtype, &rss_bucket); diff --git a/sys/netinet/ip_reass.c b/sys/netinet/ip_reass.c index 2876e5bfdcd..d4fb66bfa27 100644 --- a/sys/netinet/ip_reass.c +++ b/sys/netinet/ip_reass.c @@ -536,6 +536,7 @@ ip_reass(struct mbuf *m) IPQ_UNLOCK(hash); #ifdef RSS + if (rss_get_enabled()) { /* * Query the RSS layer for the flowid / flowtype for the * mbuf payload. @@ -564,6 +565,7 @@ ip_reass(struct mbuf *m) */ netisr_dispatch(NETISR_IP_DIRECT, m); return (NULL); + } #endif /* Handle in-line */ diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c index 85341cab075..18452795d33 100644 --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -1014,11 +1014,13 @@ hpts_cpuid(struct tcpcb *tp, int *failed) } /* If one is set the other must be the same */ #ifdef RSS + if (rss_get_enabled()) { cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); if (cpuid == NETISR_CPUID_NONE) return (hpts_random_cpu()); else return (cpuid); + } #endif /* * We don't have a flowid -> cpuid mapping, so cheat and just map diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index ad407d5c111..143fa17f702 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -234,11 +234,13 @@ inp_to_cpuid(struct inpcb *inp) if (per_cpu_timers) { #ifdef RSS + if (rss_get_enabled()) { cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); if (cpuid == NETISR_CPUID_NONE) return (curcpu); /* XXX */ else return (cpuid); + } #endif /* * We don't have a flowid -> cpuid mapping, so cheat and diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 7329600ecc7..95a9d77ba37 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1424,7 +1424,11 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, M_HASHTYPE_SET(m, flowtype); } #if defined(ROUTE_MPATH) || defined(RSS) +#ifdef RSS + else if (rss_get_enabled() || CALC_FLOWID_OUTBOUND_SENDTO) { +#else else if (CALC_FLOWID_OUTBOUND_SENDTO) { +#endif uint32_t hash_val, hash_type; hash_val = fib4_calc_packet_hash(laddr, faddr, diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index e976298bf98..be9a427b054 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -55,6 +55,9 @@ #include #include #include +#ifdef RSS +#include +#endif #include #include @@ -885,6 +888,7 @@ postinsert: } #ifdef RSS + if (rss_get_enabled()) { mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), M_NOWAIT); if (mtag == NULL) @@ -895,6 +899,7 @@ postinsert: ip6dc->ip6dc_off = offset; m_tag_prepend(m, mtag); + } #endif IP6QB_UNLOCK(bucket); @@ -903,9 +908,11 @@ postinsert: #ifdef RSS /* Queue/dispatch for reprocessing. */ + if (rss_get_enabled()) { netisr_dispatch(NETISR_IPV6_DIRECT, m); *mp = NULL; return (IPPROTO_DONE); + } #endif /* Tell launch routine the next header. */ diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index f7f2ea0b869..b3a3ce140de 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -100,6 +100,9 @@ #include #include #include +#ifdef RSS +#include +#endif #include #include diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 5e0005bdef3..ca6192c8ed6 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -266,10 +266,19 @@ ip6_vnet_init(void *arg __unused) V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; +#ifdef RSS + if (!rss_get_enabled()) { + ip6_nh.nh_m2cpuid = NULL; + ip6_nh.nh_policy = NETISR_POLICY_FLOW; + ip6_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT; + } +#endif + /* Skip global initialization stuff for non-default instances. */ #ifdef VIMAGE netisr_register_vnet(&ip6_nh); #ifdef RSS + if (rss_get_enabled()) netisr_register_vnet(&ip6_direct_nh); #endif #endif @@ -302,8 +311,17 @@ ip6_init(void *arg __unused) EVENTHANDLER_REGISTER(mbuf_lowmem, frag6_drain, NULL, LOWMEM_PRI_DEFAULT); +#ifdef RSS + if (!rss_get_enabled()) { + ip6_nh.nh_m2cpuid = NULL; + ip6_nh.nh_policy = NETISR_POLICY_FLOW; + ip6_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT; + } +#endif + netisr_register(&ip6_nh); #ifdef RSS + if (rss_get_enabled()) netisr_register(&ip6_direct_nh); #endif } @@ -347,6 +365,7 @@ ip6_destroy(void *unused __unused) int error; #ifdef RSS + if (rss_get_enabled()) netisr_unregister_vnet(&ip6_direct_nh); #endif netisr_unregister_vnet(&ip6_nh); @@ -1534,6 +1553,7 @@ ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp) } #ifdef RSS + if (rss_get_enabled()) if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { uint32_t flowid, flow_type; uint32_t rss_bucketid; diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 3c0e7f37b74..4955b22f3a1 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1868,6 +1868,8 @@ do { \ #ifdef RSS case IPV6_RECVRSSBUCKETID: + if (!rss_get_enabled()) + break; OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif @@ -2230,6 +2232,8 @@ do { \ break; #ifdef RSS case IPV6_RSSBUCKETID: + if (!rss_get_enabled()) + break; retval = rss_hash2bucket(inp->inp_flowid, inp->inp_flowtype, @@ -2241,6 +2245,8 @@ do { \ break; case IPV6_RECVRSSBUCKETID: + if (!rss_get_enabled()) + break; optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index c8b38c24d19..35473d60649 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -908,7 +908,11 @@ udp6_send(struct socket *so, int flags_arg, struct mbuf *m, flags = 0; #if defined(ROUTE_MPATH) || defined(RSS) +#ifdef RSS + if (rss_get_enabled() || CALC_FLOWID_OUTBOUND_SENDTO) { +#else if (CALC_FLOWID_OUTBOUND_SENDTO) { +#endif uint32_t hash_type, hash_val; uint8_t pr;