diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index c9a0fa3e5ba..36795c61a42 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -113,7 +113,7 @@ SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout "Number of entries in callwheel and size of timeout() preallocation"); #ifdef RSS -static int pin_default_swi = 1; +static int pin_default_swi = 0; static int pin_pcpu_swi = 1; #else static int pin_default_swi = 0; diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 4d82bb0cfdc..43b42408b3f 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -766,6 +766,12 @@ static void ether_init(__unused void *arg) { +#ifdef RSS + if (!rss_get_enabled()) { + ether_nh.nh_m2cpuid = NULL; + ether_nh.nh_policy = NETISR_POLICY_SOURCE; + } +#endif netisr_register(ðer_nh); } SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index ca9c4835daf..39e4ed1143a 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -73,6 +73,7 @@ #include #ifdef RSS #include +#include #endif #endif @@ -651,9 +652,11 @@ gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af) #ifdef INET case AF_INET: #ifdef RSS - flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src, - mtod(m, struct ip *)->ip_dst); - break; + if (rss_get_enabled()) { + flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src, + mtod(m, struct ip *)->ip_dst); + break; + } #endif flowid = mtod(m, struct ip *)->ip_src.s_addr ^ mtod(m, struct ip *)->ip_dst.s_addr; @@ -662,10 +665,12 @@ gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af) #ifdef INET6 case AF_INET6: #ifdef RSS - flowid = rss_hash_ip6_2tuple( - &mtod(m, struct ip6_hdr *)->ip6_src, - &mtod(m, struct ip6_hdr *)->ip6_dst); - break; + if (rss_get_enabled()) { + flowid = rss_hash_ip6_2tuple( + &mtod(m, struct ip6_hdr *)->ip6_src, + &mtod(m, struct ip6_hdr *)->ip6_dst); + break; + } #endif flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^ mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3]; diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 997da7ca5b0..456c7dafd84 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -6645,7 +6645,7 @@ iflib_msix_init(if_ctx_t ctx) queuemsgs = msgs - admincnt; #endif #ifdef RSS - queues = imin(queuemsgs, rss_getnumbuckets()); + queues = imin(queuemsgs, rss_get_enabled() ? rss_getnumbuckets() : queuemsgs); #else queues = queuemsgs; #endif diff --git a/sys/net/rss_config.c b/sys/net/rss_config.c index 266ea57b2dc..7a0fdc2a052 100644 --- a/sys/net/rss_config.c +++ b/sys/net/rss_config.c @@ -141,6 +141,15 @@ int rss_debug = 0; SYSCTL_INT(_net_inet_rss, OID_AUTO, debug, CTLFLAG_RWTUN, &rss_debug, 0, "RSS debug level"); +/* + * RSS enable toggle + * 0 - disable + * non-zero - enabled + */ +static u_int rss_enabled = 0; +SYSCTL_INT(_net_inet_rss, OID_AUTO, enabled, CTLFLAG_RDTUN, &rss_enabled, 0, + "RSS enabled"); + /* * RSS secret key, intended to prevent attacks on load-balancing. Its * effectiveness may be limited by algorithm choice and available entropy @@ -210,8 +219,20 @@ rss_init(__unused void *arg) * much point in having buckets to rearrange for load-balancing! */ if (rss_ncpus > 1) { - if (rss_bits == 0) + if (rss_bits == 0) { rss_bits = fls(rss_ncpus - 1) + 1; + if (!rss_enabled) { + /* + * In order to prevent every driver from + * having to check if RSS is enabled in the kernel, + * the default round-robin (1:1 mapping between + * buckets -> cpus) is set here, allowing + * drivers to keep distributing packets over + * multiple CPUs while RSS is disabled in the kernel. + */ + rss_bits = rss_bits - 1; + } + } /* * Microsoft limits RSS table entries to 128, so apply that @@ -258,6 +279,12 @@ rss_init(__unused void *arg) } SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL); +u_int +rss_get_enabled(void) +{ + return (rss_enabled); +} + static uint32_t rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen, const uint8_t *data) @@ -426,6 +453,11 @@ void rss_getkey(uint8_t *key) { + if (!rss_enabled) { + arc4rand(key, sizeof(rss_key), 0); + return; + } + bcopy(rss_key, key, sizeof(rss_key)); } @@ -472,6 +504,10 @@ rss_gethashconfig(void) * as 2-tuple. * So for now disable UDP 4-tuple hashing until all of the other * pieces are in place. + * + * XXX: The configuration is shared here regardless of RSS being + * enabled via sysctl, since drivers may still want to enable + * RSS in the hardware even if there is no support for it in the kernel. */ return ( RSS_HASHTYPE_RSS_IPV4 diff --git a/sys/net/rss_config.h b/sys/net/rss_config.h index 07c2d09b44c..68c13a96198 100644 --- a/sys/net/rss_config.h +++ b/sys/net/rss_config.h @@ -108,6 +108,7 @@ extern int rss_debug; * Device driver interfaces to query RSS properties that must be programmed * into hardware. */ +u_int rss_get_enabled(void); u_int rss_getbits(void); u_int rss_getbucket(u_int hash); u_int rss_get_indirection_to_bucket(u_int index); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 5524f23f780..268c1b55965 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -342,9 +342,18 @@ ip_vnet_init(void *arg __unused) printf("%s: WARNING: unable to register output helper hook\n", __func__); +#ifdef RSS + if (!rss_get_enabled()) { + ip_nh.nh_m2cpuid = NULL; + ip_nh.nh_policy = NETISR_POLICY_FLOW; + ip_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT; + } +#endif + #ifdef VIMAGE netisr_register_vnet(&ip_nh); #ifdef RSS + if (rss_get_enabled()) netisr_register_vnet(&ip_direct_nh); #endif #endif @@ -375,8 +384,17 @@ ip_init(const void *unused __unused) IPPROTO_REGISTER(IPPROTO_SCTP, sctp_input, sctp_ctlinput); #endif +#ifdef RSS + if (!rss_get_enabled()) { + ip_nh.nh_m2cpuid = NULL; + ip_nh.nh_policy = NETISR_POLICY_FLOW; + ip_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT; + } +#endif + netisr_register(&ip_nh); #ifdef RSS + if (rss_get_enabled()) netisr_register(&ip_direct_nh); #endif } @@ -389,6 +407,7 @@ ip_destroy(void *unused __unused) int error; #ifdef RSS + if (rss_get_enabled()) netisr_unregister_vnet(&ip_direct_nh); #endif netisr_unregister_vnet(&ip_nh); diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index bcd4ed4c94c..3a99d40d670 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1241,6 +1241,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) break; #ifdef RSS case IP_RECVRSSBUCKETID: + if (!rss_get_enabled()) + break; OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif @@ -1458,6 +1460,8 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) break; #ifdef RSS case IP_RSSBUCKETID: + if (!rss_get_enabled()) + break; retval = rss_hash2bucket(inp->inp_flowid, inp->inp_flowtype, &rss_bucket); diff --git a/sys/netinet/ip_reass.c b/sys/netinet/ip_reass.c index 2876e5bfdcd..d4fb66bfa27 100644 --- a/sys/netinet/ip_reass.c +++ b/sys/netinet/ip_reass.c @@ -536,6 +536,7 @@ ip_reass(struct mbuf *m) IPQ_UNLOCK(hash); #ifdef RSS + if (rss_get_enabled()) { /* * Query the RSS layer for the flowid / flowtype for the * mbuf payload. @@ -564,6 +565,7 @@ ip_reass(struct mbuf *m) */ netisr_dispatch(NETISR_IP_DIRECT, m); return (NULL); + } #endif /* Handle in-line */ diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c index 85341cab075..18452795d33 100644 --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -1014,11 +1014,13 @@ hpts_cpuid(struct tcpcb *tp, int *failed) } /* If one is set the other must be the same */ #ifdef RSS + if (rss_get_enabled()) { cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); if (cpuid == NETISR_CPUID_NONE) return (hpts_random_cpu()); else return (cpuid); + } #endif /* * We don't have a flowid -> cpuid mapping, so cheat and just map diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index ad407d5c111..143fa17f702 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -234,11 +234,13 @@ inp_to_cpuid(struct inpcb *inp) if (per_cpu_timers) { #ifdef RSS + if (rss_get_enabled()) { cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); if (cpuid == NETISR_CPUID_NONE) return (curcpu); /* XXX */ else return (cpuid); + } #endif /* * We don't have a flowid -> cpuid mapping, so cheat and diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 7329600ecc7..95a9d77ba37 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1424,7 +1424,11 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, M_HASHTYPE_SET(m, flowtype); } #if defined(ROUTE_MPATH) || defined(RSS) +#ifdef RSS + else if (rss_get_enabled() || CALC_FLOWID_OUTBOUND_SENDTO) { +#else else if (CALC_FLOWID_OUTBOUND_SENDTO) { +#endif uint32_t hash_val, hash_type; hash_val = fib4_calc_packet_hash(laddr, faddr, diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index e976298bf98..be9a427b054 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -55,6 +55,9 @@ #include #include #include +#ifdef RSS +#include +#endif #include #include @@ -885,6 +888,7 @@ postinsert: } #ifdef RSS + if (rss_get_enabled()) { mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), M_NOWAIT); if (mtag == NULL) @@ -895,6 +899,7 @@ postinsert: ip6dc->ip6dc_off = offset; m_tag_prepend(m, mtag); + } #endif IP6QB_UNLOCK(bucket); @@ -903,9 +908,11 @@ postinsert: #ifdef RSS /* Queue/dispatch for reprocessing. */ + if (rss_get_enabled()) { netisr_dispatch(NETISR_IPV6_DIRECT, m); *mp = NULL; return (IPPROTO_DONE); + } #endif /* Tell launch routine the next header. */ diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index f7f2ea0b869..b3a3ce140de 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -100,6 +100,9 @@ #include #include #include +#ifdef RSS +#include +#endif #include #include diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 5e0005bdef3..ca6192c8ed6 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -266,10 +266,19 @@ ip6_vnet_init(void *arg __unused) V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; +#ifdef RSS + if (!rss_get_enabled()) { + ip6_nh.nh_m2cpuid = NULL; + ip6_nh.nh_policy = NETISR_POLICY_FLOW; + ip6_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT; + } +#endif + /* Skip global initialization stuff for non-default instances. */ #ifdef VIMAGE netisr_register_vnet(&ip6_nh); #ifdef RSS + if (rss_get_enabled()) netisr_register_vnet(&ip6_direct_nh); #endif #endif @@ -302,8 +311,17 @@ ip6_init(void *arg __unused) EVENTHANDLER_REGISTER(mbuf_lowmem, frag6_drain, NULL, LOWMEM_PRI_DEFAULT); +#ifdef RSS + if (!rss_get_enabled()) { + ip6_nh.nh_m2cpuid = NULL; + ip6_nh.nh_policy = NETISR_POLICY_FLOW; + ip6_nh.nh_dispatch = NETISR_DISPATCH_DEFAULT; + } +#endif + netisr_register(&ip6_nh); #ifdef RSS + if (rss_get_enabled()) netisr_register(&ip6_direct_nh); #endif } @@ -347,6 +365,7 @@ ip6_destroy(void *unused __unused) int error; #ifdef RSS + if (rss_get_enabled()) netisr_unregister_vnet(&ip6_direct_nh); #endif netisr_unregister_vnet(&ip6_nh); @@ -1534,6 +1553,7 @@ ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp) } #ifdef RSS + if (rss_get_enabled()) if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { uint32_t flowid, flow_type; uint32_t rss_bucketid; diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 3c0e7f37b74..4955b22f3a1 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1868,6 +1868,8 @@ do { \ #ifdef RSS case IPV6_RECVRSSBUCKETID: + if (!rss_get_enabled()) + break; OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif @@ -2230,6 +2232,8 @@ do { \ break; #ifdef RSS case IPV6_RSSBUCKETID: + if (!rss_get_enabled()) + break; retval = rss_hash2bucket(inp->inp_flowid, inp->inp_flowtype, @@ -2241,6 +2245,8 @@ do { \ break; case IPV6_RECVRSSBUCKETID: + if (!rss_get_enabled()) + break; optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index c8b38c24d19..35473d60649 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -908,7 +908,11 @@ udp6_send(struct socket *so, int flags_arg, struct mbuf *m, flags = 0; #if defined(ROUTE_MPATH) || defined(RSS) +#ifdef RSS + if (rss_get_enabled() || CALC_FLOWID_OUTBOUND_SENDTO) { +#else if (CALC_FLOWID_OUTBOUND_SENDTO) { +#endif uint32_t hash_type, hash_val; uint8_t pr;