From 20efcfc602b7a57979da99f0a1f917d9fb4a30e1 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sat, 16 Jun 2018 08:26:23 +0000 Subject: [PATCH] Switch RIB and RADIX_NODE_HEAD lock from rwlock(9) to rmlock(9). Using of rwlock with multiqueue NICs for IP forwarding on high pps produces high lock contention and inefficient. Rmlock fits better for such workloads. Reviewed by: melifaro, olivier Obtained from: Yandex LLC Sponsored by: Yandex LLC Differential Revision: https://reviews.freebsd.org/D15789 --- sys/kern/subr_witness.c | 2 +- sys/kern/vfs_export.c | 3 ++- sys/net/radix.c | 2 +- sys/net/radix.h | 26 +++++++++++++------------- sys/net/radix_mpath.c | 3 +++ sys/net/route.c | 5 +++++ sys/net/route_var.h | 19 ++++++++++--------- sys/net/rtsock.c | 3 +++ sys/netinet/in_fib.c | 4 +++- sys/netinet6/in6_fib.c | 4 +++- sys/netpfil/ipfw/ip_fw_table_algo.c | 1 + sys/nfs/bootp_subr.c | 1 + 12 files changed, 46 insertions(+), 27 deletions(-) diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 9edec343e39..34bec2b43f6 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -524,7 +524,7 @@ static struct witness_order_list_entry order_lists[] = { * Routing */ { "so_rcv", &lock_class_mtx_sleep }, - { "radix node head", &lock_class_rw }, + { "radix node head", &lock_class_rm }, { "rtentry", &lock_class_mtx_sleep }, { "ifaddr", &lock_class_mtx_sleep }, { NULL, NULL }, diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 3ce2ea4ea0e..231b77e0ac2 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -51,7 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #include #include @@ -449,6 +449,7 @@ vfs_setpublicfs(struct mount *mp, struct netexport *nep, static struct netcred * vfs_export_lookup(struct mount *mp, struct sockaddr *nam) { + RADIX_NODE_HEAD_RLOCK_TRACKER; struct netexport *nep; struct netcred *np = NULL; struct radix_node_head *rnh; diff --git a/sys/net/radix.c b/sys/net/radix.c index c434df6cc5e..f6c8dbe496f 100644 --- a/sys/net/radix.c +++ b/sys/net/radix.c @@ -39,7 +39,7 @@ #ifdef _KERNEL #include #include -#include +#include #include #include #include diff --git a/sys/net/radix.h b/sys/net/radix.h index 05f0f490018..9ff51c788d7 100644 --- a/sys/net/radix.h +++ b/sys/net/radix.h @@ -38,7 +38,7 @@ #ifdef _KERNEL #include #include -#include +#include #endif #ifdef MALLOC_DECLARE @@ -138,7 +138,7 @@ struct radix_node_head { rn_close_t *rnh_close; /*do something when the last ref drops*/ struct radix_node rnh_nodes[3]; /* empty tree for common case */ #ifdef _KERNEL - struct rwlock rnh_lock; /* locks entire radix tree */ + struct rmlock rnh_lock; /* locks entire radix tree */ #endif }; @@ -159,18 +159,18 @@ void rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes, #define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO)) #define R_Free(p) free((caddr_t)p, M_RTABLE); +#define RADIX_NODE_HEAD_RLOCK_TRACKER struct rm_priotracker _rhn_tracker #define RADIX_NODE_HEAD_LOCK_INIT(rnh) \ - rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0) -#define RADIX_NODE_HEAD_LOCK(rnh) rw_wlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_UNLOCK(rnh) rw_wunlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_RLOCK(rnh) rw_rlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_RUNLOCK(rnh) rw_runlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh) rw_try_upgrade(&(rnh)->rnh_lock) - - -#define RADIX_NODE_HEAD_DESTROY(rnh) rw_destroy(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_LOCKED) -#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED) + rm_init(&(rnh)->rnh_lock, "radix node head") +#define RADIX_NODE_HEAD_LOCK(rnh) rm_wlock(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_UNLOCK(rnh) rm_wunlock(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_RLOCK(rnh) rm_rlock(&(rnh)->rnh_lock,\ + &_rhn_tracker) +#define RADIX_NODE_HEAD_RUNLOCK(rnh) rm_runlock(&(rnh)->rnh_lock,\ + &_rhn_tracker) +#define RADIX_NODE_HEAD_DESTROY(rnh) rm_destroy(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rm_assert(&(rnh)->rnh_lock, RA_LOCKED) +#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rm_assert(&(rnh)->rnh_lock, RA_WLOCKED) #endif /* _KERNEL */ int rn_inithead(void **, int); diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index e5dc2992898..b7a2ebe9676 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -43,12 +43,15 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include +#include #include #include #include #include #include +#include #include #include #include diff --git a/sys/net/route.c b/sys/net/route.c index 06eaba334f3..797f1e4d6ee 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include #include @@ -440,6 +442,7 @@ struct rtentry * rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *newrt; @@ -923,6 +926,7 @@ int rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, uint32_t flowid, struct rt_addrinfo *info) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; @@ -1944,6 +1948,7 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { + RIB_RLOCK_TRACKER; struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; diff --git a/sys/net/route_var.h b/sys/net/route_var.h index f32dbc2137b..9d0d1931c46 100644 --- a/sys/net/route_var.h +++ b/sys/net/route_var.h @@ -44,18 +44,19 @@ struct rib_head { rt_gen_t rnh_gen; /* generation counter */ int rnh_multipath; /* multipath capable ? */ struct radix_node rnh_nodes[3]; /* empty tree for common case */ - struct rwlock rib_lock; /* config/data path lock */ + struct rmlock rib_lock; /* config/data path lock */ struct radix_mask_head rmhead; /* masks radix head */ }; -#define RIB_LOCK_INIT(rh) rw_init(&(rh)->rib_lock, "rib head lock") -#define RIB_LOCK_DESTROY(rh) rw_destroy(&(rh)->rib_lock) -#define RIB_RLOCK(rh) rw_rlock(&(rh)->rib_lock) -#define RIB_RUNLOCK(rh) rw_runlock(&(rh)->rib_lock) -#define RIB_WLOCK(rh) rw_wlock(&(rh)->rib_lock) -#define RIB_WUNLOCK(rh) rw_wunlock(&(rh)->rib_lock) -#define RIB_LOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_LOCKED) -#define RIB_WLOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_WLOCKED) +#define RIB_RLOCK_TRACKER struct rm_priotracker _rib_tracker +#define RIB_LOCK_INIT(rh) rm_init(&(rh)->rib_lock, "rib head lock") +#define RIB_LOCK_DESTROY(rh) rm_destroy(&(rh)->rib_lock) +#define RIB_RLOCK(rh) rm_rlock(&(rh)->rib_lock, &_rib_tracker) +#define RIB_RUNLOCK(rh) rm_runlock(&(rh)->rib_lock, &_rib_tracker) +#define RIB_WLOCK(rh) rm_wlock(&(rh)->rib_lock) +#define RIB_WUNLOCK(rh) rm_wunlock(&(rh)->rib_lock) +#define RIB_LOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_LOCKED) +#define RIB_WLOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_WLOCKED) struct rib_head *rt_tables_get_rnh(int fib, int family); diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 6fb2c701385..2daa8c33790 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -542,6 +543,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, static int route_output(struct mbuf *m, struct socket *so, ...) { + RIB_RLOCK_TRACKER; struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct rib_head *rnh; @@ -1850,6 +1852,7 @@ sysctl_ifmalist(int af, struct walkarg *w) static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { + RIB_RLOCK_TRACKER; int *name = (int *)arg1; u_int namelen = arg2; struct rib_head *rnh = NULL; /* silence compiler. */ diff --git a/sys/netinet/in_fib.c b/sys/netinet/in_fib.c index 6b941eb1c46..5d97cc50620 100644 --- a/sys/netinet/in_fib.c +++ b/sys/netinet/in_fib.c @@ -37,7 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #include #include @@ -134,6 +134,7 @@ int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_basic *pnh4) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in sin; @@ -182,6 +183,7 @@ int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_extended *pnh4) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in sin; diff --git a/sys/netinet6/in6_fib.c b/sys/netinet6/in6_fib.c index 35719310ee8..ecaa6e2b955 100644 --- a/sys/netinet6/in6_fib.c +++ b/sys/netinet6/in6_fib.c @@ -38,7 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #include #include @@ -171,6 +171,7 @@ int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; @@ -220,6 +221,7 @@ int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c index 405be0be828..aacc9431f57 100644 --- a/sys/netpfil/ipfw/ip_fw_table_algo.c +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -4047,6 +4047,7 @@ static void ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { + RIB_RLOCK_TRACKER; struct rib_head *rh; int error; diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c index d7657155f55..07418aff1f3 100644 --- a/sys/nfs/bootp_subr.c +++ b/sys/nfs/bootp_subr.c @@ -376,6 +376,7 @@ bootpboot_p_tree(struct radix_node *rn) void bootpboot_p_rtlist(void) { + RIB_RLOCK_TRACKER; struct rib_head *rnh; printf("Routing table:\n");