From c2c2a7c11ee7cdc9d33dc3cdabff4a25dbcbca13 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Mon, 1 Jun 2009 15:49:42 +0000 Subject: [PATCH] Convert the two dimensional array to be malloced and introduce an accessor function to get the correct rnh pointer back. Update netstat to get the correct pointer using kvm_read() as well. This not only fixes the ABI problem depending on the kernel option but also permits the tunable to overwrite the kernel option at boot time up to MAXFIBS, enlarging the number of FIBs without having to recompile. So people could just use GENERIC now. Reviewed by: julian, rwatson, zec X-MFC: not possible --- UPDATING | 6 +++ sys/net/if.c | 3 +- sys/net/route.c | 75 +++++++++++++++++++++++++------------ sys/net/route.h | 3 +- sys/net/rtsock.c | 10 ++--- sys/net/vnet.h | 2 +- sys/netinet/in_rmx.c | 10 ++--- sys/netinet6/in6_ifattach.c | 9 +++-- sys/netinet6/in6_rmx.c | 26 ++++++++----- sys/netinet6/nd6_rtr.c | 11 ++++-- sys/nfsclient/bootp_subr.c | 10 +++-- sys/sys/param.h | 2 +- usr.bin/netstat/route.c | 25 +++++++------ 13 files changed, 123 insertions(+), 69 deletions(-) diff --git a/UPDATING b/UPDATING index 95869d563c3..2560bf92fa5 100644 --- a/UPDATING +++ b/UPDATING @@ -22,6 +22,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW: to maximize performance. (To disable malloc debugging, run ln -s aj /etc/malloc.conf.) +20090601: + The way we are storing and accessing `routeing table' entries + has changed. Programs reading the FIB, like netstat, need to + be re-compiled. + Bump __FreeBSD_version to 800097. + 20090530: Remove the tunable/sysctl debug.mpsafevfs as its initial purpose is no more valid. diff --git a/sys/net/if.c b/sys/net/if.c index 4d4befc2735..9a3c1fdedf3 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1001,7 +1001,8 @@ if_detach_internal(struct ifnet *ifp, int vmove) */ for (i = 1; i <= AF_MAX; i++) { for (j = 0; j < rt_numfibs; j++) { - if ((rnh = V_rt_tables[j][i]) == NULL) + rnh = rt_tables_get_rnh(j, i); + if (rnh == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); diff --git a/sys/net/route.c b/sys/net/route.c index 195145ca88f..8705a541f3e 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -91,15 +91,7 @@ TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); #ifdef VIMAGE_GLOBALS static struct rtstat rtstat; - -/* by default only the first 'row' of tables will be accessed. */ -/* - * XXXMRT When we fix netstat, and do this differnetly, - * we can allocate this dynamically. As long as we are keeping - * things backwards compaitble we need to allocate this - * statically. - */ -struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1]; +struct radix_node_head *rt_tables; static int rttrash; /* routes not in table but not freed */ #endif @@ -158,6 +150,32 @@ sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); +static __inline struct radix_node_head ** +rt_tables_get_rnh_ptr(int table, int fam) +{ + INIT_VNET_NET(curvnet); + struct radix_node_head **rnh; + + KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", + __func__)); + KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", + __func__)); + + /* rnh is [fib=0][af=0]. */ + rnh = (struct radix_node_head **)V_rt_tables; + /* Get the offset to the requested table and fam. */ + rnh += table * (AF_MAX+1) + fam; + + return (rnh); +} + +struct radix_node_head * +rt_tables_get_rnh(int table, int fam) +{ + + return (*rt_tables_get_rnh_ptr(table, fam)); +} + static void route_init(void) { @@ -179,10 +197,14 @@ route_init(void) static int vnet_route_iattach(const void *unused __unused) { INIT_VNET_NET(curvnet); - int table; struct domain *dom; + struct radix_node_head **rnh; + int table; int fam; + V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * + sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); + V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { @@ -198,8 +220,10 @@ static int vnet_route_iattach(const void *unused __unused) * (only for AF_INET and AF_INET6 * which don't need it anyhow) */ - dom->dom_rtattach( - (void **)&V_rt_tables[table][fam], + rnh = rt_tables_get_rnh_ptr(table, fam); + if (rnh == NULL) + panic("%s: rnh NULL", __func__); + dom->dom_rtattach((void **)rnh, dom->dom_rtoffset); } else { break; @@ -300,7 +324,7 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ fibnum = 0; - rnh = V_rt_tables[fibnum][dst->sa_family]; + rnh = rt_tables_get_rnh(fibnum, dst->sa_family); newrt = NULL; /* * Look up the address in the table for that Address Family @@ -362,7 +386,7 @@ rtfree(struct rtentry *rt) struct radix_node_head *rnh; KASSERT(rt != NULL,("%s: NULL rt", __func__)); - rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; + rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); RT_LOCK_ASSERT(rt); @@ -463,8 +487,13 @@ rtredirect_fib(struct sockaddr *dst, short *stat = NULL; struct rt_addrinfo info; struct ifaddr *ifa; - struct radix_node_head *rnh = - V_rt_tables[fibnum][dst->sa_family]; + struct radix_node_head *rnh; + + rnh = rt_tables_get_rnh(fibnum, dst->sa_family); + if (rnh == NULL) { + error = EAFNOSUPPORT; + goto out; + } /* verify the gateway is directly reachable */ if ((ifa = ifa_ifwithnet(gateway)) == NULL) { @@ -774,7 +803,7 @@ rtexpunge(struct rtentry *rt) /* * Find the correct routing tree to use for this Address Family */ - rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; + rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); RT_LOCK_ASSERT(rt); if (rnh == NULL) return (EAFNOSUPPORT); @@ -942,7 +971,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, /* * Find the correct routing tree to use for this Address Family */ - rnh = V_rt_tables[fibnum][dst->sa_family]; + rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) return (EAFNOSUPPORT); needlock = ((flags & RTF_RNH_LOCKED) == 0); @@ -1134,9 +1163,9 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) /* XXX dst may be overwritten, can we move this to below */ int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); #ifdef INVARIANTS - INIT_VNET_NET(curvnet); - struct radix_node_head *rnh = - V_rt_tables[rt->rt_fibnum][dst->sa_family]; + struct radix_node_head *rnh; + + rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); #endif RT_LOCK_ASSERT(rt); @@ -1203,7 +1232,6 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { - INIT_VNET_NET(curvnet); struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; @@ -1273,7 +1301,8 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) * Look up an rtentry that is in the routing tree and * contains the correct info. */ - if ((rnh = V_rt_tables[fibnum][dst->sa_family]) == NULL) + rnh = rt_tables_get_rnh(fibnum, dst->sa_family); + if (rnh == NULL) /* this table doesn't exist but others might */ continue; RADIX_NODE_HEAD_LOCK(rnh); diff --git a/sys/net/route.h b/sys/net/route.h index 731db4b2013..ca774837a87 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -373,7 +373,8 @@ struct rt_addrinfo { } \ } while (0) -extern struct radix_node_head *rt_tables[][AF_MAX+1]; +extern struct radix_node_head *rt_tables; +struct radix_node_head *rt_tables_get_rnh(int, int); struct ifmultiaddr; diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index c9f76af53c6..751e0ae8d71 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -460,7 +460,6 @@ static int route_output(struct mbuf *m, struct socket *so) { #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) - INIT_VNET_NET(so->so_vnet); struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct radix_node_head *rnh; @@ -561,7 +560,8 @@ route_output(struct mbuf *m, struct socket *so) case RTM_GET: case RTM_CHANGE: case RTM_LOCK: - rnh = V_rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family]; + rnh = rt_tables_get_rnh(so->so_fibnum, + info.rti_info[RTAX_DST]->sa_family); if (rnh == NULL) senderr(EAFNOSUPPORT); RADIX_NODE_HEAD_RLOCK(rnh); @@ -1418,10 +1418,9 @@ done: static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { - INIT_VNET_NET(curvnet); int *name = (int *)arg1; u_int namelen = arg2; - struct radix_node_head *rnh; + struct radix_node_head *rnh = NULL; /* silence compiler. */ int i, lim, error = EINVAL; u_char af; struct walkarg w; @@ -1469,7 +1468,8 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS) * take care of routing entries */ for (error = 0; error == 0 && i <= lim; i++) - if ((rnh = V_rt_tables[req->td->td_proc->p_fibnum][i]) != NULL) { + rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i); + if (rnh != NULL) { RADIX_NODE_HEAD_LOCK(rnh); error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w); diff --git a/sys/net/vnet.h b/sys/net/vnet.h index bdc466b55a4..d36c30373b2 100644 --- a/sys/net/vnet.h +++ b/sys/net/vnet.h @@ -45,7 +45,7 @@ struct vnet_net { struct knlist _ifklist; struct rtstat _rtstat; - struct radix_node_head *_rt_tables[RT_MAXFIBS][AF_MAX+1]; + struct radix_node_head *_rt_tables; int _rttrash; uma_zone_t _rtzone; diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index 19cd5fb6d4d..8fc60ae886a 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -251,14 +251,14 @@ static void in_rtqtimo(void *rock) { CURVNET_SET((struct vnet *) rock); - INIT_VNET_NET(curvnet); INIT_VNET_INET(curvnet); int fibnum; void *newrock; struct timeval atv; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { - if ((newrock = V_rt_tables[fibnum][AF_INET]) != NULL) + newrock = rt_tables_get_rnh(fibnum, AF_INET); + if (newrock != NULL) in_rtqtimo_one(newrock); } atv.tv_usec = 0; @@ -324,10 +324,9 @@ in_rtqdrain(void) VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - INIT_VNET_NET(vnet_iter); for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { - rnh = V_rt_tables[fibnum][AF_INET]; + rnh = rt_tables_get_rnh(fibnum, AF_INET); arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = 0; @@ -423,7 +422,6 @@ in_ifadownkill(struct radix_node *rn, void *xap) int in_ifadown(struct ifaddr *ifa, int delete) { - INIT_VNET_NET(curvnet); struct in_ifadown_arg arg; struct radix_node_head *rnh; int fibnum; @@ -432,7 +430,7 @@ in_ifadown(struct ifaddr *ifa, int delete) return 1; for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { - rnh = V_rt_tables[fibnum][AF_INET]; + rnh = rt_tables_get_rnh(fibnum, AF_INET); arg.ifa = ifa; arg.del = delete; RADIX_NODE_HEAD_LOCK(rnh); diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index 1137ad7532f..1666becb4a9 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -777,11 +777,11 @@ statinit: void in6_ifdetach(struct ifnet *ifp) { - INIT_VNET_NET(ifp->if_vnet); INIT_VNET_INET(ifp->if_vnet); INIT_VNET_INET6(ifp->if_vnet); struct in6_ifaddr *ia, *oia; struct ifaddr *ifa, *next; + struct radix_node_head *rnh; struct rtentry *rt; short rtflags; struct sockaddr_in6 sin6; @@ -874,15 +874,16 @@ in6_ifdetach(struct ifnet *ifp) /* XXX: should not fail */ return; /* XXX grab lock first to avoid LOR */ - if (V_rt_tables[0][AF_INET6] != NULL) { - RADIX_NODE_HEAD_LOCK(V_rt_tables[0][AF_INET6]); + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh != NULL) { + RADIX_NODE_HEAD_LOCK(rnh); rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED); if (rt) { if (rt->rt_ifp == ifp) rtexpunge(rt); RTFREE_LOCKED(rt); } - RADIX_NODE_HEAD_UNLOCK(V_rt_tables[0][AF_INET6]); + RADIX_NODE_HEAD_UNLOCK(rnh); } } diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index 70909b1fb0d..3a423ed0ff2 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -289,13 +289,17 @@ static void in6_rtqtimo(void *rock) { CURVNET_SET_QUIET((struct vnet *) rock); - INIT_VNET_NET(curvnet); INIT_VNET_INET6(curvnet); - struct radix_node_head *rnh = V_rt_tables[0][AF_INET6]; + struct radix_node_head *rnh; struct rtqk_arg arg; struct timeval atv; static time_t last_adjusted_timeout = 0; + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh == NULL) { + CURVNET_RESTORE(); + return; + } arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = time_uptime + V_rtq_timeout6; @@ -377,12 +381,16 @@ static void in6_mtutimo(void *rock) { CURVNET_SET_QUIET((struct vnet *) rock); - INIT_VNET_NET(curvnet); INIT_VNET_INET6(curvnet); - struct radix_node_head *rnh = V_rt_tables[0][AF_INET6]; + struct radix_node_head *rnh; struct mtuex_arg arg; struct timeval atv; + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh == NULL) { + CURVNET_RESTORE(); + return; + } arg.rnh = rnh; arg.nextstop = time_uptime + MTUTIMO_DEFAULT; RADIX_NODE_HEAD_LOCK(rnh); @@ -405,9 +413,12 @@ void in6_rtqdrain(void) { INIT_VNET_NET(curvnet); - struct radix_node_head *rnh = V_rt_tables[0][AF_INET6]; + struct radix_node_head *rnh; struct rtqk_arg arg; + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh == NULL) + panic("%s: rnh == NULL", __func__); arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = 0; @@ -429,9 +440,6 @@ in6_rtqdrain(void) int in6_inithead(void **head, int off) { -#ifdef INVARIANTS - INIT_VNET_NET(curvnet); -#endif INIT_VNET_INET6(curvnet); struct radix_node_head *rnh; @@ -447,7 +455,7 @@ in6_inithead(void **head, int off) V_rtq_timeout6 = RTQ_TIMEOUT; rnh = *head; - KASSERT(rnh == V_rt_tables[0][AF_INET6], ("rnh?")); + KASSERT(rnh == rt_tables_get_rnh(0, AF_INET6), ("rnh?")); rnh->rnh_addaddr = in6_addroute; rnh->rnh_matchaddr = in6_matroute; callout_init(&V_rtq_timer6, CALLOUT_MPSAFE); diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index 41a100dd93e..eab8951ef56 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -1549,7 +1549,6 @@ pfxlist_onlink_check() int nd6_prefix_onlink(struct nd_prefix *pr) { - INIT_VNET_NET(curvnet); INIT_VNET_INET6(curvnet); struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; @@ -1632,7 +1631,8 @@ nd6_prefix_onlink(struct nd_prefix *pr) ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); if (error == 0) { if (rt != NULL) /* this should be non NULL, though */ { - rnh = V_rt_tables[rt->rt_fibnum][AF_INET6]; + rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); + /* XXX what if rhn == NULL? */ RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) { @@ -2058,8 +2058,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) void rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { - INIT_VNET_NET(curvnet); - struct radix_node_head *rnh = V_rt_tables[0][AF_INET6]; + struct radix_node_head *rnh; int s = splnet(); /* We'll care only link-local addresses */ @@ -2068,6 +2067,10 @@ rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) return; } + rnh = rt_tables_get_rnh(0, AF_INET6); + if (rnh == NULL) + return; + RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); RADIX_NODE_HEAD_UNLOCK(rnh); diff --git a/sys/nfsclient/bootp_subr.c b/sys/nfsclient/bootp_subr.c index d3151c9c233..6f61abd67f0 100644 --- a/sys/nfsclient/bootp_subr.c +++ b/sys/nfsclient/bootp_subr.c @@ -361,11 +361,15 @@ void bootpboot_p_rtlist(void) { INIT_VNET_NET(curvnet); + struct radix_node_head *rnh; printf("Routing table:\n"); - RADIX_NODE_HEAD_RLOCK(V_rt_tables[0][AF_INET]); /* could sleep XXX */ - bootpboot_p_tree(V_rt_tables[0][AF_INET]->rnh_treetop); - RADIX_NODE_HEAD_RUNLOCK(V_rt_tables[0][AF_INET]); + rnh = rt_tables_get_rnh(0, AF_INET); + if (rnh == NULL) + return; + RADIX_NODE_HEAD_RLOCK(rnh); /* could sleep XXX */ + bootpboot_p_tree(rnh->rnh_treetop); + RADIX_NODE_HEAD_RUNLOCK(rnh); } void diff --git a/sys/sys/param.h b/sys/sys/param.h index bf3892d1f4b..3b3f7c4b9dd 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -57,7 +57,7 @@ * is created, otherwise 1. */ #undef __FreeBSD_version -#define __FreeBSD_version 800096 /* Master, propagated to newvers */ +#define __FreeBSD_version 800097 /* Master, propagated to newvers */ #ifndef LOCORE #include diff --git a/usr.bin/netstat/route.c b/usr.bin/netstat/route.c index 596fcd5eccc..735612b867a 100644 --- a/usr.bin/netstat/route.c +++ b/usr.bin/netstat/route.c @@ -122,12 +122,7 @@ int do_rtent = 0; struct rtentry rtentry; struct radix_node rnode; struct radix_mask rmask; -struct rtline { - struct radix_node_head *tables[AF_MAX+1]; /*xxx*/ -}; -struct rtline *rt_tables; - -struct radix_node_head *rt_tables_line[1][AF_MAX+1]; /*xxx*/ +struct radix_node_head **rt_tables; int NewTree = 0; @@ -155,7 +150,7 @@ static void domask(char *, in_addr_t, u_long); void routepr(u_long rtree) { - struct radix_node_head *rnh, head; + struct radix_node_head **rnhp, *rnh, head; size_t intsize; int i; int numfibs; @@ -165,7 +160,8 @@ routepr(u_long rtree) fibnum = 0; if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1) numfibs = 1; - rt_tables = calloc(numfibs, sizeof(struct rtline)); + rt_tables = calloc(numfibs * (AF_MAX+1), + sizeof(struct radix_node_head *)); if (rt_tables == NULL) err(EX_OSERR, "memory allocation failed"); /* @@ -186,8 +182,8 @@ routepr(u_long rtree) return; } - if (kread((u_long)(rtree), (char *)(rt_tables), - (numfibs * sizeof(struct rtline))) != 0) + if (kread((u_long)(rtree), (char *)(rt_tables), (numfibs * + (AF_MAX+1) * sizeof(struct radix_node_head *))) != 0) return; for (i = 0; i <= AF_MAX; i++) { int tmpfib; @@ -195,8 +191,15 @@ routepr(u_long rtree) tmpfib = 0; else tmpfib = fibnum; - if ((rnh = rt_tables[tmpfib].tables[i]) == 0) + rnhp = (struct radix_node_head **)*rt_tables; + /* Calculate the in-kernel address. */ + rnhp += tmpfib * (AF_MAX+1) + i; + /* Read the in kernel rhn pointer. */ + if (kget(rnhp, rnh) != 0) continue; + if (rnh == NULL) + continue; + /* Read the rnh data. */ if (kget(rnh, head) != 0) continue; if (i == AF_UNSPEC) {