From 5490110cf5ab9ba51b15bc26434ad14fbfacd619 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 08:50:19 +0000 Subject: [PATCH 01/18] In preparation for multi-FIB IPv6 support, factor the code for joining and leaving multicast groups out from in6_update_ifa() and in6_purgeaddr(). Sponsored by: Cisco Systems, Inc. --- sys/netinet6/in6.c | 581 +++++++++++++++++++++++---------------------- 1 file changed, 299 insertions(+), 282 deletions(-) diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index cf5eb35fa46..4bbc4ee4e5b 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -820,6 +820,169 @@ out: return (error); } +/* + * Join necessary multicast groups. Factored out from in6_update_ifa(). + * This entire work should only be done once, for the default FIB. + */ +static int +in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, + struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol) +{ + char ip6buf[INET6_ADDRSTRLEN]; + struct sockaddr_in6 mltaddr, mltmask; + struct in6_addr llsol; + struct in6_multi_mship *imm; + struct rtentry *rt; + int delay, error; + + KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__)); + + /* Join solicited multicast addr for new host id. */ + bzero(&llsol, sizeof(struct in6_addr)); + llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; + llsol.s6_addr32[1] = 0; + llsol.s6_addr32[2] = htonl(1); + llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; + llsol.s6_addr8[12] = 0xff; + if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { + /* XXX: should not happen */ + log(LOG_ERR, "%s: in6_setscope failed\n", __func__); + goto cleanup; + } + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * We need a random delay for DAD on the address being + * configured. It also means delaying transmission of the + * corresponding MLD report to avoid report collision. + * [RFC 4861, Section 6.3.7] + */ + delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); + } + imm = in6_joingroup(ifp, &llsol, &error, delay); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &llsol), + if_name(ifp), error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + *in6m_sol = imm->i6mm_maddr; + + bzero(&mltmask, sizeof(mltmask)); + mltmask.sin6_len = sizeof(struct sockaddr_in6); + mltmask.sin6_family = AF_INET6; + mltmask.sin6_addr = in6mask32; +#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ + + /* + * Join link-local all-nodes address. + */ + bzero(&mltaddr, sizeof(mltaddr)); + mltaddr.sin6_len = sizeof(struct sockaddr_in6); + mltaddr.sin6_family = AF_INET6; + mltaddr.sin6_addr = in6addr_linklocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + goto cleanup; /* XXX: should not fail */ + + /* + * XXX: do we really need this automatic routes? We should probably + * reconsider this stuff. Most applications actually do not need the + * routes, since they usually specify the outgoing interface. + */ + rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + if (rt != NULL) { + /* XXX: only works in !SCOPEDROUTING case. */ + if (memcmp(&mltaddr.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, + MLTMASK_LEN)) { + RTFREE_LOCKED(rt); + rt = NULL; + } + } + if (rt == NULL) { + error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0); + if (error) + goto cleanup; + } else + RTFREE_LOCKED(rt); + + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + + /* + * Join node information group address. + */ + delay = 0; + if ((flags & IN6_IFAUPDATE_DADDELAY)) { + /* + * The spec does not say anything about delay for this group, + * but the same logic should apply. + */ + delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); + } + if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { + /* XXX jinmei */ + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); + if (imm == NULL) + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + /* XXX not very fatal, go on... */ + else + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); + } + + /* + * Join interface-local all-nodes address. + * (ff01::1%ifN, and ff01::%ifN/32) + */ + mltaddr.sin6_addr = in6addr_nodelocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + goto cleanup; /* XXX: should not fail */ + /* XXX: again, do we really need the route? */ + rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + if (rt != NULL) { + if (memcmp(&mltaddr.sin6_addr, + &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, + MLTMASK_LEN)) { + RTFREE_LOCKED(rt); + rt = NULL; + } + } + if (rt == NULL) { + error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0); + if (error) + goto cleanup; + } else + RTFREE_LOCKED(rt); + + imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + if (imm == NULL) { + nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, + &mltaddr.sin6_addr), if_name(ifp), error)); + goto cleanup; + } + LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); +#undef MLTMASK_LEN + +cleanup: + return (error); +} + /* * Update parameters of an IPv6 interface address. * If necessary, a new entry is created and linked into address chains. @@ -833,9 +996,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int error = 0, hostIsNew = 0, plen = -1; struct sockaddr_in6 dst6; struct in6_addrlifetime *lt; - struct in6_multi_mship *imm; struct in6_multi *in6m_sol; - struct rtentry *rt; int delay; char ip6buf[INET6_ADDRSTRLEN]; @@ -1083,172 +1244,12 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, * not just go to unlink. */ - /* Join necessary multicast groups */ + /* Join necessary multicast groups. */ in6m_sol = NULL; if ((ifp->if_flags & IFF_MULTICAST) != 0) { - struct sockaddr_in6 mltaddr, mltmask; - struct in6_addr llsol; - - /* join solicited multicast addr for new host id */ - bzero(&llsol, sizeof(struct in6_addr)); - llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; - llsol.s6_addr32[1] = 0; - llsol.s6_addr32[2] = htonl(1); - llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; - llsol.s6_addr8[12] = 0xff; - if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { - /* XXX: should not happen */ - log(LOG_ERR, "in6_update_ifa: " - "in6_setscope failed\n"); + error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol); + if (error) goto cleanup; - } - delay = 0; - if ((flags & IN6_IFAUPDATE_DADDELAY)) { - /* - * We need a random delay for DAD on the address - * being configured. It also means delaying - * transmission of the corresponding MLD report to - * avoid report collision. - * [RFC 4861, Section 6.3.7] - */ - delay = arc4random() % - (MAX_RTR_SOLICITATION_DELAY * hz); - } - imm = in6_joingroup(ifp, &llsol, &error, delay); - if (imm == NULL) { - nd6log((LOG_WARNING, - "in6_update_ifa: addmulti failed for " - "%s on %s (errno=%d)\n", - ip6_sprintf(ip6buf, &llsol), if_name(ifp), - error)); - goto cleanup; - } - LIST_INSERT_HEAD(&ia->ia6_memberships, - imm, i6mm_chain); - in6m_sol = imm->i6mm_maddr; - - bzero(&mltmask, sizeof(mltmask)); - mltmask.sin6_len = sizeof(struct sockaddr_in6); - mltmask.sin6_family = AF_INET6; - mltmask.sin6_addr = in6mask32; -#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ - - /* - * join link-local all-nodes address - */ - bzero(&mltaddr, sizeof(mltaddr)); - mltaddr.sin6_len = sizeof(struct sockaddr_in6); - mltaddr.sin6_family = AF_INET6; - mltaddr.sin6_addr = in6addr_linklocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != - 0) - goto cleanup; /* XXX: should not fail */ - - /* - * XXX: do we really need this automatic routes? - * We should probably reconsider this stuff. Most applications - * actually do not need the routes, since they usually specify - * the outgoing interface. - */ - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); - if (rt) { - /* XXX: only works in !SCOPEDROUTING case. */ - if (memcmp(&mltaddr.sin6_addr, - &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - MLTMASK_LEN)) { - RTFREE_LOCKED(rt); - rt = NULL; - } - } - if (!rt) { - error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); - if (error) - goto cleanup; - } else { - RTFREE_LOCKED(rt); - } - - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); - if (!imm) { - nd6log((LOG_WARNING, - "in6_update_ifa: addmulti failed for " - "%s on %s (errno=%d)\n", - ip6_sprintf(ip6buf, &mltaddr.sin6_addr), - if_name(ifp), error)); - goto cleanup; - } - LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); - - /* - * join node information group address - */ - delay = 0; - if ((flags & IN6_IFAUPDATE_DADDELAY)) { - /* - * The spec doesn't say anything about delay for this - * group, but the same logic should apply. - */ - delay = arc4random() % - (MAX_RTR_SOLICITATION_DELAY * hz); - } - if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, - delay); /* XXX jinmei */ - if (!imm) { - nd6log((LOG_WARNING, "in6_update_ifa: " - "addmulti failed for %s on %s " - "(errno=%d)\n", - ip6_sprintf(ip6buf, &mltaddr.sin6_addr), - if_name(ifp), error)); - /* XXX not very fatal, go on... */ - } else { - LIST_INSERT_HEAD(&ia->ia6_memberships, - imm, i6mm_chain); - } - } - - /* - * join interface-local all-nodes address. - * (ff01::1%ifN, and ff01::%ifN/32) - */ - mltaddr.sin6_addr = in6addr_nodelocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) - != 0) - goto cleanup; /* XXX: should not fail */ - /* XXX: again, do we really need the route? */ - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); - if (rt) { - if (memcmp(&mltaddr.sin6_addr, - &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - MLTMASK_LEN)) { - RTFREE_LOCKED(rt); - rt = NULL; - } - } - if (!rt) { - error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); - if (error) - goto cleanup; - } else - RTFREE_LOCKED(rt); - - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); - if (!imm) { - nd6log((LOG_WARNING, "in6_update_ifa: " - "addmulti failed for %s on %s " - "(errno=%d)\n", - ip6_sprintf(ip6buf, &mltaddr.sin6_addr), - if_name(ifp), error)); - goto cleanup; - } - LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); -#undef MLTMASK_LEN } /* @@ -1312,15 +1313,143 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, return error; } +/* + * Leave multicast groups. Factored out from in6_purgeaddr(). + * This entire work should only be done once, for the default FIB. + */ +static int +in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) +{ + struct sockaddr_in6 mltaddr, mltmask; + struct in6_multi_mship *imm; + struct rtentry *rt; + int error; + + /* + * Leave from multicast groups we have joined for the interface. + */ + while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } + + /* + * Remove the link-local all-nodes address. + */ + bzero(&mltmask, sizeof(mltmask)); + mltmask.sin6_len = sizeof(struct sockaddr_in6); + mltmask.sin6_family = AF_INET6; + mltmask.sin6_addr = in6mask32; + + bzero(&mltaddr, sizeof(mltaddr)); + mltaddr.sin6_len = sizeof(struct sockaddr_in6); + mltaddr.sin6_family = AF_INET6; + mltaddr.sin6_addr = in6addr_linklocal_allnodes; + + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + return (error); + + rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + if (rt != NULL && rt->rt_gateway != NULL && + (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(ia->ia_addr.sin6_addr)) == 0)) { + /* + * If no more IPv6 address exists on this interface then + * remove the multicast address route. + */ + if (ifa0 == NULL) { + memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, + sizeof(mltaddr.sin6_addr)); + RTFREE_LOCKED(rt); + error = rtrequest(RTM_DELETE, + (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0); + if (error) + log(LOG_INFO, "%s: link-local all-nodes " + "multicast address deletion error\n", + __func__); + } else { + /* + * Replace the gateway of the route. + */ + struct sockaddr_in6 sa; + + bzero(&sa, sizeof(sa)); + sa.sin6_len = sizeof(struct sockaddr_in6); + sa.sin6_family = AF_INET6; + memcpy(&sa.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, + sizeof(sa.sin6_addr)); + in6_setscope(&sa.sin6_addr, ifa0->ifa_ifp, NULL); + memcpy(rt->rt_gateway, &sa, sizeof(sa)); + RTFREE_LOCKED(rt); + } + } else { + if (rt != NULL) + RTFREE_LOCKED(rt); + } + + /* + * Remove the node-local all-nodes address. + */ + mltaddr.sin6_addr = in6addr_nodelocal_allnodes; + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + return (error); + + rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + if (rt != NULL && rt->rt_gateway != NULL && + (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, + &ia->ia_addr.sin6_addr, + sizeof(ia->ia_addr.sin6_addr)) == 0)) { + /* + * If no more IPv6 address exists on this interface then + * remove the multicast address route. + */ + if (ifa0 == NULL) { + memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, + sizeof(mltaddr.sin6_addr)); + + RTFREE_LOCKED(rt); + error = rtrequest(RTM_DELETE, + (struct sockaddr *)&mltaddr, + (struct sockaddr *)&ia->ia_addr, + (struct sockaddr *)&mltmask, RTF_UP, + (struct rtentry **)0); + if (error) + log(LOG_INFO, "%s: node-local all-nodes" + "multicast address deletion error\n", + __func__); + } else { + /* + * Replace the gateway of the route. + */ + struct sockaddr_in6 sa; + + bzero(&sa, sizeof(sa)); + sa.sin6_len = sizeof(struct sockaddr_in6); + sa.sin6_family = AF_INET6; + memcpy(&sa.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, + sizeof(sa.sin6_addr)); + in6_setscope(&sa.sin6_addr, ifa0->ifa_ifp, NULL); + memcpy(rt->rt_gateway, &sa, sizeof(sa)); + RTFREE_LOCKED(rt); + } + } else { + if (rt != NULL) + RTFREE_LOCKED(rt); + } + + return (0); +} + void in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; - struct in6_multi_mship *imm; - struct sockaddr_in6 mltaddr, mltmask; int plen, error; - struct rtentry *rt; struct ifaddr *ifa0; if (ifa->ifa_carp) @@ -1362,121 +1491,9 @@ in6_purgeaddr(struct ifaddr *ifa) in6_ifremloop(ifa); - /* - * leave from multicast groups we have joined for the interface - */ - while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { - LIST_REMOVE(imm, i6mm_chain); - in6_leavegroup(imm); - } + /* Leave multicast groups. */ + error = in6_purgeaddr_mc(ifp, ia, ifa0); - /* - * remove the link-local all-nodes address - */ - bzero(&mltmask, sizeof(mltmask)); - mltmask.sin6_len = sizeof(struct sockaddr_in6); - mltmask.sin6_family = AF_INET6; - mltmask.sin6_addr = in6mask32; - - bzero(&mltaddr, sizeof(mltaddr)); - mltaddr.sin6_len = sizeof(struct sockaddr_in6); - mltaddr.sin6_family = AF_INET6; - mltaddr.sin6_addr = in6addr_linklocal_allnodes; - - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) - goto cleanup; - - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); - if (rt != NULL && rt->rt_gateway != NULL && - (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, - &ia->ia_addr.sin6_addr, - sizeof(ia->ia_addr.sin6_addr)) == 0)) { - /* - * if no more IPv6 address exists on this interface - * then remove the multicast address route - */ - if (ifa0 == NULL) { - memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, - sizeof(mltaddr.sin6_addr)); - RTFREE_LOCKED(rt); - error = rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); - if (error) - log(LOG_INFO, "in6_purgeaddr: link-local all-nodes" - "multicast address deletion error\n"); - } else { - /* - * replace the gateway of the route - */ - struct sockaddr_in6 sa; - - bzero(&sa, sizeof(sa)); - sa.sin6_len = sizeof(struct sockaddr_in6); - sa.sin6_family = AF_INET6; - memcpy(&sa.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, - sizeof(sa.sin6_addr)); - in6_setscope(&sa.sin6_addr, ifa0->ifa_ifp, NULL); - memcpy(rt->rt_gateway, &sa, sizeof(sa)); - RTFREE_LOCKED(rt); - } - } else { - if (rt != NULL) - RTFREE_LOCKED(rt); - } - - /* - * remove the node-local all-nodes address - */ - mltaddr.sin6_addr = in6addr_nodelocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != - 0) - goto cleanup; - - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); - if (rt != NULL && rt->rt_gateway != NULL && - (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, - &ia->ia_addr.sin6_addr, - sizeof(ia->ia_addr.sin6_addr)) == 0)) { - /* - * if no more IPv6 address exists on this interface - * then remove the multicast address route - */ - if (ifa0 == NULL) { - memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, - sizeof(mltaddr.sin6_addr)); - - RTFREE_LOCKED(rt); - error = rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); - - if (error) - log(LOG_INFO, "in6_purgeaddr: node-local all-nodes" - "multicast address deletion error\n"); - } else { - /* - * replace the gateway of the route - */ - struct sockaddr_in6 sa; - - bzero(&sa, sizeof(sa)); - sa.sin6_len = sizeof(struct sockaddr_in6); - sa.sin6_family = AF_INET6; - memcpy(&sa.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, - sizeof(sa.sin6_addr)); - in6_setscope(&sa.sin6_addr, ifa0->ifa_ifp, NULL); - memcpy(rt->rt_gateway, &sa, sizeof(sa)); - RTFREE_LOCKED(rt); - } - } else { - if (rt != NULL) - RTFREE_LOCKED(rt); - } - -cleanup: if (ifa0 != NULL) ifa_free(ifa0); From 556d81ddd77c5c5c9deb2da53d3a3f982fad1d9a Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 09:06:24 +0000 Subject: [PATCH 02/18] Rather than putting magic 0s as FIB argument into the rt* calls, provide a macro RT_DEFAULT_FIB defined to 0 to more easily identify the cases tied to the default FIB. Sponsored by: Cisco Systems, Inc. --- sys/net/route.h | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/net/route.h b/sys/net/route.h index 7e045b79305..c7e96d51576 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -111,6 +111,7 @@ struct rt_metrics { #endif #endif +#define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ extern u_int rt_numfibs; /* number fo usable routing tables */ /* * XXX kernel function pointer `rt_output' is visible to applications. From b680a383a8f779396e1773f49260b4754a53b8e5 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 09:23:55 +0000 Subject: [PATCH 03/18] Allow for IPv6 to allocate (and in the VIMAGE case free) as many routing tables (FIBs) as IPv4. Prepare various general rt* functions for multi-FIB IPv6 handling in addition to already existing multi-FIB IPv4 cases. Sponsored by: Cisco Systems, Inc. --- sys/net/route.c | 106 +++++++++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 42 deletions(-) diff --git a/sys/net/route.c b/sys/net/route.c index 296914b7f5d..ea7e46907f9 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -196,27 +196,23 @@ vnet_route_init(const void *unused __unused) V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { - if (dom->dom_rtattach) { - for (table = 0; table < rt_numfibs; table++) { - if ( (fam = dom->dom_family) == AF_INET || - table == 0) { - /* for now only AF_INET has > 1 table */ - /* XXX MRT - * rtattach will be also called - * from vfs_export.c but the - * offset will be 0 - * (only for AF_INET and AF_INET6 - * which don't need it anyhow) - */ - rnh = rt_tables_get_rnh_ptr(table, fam); - if (rnh == NULL) - panic("%s: rnh NULL", __func__); - dom->dom_rtattach((void **)rnh, - dom->dom_rtoffset); - } else { - break; - } - } + if (dom->dom_rtattach == NULL) + continue; + + for (table = 0; table < rt_numfibs; table++) { + fam = dom->dom_family; + if (table != 0 && fam != AF_INET6 && fam != AF_INET) + break; + + /* + * XXX MRT rtattach will be also called from + * vfs_export.c but the offset will be 0 (only for + * AF_INET and AF_INET6 which don't need it anyhow). + */ + rnh = rt_tables_get_rnh_ptr(table, fam); + if (rnh == NULL) + panic("%s: rnh NULL", __func__); + dom->dom_rtattach((void **)rnh, dom->dom_rtoffset); } } } @@ -233,20 +229,19 @@ vnet_route_uninit(const void *unused __unused) struct radix_node_head **rnh; for (dom = domains; dom; dom = dom->dom_next) { - if (dom->dom_rtdetach) { - for (table = 0; table < rt_numfibs; table++) { - if ( (fam = dom->dom_family) == AF_INET || - table == 0) { - /* For now only AF_INET has > 1 tbl. */ - rnh = rt_tables_get_rnh_ptr(table, fam); - if (rnh == NULL) - panic("%s: rnh NULL", __func__); - dom->dom_rtdetach((void **)rnh, - dom->dom_rtoffset); - } else { - break; - } - } + if (dom->dom_rtdetach == NULL) + continue; + + for (table = 0; table < rt_numfibs; table++) { + fam = dom->dom_family; + + if (table != 0 && fam != AF_INET6 && fam != AF_INET) + break; + + rnh = rt_tables_get_rnh_ptr(table, fam); + if (rnh == NULL) + panic("%s: rnh NULL", __func__); + dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset); } } } @@ -339,8 +334,15 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, int needlock; KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); - if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ - fibnum = 0; + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We support multiple FIBs. */ + break; + default: + fibnum = RT_DEFAULT_FIB; + break; + } rnh = rt_tables_get_rnh(fibnum, dst->sa_family); newrt = NULL; if (rnh == NULL) @@ -1029,8 +1031,16 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, #define senderr(x) { error = x ; goto bad; } KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); - if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ - fibnum = 0; + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We support multiple FIBs. */ + break; + default: + fibnum = RT_DEFAULT_FIB; + break; + } + /* * Find the correct routing tree to use for this Address Family */ @@ -1383,8 +1393,15 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } - if ( dst->sa_family != AF_INET) - fibnum = 0; + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We support multiple FIBs. */ + break; + default: + fibnum = RT_DEFAULT_FIB; + break; + } if (fibnum == -1) { if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { startfib = endfib = curthread->td_proc->p_fibnum; @@ -1587,7 +1604,12 @@ rtinit(struct ifaddr *ifa, int cmd, int flags) dst = ifa->ifa_addr; } - if (dst->sa_family == AF_INET) + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We do support multiple FIBs. */ fib = -1; + break; + } return (rtinit1(ifa, cmd, flags, fib)); } From db566a23b6ca177bcc5cbb705a1e4d846f13c60d Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 09:33:58 +0000 Subject: [PATCH 04/18] Provide the IPv6 counterpart to the extended IPv4 rtalloc(9) KPI. Sponsored by: Cisco Systems, Inc. --- sys/netinet6/in6_rmx.c | 40 ++++++++++++++++++++++++++++++++++++++++ sys/netinet6/in6_var.h | 11 +++++++++++ 2 files changed, 51 insertions(+) diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index 2a1364673b3..16091ed2872 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -445,3 +445,43 @@ in6_detachhead(void **head, int off) return (1); } #endif + +/* + * Extended API for IPv6 FIB support. + */ +void +in6_rtredirect(struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *nm, + int flags, struct sockaddr *src, u_int fibnum) +{ + + rtredirect_fib(dst, gw, nm, flags, src, fibnum); +} + +int +in6_rtrequest(int req, struct sockaddr *dst, struct sockaddr *gw, + struct sockaddr *mask, int flags, struct rtentry **ret_nrt, u_int fibnum) +{ + + return (rtrequest_fib(req, dst, gw, mask, flags, ret_nrt, fibnum)); +} + +void +in6_rtalloc(struct route_in6 *ro, u_int fibnum) +{ + + rtalloc_ign_fib((struct route *)ro, 0ul, fibnum); +} + +void +in6_rtalloc_ign(struct route_in6 *ro, u_long ignflags, u_int fibnum) +{ + + rtalloc_ign_fib((struct route *)ro, ignflags, fibnum); +} + +struct rtentry * +in6_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) +{ + + return (rtalloc1_fib(dst, report, ignflags, fibnum)); +} diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index c49164963c0..8ea5e048ddc 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -792,6 +792,17 @@ void in6_ifaddloop(struct ifaddr *); int in6_is_addr_deprecated __P((struct sockaddr_in6 *)); int in6_src_ioctl __P((u_long, caddr_t)); + +/* + * Extended API for IPv6 FIB support. + */ +void in6_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, + int, struct sockaddr *, u_int); +int in6_rtrequest(int, struct sockaddr *, struct sockaddr *, + struct sockaddr *, int, struct rtentry **, u_int); +void in6_rtalloc(struct route_in6 *, u_int); +void in6_rtalloc_ign(struct route_in6 *, u_long, u_int); +struct rtentry *in6_rtalloc1(struct sockaddr *, int, u_long, u_int); #endif /* _KERNEL */ #endif /* _NETINET6_IN6_VAR_H_ */ From 096f27864f5d3059ffd82c3b1c5c4ef56cb627f6 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 10:17:34 +0000 Subject: [PATCH 05/18] Fix FLOWTABLE IPv6 handling in route.c missed in r205066. While doing so, for consistency with the rtalloc_ign_fib(9) interface called, remove the "in_" prefix from rtalloc_ign_wrapper() no longer indicating that it would only handle the INET case. Sponsored by: Cisco Systems, Inc. --- sys/net/flowtable.c | 4 ++-- sys/net/route.c | 31 ++++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c index fac0f593505..8501b1875d8 100644 --- a/sys/net/flowtable.c +++ b/sys/net/flowtable.c @@ -374,7 +374,7 @@ SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD #ifndef RADIX_MPATH static void -in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum) +rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum) { rtalloc_ign_fib(ro, 0, fibnum); @@ -1315,7 +1315,7 @@ flowtable_alloc(char *name, int nentry, int flags) #ifdef RADIX_MPATH ft->ft_rtalloc = rtalloc_mpath_fib; #else - ft->ft_rtalloc = in_rtalloc_ign_wrapper; + ft->ft_rtalloc = rtalloc_ign_wrapper; #endif if (flags & FL_PCPU) { ft->ft_lock = flowtable_pcpu_lock; diff --git a/sys/net/route.c b/sys/net/route.c index ea7e46907f9..3e3e7a5c721 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -35,6 +35,7 @@ ***********************************************************************/ #include "opt_inet.h" +#include "opt_inet6.h" #include "opt_route.h" #include "opt_mrouting.h" #include "opt_mpath.h" @@ -1192,12 +1193,15 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, #ifdef FLOWTABLE rt0 = NULL; - /* XXX - * "flow-table" only support IPv4 at the moment. - * XXX-BZ as of r205066 it would support IPv6. - */ + /* "flow-table" only supports IPv6 and IPv4 at the moment. */ + switch (dst->sa_family) { +#ifdef INET6 + case AF_INET6: +#endif #ifdef INET - if (dst->sa_family == AF_INET) { + case AF_INET: +#endif +#if defined(INET6) || defined(INET) rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { struct sockaddr *mask; @@ -1236,9 +1240,9 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, } } } +#endif/* INET6 || INET */ } -#endif -#endif +#endif /* FLOWTABLE */ /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); @@ -1259,9 +1263,18 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, } #ifdef FLOWTABLE else if (rt0 != NULL) { -#ifdef INET - flowtable_route_flush(V_ip_ft, rt0); + switch (dst->sa_family) { +#ifdef INET6 + case AF_INET6: + flowtable_route_flush(V_ip6_ft, rt0); + break; #endif +#ifdef INET + case AF_INET: + flowtable_route_flush(V_ip_ft, rt0); + break; +#endif + } RTFREE(rt0); } #endif From ee799639e8ba568f998f38c34eaa1859639db341 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 11:00:53 +0000 Subject: [PATCH 06/18] Add SO_SETFIB option support on PF_INET6 sockets and allow inheriting the FIB number from the process, as set by setfib(2), on socket creation. Sponsored by: Cisco Systems, Inc. --- sys/kern/uipc_socket.c | 4 +++- sys/netinet6/ip6_output.c | 6 ++++++ sys/netinet6/raw_ip6.c | 12 +++++++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 2a1bf7fe62a..91309deeee5 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -392,6 +392,7 @@ socreate(int dom, struct socket **aso, int type, int proto, so->so_type = type; so->so_cred = crhold(cred); if ((prp->pr_domain->dom_family == PF_INET) || + (prp->pr_domain->dom_family == PF_INET6) || (prp->pr_domain->dom_family == PF_ROUTE)) so->so_fibnum = td->td_proc->p_fibnum; else @@ -2498,12 +2499,13 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_SETFIB: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); - if (optval < 0 || optval > rt_numfibs) { + if (optval < 0 || optval >= rt_numfibs) { error = EINVAL; goto bad; } if (so->so_proto != NULL && ((so->so_proto->pr_domain->dom_family == PF_INET) || + (so->so_proto->pr_domain->dom_family == PF_INET6) || (so->so_proto->pr_domain->dom_family == PF_ROUTE))) { so->so_fibnum = optval; /* Note: ignore error */ diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 53724381026..5563d2b8f87 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1448,6 +1448,12 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(in6p); error = 0; break; + case SO_SETFIB: + INP_WLOCK(in6p); + in6p->inp_inc.inc_fibnum = so->so_fibnum; + INP_WUNLOCK(in6p); + error = 0; + break; default: break; } diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 6c10fc5479b..d78bf8ef1af 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -582,6 +582,7 @@ rip6_output(m, va_alist) int rip6_ctloutput(struct socket *so, struct sockopt *sopt) { + struct inpcb *inp; int error; if (sopt->sopt_level == IPPROTO_ICMPV6) @@ -590,8 +591,17 @@ rip6_ctloutput(struct socket *so, struct sockopt *sopt) * from protosw? */ return (icmp6_ctloutput(so, sopt)); - else if (sopt->sopt_level != IPPROTO_IPV6) + else if (sopt->sopt_level != IPPROTO_IPV6) { + if (sopt->sopt_level == SOL_SOCKET && + sopt->sopt_name == SO_SETFIB) { + inp = sotoinpcb(so); + INP_WLOCK(inp); + inp->inp_inc.inc_fibnum = so->so_fibnum; + INP_WUNLOCK(inp); + return (0); + } return (EINVAL); + } error = 0; From b3dd0771528e50960d52bf283a5fcc31cb820a1e Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 11:20:11 +0000 Subject: [PATCH 07/18] Minor optimization doing input validation with a possible early return before doing further work. Sponsored by: Cisco Systems, Inc. --- sys/net/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/net/route.c b/sys/net/route.c index 3e3e7a5c721..b4cf6b18aff 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1406,6 +1406,8 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } + if (dst->sa_len == 0) + return(EINVAL); switch (dst->sa_family) { case AF_INET6: case AF_INET: @@ -1427,8 +1429,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) startfib = fibnum; endfib = fibnum; } - if (dst->sa_len == 0) - return(EINVAL); /* * If it's a delete, check that if it exists, From a84986256fd3db9fcf8b9e443846603efb59ba5b Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 12:25:14 +0000 Subject: [PATCH 08/18] Move a comment from rtinit1() to the top of the file where dealing with the (maximum) number of FIBs trying to clarify that evetually FIBs should probably attached to domain(9) specific storage. [1] Add a comment on a limitimation on the rt_add_addr_allfibs option. Use RT_DEFAULT_FIB instead of 0 where applicable. Add empty line to functions without local variables per style. Put public yet unused in-tree function rtinit_fib() under BURN_BRIDGES to indicate that it might go away in the future. No functional change. Discussed with: julian [1] (clarification on what the original one meant) Sponsored by: Cisco Systems, Inc. --- sys/net/route.c | 44 +++++++++++++++++++++++++++++--------------- sys/net/route.h | 2 ++ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/sys/net/route.c b/sys/net/route.c index b4cf6b18aff..c48a4ac94f0 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -73,7 +73,11 @@ SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); /* * Allow the boot code to allow LESS than RT_MAXFIBS to be used. * We can't do more because storage is statically allocated for now. - * (for compatibility reasons.. this will change). + * (for compatibility reasons.. this will change. When this changes, code should + * be refactored to protocol independent parts and protocol dependent parts, + * probably hanging of domain(9) specific storage to not need the full + * fib * af RNH allocation etc. but allow tuning the number of tables per + * address family). */ TUNABLE_INT("net.fibs", &rt_numfibs); @@ -83,6 +87,9 @@ TUNABLE_INT("net.fibs", &rt_numfibs); * changes for the FIB of the caller when adding a new set of addresses * to an interface. XXX this is a shotgun aproach to a problem that needs * a more fine grained solution.. that will come. + * XXX also has the problems getting the FIB from curthread which will not + * always work given the fib can be overridden and prefixes can be added + * from the network stack context. */ u_int rt_add_addr_allfibs = 1; SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, @@ -270,7 +277,8 @@ sys_setfib(struct thread *td, struct setfib_args *uap) void rtalloc(struct route *ro) { - rtalloc_ign_fib(ro, 0UL, 0); + + rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); } void @@ -290,7 +298,7 @@ rtalloc_ign(struct route *ro, u_long ignore) RTFREE(rt); ro->ro_rt = NULL; } - ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0); + ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } @@ -320,7 +328,8 @@ rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) struct rtentry * rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { - return (rtalloc1_fib(dst, report, ignflags, 0)); + + return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); } struct rtentry * @@ -489,7 +498,8 @@ rtredirect(struct sockaddr *dst, int flags, struct sockaddr *src) { - rtredirect_fib(dst, gateway, netmask, flags, src, 0); + + rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); } void @@ -620,7 +630,8 @@ out: int rtioctl(u_long req, caddr_t data) { - return (rtioctl_fib(req, data, 0)); + + return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); } /* @@ -650,7 +661,8 @@ rtioctl_fib(u_long req, caddr_t data, u_int fibnum) struct ifaddr * ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) { - return (ifa_ifwithroute_fib(flags, dst, gateway, 0)); + + return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB)); } struct ifaddr * @@ -735,7 +747,9 @@ rtrequest(int req, int flags, struct rtentry **ret_nrt) { - return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0)); + + return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, + RT_DEFAULT_FIB)); } int @@ -774,7 +788,8 @@ rtrequest_fib(int req, int rt_getifa(struct rt_addrinfo *info) { - return (rt_getifa_fib(info, 0)); + + return (rt_getifa_fib(info, RT_DEFAULT_FIB)); } /* @@ -1146,8 +1161,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, rt->rt_flags = RTF_UP | flags; rt->rt_fibnum = fibnum; /* - * Add the gateway. Possibly re-malloc-ing the storage for it - * + * Add the gateway. Possibly re-malloc-ing the storage for it. */ RT_LOCK(rt); if ((error = rt_setgate(rt, dst, gateway)) != 0) { @@ -1452,9 +1466,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) * Now go through all the requested tables (fibs) and do the * requested action. Realistically, this will either be fib 0 * for protocols that don't do multiple tables or all the - * tables for those that do. XXX For this version only AF_INET. - * When that changes code should be refactored to protocol - * independent parts and protocol dependent parts. + * tables for those that do. */ for ( fibnum = startfib; fibnum <= endfib; fibnum++) { if (cmd == RTM_DELETE) { @@ -1594,12 +1606,14 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) return (error); } +#ifndef BURN_BRIDGES /* special one for inet internal use. may not use. */ int rtinit_fib(struct ifaddr *ifa, int cmd, int flags) { return (rtinit1(ifa, cmd, flags, -1)); } +#endif /* * Set up a routing table entry, normally @@ -1609,7 +1623,7 @@ int rtinit(struct ifaddr *ifa, int cmd, int flags) { struct sockaddr *dst; - int fib = 0; + int fib = RT_DEFAULT_FIB; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; diff --git a/sys/net/route.h b/sys/net/route.h index c7e96d51576..a400debc435 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -406,8 +406,10 @@ void rtredirect(struct sockaddr *, struct sockaddr *, int rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); +#ifndef BURN_BRIDGES /* defaults to "all" FIBs */ int rtinit_fib(struct ifaddr *, int, int); +#endif /* XXX MRT NEW VERSIONS THAT USE FIBs * For now the protocol indepedent versions are the same as the AF_INET ones From 81d5d46b3c4e0cf91258e86120d42e344c127769 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 13:08:44 +0000 Subject: [PATCH 09/18] Add multi-FIB IPv6 support to the core network stack supplementing the original IPv4 implementation from r178888: - Use RT_DEFAULT_FIB in the IPv4 implementation where noticed. - Use rt*fib() KPI with explicit RT_DEFAULT_FIB where applicable in the NFS code. - Use the new in6_rt* KPI in TCP, gif(4), and the IPv6 network stack where applicable. - Split in6_rtqtimo() and in6_mtutimo() as done in IPv4 and equally prevent multiple initializations of callouts in in6_inithead(). - Use wrapper functions where needed to preserve the current KPI to ease MFCs. Use BURN_BRIDGES to indicate expected future cleanup. - Fix (related) comments (both technical or style). - Convert to rtinit() where applicable and only use custom loops where currently not possible otherwise. - Multicast group, most neighbor discovery address actions and faith(4) are locked to the default FIB. Individual IPv6 addresses will only appear in the default FIB, however redirect information and prefixes of connected subnets are automatically propagated to all FIBs by default (mimicking IPv4 behavior as closely as possible). Sponsored by: Cisco Systems, Inc. --- sys/fs/nfsclient/nfs_clport.c | 6 +- sys/fs/nfsclient/nfs_clvfsops.c | 4 +- sys/net/if_faith.c | 2 +- sys/netinet/in.c | 2 +- sys/netinet/tcp_subr.c | 2 +- sys/netinet6/icmp6.c | 18 +-- sys/netinet6/in6.c | 76 ++++++------- sys/netinet6/in6_gif.c | 7 +- sys/netinet6/in6_ifattach.c | 24 ++-- sys/netinet6/in6_mcast.c | 9 +- sys/netinet6/in6_rmx.c | 84 ++++++++------ sys/netinet6/in6_src.c | 52 ++++++--- sys/netinet6/ip6_forward.c | 2 +- sys/netinet6/ip6_input.c | 2 +- sys/netinet6/ip6_output.c | 20 ++-- sys/netinet6/ip6_var.h | 3 + sys/netinet6/nd6.c | 5 +- sys/netinet6/nd6_nbr.c | 7 +- sys/netinet6/nd6_rtr.c | 188 ++++++++++++++++++++------------ sys/netipsec/ipsec_output.c | 2 +- sys/nfs/bootp_subr.c | 7 +- sys/nfsclient/nfs_vfsops.c | 4 +- 22 files changed, 307 insertions(+), 219 deletions(-) diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 53bf8b342fc..c8c5bf54b4c 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -976,7 +976,8 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) sad.sin_len = sizeof (struct sockaddr_in); sad.sin_addr.s_addr = sin->sin_addr.s_addr; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); - rt = rtalloc1((struct sockaddr *)&sad, 0, 0UL); + rt = rtalloc1_fib((struct sockaddr *)&sad, 0, 0UL, + curthread->td_proc->p_fibnum); if (rt != NULL) { if (rt->rt_ifp != NULL && rt->rt_ifa != NULL && @@ -1001,7 +1002,8 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) sad6.sin6_len = sizeof (struct sockaddr_in6); sad6.sin6_addr = sin6->sin6_addr; CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); - rt = rtalloc1((struct sockaddr *)&sad6, 0, 0UL); + rt = rtalloc1_fib((struct sockaddr *)&sad6, 0, 0UL, + curthread->td_proc->p_fibnum); if (rt != NULL) { if (rt->rt_ifp != NULL && rt->rt_ifa != NULL && diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index ce128afc6a6..04d8e674f85 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -459,10 +459,10 @@ nfs_mountroot(struct mount *mp) sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); - error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, + error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, - RTF_UP | RTF_GATEWAY, NULL); + RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); diff --git a/sys/net/if_faith.c b/sys/net/if_faith.c index ca8088362a9..db4c1d99e8d 100644 --- a/sys/net/if_faith.c +++ b/sys/net/if_faith.c @@ -338,7 +338,7 @@ faithprefix(in6) sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *in6; - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB); if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH && (rt->rt_ifp->if_flags & IFF_UP) != 0) ret = 1; diff --git a/sys/netinet/in.c b/sys/netinet/in.c index ee9259c7431..58cd062cede 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -879,7 +879,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, bzero(&ia_ro, sizeof(ia_ro)); *((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr; - rtalloc_ign_fib(&ia_ro, 0, 0); + rtalloc_ign_fib(&ia_ro, 0, RT_DEFAULT_FIB); if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) && (ia_ro.ro_rt->rt_ifp == V_loif)) { RT_LOCK(ia_ro.ro_rt); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 46b4022cd29..1270f1a5ef9 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1750,7 +1750,7 @@ tcp_maxmtu6(struct in_conninfo *inc, int *flags) sro6.ro_dst.sin6_family = AF_INET6; sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); sro6.ro_dst.sin6_addr = inc->inc6_faddr; - rtalloc_ign((struct route *)&sro6, 0); + in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum); } if (sro6.ro_rt != NULL) { ifp = sro6.ro_rt->rt_ifp; diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 7d7467a5d67..967d01a43a5 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -360,7 +360,7 @@ icmp6_error(struct mbuf *m, int type, int code, int param) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); - M_PREPEND(m, preplen, M_DONTWAIT); + M_PREPEND(m, preplen, M_DONTWAIT); /* FIB is also copied over. */ if (m && m->m_len < preplen) m = m_pullup(m, preplen); if (m == NULL) { @@ -584,7 +584,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) MGETHDR(n, M_DONTWAIT, n0->m_type); n0len = n0->m_pkthdr.len; /* save for use below */ if (n) - M_MOVE_PKTHDR(n, n0); + M_MOVE_PKTHDR(n, n0); /* FIB copied. */ if (n && maxlen >= MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { @@ -1502,7 +1502,7 @@ ni6_input(struct mbuf *m, int off) m_freem(m); return (NULL); } - M_MOVE_PKTHDR(n, m); /* just for recvif */ + M_MOVE_PKTHDR(n, m); /* just for recvif and FIB */ if (replylen > MHLEN) { if (replylen > MCLBYTES) { /* @@ -2414,7 +2414,7 @@ icmp6_redirect_input(struct mbuf *m, int off) sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB); if (rt) { if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { @@ -2501,6 +2501,7 @@ icmp6_redirect_input(struct mbuf *m, int off) struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; + u_int fibnum; bzero(&sdst, sizeof(sdst)); bzero(&sgw, sizeof(sgw)); @@ -2511,9 +2512,11 @@ icmp6_redirect_input(struct mbuf *m, int off) bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); - rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw, - (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST, - (struct sockaddr *)&ssrc); + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) + in6_rtredirect((struct sockaddr *)&sdst, + (struct sockaddr *)&sgw, (struct sockaddr *)NULL, + RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&ssrc, + fibnum); } /* finally update cached route in each socket via pfctlinput */ { @@ -2598,6 +2601,7 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) MCLGET(m, M_DONTWAIT); if (!m) goto fail; + M_SETFIB(m, rt->rt_fibnum); m->m_pkthdr.rcvif = NULL; m->m_len = 0; maxlen = M_TRAILINGSPACE(m); diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 4bbc4ee4e5b..0b22a1c4625 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -180,6 +180,7 @@ in6_ifaddloop(struct ifaddr *ifa) rt_mask(&rt) = (struct sockaddr *)&mask; rt_key(&rt) = (struct sockaddr *)&addr; rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC; + /* Announce arrival of local address to all FIBs. */ rt_newaddrmsg(RTM_ADD, ifa, 0, &rt); } @@ -214,6 +215,7 @@ in6_ifremloop(struct ifaddr *ifa) rt_mask(&rt0) = (struct sockaddr *)&mask; rt_key(&rt0) = (struct sockaddr *)&addr; rt0.rt_flags = RTF_HOST | RTF_STATIC; + /* Announce removal of local address to all FIBs. */ rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0); } @@ -282,6 +284,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, switch (cmd) { case SIOCGETSGCNT_IN6: case SIOCGETMIFCNT_IN6: + /* + * XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c. + * We cannot see how that would be needed, so do not adjust the + * KPI blindly; more likely should clean up the IPv4 variant. + */ return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP); } @@ -820,6 +827,7 @@ out: return (error); } + /* * Join necessary multicast groups. Factored out from in6_update_ifa(). * This entire work should only be done once, for the default FIB. @@ -890,7 +898,7 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, * reconsider this stuff. Most applications actually do not need the * routes, since they usually specify the outgoing interface. */ - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL) { /* XXX: only works in !SCOPEDROUTING case. */ if (memcmp(&mltaddr.sin6_addr, @@ -901,10 +909,10 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, } } if (rt == NULL) { - error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); + (struct rtentry **)0, RT_DEFAULT_FIB); if (error) goto cleanup; } else @@ -950,7 +958,7 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ /* XXX: again, do we really need the route? */ - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL) { if (memcmp(&mltaddr.sin6_addr, &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, @@ -960,10 +968,10 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, } } if (rt == NULL) { - error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, + error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); + (struct rtentry **)0, RT_DEFAULT_FIB); if (error) goto cleanup; } else @@ -1254,8 +1262,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, /* * Perform DAD, if needed. - * XXX It may be of use, if we can administratively - * disable DAD. + * XXX It may be of use, if we can administratively disable DAD. */ if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && (ia->ia6_flags & IN6_IFF_TENTATIVE)) @@ -1349,7 +1356,7 @@ in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) return (error); - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL && rt->rt_gateway != NULL && (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, &ia->ia_addr.sin6_addr, @@ -1362,11 +1369,11 @@ in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, sizeof(mltaddr.sin6_addr)); RTFREE_LOCKED(rt); - error = rtrequest(RTM_DELETE, + error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); + (struct rtentry **)0, RT_DEFAULT_FIB); if (error) log(LOG_INFO, "%s: link-local all-nodes " "multicast address deletion error\n", @@ -1398,7 +1405,7 @@ in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) return (error); - rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL && rt->rt_gateway != NULL && (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, &ia->ia_addr.sin6_addr, @@ -1412,11 +1419,11 @@ in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) sizeof(mltaddr.sin6_addr)); RTFREE_LOCKED(rt); - error = rtrequest(RTM_DELETE, + error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0); + (struct rtentry **)0, RT_DEFAULT_FIB); if (error) log(LOG_INFO, "%s: node-local all-nodes" "multicast address deletion error\n", @@ -1489,6 +1496,7 @@ in6_purgeaddr(struct ifaddr *ifa) /* stop DAD processing */ nd6_dad_stop(ifa); + /* Remove local address entry from lltable. */ in6_ifremloop(ifa); /* Leave multicast groups. */ @@ -1499,25 +1507,11 @@ in6_purgeaddr(struct ifaddr *ifa) plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if ((ia->ia_flags & IFA_ROUTE) && plen == 128) { - int error; - struct sockaddr *dstaddr; - - /* - * use the interface address if configuring an - * interface address with a /128 prefix len - */ - if (ia->ia_dstaddr.sin6_family == AF_INET6) - dstaddr = (struct sockaddr *)&ia->ia_dstaddr; - else - dstaddr = (struct sockaddr *)&ia->ia_addr; - - error = rtrequest(RTM_DELETE, - (struct sockaddr *)dstaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&ia->ia_prefixmask, - ia->ia_flags | RTF_HOST, NULL); + error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags | + (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0); if (error != 0) - return; + log(LOG_INFO, "%s: err=%d, destination address delete " + "failed\n", __func__, error); ia->ia_flags &= ~IFA_ROUTE; } @@ -1849,8 +1843,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, } /* - * Initialize an interface's intetnet6 address - * and routing table entry. + * Initialize an interface's IPv6 address and routing table entry. */ static int in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, @@ -1900,13 +1893,8 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { int rtflags = RTF_UP | RTF_HOST; - - error = rtrequest(RTM_ADD, - (struct sockaddr *)&ia->ia_dstaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&ia->ia_prefixmask, - ia->ia_flags | rtflags, NULL); - if (error != 0) + error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags); + if (error) return (error); ia->ia_flags |= IFA_ROUTE; /* @@ -1926,7 +1914,7 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, ia->ia_flags |= IFA_RTSELF; } - /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */ + /* Add local address to lltable, if necessary (ex. on p2p link). */ if (newhost) in6_ifaddloop(&(ia->ia_ifa)); @@ -2529,8 +2517,10 @@ in6_lltable_rtcheck(struct ifnet *ifp, KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); + /* Our local addresses are always only installed on the default FIB. */ /* XXX rtalloc1 should take a const param */ - rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0); + rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0, + RT_DEFAULT_FIB); if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { struct ifaddr *ifa; /* diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index 961fc77fb58..c329e11dd16 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -228,6 +228,8 @@ in6_gif_output(struct ifnet *ifp, ip6->ip6_flow &= ~htonl(0xff << 20); ip6->ip6_flow |= htonl((u_int32_t)otos << 20); + M_SETFIB(m, sc->gif_fibnum); + if (dst->sin6_family != sin6_dst->sin6_family || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) { /* cache route doesn't match */ @@ -245,7 +247,7 @@ in6_gif_output(struct ifnet *ifp, } if (sc->gif_ro6.ro_rt == NULL) { - rtalloc((struct route *)&sc->gif_ro6); + in6_rtalloc(&sc->gif_ro6, sc->gif_fibnum); if (sc->gif_ro6.ro_rt == NULL) { m_freem(m); return ENETUNREACH; @@ -404,7 +406,8 @@ gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc, sin6.sin6_addr = ip6->ip6_src; sin6.sin6_scope_id = 0; /* XXX */ - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, + sc->gif_fibnum); if (!rt || rt->rt_ifp != ifp) { #if 0 char ip6buf[INET6_ADDRSTRLEN]; diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index f8134caea34..28ec920de81 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -790,7 +790,6 @@ in6_ifdetach(struct ifnet *ifp) struct ifaddr *ifa, *next; struct radix_node_head *rnh; struct rtentry *rt; - short rtflags; struct sockaddr_in6 sin6; struct in6_multi_mship *imm; @@ -821,16 +820,9 @@ in6_ifdetach(struct ifnet *ifp) in6_leavegroup(imm); } - /* remove from the routing table */ - if ((ia->ia_flags & IFA_ROUTE) && - (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) { - rtflags = rt->rt_flags; - RTFREE_LOCKED(rt); - rtrequest(RTM_DELETE, (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&ia->ia_prefixmask, - rtflags, (struct rtentry **)0); - } + /* Remove link-local from the routing table. */ + if (ia->ia_flags & IFA_ROUTE) + (void)rtinit(&ia->ia_ifa, RTM_DELETE, ia->ia_flags); /* remove from the linked list */ IF_ADDR_WLOCK(ifp); @@ -859,7 +851,10 @@ in6_ifdetach(struct ifnet *ifp) */ nd6_purge(ifp); - /* remove route to link-local allnodes multicast (ff02::1) */ + /* + * Remove route to link-local allnodes multicast (ff02::1). + * These only get automatically installed for the default FIB. + */ bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; @@ -868,10 +863,11 @@ in6_ifdetach(struct ifnet *ifp) /* XXX: should not fail */ return; /* XXX grab lock first to avoid LOR */ - rnh = rt_tables_get_rnh(0, AF_INET6); + rnh = rt_tables_get_rnh(RT_DEFAULT_FIB, AF_INET6); if (rnh != NULL) { RADIX_NODE_HEAD_LOCK(rnh); - rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED); + rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED, + RT_DEFAULT_FIB); if (rt) { if (rt->rt_ifp == ifp) rtexpunge(rt); diff --git a/sys/netinet6/in6_mcast.c b/sys/netinet6/in6_mcast.c index e53597bacb0..ce23aa8c34c 100644 --- a/sys/netinet6/in6_mcast.c +++ b/sys/netinet6/in6_mcast.c @@ -1764,7 +1764,7 @@ ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt) * Returns NULL if no ifp could be found. */ static struct ifnet * -in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused, +in6p_lookup_mcast_ifp(const struct inpcb *in6p, const struct sockaddr_in6 *gsin6) { struct route_in6 ro6; @@ -1780,11 +1780,8 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p __unused, ifp = NULL; memset(&ro6, 0, sizeof(struct route_in6)); memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6)); -#ifdef notyet - rtalloc_ign_fib(&ro6, 0, inp ? inp->inp_inc.inc_fibnum : 0); -#else - rtalloc_ign((struct route *)&ro6, 0); -#endif + rtalloc_ign_fib((struct route *)&ro6, 0, + in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB); if (ro6.ro_rt != NULL) { ifp = ro6.ro_rt->rt_ifp; KASSERT(ifp != NULL, ("%s: null ifp", __func__)); diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index 16091ed2872..b5260308d60 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -168,7 +168,8 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * net route entry, 3ffe:0501:: -> if0. * This case should not raise an error. */ - rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED); + rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED, + rt->rt_fibnum); if (rt2) { if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0) && rt2->rt_gateway @@ -255,10 +256,11 @@ in6_rtqkill(struct radix_node *rn, void *rock) if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); - err = rtrequest(RTM_DELETE, + err = in6_rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), - rt->rt_flags|RTF_RNH_LOCKED, 0); + rt->rt_flags|RTF_RNH_LOCKED, 0, + rt->rt_fibnum); if (err) { log(LOG_WARNING, "in6_rtqkill: error %d", err); } else { @@ -287,19 +289,11 @@ static VNET_DEFINE(struct callout, rtq_timer6); #define V_rtq_timer6 VNET(rtq_timer6) static void -in6_rtqtimo(void *rock) +in6_rtqtimo_one(struct radix_node_head *rnh) { - CURVNET_SET_QUIET((struct vnet *) rock); - struct radix_node_head *rnh; struct rtqk_arg arg; - struct timeval atv; static time_t last_adjusted_timeout = 0; - rnh = rt_tables_get_rnh(0, AF_INET6); - if (rnh == NULL) { - CURVNET_RESTORE(); - return; - } arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = time_uptime + V_rtq_timeout6; @@ -335,9 +329,24 @@ in6_rtqtimo(void *rock) rnh->rnh_walktree(rnh, in6_rtqkill, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); } +} + +static void +in6_rtqtimo(void *rock) +{ + CURVNET_SET_QUIET((struct vnet *) rock); + struct radix_node_head *rnh; + struct timeval atv; + u_int fibnum; + + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rnh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rnh != NULL) + in6_rtqtimo_one(rnh); + } atv.tv_usec = 0; - atv.tv_sec = arg.nextstop - time_uptime; + atv.tv_sec = V_rtq_timeout6; callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock); CURVNET_RESTORE(); } @@ -377,31 +386,33 @@ in6_mtuexpire(struct radix_node *rn, void *rock) #define MTUTIMO_DEFAULT (60*1) static void -in6_mtutimo(void *rock) +in6_mtutimo_one(struct radix_node_head *rnh) { - CURVNET_SET_QUIET((struct vnet *) rock); - struct radix_node_head *rnh; struct mtuex_arg arg; - struct timeval atv; - rnh = rt_tables_get_rnh(0, AF_INET6); - if (rnh == NULL) { - CURVNET_RESTORE(); - return; - } arg.rnh = rnh; arg.nextstop = time_uptime + MTUTIMO_DEFAULT; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, in6_mtuexpire, &arg); RADIX_NODE_HEAD_UNLOCK(rnh); +} - atv.tv_usec = 0; - atv.tv_sec = arg.nextstop - time_uptime; - if (atv.tv_sec < 0) { - printf("invalid mtu expiration time on routing table\n"); - arg.nextstop = time_uptime + 30; /* last resort */ - atv.tv_sec = 30; +static void +in6_mtutimo(void *rock) +{ + CURVNET_SET_QUIET((struct vnet *) rock); + struct radix_node_head *rnh; + struct timeval atv; + u_int fibnum; + + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rnh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rnh != NULL) + in6_mtutimo_one(rnh); } + + atv.tv_sec = MTUTIMO_DEFAULT; + atv.tv_usec = 0; callout_reset(&V_rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock); CURVNET_RESTORE(); } @@ -413,6 +424,9 @@ in6_mtutimo(void *rock) * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd * see also comments in in_inithead() vfs_export.c and domain.h */ +static VNET_DEFINE(int, _in6_rt_was_here); +#define V__in6_rt_was_here VNET(_in6_rt_was_here) + int in6_inithead(void **head, int off) { @@ -425,13 +439,17 @@ in6_inithead(void **head, int off) return 1; /* only do the rest for the real thing */ rnh = *head; - KASSERT(rnh == rt_tables_get_rnh(0, AF_INET6), ("rnh?")); rnh->rnh_addaddr = in6_addroute; rnh->rnh_matchaddr = in6_matroute; - callout_init(&V_rtq_timer6, CALLOUT_MPSAFE); - callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE); - in6_rtqtimo(curvnet); /* kick off timeout first time */ - in6_mtutimo(curvnet); /* kick off timeout first time */ + + if (V__in6_rt_was_here == 0) { + callout_init(&V_rtq_timer6, CALLOUT_MPSAFE); + callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE); + in6_rtqtimo(curvnet); /* kick off timeout first time */ + in6_mtutimo(curvnet); /* kick off timeout first time */ + V__in6_rt_was_here = 1; + } + return 1; } diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index 2b3493e63a7..295ea715e36 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -129,9 +129,9 @@ VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, int)); + struct rtentry **, int, int)); static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); + struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, int)); static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); @@ -217,7 +217,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct in6_ifaddr *ia6; /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) + if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, + (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) + != 0) return (error); /* @@ -281,7 +283,8 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * the outgoing interface and the destination address. */ /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) + if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, + (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) return (error); #ifdef DIAGNOSTIC @@ -504,7 +507,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, static int selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int norouteok) + struct ifnet **retifp, struct rtentry **retrt, int norouteok, int fibnum) { int error = 0; struct ifnet *ifp = NULL; @@ -581,7 +584,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, if (ron->ro_rt == NULL) { - rtalloc((struct route *)ron); /* multi path case? */ + in6_rtalloc(ron, fibnum); /* multi path case? */ if (ron->ro_rt == NULL) { if (ron->ro_rt) { RTFREE(ron->ro_rt); @@ -616,7 +619,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, *satosin6(&ron->ro_dst) = *sin6_next; } if (ron->ro_rt == NULL) { - rtalloc((struct route *)ron); /* multi path case? */ + in6_rtalloc(ron); /* multi path case? */ if (ron->ro_rt == NULL || !(ron->ro_rt->rt_flags & RTF_LLINFO)) { if (ron->ro_rt) { @@ -661,11 +664,11 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, sa6->sin6_scope_id = 0; #ifdef RADIX_MPATH - rtalloc_mpath((struct route *)ro, - ntohl(sa6->sin6_addr.s6_addr32[3])); + rtalloc_mpath_fib((struct route *)ro, + ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum); #else - ro->ro_rt = rtalloc1(&((struct route *)ro) - ->ro_dst, 0, 0UL); + ro->ro_rt = in6_rtalloc1((struct sockaddr *) + &ro->ro_dst, 0, 0UL, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); #endif @@ -746,7 +749,8 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, static int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp) + struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, + int fibnum) { int error; struct route_in6 sro; @@ -758,7 +762,7 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } if ((error = selectroute(dstsock, opts, mopts, ro, retifp, - &rt, 1)) != 0) { + &rt, 1, fibnum)) != 0) { if (ro == &sro && rt && rt == sro.ro_rt) RTFREE(rt); return (error); @@ -795,7 +799,10 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } /* - * clone - meaningful only for bsdi and freebsd + * Public wrapper function to selectroute(). + * + * XXX-BZ in6_selectroute() should and will grow the FIB argument. The + * in6_selectroute_fib() function is only there for backward compat on stable. */ int in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, @@ -804,9 +811,21 @@ in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, { return (selectroute(dstsock, opts, mopts, ro, retifp, - retrt, 0)); + retrt, 0, RT_DEFAULT_FIB)); } +#ifndef BURN_BRIDGES +int +in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct ip6_moptions *mopts, struct route_in6 *ro, + struct ifnet **retifp, struct rtentry **retrt, int fibnum) +{ + + return (selectroute(dstsock, opts, mopts, ro, retifp, + retrt, 0, fibnum)); +} +#endif + /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit value specified via ioctl. @@ -830,7 +849,8 @@ in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) ro6.ro_dst.sin6_family = AF_INET6; ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); ro6.ro_dst.sin6_addr = in6p->in6p_faddr; - rtalloc((struct route *)&ro6); + in6_rtalloc(&ro6, in6p ? in6p->inp_inc.inc_fibnum : + RT_DEFAULT_FIB); if (ro6.ro_rt) { lifp = ro6.ro_rt->rt_ifp; RTFREE(ro6.ro_rt); diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index 77cb9266307..bae418ae2af 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -362,7 +362,7 @@ again: #ifdef IPFIREWALL_FORWARD again2: #endif - rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0); + rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); if (rin6.ro_rt != NULL) RT_UNLOCK(rin6.ro_rt); else { diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 245f8f4bc03..b733af4ef37 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -666,7 +666,7 @@ passin: dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; - rin6.ro_rt = rtalloc1((struct sockaddr *)dst, 0, 0); + rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); if (rin6.ro_rt) RT_UNLOCK(rin6.ro_rt); diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 5563d2b8f87..43a236d8ff5 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -142,7 +142,7 @@ static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int, static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *, - struct ifnet *, struct in6_addr *, u_long *, int *)); + struct ifnet *, struct in6_addr *, u_long *, int *, int)); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); @@ -241,6 +241,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, goto bad; } + if (inp != NULL) + M_SETFIB(m, inp->inp_inc.inc_fibnum); + finaldst = ip6->ip6_dst; bzero(&exthdrs, sizeof(exthdrs)); if (opt) { @@ -604,8 +607,8 @@ again: if (flevalid) { rt = ro->ro_rt; ifp = ro->ro_rt->rt_ifp; - } else if ((error = in6_selectroute(&dst_sa, opt, im6o, ro, - &ifp, &rt)) != 0) { + } else if ((error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, + &ifp, &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0) { switch (error) { case EHOSTUNREACH: V_ip6stat.ip6s_noroute++; @@ -773,7 +776,7 @@ again: /* Determine path MTU. */ if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, - &alwaysfrag)) != 0) + &alwaysfrag, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0) goto bad; /* @@ -1064,7 +1067,7 @@ passout: goto sendorfree; } m->m_pkthdr.rcvif = NULL; - m->m_flags = m0->m_flags & M_COPYFLAGS; + m->m_flags = m0->m_flags & M_COPYFLAGS; /* incl. FIB */ *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; @@ -1321,7 +1324,7 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, static int ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, - int *alwaysfragp) + int *alwaysfragp, int fibnum) { u_int32_t mtu = 0; int alwaysfrag = 0; @@ -1343,7 +1346,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; - rtalloc((struct route *)ro_pmtu); + in6_rtalloc(ro_pmtu, fibnum); } } if (ro_pmtu->ro_rt) { @@ -1981,7 +1984,8 @@ do { \ * the outgoing interface. */ error = ip6_getpmtu(&sro, NULL, NULL, - &in6p->in6p_faddr, &pmtu, NULL); + &in6p->in6p_faddr, &pmtu, NULL, + so->so_fibnum); if (sro.ro_rt) RTFREE(sro.ro_rt); if (error) diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index c9d35e0b03d..10dcbb38bab 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -445,6 +445,9 @@ int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, int in6_selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **)); +int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *, + struct ip6_moptions *, struct route_in6 *, struct ifnet **, + struct rtentry **, int); u_int32_t ip6_randomid __P((void)); u_int32_t ip6_randomflowlabel __P((void)); #endif /* _KERNEL */ diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index ec523fce2a9..bf2cd530d5f 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -900,7 +900,10 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { struct rtentry *rt; - rt = rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0); + + /* Always use the default FIB here. */ + rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix, + 0, 0, RT_DEFAULT_FIB); if (rt == NULL) continue; /* diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index a2aaeeacb8f..290d5fba4b2 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -242,13 +242,16 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) tsin6.sin6_family = AF_INET6; tsin6.sin6_addr = taddr6; + /* Always use the default FIB. */ #ifdef RADIX_MPATH bzero(&ro, sizeof(ro)); ro.ro_dst = tsin6; - rtalloc_mpath((struct route *)&ro, RTF_ANNOUNCE); + rtalloc_mpath_fib((struct route *)&ro, RTF_ANNOUNCE, + RT_DEFAULT_FIB); rt = ro.ro_rt; #else - rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0); + rt = in6_rtalloc1((struct sockaddr *)&tsin6, 0, 0, + RT_DEFAULT_FIB); #endif need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 && rt->rt_gateway->sa_family == AF_LINK); diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index e77767f773c..792b5c99365 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -463,7 +463,7 @@ nd6_rtmsg(int cmd, struct rtentry *rt) } else ifa = NULL; - rt_missmsg(cmd, &info, rt->rt_flags, 0); + rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum); if (ifa != NULL) ifa_free(ifa); } @@ -486,9 +486,9 @@ defrouter_addreq(struct nd_defrouter *new) gate.sin6_addr = new->rtaddr; s = splnet(); - error = rtrequest(RTM_ADD, (struct sockaddr *)&def, + error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, - RTF_GATEWAY, &newrt); + RTF_GATEWAY, &newrt, RT_DEFAULT_FIB); if (newrt) { nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ RTFREE(newrt); @@ -532,9 +532,9 @@ defrouter_delreq(struct nd_defrouter *dr) def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; - rtrequest(RTM_DELETE, (struct sockaddr *)&def, + in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, - (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt); + (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB); if (oldrt) { nd6_rtmsg(RTM_DELETE, oldrt); RTFREE(oldrt); @@ -1540,19 +1540,92 @@ pfxlist_onlink_check() } } +static int +nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) +{ + static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; + struct radix_node_head *rnh; + struct rtentry *rt; + struct sockaddr_in6 mask6; + u_long rtflags; + int error, a_failure, fibnum; + + /* + * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. + * ifa->ifa_rtrequest = nd6_rtrequest; + */ + bzero(&mask6, sizeof(mask6)); + mask6.sin6_len = sizeof(mask6); + mask6.sin6_addr = pr->ndpr_mask; + rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; + + a_failure = 0; + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + + rt = NULL; + error = in6_rtrequest(RTM_ADD, + (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, + (struct sockaddr *)&mask6, rtflags, &rt, fibnum); + if (error == 0) { + KASSERT(rt != NULL, ("%s: in6_rtrequest return no " + "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__, + error, pr, ifa)); + + rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); + /* XXX what if rhn == NULL? */ + RADIX_NODE_HEAD_LOCK(rnh); + RT_LOCK(rt); + if (rt_setgate(rt, rt_key(rt), + (struct sockaddr *)&null_sdl) == 0) { + struct sockaddr_dl *dl; + + dl = (struct sockaddr_dl *)rt->rt_gateway; + dl->sdl_type = rt->rt_ifp->if_type; + dl->sdl_index = rt->rt_ifp->if_index; + } + RADIX_NODE_HEAD_UNLOCK(rnh); + nd6_rtmsg(RTM_ADD, rt); + RT_UNLOCK(rt); + pr->ndpr_stateflags |= NDPRF_ONLINK; + } else { + char ip6buf[INET6_ADDRSTRLEN]; + char ip6bufg[INET6_ADDRSTRLEN]; + char ip6bufm[INET6_ADDRSTRLEN]; + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; + nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add " + "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, " + "flags=%lx errno = %d\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(pr->ndpr_ifp), + ip6_sprintf(ip6bufg, &sin6->sin6_addr), + ip6_sprintf(ip6bufm, &mask6.sin6_addr), + rtflags, error)); + + /* Save last error to return, see rtinit(). */ + a_failure = error; + } + + if (rt != NULL) { + RT_LOCK(rt); + RT_REMREF(rt); + RT_UNLOCK(rt); + } + } + + /* Return the last error we got. */ + return (a_failure); +} + static int nd6_prefix_onlink(struct nd_prefix *pr) { struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; - struct sockaddr_in6 mask6; struct nd_prefix *opr; - u_long rtflags; int error = 0; - struct radix_node_head *rnh; - struct rtentry *rt = NULL; char ip6buf[INET6_ADDRSTRLEN]; - struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { @@ -1616,49 +1689,8 @@ nd6_prefix_onlink(struct nd_prefix *pr) return (0); } - /* - * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. - * ifa->ifa_rtrequest = nd6_rtrequest; - */ - bzero(&mask6, sizeof(mask6)); - mask6.sin6_len = sizeof(mask6); - mask6.sin6_addr = pr->ndpr_mask; - rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; - error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, - ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); - if (error == 0) { - if (rt != NULL) /* this should be non NULL, though */ { - rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); - /* XXX what if rhn == NULL? */ - RADIX_NODE_HEAD_LOCK(rnh); - RT_LOCK(rt); - if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) { - ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = - rt->rt_ifp->if_type; - ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = - rt->rt_ifp->if_index; - } - RADIX_NODE_HEAD_UNLOCK(rnh); - nd6_rtmsg(RTM_ADD, rt); - RT_UNLOCK(rt); - } - pr->ndpr_stateflags |= NDPRF_ONLINK; - } else { - char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN]; - nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a" - " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx " - "errno = %d\n", - ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), - pr->ndpr_plen, if_name(ifp), - ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr), - ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error)); - } + error = nd6_prefix_onlink_rtrequest(pr, ifa); - if (rt != NULL) { - RT_LOCK(rt); - RT_REMREF(rt); - RT_UNLOCK(rt); - } if (ifa != NULL) ifa_free(ifa); @@ -1672,8 +1704,9 @@ nd6_prefix_offlink(struct nd_prefix *pr) struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; struct sockaddr_in6 sa6, mask6; - struct rtentry *rt = NULL; + struct rtentry *rt; char ip6buf[INET6_ADDRSTRLEN]; + int fibnum, a_failure; /* sanity check */ if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { @@ -1693,15 +1726,28 @@ nd6_prefix_offlink(struct nd_prefix *pr) mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); - error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, - (struct sockaddr *)&mask6, 0, &rt); + + a_failure = 0; + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rt = NULL; + error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, + (struct sockaddr *)&mask6, 0, &rt, fibnum); + if (error == 0) { + /* report the route deletion to the routing socket. */ + if (rt != NULL) + nd6_rtmsg(RTM_DELETE, rt); + } else { + /* Save last error to return, see rtinit(). */ + a_failure = error; + } + if (rt != NULL) { + RTFREE(rt); + } + } + error = a_failure; if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; - /* report the route deletion to the routing socket. */ - if (rt != NULL) - nd6_rtmsg(RTM_DELETE, rt); - /* * There might be the same prefix on another interface, * the prefix which could not be on-link just because we have @@ -1749,10 +1795,6 @@ nd6_prefix_offlink(struct nd_prefix *pr) if_name(ifp), error)); } - if (rt != NULL) { - RTFREE(rt); - } - return (error); } @@ -2069,6 +2111,7 @@ void rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { struct radix_node_head *rnh; + u_int fibnum; int s = splnet(); /* We'll care only link-local addresses */ @@ -2077,13 +2120,16 @@ rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) return; } - rnh = rt_tables_get_rnh(0, AF_INET6); - if (rnh == NULL) - return; + /* XXX Do we really need to walk any but the default FIB? */ + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { + rnh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rnh == NULL) + continue; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); - RADIX_NODE_HEAD_UNLOCK(rnh); + RADIX_NODE_HEAD_LOCK(rnh); + rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); + RADIX_NODE_HEAD_UNLOCK(rnh); + } splx(s); } @@ -2116,8 +2162,8 @@ rt6_deleteroute(struct radix_node *rn, void *arg) if ((rt->rt_flags & RTF_HOST) == 0) return (0); - return (rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, - rt_mask(rt), rt->rt_flags, 0)); + return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, + rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum)); #undef SIN6 } diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 77897ed2429..38268f7ae1f 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -867,7 +867,7 @@ ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = ip6->ip6_dst; - rtalloc(state->ro); + rtalloc_ign_fib(state->ro, 0UL, M_GETFIB(m)); } if (state->ro->ro_rt == NULL) { V_ip6stat.ip6s_noroute++; diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c index 1e5e0668401..b29a0e962bf 100644 --- a/sys/nfs/bootp_subr.c +++ b/sys/nfs/bootp_subr.c @@ -1046,10 +1046,9 @@ bootpc_adjust_interface(struct bootpc_ifcontext *ifctx, clear_sinaddr(&defmask); /* XXX MRT just table 0 */ error = rtrequest_fib(RTM_ADD, - (struct sockaddr *) &defdst, - (struct sockaddr *) gw, - (struct sockaddr *) &defmask, - (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL, 0); + (struct sockaddr *) &defdst, (struct sockaddr *) gw, + (struct sockaddr *) &defmask, + (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL, RT_DEFAULT_FIB); if (error != 0) { printf("%s: RTM_ADD, error=%d\n", __func__, error); return (error); diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index 58384e86a1b..8f1be3a45da 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -510,10 +510,10 @@ nfs_mountroot(struct mount *mp) sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); - error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, + error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, - RTF_UP | RTF_GATEWAY, NULL); + RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); From cebf3b1b6a5aa87044c0415965adefba19000660 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 13:12:42 +0000 Subject: [PATCH 10/18] Make ipfw verify_path6() multi-FIB aware. The "fib" and "setfib" keywords implementations need no adjustments. Sponsored by: Cisco Systems, Inc. --- sys/netinet/ipfw/ip_fw2.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c index 12a7fb9594c..1b16bc69314 100644 --- a/sys/netinet/ipfw/ip_fw2.c +++ b/sys/netinet/ipfw/ip_fw2.c @@ -496,7 +496,7 @@ search_ip6_addr_net (struct in6_addr * ip6_addr) } static int -verify_path6(struct in6_addr *src, struct ifnet *ifp) +verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) { struct route_in6 ro; struct sockaddr_in6 *dst; @@ -507,9 +507,8 @@ verify_path6(struct in6_addr *src, struct ifnet *ifp) dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(*dst); dst->sin6_addr = *src; - /* XXX MRT 0 for ipv6 at this time */ - rtalloc_ign((struct route *)&ro, 0); + in6_rtalloc_ign(&ro, 0, fib); if (ro.ro_rt == NULL) return 0; @@ -1701,7 +1700,7 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), - m->m_pkthdr.rcvif) : + m->m_pkthdr.rcvif, args->f_id.fib) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, args->f_id.fib))); @@ -1713,7 +1712,7 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6(&(args->f_id.src_ip6), - NULL) : + NULL, args->f_id.fib) : #endif verify_path(src_ip, NULL, args->f_id.fib))); break; @@ -1731,7 +1730,8 @@ do { \ #ifdef INET6 is_ipv6 ? verify_path6( &(args->f_id.src_ip6), - m->m_pkthdr.rcvif) : + m->m_pkthdr.rcvif, + args->f_id.fib) : #endif verify_path(src_ip, m->m_pkthdr.rcvif, From 0e2181f57880a0791854190c1f0f8f243259ab08 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 13:20:48 +0000 Subject: [PATCH 11/18] Extend IPv6 routing lookups in pf(4) to use the new multi-FIB KPI. Try to make the "rtable" handling work but the current version of pf(4) does not fully support it yet as especially callers of PF_MISMATCHAW() are not fully FIB-aware. OpenBSD seems to have fixed this in a later version. Prepare as much as possible. Sponsored by: Cisco Systems, Inc. --- sys/contrib/pf/net/pf.c | 83 +++++++++++++++++++---------------- sys/contrib/pf/net/pf_ioctl.c | 4 +- sys/contrib/pf/net/pf_lb.c | 7 +-- sys/contrib/pf/net/pf_norm.c | 12 ++--- sys/contrib/pf/net/pfvar.h | 18 +++++--- 5 files changed, 69 insertions(+), 55 deletions(-) diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index 5c9641bc485..7058b7dca4d 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -320,7 +320,7 @@ u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, sa_family_t); u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, - u_int16_t); + int, u_int16_t); void pf_set_rt_ifp(struct pf_state *, struct pf_addr *); int pf_check_proto_cksum(struct mbuf *, int, int, @@ -3137,7 +3137,7 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) } u_int16_t -pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) +pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) { #ifdef INET struct sockaddr_in *dst; @@ -3166,11 +3166,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; #ifdef __FreeBSD__ -#ifdef RTF_PRCLONING - rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); -#else /* !RTF_PRCLONING */ - in_rtalloc_ign(&ro, 0, 0); -#endif + in_rtalloc_ign(&ro, 0, rtableid); #else /* ! __FreeBSD__ */ rtalloc_noclone(&ro, NO_CLONING); #endif @@ -3186,12 +3182,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; #ifdef __FreeBSD__ -#ifdef RTF_PRCLONING - rtalloc_ign((struct route *)&ro6, - (RTF_CLONING | RTF_PRCLONING)); -#else /* !RTF_PRCLONING */ - rtalloc_ign((struct route *)&ro6, 0); -#endif + in6_rtalloc_ign(&ro6, 0, rtableid); #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro6, NO_CLONING); #endif @@ -3532,14 +3523,14 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, saddr, af, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; /* tcp/udp only. port_op always 0 in other cases */ else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; /* tcp/udp only. port_op always 0 in other cases */ else if (r->dst.port_op && !pf_match_port(r->dst.port_op, @@ -3988,9 +3979,10 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, } s->src.seqhi = htonl(arc4random()); /* Find mss option */ + int rtid = M_GETFIB(m); mss = pf_get_mss(m, off, th->th_off, pd->af); - mss = pf_calc_mss(pd->src, pd->af, mss); - mss = pf_calc_mss(pd->dst, pd->af, mss); + mss = pf_calc_mss(pd->src, pd->af, rtid, mss); + mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); s->src.mss = mss; #ifdef __FreeBSD__ pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport, @@ -4072,10 +4064,10 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); @@ -5677,7 +5669,8 @@ pf_pull_hdr(struct mbuf *m, int off, void *p, int len, } int -pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) +pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, + int rtableid) { #ifdef __FreeBSD__ #ifdef RADIX_MPATH @@ -5751,13 +5744,21 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) goto out; #ifdef __FreeBSD__ -/* XXX MRT not always INET */ /* stick with table 0 though */ -#ifdef INET - if (af == AF_INET) - in_rtalloc_ign((struct route *)&ro, 0, 0); - else + switch (af) { +#ifdef INET6 + case AF_INET6: + in6_rtalloc_ign(&ro, 0, rtableid); + break; #endif - rtalloc_ign((struct route *)&ro, 0); +#ifdef INET + case AF_INET: + in_rtalloc_ign((struct route *)&ro, 0, rtableid); + break; +#endif + default: + rtalloc_ign((struct route *)&ro, 0); /* No/default FIB. */ + break; + } #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); #endif @@ -5803,7 +5804,8 @@ out: } int -pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) +pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, + int rtableid) { struct sockaddr_in *dst; #ifdef INET6 @@ -5835,16 +5837,21 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) } #ifdef __FreeBSD__ -# ifdef RTF_PRCLONING - rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING)); -# else /* !RTF_PRCLONING */ -#ifdef INET - if (af == AF_INET) - in_rtalloc_ign((struct route *)&ro, 0, 0); - else + switch (af) { +#ifdef INET6 + case AF_INET6: + in6_rtalloc_ign(&ro, 0, rtableid); + break; #endif +#ifdef INET + case AF_INET: + in_rtalloc_ign((struct route *)&ro, 0, rtableid); + break; +#endif + default: rtalloc_ign((struct route *)&ro, 0); -# endif + break; + } #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); #endif @@ -5927,7 +5934,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (r->rt == PF_FASTROUTE) { #ifdef __FreeBSD__ - in_rtalloc(ro, 0); + in_rtalloc_ign(ro, 0, M_GETFIB(m0)); #else rtalloc(ro); #endif @@ -6893,7 +6900,7 @@ done: ("pf: dropping packet with ip options\n")); } - if ((s && s->tag) || r->rtableid) + if ((s && s->tag) || r->rtableid >= 0) #ifdef __FreeBSD__ pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); #else @@ -7437,7 +7444,7 @@ done: ("pf: dropping packet with dangerous v6 headers\n")); } - if ((s && s->tag) || r->rtableid) + if ((s && s->tag) || r->rtableid >= 0) #ifdef __FreeBSD__ pf_tag_packet(m, s ? s->tag : 0, r->rtableid, pd.pf_mtag); #else diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index e78c8ffec7b..468f3d76933 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -1754,7 +1754,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } #ifdef __FreeBSD__ /* ROUTING */ - if (rule->rtableid > 0 && rule->rtableid > rt_numfibs) + if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs) #else if (rule->rtableid > 0 && !rtable_exists(rule->rtableid)) #endif @@ -2035,7 +2035,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (newrule->rtableid > 0 && #ifdef __FreeBSD__ /* ROUTING */ - newrule->rtableid > rt_numfibs) + newrule->rtableid >= rt_numfibs) #else !rtable_exists(newrule->rtableid)) #endif diff --git a/sys/contrib/pf/net/pf_lb.c b/sys/contrib/pf/net/pf_lb.c index f4c9a00844c..4adc6f00e8d 100644 --- a/sys/contrib/pf/net/pf_lb.c +++ b/sys/contrib/pf/net/pf_lb.c @@ -261,7 +261,7 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, - src->neg, kif)) + src->neg, kif, M_GETFIB(m))) r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : PF_SKIP_DST_ADDR].ptr; else if (src->port_op && !pf_match_port(src->port_op, @@ -269,10 +269,11 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : PF_SKIP_DST_PORT].ptr; else if (dst != NULL && - PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL, + M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, - 0, NULL)) + 0, NULL, M_GETFIB(m))) r = TAILQ_NEXT(r, entries); else if (dst != NULL && dst->port_op && !pf_match_port(dst->port_op, dst->port[0], diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index 6c04eee99ff..2b20c85b246 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -1163,11 +1163,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, (struct pf_addr *)&h->ip_src.s_addr, AF_INET, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; #ifdef __FreeBSD__ else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag)) @@ -1428,11 +1428,11 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, #endif else if (PF_MISMATCHAW(&r->src.addr, (struct pf_addr *)&h->ip6_src, AF_INET6, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, (struct pf_addr *)&h->ip6_dst, AF_INET6, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; @@ -1593,13 +1593,13 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, - r->src.neg, kif)) + r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, - r->dst.neg, NULL)) + r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) diff --git a/sys/contrib/pf/net/pfvar.h b/sys/contrib/pf/net/pfvar.h index fe4c0ced945..85fef1d2322 100644 --- a/sys/contrib/pf/net/pfvar.h +++ b/sys/contrib/pf/net/pfvar.h @@ -402,14 +402,18 @@ extern struct mtx pf_task_mtx; #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ -#define PF_MISMATCHAW(aw, x, af, neg, ifp) \ +/* + * XXX callers not FIB-aware in our version of pf yet. + * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio. + */ +#define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \ ( \ (((aw)->type == PF_ADDR_NOROUTE && \ - pf_routable((x), (af), NULL)) || \ + pf_routable((x), (af), NULL, (rtid))) || \ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ - pf_routable((x), (af), (ifp))) || \ + pf_routable((x), (af), (ifp), (rtid))) || \ ((aw)->type == PF_ADDR_RTLABEL && \ - !pf_rtlabel_match((x), (af), (aw))) || \ + !pf_rtlabel_match((x), (af), (aw), (rtid))) || \ ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ @@ -1977,8 +1981,10 @@ int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, u_int32_t pf_state_expires(const struct pf_state *); void pf_purge_expired_fragments(void); -int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *); -int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); +int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *, + int); +int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *, + int); #ifdef __FreeBSD__ int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *); #else From bb4b19cae22e7c47d2b2807585219aecd50abb1e Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 13:54:25 +0000 Subject: [PATCH 12/18] Fix the upper limit bounds checking for the "rtables" keyword wrapping it in a function to dynamically query the currently supported number of FIBs by the kernel for FreeBSD. Sponsored by: Cisco Systems, Inc. --- contrib/pf/pfctl/parse.y | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/contrib/pf/pfctl/parse.y b/contrib/pf/pfctl/parse.y index 440692e1ef3..f798cacc2e5 100644 --- a/contrib/pf/pfctl/parse.y +++ b/contrib/pf/pfctl/parse.y @@ -33,6 +33,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef __FreeBSD__ +#include +#endif #include #include #include @@ -335,6 +338,7 @@ int expand_skip_interface(struct node_if *); int check_rulestate(int); int getservice(char *); int rule_label(struct pf_rule *, char *); +int rt_tableid_max(void); void mv_rules(struct pf_ruleset *, struct pf_ruleset *); void decide_address_family(struct node_host *, sa_family_t *); @@ -1174,7 +1178,7 @@ scrub_opt : NODF { scrub_opts.randomid = 1; } | RTABLE NUMBER { - if ($2 < 0 /* || $2 > RT_TABLEID_MAX */) { + if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); YYERROR; } @@ -1322,7 +1326,7 @@ antispoof_opt : label { antispoof_opts.label = $1; } | RTABLE NUMBER { - if ($2 < 0 /* || $2 > RT_TABLEID_MAX */ ) { + if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); YYERROR; } @@ -2361,7 +2365,7 @@ filter_opt : USER uids { filter_opts.prob = 1; } | RTABLE NUMBER { - if ($2 < 0 /* || $2 > RT_TABLEID_MAX */ ) { + if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); YYERROR; } @@ -4190,7 +4194,7 @@ tagged : /* empty */ { $$.neg = 0; $$.name = NULL; } rtable : /* empty */ { $$ = -1; } | RTABLE NUMBER { - if ($2 < 0 /* || $2 > RT_TABLEID_MAX */ ) { + if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); YYERROR; } @@ -6051,3 +6055,23 @@ pfctl_load_anchors(int dev, struct pfctl *pf, struct pfr_buffer *trans) return (0); } + +int +rt_tableid_max(void) +{ +#ifdef __FreeBSD__ + int fibs; + size_t l = sizeof(fibs); + + if (sysctlbyname("net.fibs", &fibs, &l, NULL, 0) == -1) + fibs = 16; /* XXX RT_MAXFIBS, at least limit it some. */ + /* + * As the OpenBSD code only compares > and not >= we need to adjust + * here given we only accept values of 0..n and want to avoid #ifdefs + * in the grammer. + */ + return (fibs - 1); +#else + return (RT_TABLEID_MAX); +#endif +} From 4fd5619bb1f7634916ad92cf907a0ddfc94ae30f Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 15:26:55 +0000 Subject: [PATCH 13/18] Teach netstat -r (display contents of routing tables) about multi-FIB for IPv6 in addition to IPv4. While here harmonize naming of variables a bit with what we use in kernel. Sponsored by: Cisco Systems, Inc. --- usr.bin/netstat/route.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/usr.bin/netstat/route.c b/usr.bin/netstat/route.c index 2b4ad2f218a..996b4715808 100644 --- a/usr.bin/netstat/route.c +++ b/usr.bin/netstat/route.c @@ -113,7 +113,6 @@ typedef union { static sa_u pt_u; -int fibnum; int do_rtent = 0; struct rtentry rtentry; struct radix_node rnode; @@ -148,8 +147,7 @@ routepr(u_long rtree) { struct radix_node_head **rnhp, *rnh, head; size_t intsize; - int i; - int numfibs; + int fam, fibnum, numfibs; intsize = sizeof(int); if (sysctlbyname("net.my_fibnum", &fibnum, &intsize, NULL, 0) == -1) @@ -181,15 +179,20 @@ routepr(u_long rtree) if (kread((u_long)(rtree), (char *)(rt_tables), (numfibs * (AF_MAX+1) * sizeof(struct radix_node_head *))) != 0) return; - for (i = 0; i <= AF_MAX; i++) { + for (fam = 0; fam <= AF_MAX; fam++) { int tmpfib; - if (i != AF_INET) - tmpfib = 0; - else + + switch (fam) { + case AF_INET6: + case AF_INET: tmpfib = fibnum; + break; + default: + tmpfib = 0; + } rnhp = (struct radix_node_head **)*rt_tables; /* Calculate the in-kernel address. */ - rnhp += tmpfib * (AF_MAX+1) + i; + rnhp += tmpfib * (AF_MAX+1) + fam; /* Read the in kernel rhn pointer. */ if (kget(rnhp, rnh) != 0) continue; @@ -198,16 +201,16 @@ routepr(u_long rtree) /* Read the rnh data. */ if (kget(rnh, head) != 0) continue; - if (i == AF_UNSPEC) { + if (fam == AF_UNSPEC) { if (Aflag && af == 0) { printf("Netmasks:\n"); p_tree(head.rnh_treetop); } - } else if (af == AF_UNSPEC || af == i) { - size_cols(i, head.rnh_treetop); - pr_family(i); + } else if (af == AF_UNSPEC || af == fam) { + size_cols(fam, head.rnh_treetop); + pr_family(fam); do_rtent = 1; - pr_rthdr(i); + pr_rthdr(fam); p_tree(head.rnh_treetop); } } From b202f3dc89e172797483d45136dbd7b62254f296 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 15:31:47 +0000 Subject: [PATCH 14/18] Install the IPv6 reject routes we do for the default FIB to all FIBs. Sponsored by: Cisco Systems, Inc. --- etc/rc.d/routing | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/etc/rc.d/routing b/etc/rc.d/routing index 616a19c0b1b..f86fd3a54f5 100755 --- a/etc/rc.d/routing +++ b/etc/rc.d/routing @@ -137,12 +137,22 @@ static_inet() static_inet6() { - local _action i + local _action i fibs _action=$1 + # get the number of FIBs supported. + fibs=`sysctl -n net.fibs` + : ${fibs:=1} + # disallow "internal" addresses to appear on the wire - route ${_action} -inet6 ::ffff:0.0.0.0 -prefixlen 96 ::1 -reject - route ${_action} -inet6 ::0.0.0.0 -prefixlen 96 ::1 -reject + i=0 + while test ${i} -lt ${fibs}; do + setfib -F ${i} route ${_action} \ + -inet6 ::ffff:0.0.0.0 -prefixlen 96 ::1 -reject + setfib -F ${i} route ${_action} \ + -inet6 ::0.0.0.0 -prefixlen 96 ::1 -reject + i=$((i + 1)) + done case ${ipv6_defaultrouter} in [Nn][Oo] | '') @@ -214,8 +224,14 @@ static_inet6() # for the host case, you will allow to omit the identifiers. # Under this configuration, the packets will go to the default # interface. - route ${_action} -inet6 fe80:: -prefixlen 10 ::1 -reject - route ${_action} -inet6 ff02:: -prefixlen 16 ::1 -reject + i=0 + while test ${i} -lt ${fibs}; do + setfib -F ${i} route ${_action} \ + -inet6 fe80:: -prefixlen 10 ::1 -reject + setfib -F ${i} route ${_action} \ + -inet6 ff02:: -prefixlen 16 ::1 -reject + i=$((i + 1)) + done case ${ipv6_default_interface} in '') From dc5177989be77d2fc6c821320ed519f5ff55a1c8 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 15:33:55 +0000 Subject: [PATCH 15/18] Document the fact that faith(4) is only available on the default FIB. Sponsored by: Cisco Systems, Inc. --- share/man/man4/faith.4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/man/man4/faith.4 b/share/man/man4/faith.4 index b98ba394560..f0a2df6f6c3 100644 --- a/share/man/man4/faith.4 +++ b/share/man/man4/faith.4 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 10, 1999 +.Dd January 23, 2012 .Dt FAITH 4 .Os .Sh NAME @@ -58,7 +58,7 @@ variable in .Xr rc.conf 5 . .Pp Special action will be taken when IPv6 TCP traffic is seen on a router, -and the routing table suggests to route it to the +and the default routing table suggests to route it to the .Nm interface. In this case, the packet will be accepted by the router, From 9836132cd1049b4c261fc131f720b1d17977ffa2 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Fri, 3 Feb 2012 15:39:13 +0000 Subject: [PATCH 16/18] Document the fact that multi-FIB support for SCTP had been backed out in r179783 as (ab)using the concept of VRFs for this had not worked. At this point SCTP in FreeBSD does not support multi-FIB, neither for IPv4 nor for IPv6. Discussed with: rrs Sponsored by: Cisco Systems, Inc. --- sys/netinet/sctp_os_bsd.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h index 4e0dcab5352..215b93dbd52 100644 --- a/sys/netinet/sctp_os_bsd.h +++ b/sys/netinet/sctp_os_bsd.h @@ -424,6 +424,12 @@ typedef struct callout sctp_os_timer_t; typedef struct route sctp_route_t; typedef struct rtentry sctp_rtentry_t; +/* + * XXX multi-FIB support was backed out in r179783 and it seems clear that the + * VRF support as currently in FreeBSD is not ready to support multi-FIB. + * It might be best to implement multi-FIB support for both v4 and v6 indepedent + * of VRFs and leave those to a real MPLS stack. + */ #define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL) /* Future zero copy wakeup/send function */ From aaf04b7cb637af9400e6b9ae1bd531ef828a7c82 Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Tue, 14 Feb 2012 11:37:04 +0000 Subject: [PATCH 17/18] Switch from setfib(2) moving the process to a different FIB to setsockopt(2) with SO_SETFIB to only tag the socket with the right FIB. That way either setfib(1) or nc -V can be used depending on what wants to be achieved. This also allows nc to be used for simple regression testing of either feature. Sponsored by: Cisco Systems, Inc. --- contrib/netcat/netcat.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/contrib/netcat/netcat.c b/contrib/netcat/netcat.c index 9b9017d6e61..bee0fa6be2c 100644 --- a/contrib/netcat/netcat.c +++ b/contrib/netcat/netcat.c @@ -605,8 +605,10 @@ remote_connect(const char *host, const char *port, struct addrinfo hints) #endif if (rtableid) { - if (setfib(rtableid) == -1) - err(1, "setfib"); + if (setsockopt(s, SOL_SOCKET, SO_SETFIB, &rtableid, + sizeof(rtableid)) == -1) + err(1, "setsockopt(.., SO_SETFIB, %u, ..)", + rtableid); } /* Bind to a local port or source address if specified. */ @@ -678,8 +680,11 @@ local_listen(char *host, char *port, struct addrinfo hints) continue; if (rtableid) { - if (setfib(rtableid) == -1) - err(1, "setfib"); + ret = setsockopt(s, SOL_SOCKET, SO_SETFIB, &rtableid, + sizeof(rtableid)); + if (ret == -1) + err(1, "setsockopt(.., SO_SETFIB, %u, ..)", + rtableid); } ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &x, sizeof(x)); From 1b46c7f83201c70bc284b319a970764d3811e63f Mon Sep 17 00:00:00 2001 From: "Bjoern A. Zeeb" Date: Tue, 14 Feb 2012 11:51:32 +0000 Subject: [PATCH 18/18] Allow to provide a hint to in6_selectsrc() for the interface using the return ifnet double pointer. Pass that hint down to in6_selectif() to be used when i) the default FIB is queried and ii) route lookup fails because the network is not present (i.e. someone deleted the connected subnet). This hint should not be generally used from anywhere outside the neighbor discovery code. We just make use of it from nd6_ns_output(). Extend the nd6_na_output() interface by a nd6_na_output_fib() version and pass the FIB number from the NS mbuf on to NA to allow the new mbuf to inherit the FIB tag and a later lookup from ip6_output() to succeed in the aformentioned example case. Provide a wrapper function for the old public interface also used from CARP but mark it with BURN_BRIDGES to cleanup in HEAD after MFC. Sponsored by: Cisco Systems, Inc. --- sys/netinet6/in6_src.c | 30 ++++++++++++++++++++++++------ sys/netinet6/nd6_nbr.c | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index 295ea715e36..9ed3eab8aef 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -131,7 +131,8 @@ static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **, int, int)); static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, int)); + struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, + struct ifnet *, int)); static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); @@ -182,7 +183,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ifnet **ifpp, struct in6_addr *srcp) { struct in6_addr dst, tmp; - struct ifnet *ifp = NULL; + struct ifnet *ifp = NULL, *oifp = NULL; struct in6_ifaddr *ia = NULL, *ia_best = NULL; struct in6_pktinfo *pi = NULL; int dst_scope = -1, best_scope = -1, best_matchlen = -1; @@ -195,8 +196,18 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__)); dst = dstsock->sin6_addr; /* make a copy for local operation */ - if (ifpp) + if (ifpp) { + /* + * Save a possibly passed in ifp for in6_selectsrc. Only + * neighbor discovery code should use this feature, where + * we may know the interface but not the FIB number holding + * the connected subnet in case someone deleted it from the + * default FIB and we need to check the interface. + */ + if (*ifpp != NULL) + oifp = *ifpp; *ifpp = NULL; + } if (inp != NULL) { INP_LOCK_ASSERT(inp); @@ -217,7 +228,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct in6_ifaddr *ia6; /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, + if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) return (error); @@ -283,7 +294,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * the outgoing interface and the destination address. */ /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, + if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) return (error); @@ -750,12 +761,14 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, static int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, - int fibnum) + struct ifnet *oifp, int fibnum) { int error; struct route_in6 sro; struct rtentry *rt = NULL; + KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); + if (ro == NULL) { bzero(&sro, sizeof(sro)); ro = &sro; @@ -765,6 +778,11 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, &rt, 1, fibnum)) != 0) { if (ro == &sro && rt && rt == sro.ro_rt) RTFREE(rt); + /* Help ND. See oifp comment in in6_selectsrc(). */ + if (oifp != NULL && fibnum == RT_DEFAULT_FIB) { + *retifp = oifp; + error = 0; + } return (error); } diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 290d5fba4b2..4b80dc270f0 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -85,6 +85,8 @@ static void nd6_dad_timer(struct dadq *); static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); static void nd6_dad_ns_input(struct ifaddr *); static void nd6_dad_na_input(struct ifaddr *); +static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *, + const struct in6_addr *, u_long, int, struct sockaddr *, u_int); VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/ VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ @@ -344,19 +346,20 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) in6_all = in6addr_linklocal_allnodes; if (in6_setscope(&in6_all, ifp, NULL) != 0) goto bad; - nd6_na_output(ifp, &in6_all, &taddr6, + nd6_na_output_fib(ifp, &in6_all, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | - rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); + rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, + M_GETFIB(m)); goto freeit; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_NEIGHBOR_SOLICIT, 0); - nd6_na_output(ifp, &saddr6, &taddr6, + nd6_na_output_fib(ifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag | ND_NA_FLAG_SOLICITED, tlladdr, - proxy ? (struct sockaddr *)&proxydl : NULL); + proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); freeit: if (ifa != NULL) ifa_free(ifa); @@ -508,14 +511,16 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, int error; struct sockaddr_in6 dst_sa; struct in6_addr src_in; + struct ifnet *oifp; bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; + oifp = ifp; error = in6_selectsrc(&dst_sa, NULL, - NULL, &ro, NULL, NULL, &src_in); + NULL, &ro, NULL, &oifp, &src_in); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, @@ -957,13 +962,14 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * tlladdr - 1 if include target link-layer address * sdl0 - sockaddr_dl (= proxy NA) or NULL */ -void -nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, +static void +nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, - struct sockaddr *sdl0) + struct sockaddr *sdl0, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; + struct ifnet *oifp; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; @@ -999,6 +1005,7 @@ nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, if (m == NULL) return; m->m_pkthdr.rcvif = NULL; + M_SETFIB(m, fibnum); if (IN6_IS_ADDR_MULTICAST(&daddr6)) { m->m_flags |= M_MCAST; @@ -1040,7 +1047,8 @@ nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, * Select a source whose scope is the same as that of the dest. */ bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa)); - error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, NULL, &src); + oifp = ifp; + error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " @@ -1129,6 +1137,18 @@ nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, return; } +#ifndef BURN_BRIDGES +void +nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, + const struct in6_addr *taddr6, u_long flags, int tlladdr, + struct sockaddr *sdl0) +{ + + nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0, + RT_DEFAULT_FIB); +} +#endif + caddr_t nd6_ifptomac(struct ifnet *ifp) {