diff --git a/share/man/man4/gif.4 b/share/man/man4/gif.4 index 76b7976e7ff..752282bcf83 100644 --- a/share/man/man4/gif.4 +++ b/share/man/man4/gif.4 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 10, 2015 +.Dd June 5, 2018 .Dt GIF 4 .Os .Sh NAME @@ -169,14 +169,6 @@ This behavior may be modified at runtime by setting the variable .Va net.link.gif.max_nesting to the desired level of nesting. -Additionally, -.Nm -tunnels are restricted to one per pair of end points. -Parallel tunnels may be enabled by setting the -.Xr sysctl 8 -variable -.Va net.link.gif.parallel_tunnels -to 1. .Sh SEE ALSO .Xr gre 4 , .Xr inet 4 , diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index a8c6fb5c9c7..3293ded9dbb 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,7 +40,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include #include #include @@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -85,8 +84,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #endif /* INET6 */ #include @@ -98,32 +95,17 @@ __FBSDID("$FreeBSD$"); static const char gifname[] = "gif"; -/* - * gif_mtx protects a per-vnet gif_softc_list. - */ -static VNET_DEFINE(struct mtx, gif_mtx); -#define V_gif_mtx VNET(gif_mtx) -static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); -static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list); -#define V_gif_softc_list VNET(gif_softc_list) +MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); static struct sx gif_ioctl_sx; SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl"); -#define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \ - NULL, MTX_DEF) -#define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx) -#define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx) -#define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx) - void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af); void (*ng_gif_attach_p)(struct ifnet *ifp); void (*ng_gif_detach_p)(struct ifnet *ifp); static int gif_check_nesting(struct ifnet *, struct mbuf *); -static int gif_set_tunnel(struct ifnet *, struct sockaddr *, - struct sockaddr *); -static void gif_delete_tunnel(struct ifnet *); +static void gif_delete_tunnel(struct gif_softc *); static int gif_ioctl(struct ifnet *, u_long, caddr_t); static int gif_transmit(struct ifnet *, struct mbuf *); static void gif_qflush(struct ifnet *); @@ -132,8 +114,6 @@ static void gif_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, gif_cloner); #define V_gif_cloner VNET(gif_cloner) -static int gifmodevent(module_t, int, void *); - SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, "Generic Tunnel Interface"); @@ -153,21 +133,6 @@ static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST; SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels"); -/* - * By default, we disallow creation of multiple tunnels between the same - * pair of addresses. Some applications require this functionality so - * we allow control over this check here. - */ -#ifdef XBONEHACK -static VNET_DEFINE(int, parallel_tunnels) = 1; -#else -static VNET_DEFINE(int, parallel_tunnels) = 0; -#endif -#define V_parallel_tunnels VNET(parallel_tunnels) -SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0, - "Allow parallel tunnels?"); - static int gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) { @@ -176,20 +141,15 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO); sc->gif_fibnum = curthread->td_proc->p_fibnum; GIF2IFP(sc) = if_alloc(IFT_GIF); - GIF_LOCK_INIT(sc); GIF2IFP(sc)->if_softc = sc; if_initname(GIF2IFP(sc), gifname, unit); GIF2IFP(sc)->if_addrlen = 0; GIF2IFP(sc)->if_mtu = GIF_MTU; GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; -#if 0 - /* turn off ingress filter */ - GIF2IFP(sc)->if_flags |= IFF_LINK2; -#endif GIF2IFP(sc)->if_ioctl = gif_ioctl; - GIF2IFP(sc)->if_transmit = gif_transmit; - GIF2IFP(sc)->if_qflush = gif_qflush; + GIF2IFP(sc)->if_transmit = gif_transmit; + GIF2IFP(sc)->if_qflush = gif_qflush; GIF2IFP(sc)->if_output = gif_output; GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; @@ -198,9 +158,6 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) if (ng_gif_attach_p != NULL) (*ng_gif_attach_p)(GIF2IFP(sc)); - GIF_LIST_LOCK(); - LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list); - GIF_LIST_UNLOCK(); return (0); } @@ -211,10 +168,7 @@ gif_clone_destroy(struct ifnet *ifp) sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; - gif_delete_tunnel(ifp); - GIF_LIST_LOCK(); - LIST_REMOVE(sc, gif_list); - GIF_LIST_UNLOCK(); + gif_delete_tunnel(sc); if (ng_gif_detach_p != NULL) (*ng_gif_detach_p)(ifp); bpfdetach(ifp); @@ -222,8 +176,8 @@ gif_clone_destroy(struct ifnet *ifp) ifp->if_softc = NULL; sx_xunlock(&gif_ioctl_sx); + GIF_WAIT(); if_free(ifp); - GIF_LOCK_DESTROY(sc); free(sc, M_GIF); } @@ -231,10 +185,14 @@ static void vnet_gif_init(const void *unused __unused) { - LIST_INIT(&V_gif_softc_list); - GIF_LIST_LOCK_INIT(); V_gif_cloner = if_clone_simple(gifname, gif_clone_create, gif_clone_destroy, 0); +#ifdef INET + in_gif_init(); +#endif +#ifdef INET6 + in6_gif_init(); +#endif } VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_init, NULL); @@ -244,7 +202,12 @@ vnet_gif_uninit(const void *unused __unused) { if_clone_detach(V_gif_cloner); - GIF_LIST_LOCK_DESTROY(); +#ifdef INET + in_gif_uninit(); +#endif +#ifdef INET6 + in6_gif_uninit(); +#endif } VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_uninit, NULL); @@ -272,65 +235,25 @@ static moduledata_t gif_mod = { DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gif, 1); -int -gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +struct gif_list * +gif_hashinit(void) { - GIF_RLOCK_TRACKER; - const struct ip *ip; - struct gif_softc *sc; - int ret; + struct gif_list *hash; + int i; - sc = (struct gif_softc *)arg; - if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0) - return (0); + hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE, + M_GIF, M_WAITOK); + for (i = 0; i < GIF_HASH_SIZE; i++) + CK_LIST_INIT(&hash[i]); - ret = 0; - GIF_RLOCK(sc); + return (hash); +} - /* no physical address */ - if (sc->gif_family == 0) - goto done; +void +gif_hashdestroy(struct gif_list *hash) +{ - switch (proto) { -#ifdef INET - case IPPROTO_IPV4: -#endif -#ifdef INET6 - case IPPROTO_IPV6: -#endif - case IPPROTO_ETHERIP: - break; - default: - goto done; - } - - /* Bail on short packets */ - M_ASSERTPKTHDR(m); - if (m->m_pkthdr.len < sizeof(struct ip)) - goto done; - - ip = mtod(m, const struct ip *); - switch (ip->ip_v) { -#ifdef INET - case 4: - if (sc->gif_family != AF_INET) - goto done; - ret = in_gif_encapcheck(m, off, proto, arg); - break; -#endif -#ifdef INET6 - case 6: - if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) - goto done; - if (sc->gif_family != AF_INET6) - goto done; - ret = in6_gif_encapcheck(m, off, proto, arg); - break; -#endif - } -done: - GIF_RUNLOCK(sc); - return (ret); + free(hash, M_GIF); } static int @@ -357,6 +280,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) } #endif error = ENETDOWN; + GIF_RLOCK(); sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || @@ -444,6 +368,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) err: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + GIF_RUNLOCK(); return (error); } @@ -616,7 +541,8 @@ gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn) break; #endif case AF_LINK: - n = sizeof(struct etherip_header) + sizeof(struct ether_header); + n = sizeof(struct etherip_header) + + sizeof(struct ether_header); if (n > m->m_len) m = m_pullup(m, n); if (m == NULL) @@ -674,20 +600,11 @@ drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } -/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ -int +static int gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - GIF_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq*)data; - struct sockaddr *dst, *src; struct gif_softc *sc; -#ifdef INET - struct sockaddr_in *sin = NULL; -#endif -#ifdef INET6 - struct sockaddr_in6 *sin6 = NULL; -#endif u_int options; int error; @@ -715,176 +632,25 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } error = 0; switch (cmd) { - case SIOCSIFPHYADDR: -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: -#endif - error = EINVAL; - switch (cmd) { -#ifdef INET - case SIOCSIFPHYADDR: - src = (struct sockaddr *) - &(((struct in_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in_aliasreq *)data)->ifra_dstaddr); - break; -#endif -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: - src = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_dstaddr); - break; -#endif - default: - goto bad; - } - /* sa_family must be equal */ - if (src->sa_family != dst->sa_family || - src->sa_len != dst->sa_len) - goto bad; - - /* validate sa_len */ - /* check sa_family looks sane for the cmd */ - switch (src->sa_family) { -#ifdef INET - case AF_INET: - if (src->sa_len != sizeof(struct sockaddr_in)) - goto bad; - if (cmd != SIOCSIFPHYADDR) { - error = EAFNOSUPPORT; - goto bad; - } - if (satosin(src)->sin_addr.s_addr == INADDR_ANY || - satosin(dst)->sin_addr.s_addr == INADDR_ANY) { - error = EADDRNOTAVAIL; - goto bad; - } - break; -#endif -#ifdef INET6 - case AF_INET6: - if (src->sa_len != sizeof(struct sockaddr_in6)) - goto bad; - if (cmd != SIOCSIFPHYADDR_IN6) { - error = EAFNOSUPPORT; - goto bad; - } - error = EADDRNOTAVAIL; - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) - || - IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) - goto bad; - /* - * Check validity of the scope zone ID of the - * addresses, and convert it into the kernel - * internal form if necessary. - */ - error = sa6_embedscope(satosin6(src), 0); - if (error != 0) - goto bad; - error = sa6_embedscope(satosin6(dst), 0); - if (error != 0) - goto bad; - break; -#endif - default: - error = EAFNOSUPPORT; - goto bad; - } - error = gif_set_tunnel(ifp, src, dst); - break; case SIOCDIFPHYADDR: - gif_delete_tunnel(ifp); + if (sc->gif_family == 0) + break; + gif_delete_tunnel(sc); break; +#ifdef INET + case SIOCSIFPHYADDR: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: + error = in_gif_ioctl(sc, cmd, data); + break; +#endif #ifdef INET6 + case SIOCSIFPHYADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: -#endif - if (sc->gif_family == 0) { - error = EADDRNOTAVAIL; - break; - } - GIF_RLOCK(sc); - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - case SIOCGIFPDSTADDR: - if (sc->gif_family != AF_INET) { - error = EADDRNOTAVAIL; - break; - } - sin = (struct sockaddr_in *)&ifr->ifr_addr; - memset(sin, 0, sizeof(*sin)); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - case SIOCGIFPDSTADDR_IN6: - if (sc->gif_family != AF_INET6) { - error = EADDRNOTAVAIL; - break; - } - sin6 = (struct sockaddr_in6 *) - &(((struct in6_ifreq *)data)->ifr_addr); - memset(sin6, 0, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); - break; -#endif - default: - error = EAFNOSUPPORT; - } - if (error == 0) { - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - sin->sin_addr = sc->gif_iphdr->ip_src; - break; - case SIOCGIFPDSTADDR: - sin->sin_addr = sc->gif_iphdr->ip_dst; - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - sin6->sin6_addr = sc->gif_ip6hdr->ip6_src; - break; - case SIOCGIFPDSTADDR_IN6: - sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst; - break; -#endif - } - } - GIF_RUNLOCK(sc); - if (error != 0) - break; - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - case SIOCGIFPDSTADDR: - error = prison_if(curthread->td_ucred, - (struct sockaddr *)sin); - if (error != 0) - memset(sin, 0, sizeof(*sin)); - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - case SIOCGIFPDSTADDR_IN6: - error = prison_if(curthread->td_ucred, - (struct sockaddr *)sin6); - if (error == 0) - error = sa6_recoverscope(sin6); - if (error != 0) - memset(sin6, 0, sizeof(*sin6)); -#endif - } + error = in6_gif_ioctl(sc, cmd, data); break; +#endif case SIOCGTUNFIB: ifr->ifr_fib = sc->gif_fibnum; break; @@ -908,171 +674,63 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sizeof(options)); if (error) break; - if (options & ~GIF_OPTMASK) + if (options & ~GIF_OPTMASK) { error = EINVAL; - else - sc->gif_options = options; + break; + } + if (sc->gif_options != options) { + switch (sc->gif_family) { +#ifdef INET + case AF_INET: + error = in_gif_setopts(sc, options); + break; +#endif +#ifdef INET6 + case AF_INET6: + error = in6_gif_setopts(sc, options); + break; +#endif + default: + /* No need to invoke AF-handler */ + sc->gif_options = options; + } + } break; default: error = EINVAL; break; } + if (error == 0 && sc->gif_family != 0) { + if ( +#ifdef INET + cmd == SIOCSIFPHYADDR || +#endif +#ifdef INET6 + cmd == SIOCSIFPHYADDR_IN6 || +#endif + 0) { + ifp->if_drv_flags |= IFF_DRV_RUNNING; + if_link_state_change(ifp, LINK_STATE_UP); + } + } bad: sx_xunlock(&gif_ioctl_sx); return (error); } static void -gif_detach(struct gif_softc *sc, int family) +gif_delete_tunnel(struct gif_softc *sc) { sx_assert(&gif_ioctl_sx, SA_XLOCKED); - if (sc->gif_ecookie != NULL) { - switch (family) { -#ifdef INET - case AF_INET: - ip_encap_detach(sc->gif_ecookie); - break; -#endif -#ifdef INET6 - case AF_INET6: - ip6_encap_detach(sc->gif_ecookie); - break; -#endif - } - } - sc->gif_ecookie = NULL; -} - -static int -gif_attach(struct gif_softc *sc, int af) -{ - - sx_assert(&gif_ioctl_sx, SA_XLOCKED); - switch (af) { -#ifdef INET - case AF_INET: - return (in_gif_attach(sc)); -#endif -#ifdef INET6 - case AF_INET6: - return (in6_gif_attach(sc)); -#endif - } - return (EAFNOSUPPORT); -} - -static int -gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) -{ - struct gif_softc *sc = ifp->if_softc; - struct gif_softc *tsc; -#ifdef INET - struct ip *ip; -#endif -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - void *hdr; - int error = 0; - - if (sc == NULL) - return (ENXIO); - /* Disallow parallel tunnels unless instructed otherwise. */ - if (V_parallel_tunnels == 0) { - GIF_LIST_LOCK(); - LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) { - if (tsc == sc || tsc->gif_family != src->sa_family) - continue; -#ifdef INET - if (tsc->gif_family == AF_INET && - tsc->gif_iphdr->ip_src.s_addr == - satosin(src)->sin_addr.s_addr && - tsc->gif_iphdr->ip_dst.s_addr == - satosin(dst)->sin_addr.s_addr) { - error = EADDRNOTAVAIL; - GIF_LIST_UNLOCK(); - goto bad; - } -#endif -#ifdef INET6 - if (tsc->gif_family == AF_INET6 && - IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src, - &satosin6(src)->sin6_addr) && - IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst, - &satosin6(dst)->sin6_addr)) { - error = EADDRNOTAVAIL; - GIF_LIST_UNLOCK(); - goto bad; - } -#endif - } - GIF_LIST_UNLOCK(); - } - switch (src->sa_family) { -#ifdef INET - case AF_INET: - hdr = ip = malloc(sizeof(struct ip), M_GIF, - M_WAITOK | M_ZERO); - ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr; - ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr; - break; -#endif -#ifdef INET6 - case AF_INET6: - hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF, - M_WAITOK | M_ZERO); - ip6->ip6_src = satosin6(src)->sin6_addr; - ip6->ip6_dst = satosin6(dst)->sin6_addr; - ip6->ip6_vfc = IPV6_VERSION; - break; -#endif - default: - return (EAFNOSUPPORT); - } - - if (sc->gif_family != src->sa_family) - gif_detach(sc, sc->gif_family); - if (sc->gif_family == 0 || - sc->gif_family != src->sa_family) - error = gif_attach(sc, src->sa_family); - - GIF_WLOCK(sc); - if (sc->gif_family != 0) + if (sc->gif_family != 0) { + CK_LIST_REMOVE(sc, chain); + /* Wait until it become safe to free gif_hdr */ + GIF_WAIT(); free(sc->gif_hdr, M_GIF); - sc->gif_family = src->sa_family; - sc->gif_hdr = hdr; - GIF_WUNLOCK(sc); -#if defined(INET) || defined(INET6) -bad: -#endif - if (error == 0 && sc->gif_family != 0) { - ifp->if_drv_flags |= IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_UP); - } else { - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_DOWN); } - return (error); -} - -static void -gif_delete_tunnel(struct ifnet *ifp) -{ - struct gif_softc *sc = ifp->if_softc; - int family; - - if (sc == NULL) - return; - - GIF_WLOCK(sc); - family = sc->gif_family; sc->gif_family = 0; - GIF_WUNLOCK(sc); - if (family != 0) { - gif_detach(sc, family); - free(sc->gif_hdr, M_GIF); - } - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_DOWN); + GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; + if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN); } + diff --git a/sys/net/if_gif.h b/sys/net/if_gif.h index 18e33701f8a..217b5fe7b16 100644 --- a/sys/net/if_gif.h +++ b/sys/net/if_gif.h @@ -5,6 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,14 +37,9 @@ #define _NET_IF_GIF_H_ #ifdef _KERNEL -#include "opt_inet.h" -#include "opt_inet6.h" - -#include struct ip; struct ip6_hdr; -struct encaptab; extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); @@ -55,8 +51,6 @@ extern void (*ng_gif_detach_p)(struct ifnet *ifp); struct gif_softc { struct ifnet *gif_ifp; - struct rmlock gif_lock; - const struct encaptab *gif_ecookie; int gif_family; int gif_flags; u_int gif_fibnum; @@ -65,28 +59,22 @@ struct gif_softc { union { void *hdr; struct ip *iphdr; -#ifdef INET6 struct ip6_hdr *ip6hdr; -#endif } gif_uhdr; - LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */ -}; -#define GIF2IFP(sc) ((sc)->gif_ifp) -#define GIF_LOCK_INIT(sc) rm_init(&(sc)->gif_lock, "gif softc") -#define GIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->gif_lock) -#define GIF_RLOCK_TRACKER struct rm_priotracker gif_tracker -#define GIF_RLOCK(sc) rm_rlock(&(sc)->gif_lock, &gif_tracker) -#define GIF_RUNLOCK(sc) rm_runlock(&(sc)->gif_lock, &gif_tracker) -#define GIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_RLOCKED) -#define GIF_WLOCK(sc) rm_wlock(&(sc)->gif_lock) -#define GIF_WUNLOCK(sc) rm_wunlock(&(sc)->gif_lock) -#define GIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_WLOCKED) + CK_LIST_ENTRY(gif_softc) chain; +}; +CK_LIST_HEAD(gif_list, gif_softc); +MALLOC_DECLARE(M_GIF); + +#ifndef GIF_HASH_SIZE +#define GIF_HASH_SIZE (1 << 4) +#endif + +#define GIF2IFP(sc) ((sc)->gif_ifp) #define gif_iphdr gif_uhdr.iphdr #define gif_hdr gif_uhdr.hdr -#ifdef INET6 #define gif_ip6hdr gif_uhdr.ip6hdr -#endif #define GIF_MTU (1280) /* Default MTU */ #define GIF_MTU_MIN (1280) /* Minimum MTU */ @@ -108,21 +96,29 @@ struct etherip_header { /* mbuf adjust factor to force 32-bit alignment of IP header */ #define ETHERIP_ALIGN 2 +#define GIF_RLOCK() epoch_enter_preempt(net_epoch_preempt) +#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) +#define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt) + /* Prototypes */ +struct gif_list *gif_hashinit(void); +void gif_hashdestroy(struct gif_list *); + void gif_input(struct mbuf *, struct ifnet *, int, uint8_t); int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); -int gif_encapcheck(const struct mbuf *, int, int, void *); -#ifdef INET + +void in_gif_init(void); +void in_gif_uninit(void); int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t); -int in_gif_encapcheck(const struct mbuf *, int, int, void *); -int in_gif_attach(struct gif_softc *); -#endif -#ifdef INET6 +int in_gif_ioctl(struct gif_softc *, u_long, caddr_t); +int in_gif_setopts(struct gif_softc *, u_int); + +void in6_gif_init(void); +void in6_gif_uninit(void); int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t); -int in6_gif_encapcheck(const struct mbuf *, int, int, void *); -int in6_gif_attach(struct gif_softc *); -#endif +int in6_gif_ioctl(struct gif_softc *, u_long, caddr_t); +int in6_gif_setopts(struct gif_softc *, u_int); #endif /* _KERNEL */ #define GIFGOPTS _IOWR('i', 150, struct ifreq) diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 835760c8a4d..5f4af01b199 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,9 +39,8 @@ __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include -#include -#include #include +#include #include #include #include @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -75,15 +76,155 @@ static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL; SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets"); +/* + * We keep interfaces in a hash table using src+dst as key. + * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list. + */ +static VNET_DEFINE(struct gif_list *, ipv4_hashtbl) = NULL; +static VNET_DEFINE(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER(); +#define V_ipv4_hashtbl VNET(ipv4_hashtbl) +#define V_ipv4_list VNET(ipv4_list) + +#define GIF_HASH(src, dst) (V_ipv4_hashtbl[\ + in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)]) +#define GIF_HASH_SC(sc) GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\ + (sc)->gif_iphdr->ip_dst.s_addr) +static uint32_t +in_gif_hashval(in_addr_t src, in_addr_t dst) +{ + uint32_t ret; + + ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); + return (fnv_32_buf(&dst, sizeof(dst), ret)); +} + +static int +in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst) +{ + struct gif_softc *tmp; + + if (sc->gif_family == AF_INET && + sc->gif_iphdr->ip_src.s_addr == src && + sc->gif_iphdr->ip_dst.s_addr == dst) + return (EEXIST); + + CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) { + if (tmp == sc) + continue; + if (tmp->gif_iphdr->ip_src.s_addr == src && + tmp->gif_iphdr->ip_dst.s_addr == dst) + return (EADDRNOTAVAIL); + } + return (0); +} + +static void +in_gif_attach(struct gif_softc *sc) +{ + + if (sc->gif_options & GIF_IGNORE_SOURCE) + CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain); + else + CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain); +} + +int +in_gif_setopts(struct gif_softc *sc, u_int options) +{ + + /* NOTE: we are protected with gif_ioctl_sx lock */ + MPASS(sc->gif_family == AF_INET); + MPASS(sc->gif_options != options); + + if ((options & GIF_IGNORE_SOURCE) != + (sc->gif_options & GIF_IGNORE_SOURCE)) { + CK_LIST_REMOVE(sc, chain); + sc->gif_options = options; + in_gif_attach(sc); + } + return (0); +} + +int +in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) +{ + struct ifreq *ifr = (struct ifreq *)data; + struct sockaddr_in *dst, *src; + struct ip *ip; + int error; + + /* NOTE: we are protected with gif_ioctl_sx lock */ + error = EINVAL; + switch (cmd) { + case SIOCSIFPHYADDR: + src = &((struct in_aliasreq *)data)->ifra_addr; + dst = &((struct in_aliasreq *)data)->ifra_dstaddr; + + /* sanity checks */ + if (src->sin_family != dst->sin_family || + src->sin_family != AF_INET || + src->sin_len != dst->sin_len || + src->sin_len != sizeof(*src)) + break; + if (src->sin_addr.s_addr == INADDR_ANY || + dst->sin_addr.s_addr == INADDR_ANY) { + error = EADDRNOTAVAIL; + break; + } + if (V_ipv4_hashtbl == NULL) + V_ipv4_hashtbl = gif_hashinit(); + error = in_gif_checkdup(sc, src->sin_addr.s_addr, + dst->sin_addr.s_addr); + if (error == EADDRNOTAVAIL) + break; + if (error == EEXIST) { + /* Addresses are the same. Just return. */ + error = 0; + break; + } + ip = malloc(sizeof(*ip), M_GIF, M_WAITOK | M_ZERO); + ip->ip_src.s_addr = src->sin_addr.s_addr; + ip->ip_dst.s_addr = dst->sin_addr.s_addr; + if (sc->gif_family != 0) { + /* Detach existing tunnel first */ + CK_LIST_REMOVE(sc, chain); + GIF_WAIT(); + free(sc->gif_hdr, M_GIF); + /* XXX: should we notify about link state change? */ + } + sc->gif_family = AF_INET; + sc->gif_iphdr = ip; + in_gif_attach(sc); + break; + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + if (sc->gif_family != AF_INET) { + error = EADDRNOTAVAIL; + break; + } + src = (struct sockaddr_in *)&ifr->ifr_addr; + memset(src, 0, sizeof(*src)); + src->sin_family = AF_INET; + src->sin_len = sizeof(*src); + src->sin_addr = (cmd == SIOCGIFPSRCADDR) ? + sc->gif_iphdr->ip_src: sc->gif_iphdr->ip_dst; + error = prison_if(curthread->td_ucred, (struct sockaddr *)src); + if (error != 0) + memset(src, 0, sizeof(*src)); + break; + } + return (error); +} + int in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) { - GIF_RLOCK_TRACKER; struct gif_softc *sc = ifp->if_softc; struct ip *ip; int len; /* prepend new IP header */ + MPASS(in_epoch()); len = sizeof(struct ip); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) @@ -102,15 +243,9 @@ in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) } #endif ip = mtod(m, struct ip *); - GIF_RLOCK(sc); - if (sc->gif_family != AF_INET) { - m_freem(m); - GIF_RUNLOCK(sc); - return (ENETDOWN); - } - bcopy(sc->gif_iphdr, ip, sizeof(struct ip)); - GIF_RUNLOCK(sc); + MPASS(sc->gif_family == AF_INET); + bcopy(sc->gif_iphdr, ip, sizeof(struct ip)); ip->ip_p = proto; /* version will be set in ip_output() */ ip->ip_ttl = V_ip_gif_ttl; @@ -128,6 +263,7 @@ in_gif_input(struct mbuf *m, int off, int proto, void *arg) struct ip *ip; uint8_t ecn; + MPASS(in_epoch()); if (sc == NULL) { m_freem(m); KMOD_IPSTAT_INC(ips_nogif); @@ -146,61 +282,122 @@ in_gif_input(struct mbuf *m, int off, int proto, void *arg) return (IPPROTO_DONE); } -/* - * we know that we are in IFF_UP, outer address available, and outer family - * matched the physical addr family. see gif_encapcheck(). - */ -int -in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +static int +in_gif_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip *ip; struct gif_softc *sc; int ret; - /* sanity check done in caller */ - sc = (struct gif_softc *)arg; - GIF_RLOCK_ASSERT(sc); - - /* check for address match */ + MPASS(in_epoch()); ip = mtod(m, const struct ip *); - if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr) + /* + * NOTE: it is safe to iterate without any locking here, because softc + * can be reclaimed only when we are not within net_epoch_preempt + * section, but ip_encap lookup+input are executed in epoch section. + */ + ret = 0; + CK_LIST_FOREACH(sc, &GIF_HASH(ip->ip_dst.s_addr, + ip->ip_src.s_addr), chain) { + /* + * This is an inbound packet, its ip_dst is source address + * in softc. + */ + if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr && + sc->gif_iphdr->ip_dst.s_addr == ip->ip_src.s_addr) { + ret = ENCAP_DRV_LOOKUP; + goto done; + } + } + /* + * No exact match. + * Check the list of interfaces with GIF_IGNORE_SOURCE flag. + */ + CK_LIST_FOREACH(sc, &V_ipv4_list, chain) { + if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr) { + ret = 32 + 8; /* src + proto */ + goto done; + } + } + return (0); +done: + if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0) return (0); - ret = 32 + 8; /* src + proto */ - if (sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) { - if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0) - return (0); - } else - ret += 32; - /* ingress filters on outer source */ if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) { struct nhop4_basic nh4; struct in_addr dst; dst = ip->ip_src; - if (fib4_lookup_nh_basic(sc->gif_fibnum, dst, 0, 0, &nh4) != 0) return (0); - if (nh4.nh_ifp != m->m_pkthdr.rcvif) return (0); } + *arg = sc; return (ret); } -static const struct encap_config ipv4_encap_cfg = { - .proto = -1, - .min_length = sizeof(struct ip), - .exact_match = (sizeof(in_addr_t) << 4) + 8, - .check = gif_encapcheck, - .input = in_gif_input +static struct { + const struct encap_config encap; + const struct encaptab *cookie; +} ipv4_encap_cfg[] = { + { + .encap = { + .proto = IPPROTO_IPV4, + .min_length = 2 * sizeof(struct ip), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in_gif_lookup, + .input = in_gif_input + }, + }, +#ifdef INET6 + { + .encap = { + .proto = IPPROTO_IPV6, + .min_length = sizeof(struct ip) + + sizeof(struct ip6_hdr), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in_gif_lookup, + .input = in_gif_input + }, + }, +#endif + { + .encap = { + .proto = IPPROTO_ETHERIP, + .min_length = sizeof(struct ip) + + sizeof(struct etherip_header) + + sizeof(struct ether_header), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in_gif_lookup, + .input = in_gif_input + }, + } }; -int -in_gif_attach(struct gif_softc *sc) +void +in_gif_init(void) { + int i; - KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL")); - sc->gif_ecookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK); - return (0); + if (!IS_DEFAULT_VNET(curvnet)) + return; + for (i = 0; i < nitems(ipv4_encap_cfg); i++) + ipv4_encap_cfg[i].cookie = ip_encap_attach( + &ipv4_encap_cfg[i].encap, NULL, M_WAITOK); } + +void +in_gif_uninit(void) +{ + int i; + + if (IS_DEFAULT_VNET(curvnet)) { + for (i = 0; i < nitems(ipv4_encap_cfg); i++) + ip_encap_detach(ipv4_encap_cfg[i].cookie); + } + if (V_ipv4_hashtbl != NULL) + gif_hashdestroy(V_ipv4_hashtbl); +} + diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index 0fc0e66cab0..1834660ce1e 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,19 +39,18 @@ __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include -#include -#include #include +#include #include #include #include #include #include -#include #include #include #include +#include #include #include #include @@ -60,18 +60,15 @@ __FBSDID("$FreeBSD$"); #include #ifdef INET #include +#include #endif #include -#ifdef INET6 #include #include #include -#endif -#include -#ifdef INET6 +#include #include #include -#endif #include @@ -84,15 +81,167 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0, "Default hop limit for encapsulated packets"); +/* + * We keep interfaces in a hash table using src+dst as key. + * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list. + */ +static VNET_DEFINE(struct gif_list *, ipv6_hashtbl) = NULL; +static VNET_DEFINE(struct gif_list, ipv6_list) = CK_LIST_HEAD_INITIALIZER(); +#define V_ipv6_hashtbl VNET(ipv6_hashtbl) +#define V_ipv6_list VNET(ipv6_list) + +#define GIF_HASH(src, dst) (V_ipv6_hashtbl[\ + in6_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)]) +#define GIF_HASH_SC(sc) GIF_HASH(&(sc)->gif_ip6hdr->ip6_src,\ + &(sc)->gif_ip6hdr->ip6_dst) +static uint32_t +in6_gif_hashval(const struct in6_addr *src, const struct in6_addr *dst) +{ + uint32_t ret; + + ret = fnv_32_buf(src, sizeof(*src), FNV1_32_INIT); + return (fnv_32_buf(dst, sizeof(*dst), ret)); +} + +static int +in6_gif_checkdup(const struct gif_softc *sc, const struct in6_addr *src, + const struct in6_addr *dst) +{ + struct gif_softc *tmp; + + if (sc->gif_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, src) && + IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, dst)) + return (EEXIST); + + CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) { + if (tmp == sc) + continue; + if (IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_src, src) && + IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_dst, dst)) + return (EADDRNOTAVAIL); + } + return (0); +} + +static void +in6_gif_attach(struct gif_softc *sc) +{ + + if (sc->gif_options & GIF_IGNORE_SOURCE) + CK_LIST_INSERT_HEAD(&V_ipv6_list, sc, chain); + else + CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain); +} + +int +in6_gif_setopts(struct gif_softc *sc, u_int options) +{ + + /* NOTE: we are protected with gif_ioctl_sx lock */ + MPASS(sc->gif_family == AF_INET6); + MPASS(sc->gif_options != options); + + if ((options & GIF_IGNORE_SOURCE) != + (sc->gif_options & GIF_IGNORE_SOURCE)) { + CK_LIST_REMOVE(sc, chain); + sc->gif_options = options; + in6_gif_attach(sc); + } + return (0); +} + +int +in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) +{ + struct in6_ifreq *ifr = (struct in6_ifreq *)data; + struct sockaddr_in6 *dst, *src; + struct ip6_hdr *ip6; + int error; + + /* NOTE: we are protected with gif_ioctl_sx lock */ + error = EINVAL; + switch (cmd) { + case SIOCSIFPHYADDR_IN6: + src = &((struct in6_aliasreq *)data)->ifra_addr; + dst = &((struct in6_aliasreq *)data)->ifra_dstaddr; + + /* sanity checks */ + if (src->sin6_family != dst->sin6_family || + src->sin6_family != AF_INET6 || + src->sin6_len != dst->sin6_len || + src->sin6_len != sizeof(*src)) + break; + if (IN6_IS_ADDR_UNSPECIFIED(&src->sin6_addr) || + IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) { + error = EADDRNOTAVAIL; + break; + } + /* + * Check validity of the scope zone ID of the + * addresses, and convert it into the kernel + * internal form if necessary. + */ + if ((error = sa6_embedscope(src, 0)) != 0 || + (error = sa6_embedscope(dst, 0)) != 0) + break; + + if (V_ipv6_hashtbl == NULL) + V_ipv6_hashtbl = gif_hashinit(); + error = in6_gif_checkdup(sc, &src->sin6_addr, + &dst->sin6_addr); + if (error == EADDRNOTAVAIL) + break; + if (error == EEXIST) { + /* Addresses are the same. Just return. */ + error = 0; + break; + } + ip6 = malloc(sizeof(*ip6), M_GIF, M_WAITOK | M_ZERO); + ip6->ip6_src = src->sin6_addr; + ip6->ip6_dst = dst->sin6_addr; + if (sc->gif_family != 0) { + /* Detach existing tunnel first */ + CK_LIST_REMOVE(sc, chain); + GIF_WAIT(); + free(sc->gif_hdr, M_GIF); + /* XXX: should we notify about link state change? */ + } + sc->gif_family = AF_INET6; + sc->gif_ip6hdr = ip6; + in6_gif_attach(sc); + break; + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + if (sc->gif_family != AF_INET6) { + error = EADDRNOTAVAIL; + break; + } + src = (struct sockaddr_in6 *)&ifr->ifr_addr; + memset(src, 0, sizeof(*src)); + src->sin6_family = AF_INET6; + src->sin6_len = sizeof(*src); + src->sin6_addr = (cmd == SIOCGIFPSRCADDR_IN6) ? + sc->gif_ip6hdr->ip6_src: sc->gif_ip6hdr->ip6_dst; + error = prison_if(curthread->td_ucred, (struct sockaddr *)src); + if (error == 0) + error = sa6_recoverscope(src); + if (error != 0) + memset(src, 0, sizeof(*src)); + break; + } + return (error); +} + int in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) { - GIF_RLOCK_TRACKER; struct gif_softc *sc = ifp->if_softc; struct ip6_hdr *ip6; int len; /* prepend new IP header */ + MPASS(in_epoch()); len = sizeof(struct ip6_hdr); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) @@ -112,14 +261,8 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) #endif ip6 = mtod(m, struct ip6_hdr *); - GIF_RLOCK(sc); - if (sc->gif_family != AF_INET6) { - m_freem(m); - GIF_RUNLOCK(sc); - return (ENETDOWN); - } + MPASS(sc->gif_family == AF_INET6); bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr)); - GIF_RUNLOCK(sc); ip6->ip6_flow |= htonl((uint32_t)ecn << 20); ip6->ip6_nxt = proto; @@ -140,6 +283,7 @@ in6_gif_input(struct mbuf *m, int off, int proto, void *arg) struct ip6_hdr *ip6; uint8_t ecn; + MPASS(in_epoch()); if (sc == NULL) { m_freem(m); IP6STAT_INC(ip6s_nogif); @@ -158,64 +302,123 @@ in6_gif_input(struct mbuf *m, int off, int proto, void *arg) return (IPPROTO_DONE); } -/* - * we know that we are in IFF_UP, outer address available, and outer family - * matched the physical addr family. see gif_encapcheck(). - */ -int -in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +static int +in6_gif_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip6_hdr *ip6; struct gif_softc *sc; int ret; - /* sanity check done in caller */ - sc = (struct gif_softc *)arg; - GIF_RLOCK_ASSERT(sc); - + MPASS(in_epoch()); /* - * Check for address match. Note that the check is for an incoming - * packet. We should compare the *source* address in our configuration - * and the *destination* address of the packet, and vice versa. + * NOTE: it is safe to iterate without any locking here, because softc + * can be reclaimed only when we are not within net_epoch_preempt + * section, but ip_encap lookup+input are executed in epoch section. */ ip6 = mtod(m, const struct ip6_hdr *); - if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst)) + ret = 0; + CK_LIST_FOREACH(sc, &GIF_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) { + /* + * This is an inbound packet, its ip6_dst is source address + * in softc. + */ + if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, + &ip6->ip6_dst) && + IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, + &ip6->ip6_src)) { + ret = ENCAP_DRV_LOOKUP; + goto done; + } + } + /* + * No exact match. + * Check the list of interfaces with GIF_IGNORE_SOURCE flag. + */ + CK_LIST_FOREACH(sc, &V_ipv6_list, chain) { + if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, + &ip6->ip6_dst)) { + ret = 128 + 8; /* src + proto */ + goto done; + } + } + return (0); +done: + if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0) return (0); - ret = 128; - if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) { - if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0) - return (0); - } else - ret += 128; - /* ingress filters on outer source */ if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) { struct nhop6_basic nh6; - /* XXX empty scope id */ - if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0, - &nh6) != 0) + if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, + ntohs(in6_getscope(&ip6->ip6_src)), 0, 0, &nh6) != 0) return (0); if (nh6.nh_ifp != m->m_pkthdr.rcvif) return (0); } + *arg = sc; return (ret); } -static const struct encap_config ipv6_encap_cfg = { - .proto = -1, - .min_length = sizeof(struct ip6_hdr), - .exact_match = (sizeof(struct in6_addr) << 4) + 8, - .check = gif_encapcheck, - .input = in6_gif_input +static struct { + const struct encap_config encap; + const struct encaptab *cookie; +} ipv6_encap_cfg[] = { +#ifdef INET + { + .encap = { + .proto = IPPROTO_IPV4, + .min_length = sizeof(struct ip6_hdr) + + sizeof(struct ip), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in6_gif_lookup, + .input = in6_gif_input + }, + }, +#endif + { + .encap = { + .proto = IPPROTO_IPV6, + .min_length = 2 * sizeof(struct ip6_hdr), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in6_gif_lookup, + .input = in6_gif_input + }, + }, + { + .encap = { + .proto = IPPROTO_ETHERIP, + .min_length = sizeof(struct ip6_hdr) + + sizeof(struct etherip_header) + + sizeof(struct ether_header), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in6_gif_lookup, + .input = in6_gif_input + }, + } }; -int -in6_gif_attach(struct gif_softc *sc) +void +in6_gif_init(void) { + int i; - KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL")); - sc->gif_ecookie = ip6_encap_attach(&ipv6_encap_cfg, sc, M_WAITOK); - return (0); + if (!IS_DEFAULT_VNET(curvnet)) + return; + for (i = 0; i < nitems(ipv6_encap_cfg); i++) + ipv6_encap_cfg[i].cookie = ip6_encap_attach( + &ipv6_encap_cfg[i].encap, NULL, M_WAITOK); +} + +void +in6_gif_uninit(void) +{ + int i; + + if (IS_DEFAULT_VNET(curvnet)) { + for (i = 0; i < nitems(ipv6_encap_cfg); i++) + ip6_encap_detach(ipv6_encap_cfg[i].cookie); + } + if (V_ipv6_hashtbl != NULL) + gif_hashdestroy(V_ipv6_hashtbl); }