diff --git a/sys/net/if.c b/sys/net/if.c index 3b303fe42e9..de63b936684 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -311,30 +311,19 @@ VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); /* Table of ifnet by index. */ -static int if_index; -static int if_indexlim = 8; -static struct ifnet **ifindex_table; +VNET_DEFINE_STATIC(int, if_index); +#define V_if_index VNET(if_index) +VNET_DEFINE_STATIC(int, if_indexlim) = 8; +#define V_if_indexlim VNET(if_indexlim) +VNET_DEFINE_STATIC(struct ifnet **, ifindex_table); +#define V_ifindex_table VNET(ifindex_table) SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Variables global to all interfaces"); -static int -sysctl_ifcount(SYSCTL_HANDLER_ARGS) -{ - int rv = 0; - - IFNET_RLOCK(); - for (int i = 1; i <= if_index; i++) - if (ifindex_table[i] != NULL && - ifindex_table[i]->if_vnet == curvnet) - rv = i; - IFNET_RUNLOCK(); - - return (sysctl_handle_int(oidp, &rv, 0, req)); -} -SYSCTL_PROC(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, - CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RD, NULL, 0, sysctl_ifcount, "I", - "Maximum known interface index"); +SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, + CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(if_index), 0, + "Number of configured interfaces"); /* * The global network interface list (V_ifnet) and related state (such as @@ -363,19 +352,13 @@ MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); struct ifnet * ifnet_byindex(u_int idx) { - struct ifnet *ifp; NET_EPOCH_ASSERT(); - if (__predict_false(idx > if_index)) + if (__predict_false(idx > V_if_index)) return (NULL); - ifp = ck_pr_load_ptr(&ifindex_table[idx]); - - if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet) - ifp = NULL; - - return (ifp); + return (ck_pr_load_ptr(&V_ifindex_table[idx])); } struct ifnet * @@ -391,6 +374,58 @@ ifnet_byindex_ref(u_int idx) return (ifp); } +/* + * Allocate an ifindex array entry. + */ +static void +ifindex_alloc(struct ifnet *ifp) +{ + u_short idx; + + IFNET_WLOCK(); + /* + * Try to find an empty slot below V_if_index. If we fail, take the + * next slot. + */ + for (idx = 1; idx <= V_if_index; idx++) { + if (V_ifindex_table[idx] == NULL) + break; + } + + /* Catch if_index overflow. */ + if (idx >= V_if_indexlim) { + struct ifnet **new, **old; + int newlim; + + newlim = V_if_indexlim * 2; + new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO); + memcpy(new, V_ifindex_table, V_if_indexlim * sizeof(*new)); + old = V_ifindex_table; + ck_pr_store_ptr(&V_ifindex_table, new); + V_if_indexlim = newlim; + epoch_wait_preempt(net_epoch_preempt); + free(old, M_IFNET); + } + if (idx > V_if_index) + V_if_index = idx; + + ifp->if_index = idx; + ck_pr_store_ptr(&V_ifindex_table[idx], ifp); + IFNET_WUNLOCK(); +} + +static void +ifindex_free(u_short idx) +{ + + IFNET_WLOCK_ASSERT(); + + ck_pr_store_ptr(&V_ifindex_table[idx], NULL); + while (V_if_index > 0 && + V_ifindex_table[V_if_index] == NULL) + V_if_index--; +} + /* * Network interface utility routines. * @@ -398,26 +433,35 @@ ifnet_byindex_ref(u_int idx) * parameters. */ -static void -if_init(void *arg __unused) -{ - - ifindex_table = malloc(if_indexlim * sizeof(*ifindex_table), - M_IFNET, M_WAITOK | M_ZERO); -} -SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL); - static void vnet_if_init(const void *unused __unused) { CK_STAILQ_INIT(&V_ifnet); CK_STAILQ_INIT(&V_ifg_head); + V_ifindex_table = malloc(V_if_indexlim * sizeof(*V_ifindex_table), + M_IFNET, M_WAITOK | M_ZERO); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); +#ifdef VIMAGE +static void +vnet_if_uninit(const void *unused __unused) +{ + + VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " + "not empty", __func__, __LINE__, &V_ifnet)); + VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " + "not empty", __func__, __LINE__, &V_ifg_head)); + + free((caddr_t)V_ifindex_table, M_IFNET); +} +VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, + vnet_if_uninit, NULL); +#endif + static void if_link_ifnet(struct ifnet *ifp) { @@ -510,7 +554,6 @@ static struct ifnet * if_alloc_domain(u_char type, int numa_domain) { struct ifnet *ifp; - u_short idx; KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large")); if (numa_domain == IF_NODOM) @@ -550,37 +593,7 @@ if_alloc_domain(u_char type, int numa_domain) ifp->if_get_counter = if_get_counter_default; ifp->if_pcp = IFNET_PCP_NONE; - /* Allocate an ifindex array entry. */ - IFNET_WLOCK(); - /* - * Try to find an empty slot below if_index. If we fail, take the - * next slot. - */ - for (idx = 1; idx <= if_index; idx++) { - if (ifindex_table[idx] == NULL) - break; - } - - /* Catch if_index overflow. */ - if (idx >= if_indexlim) { - struct ifnet **new, **old; - int newlim; - - newlim = if_indexlim * 2; - new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO); - memcpy(new, ifindex_table, if_indexlim * sizeof(*new)); - old = ifindex_table; - ck_pr_store_ptr(&ifindex_table, new); - if_indexlim = newlim; - epoch_wait_preempt(net_epoch_preempt); - free(old, M_IFNET); - } - if (idx > if_index) - if_index = idx; - - ifp->if_index = idx; - ck_pr_store_ptr(&ifindex_table[idx], ifp); - IFNET_WUNLOCK(); + ifindex_alloc(ifp); return (ifp); } @@ -650,18 +663,23 @@ if_free(struct ifnet *ifp) * epoch and then dereferencing ifp while we perform if_free(), * and after if_free() finished, too. * - * This early index freeing was important back when ifindex was - * virtualized and interface would outlive the vnet. + * The reason is the VIMAGE. For some reason it was designed + * to require all sockets drained before destroying, but not all + * ifnets. A vnet destruction calls if_vmove() on ifnet, which + * causes ID change. But ID change and a possible misidentification + * of an ifnet later is a lesser problem, as it doesn't crash kernel. + * A worse problem is that removed interface may outlive the vnet it + * belongs too! The if_free_deferred() would see ifp->if_vnet freed. */ + CURVNET_SET_QUIET(ifp->if_vnet); IFNET_WLOCK(); - MPASS(ifindex_table[ifp->if_index] == ifp); - ck_pr_store_ptr(&ifindex_table[ifp->if_index], NULL); - while (if_index > 0 && ifindex_table[if_index] == NULL) - if_index--; + MPASS(V_ifindex_table[ifp->if_index] == ifp); + ifindex_free(ifp->if_index); IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx); + CURVNET_RESTORE(); } /* @@ -805,7 +823,7 @@ if_attach_internal(struct ifnet *ifp, bool vmove) struct sockaddr_dl *sdl; struct ifaddr *ifa; - MPASS(ifindex_table[ifp->if_index] == ifp); + MPASS(V_ifindex_table[ifp->if_index] == ifp); #ifdef VIMAGE ifp->if_vnet = curvnet; @@ -1255,6 +1273,17 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet) if (rc != 0) return (rc); + /* + * Unlink the ifnet from ifindex_table[] in current vnet, and shrink + * the if_index for that vnet if possible. + * + * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, + * or we'd lock on one vnet and unlock on another. + */ + IFNET_WLOCK(); + ifindex_free(ifp->if_index); + IFNET_WUNLOCK(); + /* * Perform interface-specific reassignment tasks, if provided by * the driver. @@ -1266,6 +1295,7 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet) * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); + ifindex_alloc(ifp); if_attach_internal(ifp, true); #ifdef DEV_BPF @@ -1901,6 +1931,7 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; + const struct sockaddr_dl *sdl; NET_EPOCH_ASSERT(); /* @@ -1908,9 +1939,14 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) * so do that if we can. */ if (af == AF_LINK) { - ifp = ifnet_byindex( - ((const struct sockaddr_dl *)addr)->sdl_index); - return (ifp ? ifp->if_addr : NULL); + sdl = (const struct sockaddr_dl *)addr; + if (sdl->sdl_index && sdl->sdl_index <= V_if_index) { + ifp = ifnet_byindex(sdl->sdl_index); + if (ifp == NULL) + return (NULL); + + return (ifp->if_addr); + } } /* @@ -4546,16 +4582,24 @@ DB_SHOW_COMMAND(ifnet, db_show_ifnet) DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets) { + VNET_ITERATOR_DECL(vnet_iter); struct ifnet *ifp; u_short idx; - for (idx = 1; idx <= if_index; idx++) { - ifp = ifindex_table[idx]; - if (ifp == NULL) - continue; - db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); - if (db_pager_quit) - break; + VNET_FOREACH(vnet_iter) { + CURVNET_SET_QUIET(vnet_iter); +#ifdef VIMAGE + db_printf("vnet=%p\n", curvnet); +#endif + for (idx = 1; idx <= V_if_index; idx++) { + ifp = V_ifindex_table[idx]; + if (ifp == NULL) + continue; + db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); + if (db_pager_quit) + break; + } + CURVNET_RESTORE(); } } #endif /* DDB */