mirror of
https://github.com/opnsense/src.git
synced 2026-06-09 00:32:25 -04:00
SMR protection for inpcbs
With introduction of epoch(9) synchronization to network stack the inpcb database became protected by the network epoch together with static network data (interfaces, addresses, etc). However, inpcb aren't static in nature, they are created and destroyed all the time, which creates some traffic on the epoch(9) garbage collector. Fairly new feature of uma(9) - Safe Memory Reclamation allows to safely free memory in page-sized batches, with virtually zero overhead compared to uma_zfree(). However, unlike epoch(9), it puts stricter requirement on the access to the protected memory, needing the critical(9) section to access it. Details: - The database is already build on CK lists, thanks to epoch(9). - For write access nothing is changed. - For a lookup in the database SMR section is now required. Once the desired inpcb is found we need to transition from SMR section to r/w lock on the inpcb itself, with a check that inpcb isn't yet freed. This requires some compexity, since SMR section itself is a critical(9) section. The complexity is hidden from KPI users in inp_smr_lock(). - For a inpcb list traversal (a pcblist sysctl, or broadcast notification) also a new KPI is provided, that hides internals of the database - inp_next(struct inp_iterator *). Reviewed by: rrs Differential revision: https://reviews.freebsd.org/D33022
This commit is contained in:
parent
565655f4e3
commit
de2d47842e
20 changed files with 1158 additions and 1335 deletions
|
|
@ -564,15 +564,15 @@ static struct witness_order_list_entry order_lists[] = {
|
||||||
/*
|
/*
|
||||||
* UDP/IP
|
* UDP/IP
|
||||||
*/
|
*/
|
||||||
{ "udp", &lock_class_mtx_sleep },
|
|
||||||
{ "udpinp", &lock_class_rw },
|
{ "udpinp", &lock_class_rw },
|
||||||
|
{ "udp", &lock_class_mtx_sleep },
|
||||||
{ "so_snd", &lock_class_mtx_sleep },
|
{ "so_snd", &lock_class_mtx_sleep },
|
||||||
{ NULL, NULL },
|
{ NULL, NULL },
|
||||||
/*
|
/*
|
||||||
* TCP/IP
|
* TCP/IP
|
||||||
*/
|
*/
|
||||||
{ "tcp", &lock_class_mtx_sleep },
|
|
||||||
{ "tcpinp", &lock_class_rw },
|
{ "tcpinp", &lock_class_rw },
|
||||||
|
{ "tcp", &lock_class_mtx_sleep },
|
||||||
{ "so_snd", &lock_class_mtx_sleep },
|
{ "so_snd", &lock_class_mtx_sleep },
|
||||||
{ NULL, NULL },
|
{ NULL, NULL },
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -854,10 +854,6 @@ ktls_try_toe(struct socket *so, struct ktls_session *tls, int direction)
|
||||||
|
|
||||||
inp = so->so_pcb;
|
inp = so->so_pcb;
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
if (inp->inp_flags2 & INP_FREED) {
|
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
return (ECONNRESET);
|
|
||||||
}
|
|
||||||
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
|
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
return (ECONNRESET);
|
return (ECONNRESET);
|
||||||
|
|
@ -909,10 +905,6 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force,
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
INP_RLOCK(inp);
|
INP_RLOCK(inp);
|
||||||
if (inp->inp_flags2 & INP_FREED) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
return (ECONNRESET);
|
|
||||||
}
|
|
||||||
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
|
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
|
||||||
INP_RUNLOCK(inp);
|
INP_RUNLOCK(inp);
|
||||||
return (ECONNRESET);
|
return (ECONNRESET);
|
||||||
|
|
@ -2716,8 +2708,7 @@ ktls_disable_ifnet_help(void *context, int pending __unused)
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
so = inp->inp_socket;
|
so = inp->inp_socket;
|
||||||
MPASS(so != NULL);
|
MPASS(so != NULL);
|
||||||
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
|
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
|
||||||
(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2729,7 +2720,6 @@ ktls_disable_ifnet_help(void *context, int pending __unused)
|
||||||
counter_u64_add(ktls_ifnet_disable_ok, 1);
|
counter_u64_add(ktls_ifnet_disable_ok, 1);
|
||||||
/* ktls_set_tx_mode() drops inp wlock, so recheck flags */
|
/* ktls_set_tx_mode() drops inp wlock, so recheck flags */
|
||||||
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0 &&
|
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0 &&
|
||||||
(inp->inp_flags2 & INP_FREED) == 0 &&
|
|
||||||
(tp = intotcpcb(inp)) != NULL &&
|
(tp = intotcpcb(inp)) != NULL &&
|
||||||
tp->t_fb->tfb_hwtls_change != NULL)
|
tp->t_fb->tfb_hwtls_change != NULL)
|
||||||
(*tp->t_fb->tfb_hwtls_change)(tp, 0);
|
(*tp->t_fb->tfb_hwtls_change)(tp, 0);
|
||||||
|
|
|
||||||
|
|
@ -113,6 +113,7 @@ __FBSDID("$FreeBSD$");
|
||||||
|
|
||||||
#define INPCBLBGROUP_SIZMIN 8
|
#define INPCBLBGROUP_SIZMIN 8
|
||||||
#define INPCBLBGROUP_SIZMAX 256
|
#define INPCBLBGROUP_SIZMAX 256
|
||||||
|
#define INP_FREED 0x00000200 /* See in_pcb.h. */
|
||||||
|
|
||||||
static struct callout ipport_tick_callout;
|
static struct callout ipport_tick_callout;
|
||||||
|
|
||||||
|
|
@ -145,7 +146,6 @@ VNET_DEFINE_STATIC(int, ipport_tcplastcount);
|
||||||
|
|
||||||
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
|
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
|
||||||
|
|
||||||
static void in_pcbremlists(struct inpcb *inp);
|
|
||||||
#ifdef INET
|
#ifdef INET
|
||||||
static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
|
static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
|
||||||
struct in_addr faddr, u_int fport_arg,
|
struct in_addr faddr, u_int fport_arg,
|
||||||
|
|
@ -514,38 +514,43 @@ inpcb_fini(void *mem, int size)
|
||||||
INP_LOCK_DESTROY(inp);
|
INP_LOCK_DESTROY(inp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Make sure it is safe to use hashinit(9) on CK_LIST. */
|
||||||
|
CTASSERT(sizeof(struct inpcbhead) == sizeof(LIST_HEAD(, inpcb)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize an inpcbinfo -- we should be able to reduce the number of
|
* Initialize an inpcbinfo -- we should be able to reduce the number of
|
||||||
* arguments in time.
|
* arguments in time.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
|
in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
|
||||||
struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
|
u_int hash_nelements, int porthash_nelements, char *inpcbzone_name,
|
||||||
char *inpcbzone_name, uma_init inpcbzone_init, u_int hashfields)
|
uma_init inpcbzone_init)
|
||||||
{
|
{
|
||||||
|
|
||||||
porthash_nelements = imin(porthash_nelements, IPPORT_MAX + 1);
|
mtx_init(&pcbinfo->ipi_lock, name, NULL, MTX_DEF);
|
||||||
|
mtx_init(&pcbinfo->ipi_hash_lock, "pcbinfohash", NULL, MTX_DEF);
|
||||||
INP_INFO_LOCK_INIT(pcbinfo, name);
|
|
||||||
INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */
|
|
||||||
INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist");
|
|
||||||
#ifdef VIMAGE
|
#ifdef VIMAGE
|
||||||
pcbinfo->ipi_vnet = curvnet;
|
pcbinfo->ipi_vnet = curvnet;
|
||||||
#endif
|
#endif
|
||||||
pcbinfo->ipi_listhead = listhead;
|
CK_LIST_INIT(&pcbinfo->ipi_listhead);
|
||||||
CK_LIST_INIT(pcbinfo->ipi_listhead);
|
|
||||||
pcbinfo->ipi_count = 0;
|
pcbinfo->ipi_count = 0;
|
||||||
pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
|
pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
|
||||||
&pcbinfo->ipi_hashmask);
|
&pcbinfo->ipi_hashmask);
|
||||||
|
porthash_nelements = imin(porthash_nelements, IPPORT_MAX + 1);
|
||||||
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
|
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
|
||||||
&pcbinfo->ipi_porthashmask);
|
&pcbinfo->ipi_porthashmask);
|
||||||
pcbinfo->ipi_lbgrouphashbase = hashinit(porthash_nelements, M_PCB,
|
pcbinfo->ipi_lbgrouphashbase = hashinit(porthash_nelements, M_PCB,
|
||||||
&pcbinfo->ipi_lbgrouphashmask);
|
&pcbinfo->ipi_lbgrouphashmask);
|
||||||
pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
|
pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
|
||||||
NULL, NULL, inpcbzone_init, inpcb_fini, UMA_ALIGN_PTR, 0);
|
NULL, NULL, inpcbzone_init, inpcb_fini, UMA_ALIGN_PTR,
|
||||||
|
UMA_ZONE_SMR);
|
||||||
uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
|
uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
|
||||||
uma_zone_set_warning(pcbinfo->ipi_zone,
|
uma_zone_set_warning(pcbinfo->ipi_zone,
|
||||||
"kern.ipc.maxsockets limit reached");
|
"kern.ipc.maxsockets limit reached");
|
||||||
|
pcbinfo->ipi_smr = uma_zone_get_smr(pcbinfo->ipi_zone);
|
||||||
|
pcbinfo->ipi_portzone = uma_zcreate(inpcbzone_name,
|
||||||
|
sizeof(struct inpcbport), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
||||||
|
uma_zone_set_smr(pcbinfo->ipi_portzone, pcbinfo->ipi_smr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -564,9 +569,8 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
|
||||||
hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
|
hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
|
||||||
pcbinfo->ipi_lbgrouphashmask);
|
pcbinfo->ipi_lbgrouphashmask);
|
||||||
uma_zdestroy(pcbinfo->ipi_zone);
|
uma_zdestroy(pcbinfo->ipi_zone);
|
||||||
INP_LIST_LOCK_DESTROY(pcbinfo);
|
mtx_destroy(&pcbinfo->ipi_hash_lock);
|
||||||
INP_HASH_LOCK_DESTROY(pcbinfo);
|
mtx_destroy(&pcbinfo->ipi_lock);
|
||||||
INP_INFO_LOCK_DESTROY(pcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -580,7 +584,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
|
inp = uma_zalloc_smr(pcbinfo->ipi_zone, M_NOWAIT);
|
||||||
if (inp == NULL)
|
if (inp == NULL)
|
||||||
return (ENOBUFS);
|
return (ENOBUFS);
|
||||||
bzero(&inp->inp_start_zero, inp_zero_size);
|
bzero(&inp->inp_start_zero, inp_zero_size);
|
||||||
|
|
@ -612,33 +616,38 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
|
||||||
if (V_ip6_v6only)
|
if (V_ip6_v6only)
|
||||||
inp->inp_flags |= IN6P_IPV6_V6ONLY;
|
inp->inp_flags |= IN6P_IPV6_V6ONLY;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
INP_WLOCK(inp);
|
|
||||||
INP_LIST_WLOCK(pcbinfo);
|
|
||||||
CK_LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
|
|
||||||
pcbinfo->ipi_count++;
|
|
||||||
so->so_pcb = (caddr_t)inp;
|
|
||||||
#ifdef INET6
|
|
||||||
if (V_ip6_auto_flowlabel)
|
if (V_ip6_auto_flowlabel)
|
||||||
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
|
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
|
||||||
#endif
|
#endif
|
||||||
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
|
|
||||||
refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Routes in inpcb's can cache L2 as well; they are guaranteed
|
* Routes in inpcb's can cache L2 as well; they are guaranteed
|
||||||
* to be cleaned up.
|
* to be cleaned up.
|
||||||
*/
|
*/
|
||||||
inp->inp_route.ro_flags = RT_LLE_CACHE;
|
inp->inp_route.ro_flags = RT_LLE_CACHE;
|
||||||
INP_LIST_WUNLOCK(pcbinfo);
|
#ifdef TCPHPTS
|
||||||
|
/*
|
||||||
|
* If using hpts lets drop a random number in so
|
||||||
|
* not all new connections fall on the same CPU.
|
||||||
|
*/
|
||||||
|
inp->inp_hpts_cpu = inp->inp_input_cpu = hpts_random_cpu(inp);
|
||||||
|
#endif
|
||||||
|
refcount_init(&inp->inp_refcount, 1); /* Reference from socket. */
|
||||||
|
INP_WLOCK(inp);
|
||||||
|
INP_INFO_WLOCK(pcbinfo);
|
||||||
|
pcbinfo->ipi_count++;
|
||||||
|
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
|
||||||
|
CK_LIST_INSERT_HEAD(&pcbinfo->ipi_listhead, inp, inp_list);
|
||||||
|
INP_INFO_WUNLOCK(pcbinfo);
|
||||||
|
so->so_pcb = inp;
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
|
||||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT) || defined(MAC)
|
#if defined(IPSEC) || defined(IPSEC_SUPPORT) || defined(MAC)
|
||||||
out:
|
out:
|
||||||
if (error != 0) {
|
crfree(inp->inp_cred);
|
||||||
crfree(inp->inp_cred);
|
uma_zfree_smr(pcbinfo->ipi_zone, inp);
|
||||||
uma_zfree(pcbinfo->ipi_zone, inp);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return (error);
|
return (error);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef INET
|
#ifdef INET
|
||||||
|
|
@ -1504,193 +1513,275 @@ in_pcbdetach(struct inpcb *inp)
|
||||||
inp->inp_socket = NULL;
|
inp->inp_socket = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* inpcb hash lookups are protected by SMR section.
|
||||||
|
*
|
||||||
|
* Once desired pcb has been found, switching from SMR section to a pcb
|
||||||
|
* lock is performed with inp_smr_lock(). We can not use INP_(W|R)LOCK
|
||||||
|
* here because SMR is a critical section.
|
||||||
|
* In 99%+ cases inp_smr_lock() would obtain the lock immediately.
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
inp_lock(struct inpcb *inp, const inp_lookup_t lock)
|
||||||
|
{
|
||||||
|
|
||||||
|
lock == INPLOOKUP_RLOCKPCB ?
|
||||||
|
rw_rlock(&inp->inp_lock) : rw_wlock(&inp->inp_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
inp_unlock(struct inpcb *inp, const inp_lookup_t lock)
|
||||||
|
{
|
||||||
|
|
||||||
|
lock == INPLOOKUP_RLOCKPCB ?
|
||||||
|
rw_runlock(&inp->inp_lock) : rw_wunlock(&inp->inp_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
inp_trylock(struct inpcb *inp, const inp_lookup_t lock)
|
||||||
|
{
|
||||||
|
|
||||||
|
return (lock == INPLOOKUP_RLOCKPCB ?
|
||||||
|
rw_try_rlock(&inp->inp_lock) : rw_try_wlock(&inp->inp_lock));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
in_pcbrele(struct inpcb *inp, const inp_lookup_t lock)
|
||||||
|
{
|
||||||
|
|
||||||
|
return (lock == INPLOOKUP_RLOCKPCB ?
|
||||||
|
in_pcbrele_rlocked(inp) : in_pcbrele_wlocked(inp));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
inp_smr_lock(struct inpcb *inp, const inp_lookup_t lock)
|
||||||
|
{
|
||||||
|
|
||||||
|
MPASS(lock == INPLOOKUP_RLOCKPCB || lock == INPLOOKUP_WLOCKPCB);
|
||||||
|
SMR_ASSERT_ENTERED(inp->inp_pcbinfo->ipi_smr);
|
||||||
|
|
||||||
|
if (__predict_true(inp_trylock(inp, lock))) {
|
||||||
|
if (__predict_false(inp->inp_flags & INP_FREED)) {
|
||||||
|
smr_exit(inp->inp_pcbinfo->ipi_smr);
|
||||||
|
inp_unlock(inp, lock);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
smr_exit(inp->inp_pcbinfo->ipi_smr);
|
||||||
|
return (true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (__predict_true(refcount_acquire_if_not_zero(&inp->inp_refcount))) {
|
||||||
|
smr_exit(inp->inp_pcbinfo->ipi_smr);
|
||||||
|
inp_lock(inp, lock);
|
||||||
|
if (__predict_false(in_pcbrele(inp, lock)))
|
||||||
|
return (false);
|
||||||
|
/*
|
||||||
|
* inp acquired through refcount & lock for sure didn't went
|
||||||
|
* through uma_zfree(). However, it may have already went
|
||||||
|
* through in_pcbfree() and has another reference, that
|
||||||
|
* prevented its release by our in_pcbrele().
|
||||||
|
*/
|
||||||
|
if (__predict_false(inp->inp_flags & INP_FREED)) {
|
||||||
|
inp_unlock(inp, lock);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
return (true);
|
||||||
|
} else {
|
||||||
|
smr_exit(inp->inp_pcbinfo->ipi_smr);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* inp_next() - inpcb hash/list traversal iterator
|
||||||
|
*
|
||||||
|
* Requires initialized struct inpcb_iterator for context.
|
||||||
|
* The structure can be initialized with INP_ITERATOR() or INP_ALL_ITERATOR().
|
||||||
|
*
|
||||||
|
* - Iterator can have either write-lock or read-lock semantics, that can not
|
||||||
|
* be changed later.
|
||||||
|
* - Iterator can iterate either over all pcbs list (INP_ALL_LIST), or through
|
||||||
|
* a single hash slot. Note: only rip_input() does the latter.
|
||||||
|
* - Iterator may have optional bool matching function. The matching function
|
||||||
|
* will be executed for each inpcb in the SMR context, so it can not acquire
|
||||||
|
* locks and can safely access only immutable fields of inpcb.
|
||||||
|
*
|
||||||
|
* A fresh initialized iterator has NULL inpcb in its context and that
|
||||||
|
* means that inp_next() call would return the very first inpcb on the list
|
||||||
|
* locked with desired semantic. In all following calls the context pointer
|
||||||
|
* shall hold the current inpcb pointer. The KPI user is not supposed to
|
||||||
|
* unlock the current inpcb! Upon end of traversal inp_next() will return NULL
|
||||||
|
* and write NULL to its context. After end of traversal an iterator can be
|
||||||
|
* reused.
|
||||||
|
*
|
||||||
|
* List traversals have the following features/constraints:
|
||||||
|
* - New entries won't be seen, as they are always added to the head of a list.
|
||||||
|
* - Removed entries won't stop traversal as long as they are not added to
|
||||||
|
* a different list. This is violated by in_pcbrehash().
|
||||||
|
*/
|
||||||
|
#define II_LIST_FIRST(ipi, hash) \
|
||||||
|
(((hash) == INP_ALL_LIST) ? \
|
||||||
|
CK_LIST_FIRST(&(ipi)->ipi_listhead) : \
|
||||||
|
CK_LIST_FIRST(&(ipi)->ipi_hashbase[(hash)]))
|
||||||
|
#define II_LIST_NEXT(inp, hash) \
|
||||||
|
(((hash) == INP_ALL_LIST) ? \
|
||||||
|
CK_LIST_NEXT((inp), inp_list) : \
|
||||||
|
CK_LIST_NEXT((inp), inp_hash))
|
||||||
|
#define II_LOCK_ASSERT(inp, lock) \
|
||||||
|
rw_assert(&(inp)->inp_lock, \
|
||||||
|
(lock) == INPLOOKUP_RLOCKPCB ? RA_RLOCKED : RA_WLOCKED )
|
||||||
|
struct inpcb *
|
||||||
|
inp_next(struct inpcb_iterator *ii)
|
||||||
|
{
|
||||||
|
const struct inpcbinfo *ipi = ii->ipi;
|
||||||
|
inp_match_t *match = ii->match;
|
||||||
|
void *ctx = ii->ctx;
|
||||||
|
inp_lookup_t lock = ii->lock;
|
||||||
|
int hash = ii->hash;
|
||||||
|
struct inpcb *inp;
|
||||||
|
|
||||||
|
if (ii->inp == NULL) { /* First call. */
|
||||||
|
smr_enter(ipi->ipi_smr);
|
||||||
|
/* This is unrolled CK_LIST_FOREACH(). */
|
||||||
|
for (inp = II_LIST_FIRST(ipi, hash);
|
||||||
|
inp != NULL;
|
||||||
|
inp = II_LIST_NEXT(inp, hash)) {
|
||||||
|
if (match != NULL && (match)(inp, ctx) == false)
|
||||||
|
continue;
|
||||||
|
if (__predict_true(inp_smr_lock(inp, lock)))
|
||||||
|
break;
|
||||||
|
else {
|
||||||
|
smr_enter(ipi->ipi_smr);
|
||||||
|
MPASS(inp != II_LIST_FIRST(ipi, hash));
|
||||||
|
inp = II_LIST_FIRST(ipi, hash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inp == NULL)
|
||||||
|
smr_exit(ipi->ipi_smr);
|
||||||
|
else
|
||||||
|
ii->inp = inp;
|
||||||
|
|
||||||
|
return (inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not a first call. */
|
||||||
|
smr_enter(ipi->ipi_smr);
|
||||||
|
restart:
|
||||||
|
inp = ii->inp;
|
||||||
|
II_LOCK_ASSERT(inp, lock);
|
||||||
|
next:
|
||||||
|
inp = II_LIST_NEXT(inp, hash);
|
||||||
|
if (inp == NULL) {
|
||||||
|
smr_exit(ipi->ipi_smr);
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (match != NULL && (match)(inp, ctx) == false)
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
if (__predict_true(inp_trylock(inp, lock))) {
|
||||||
|
if (__predict_false(inp->inp_flags & INP_FREED)) {
|
||||||
|
/*
|
||||||
|
* Entries are never inserted in middle of a list, thus
|
||||||
|
* as long as we are in SMR, we can continue traversal.
|
||||||
|
* Jump to 'restart' should yield in the same result,
|
||||||
|
* but could produce unnecessary looping. Could this
|
||||||
|
* looping be unbound?
|
||||||
|
*/
|
||||||
|
inp_unlock(inp, lock);
|
||||||
|
goto next;
|
||||||
|
} else {
|
||||||
|
smr_exit(ipi->ipi_smr);
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Can't obtain lock immediately, thus going hard. Once we exit the
|
||||||
|
* SMR section we can no longer jump to 'next', and our only stable
|
||||||
|
* anchoring point is ii->inp, which we keep locked for this case, so
|
||||||
|
* we jump to 'restart'.
|
||||||
|
*/
|
||||||
|
if (__predict_true(refcount_acquire_if_not_zero(&inp->inp_refcount))) {
|
||||||
|
smr_exit(ipi->ipi_smr);
|
||||||
|
inp_lock(inp, lock);
|
||||||
|
if (__predict_false(in_pcbrele(inp, lock))) {
|
||||||
|
smr_enter(ipi->ipi_smr);
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* See comment in inp_smr_lock().
|
||||||
|
*/
|
||||||
|
if (__predict_false(inp->inp_flags & INP_FREED)) {
|
||||||
|
inp_unlock(inp, lock);
|
||||||
|
smr_enter(ipi->ipi_smr);
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
found:
|
||||||
|
inp_unlock(ii->inp, lock);
|
||||||
|
ii->inp = inp;
|
||||||
|
|
||||||
|
return (ii->inp);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* in_pcbref() bumps the reference count on an inpcb in order to maintain
|
* in_pcbref() bumps the reference count on an inpcb in order to maintain
|
||||||
* stability of an inpcb pointer despite the inpcb lock being released. This
|
* stability of an inpcb pointer despite the inpcb lock being released or
|
||||||
* is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
|
* SMR section exited.
|
||||||
* but where the inpcb lock may already held.
|
|
||||||
*
|
*
|
||||||
* in_pcbref() should be used only to provide brief memory stability, and
|
* To free a reference later in_pcbrele_(r|w)locked() must be performed.
|
||||||
* must always be followed by a call to INP_WLOCK() and in_pcbrele() to
|
|
||||||
* garbage collect the inpcb if it has been in_pcbfree()'d from another
|
|
||||||
* context. Until in_pcbrele() has returned that the inpcb is still valid,
|
|
||||||
* lock and rele are the *only* safe operations that may be performed on the
|
|
||||||
* inpcb.
|
|
||||||
*
|
|
||||||
* While the inpcb will not be freed, releasing the inpcb lock means that the
|
|
||||||
* connection's state may change, so the caller should be careful to
|
|
||||||
* revalidate any cached state on reacquiring the lock. Drop the reference
|
|
||||||
* using in_pcbrele().
|
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
in_pcbref(struct inpcb *inp)
|
in_pcbref(struct inpcb *inp)
|
||||||
{
|
{
|
||||||
|
u_int old __diagused;
|
||||||
|
|
||||||
KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
|
old = refcount_acquire(&inp->inp_refcount);
|
||||||
|
KASSERT(old > 0, ("%s: refcount 0", __func__));
|
||||||
refcount_acquire(&inp->inp_refcount);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
|
* Drop a refcount on an inpcb elevated using in_pcbref(), potentially
|
||||||
* in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
|
* freeing the pcb, if the reference was very last.
|
||||||
* return a flag indicating whether or not the inpcb remains valid. If it is
|
|
||||||
* valid, we return with the inpcb lock held.
|
|
||||||
*
|
|
||||||
* Notice that, unlike in_pcbref(), the inpcb lock must be held to drop a
|
|
||||||
* reference on an inpcb. Historically more work was done here (actually, in
|
|
||||||
* in_pcbfree_internal()) but has been moved to in_pcbfree() to avoid the
|
|
||||||
* need for the pcbinfo lock in in_pcbrele(). Deferring the free is entirely
|
|
||||||
* about memory stability (and continued use of the write lock).
|
|
||||||
*/
|
*/
|
||||||
int
|
bool
|
||||||
in_pcbrele_rlocked(struct inpcb *inp)
|
in_pcbrele_rlocked(struct inpcb *inp)
|
||||||
{
|
{
|
||||||
struct inpcbinfo *pcbinfo;
|
|
||||||
|
|
||||||
KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
|
|
||||||
|
|
||||||
INP_RLOCK_ASSERT(inp);
|
INP_RLOCK_ASSERT(inp);
|
||||||
|
|
||||||
if (refcount_release(&inp->inp_refcount) == 0) {
|
if (refcount_release(&inp->inp_refcount) == 0)
|
||||||
/*
|
return (false);
|
||||||
* If the inpcb has been freed, let the caller know, even if
|
|
||||||
* this isn't the last reference.
|
|
||||||
*/
|
|
||||||
if (inp->inp_flags2 & INP_FREED) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
return (1);
|
|
||||||
}
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
|
MPASS(inp->inp_flags & INP_FREED);
|
||||||
#ifdef TCPHPTS
|
MPASS(inp->inp_socket == NULL);
|
||||||
if (inp->inp_in_hpts || inp->inp_in_input) {
|
MPASS(inp->inp_in_hpts == 0);
|
||||||
struct tcp_hpts_entry *hpts;
|
MPASS(inp->inp_in_input == 0);
|
||||||
/*
|
|
||||||
* We should not be on the hpts at
|
|
||||||
* this point in any form. we must
|
|
||||||
* get the lock to be sure.
|
|
||||||
*/
|
|
||||||
hpts = tcp_hpts_lock(inp);
|
|
||||||
if (inp->inp_in_hpts)
|
|
||||||
panic("Hpts:%p inp:%p at free still on hpts",
|
|
||||||
hpts, inp);
|
|
||||||
mtx_unlock(&hpts->p_mtx);
|
|
||||||
hpts = tcp_input_lock(inp);
|
|
||||||
if (inp->inp_in_input)
|
|
||||||
panic("Hpts:%p inp:%p at free still on input hpts",
|
|
||||||
hpts, inp);
|
|
||||||
mtx_unlock(&hpts->p_mtx);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
INP_RUNLOCK(inp);
|
INP_RUNLOCK(inp);
|
||||||
pcbinfo = inp->inp_pcbinfo;
|
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
|
||||||
uma_zfree(pcbinfo->ipi_zone, inp);
|
return (true);
|
||||||
return (1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
bool
|
||||||
in_pcbrele_wlocked(struct inpcb *inp)
|
in_pcbrele_wlocked(struct inpcb *inp)
|
||||||
{
|
{
|
||||||
struct inpcbinfo *pcbinfo;
|
|
||||||
|
|
||||||
KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
|
|
||||||
|
|
||||||
INP_WLOCK_ASSERT(inp);
|
INP_WLOCK_ASSERT(inp);
|
||||||
|
|
||||||
if (refcount_release(&inp->inp_refcount) == 0) {
|
if (refcount_release(&inp->inp_refcount) == 0)
|
||||||
/*
|
return (false);
|
||||||
* If the inpcb has been freed, let the caller know, even if
|
|
||||||
* this isn't the last reference.
|
|
||||||
*/
|
|
||||||
if (inp->inp_flags2 & INP_FREED) {
|
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
return (1);
|
|
||||||
}
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
|
MPASS(inp->inp_flags & INP_FREED);
|
||||||
#ifdef TCPHPTS
|
MPASS(inp->inp_socket == NULL);
|
||||||
if (inp->inp_in_hpts || inp->inp_in_input) {
|
MPASS(inp->inp_in_hpts == 0);
|
||||||
struct tcp_hpts_entry *hpts;
|
MPASS(inp->inp_in_input == 0);
|
||||||
/*
|
|
||||||
* We should not be on the hpts at
|
|
||||||
* this point in any form. we must
|
|
||||||
* get the lock to be sure.
|
|
||||||
*/
|
|
||||||
hpts = tcp_hpts_lock(inp);
|
|
||||||
if (inp->inp_in_hpts)
|
|
||||||
panic("Hpts:%p inp:%p at free still on hpts",
|
|
||||||
hpts, inp);
|
|
||||||
mtx_unlock(&hpts->p_mtx);
|
|
||||||
hpts = tcp_input_lock(inp);
|
|
||||||
if (inp->inp_in_input)
|
|
||||||
panic("Hpts:%p inp:%p at free still on input hpts",
|
|
||||||
hpts, inp);
|
|
||||||
mtx_unlock(&hpts->p_mtx);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
pcbinfo = inp->inp_pcbinfo;
|
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
|
||||||
uma_zfree(pcbinfo->ipi_zone, inp);
|
return (true);
|
||||||
return (1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
inpcbport_free(epoch_context_t ctx)
|
|
||||||
{
|
|
||||||
struct inpcbport *phd;
|
|
||||||
|
|
||||||
phd = __containerof(ctx, struct inpcbport, phd_epoch_ctx);
|
|
||||||
free(phd, M_PCB);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
in_pcbfree_deferred(epoch_context_t ctx)
|
|
||||||
{
|
|
||||||
struct inpcb *inp;
|
|
||||||
int released __unused;
|
|
||||||
|
|
||||||
inp = __containerof(ctx, struct inpcb, inp_epoch_ctx);
|
|
||||||
|
|
||||||
INP_WLOCK(inp);
|
|
||||||
CURVNET_SET(inp->inp_vnet);
|
|
||||||
#ifdef INET
|
|
||||||
struct ip_moptions *imo = inp->inp_moptions;
|
|
||||||
inp->inp_moptions = NULL;
|
|
||||||
#endif
|
|
||||||
/* XXXRW: Do as much as possible here. */
|
|
||||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
|
|
||||||
if (inp->inp_sp != NULL)
|
|
||||||
ipsec_delete_pcbpolicy(inp);
|
|
||||||
#endif
|
|
||||||
#ifdef INET6
|
|
||||||
struct ip6_moptions *im6o = NULL;
|
|
||||||
if (inp->inp_vflag & INP_IPV6PROTO) {
|
|
||||||
ip6_freepcbopts(inp->in6p_outputopts);
|
|
||||||
im6o = inp->in6p_moptions;
|
|
||||||
inp->in6p_moptions = NULL;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (inp->inp_options)
|
|
||||||
(void)m_free(inp->inp_options);
|
|
||||||
inp->inp_vflag = 0;
|
|
||||||
crfree(inp->inp_cred);
|
|
||||||
#ifdef MAC
|
|
||||||
mac_inpcb_destroy(inp);
|
|
||||||
#endif
|
|
||||||
released = in_pcbrele_wlocked(inp);
|
|
||||||
MPASS(released);
|
|
||||||
#ifdef INET6
|
|
||||||
ip6_freemoptions(im6o);
|
|
||||||
#endif
|
|
||||||
#ifdef INET
|
|
||||||
inp_freemoptions(imo);
|
|
||||||
#endif
|
|
||||||
CURVNET_RESTORE();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1698,32 +1789,81 @@ in_pcbfree_deferred(epoch_context_t ctx)
|
||||||
* reference count, which should occur only after the inpcb has been detached
|
* reference count, which should occur only after the inpcb has been detached
|
||||||
* from its socket. If another thread holds a temporary reference (acquired
|
* from its socket. If another thread holds a temporary reference (acquired
|
||||||
* using in_pcbref()) then the free is deferred until that reference is
|
* using in_pcbref()) then the free is deferred until that reference is
|
||||||
* released using in_pcbrele(), but the inpcb is still unlocked. Almost all
|
* released using in_pcbrele_(r|w)locked(), but the inpcb is still unlocked.
|
||||||
* work, including removal from global lists, is done in this context, where
|
* Almost all work, including removal from global lists, is done in this
|
||||||
* the pcbinfo lock is held.
|
* context, where the pcbinfo lock is held.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
in_pcbfree(struct inpcb *inp)
|
in_pcbfree(struct inpcb *inp)
|
||||||
{
|
{
|
||||||
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
|
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
|
||||||
|
#ifdef INET
|
||||||
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
|
struct ip_moptions *imo;
|
||||||
KASSERT((inp->inp_flags2 & INP_FREED) == 0,
|
#endif
|
||||||
("%s: called twice for pcb %p", __func__, inp));
|
#ifdef INET6
|
||||||
if (inp->inp_flags2 & INP_FREED) {
|
struct ip6_moptions *im6o;
|
||||||
INP_WUNLOCK(inp);
|
#endif
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
INP_WLOCK_ASSERT(inp);
|
INP_WLOCK_ASSERT(inp);
|
||||||
INP_LIST_WLOCK(pcbinfo);
|
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
|
||||||
in_pcbremlists(inp);
|
KASSERT((inp->inp_flags & INP_FREED) == 0,
|
||||||
INP_LIST_WUNLOCK(pcbinfo);
|
("%s: called twice for pcb %p", __func__, inp));
|
||||||
|
|
||||||
|
inp->inp_flags |= INP_FREED;
|
||||||
|
INP_INFO_WLOCK(pcbinfo);
|
||||||
|
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
|
||||||
|
pcbinfo->ipi_count--;
|
||||||
|
CK_LIST_REMOVE(inp, inp_list);
|
||||||
|
INP_INFO_WUNLOCK(pcbinfo);
|
||||||
|
|
||||||
|
if (inp->inp_flags & INP_INHASHLIST) {
|
||||||
|
struct inpcbport *phd = inp->inp_phd;
|
||||||
|
|
||||||
|
INP_HASH_WLOCK(pcbinfo);
|
||||||
|
/* XXX: Only do if SO_REUSEPORT_LB set? */
|
||||||
|
in_pcbremlbgrouphash(inp);
|
||||||
|
|
||||||
|
CK_LIST_REMOVE(inp, inp_hash);
|
||||||
|
CK_LIST_REMOVE(inp, inp_portlist);
|
||||||
|
if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
|
||||||
|
CK_LIST_REMOVE(phd, phd_hash);
|
||||||
|
uma_zfree_smr(pcbinfo->ipi_portzone, phd);
|
||||||
|
}
|
||||||
|
INP_HASH_WUNLOCK(pcbinfo);
|
||||||
|
inp->inp_flags &= ~INP_INHASHLIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
crfree(inp->inp_cred);
|
||||||
RO_INVALIDATE_CACHE(&inp->inp_route);
|
RO_INVALIDATE_CACHE(&inp->inp_route);
|
||||||
/* mark as destruction in progress */
|
#ifdef MAC
|
||||||
inp->inp_flags2 |= INP_FREED;
|
mac_inpcb_destroy(inp);
|
||||||
INP_WUNLOCK(inp);
|
#endif
|
||||||
NET_EPOCH_CALL(in_pcbfree_deferred, &inp->inp_epoch_ctx);
|
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
|
||||||
|
if (inp->inp_sp != NULL)
|
||||||
|
ipsec_delete_pcbpolicy(inp);
|
||||||
|
#endif
|
||||||
|
#ifdef INET
|
||||||
|
if (inp->inp_options)
|
||||||
|
(void)m_free(inp->inp_options);
|
||||||
|
imo = inp->inp_moptions;
|
||||||
|
#endif
|
||||||
|
#ifdef INET6
|
||||||
|
if (inp->inp_vflag & INP_IPV6PROTO) {
|
||||||
|
ip6_freepcbopts(inp->in6p_outputopts);
|
||||||
|
im6o = inp->in6p_moptions;
|
||||||
|
} else
|
||||||
|
im6o = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (__predict_false(in_pcbrele_wlocked(inp) == false)) {
|
||||||
|
INP_WUNLOCK(inp);
|
||||||
|
}
|
||||||
|
#ifdef INET6
|
||||||
|
ip6_freemoptions(im6o);
|
||||||
|
#endif
|
||||||
|
#ifdef INET
|
||||||
|
inp_freemoptions(imo);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1764,7 +1904,7 @@ in_pcbdrop(struct inpcb *inp)
|
||||||
CK_LIST_REMOVE(inp, inp_portlist);
|
CK_LIST_REMOVE(inp, inp_portlist);
|
||||||
if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
|
if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
|
||||||
CK_LIST_REMOVE(phd, phd_hash);
|
CK_LIST_REMOVE(phd, phd_hash);
|
||||||
NET_EPOCH_CALL(inpcbport_free, &phd->phd_epoch_ctx);
|
uma_zfree_smr(inp->inp_pcbinfo->ipi_portzone, phd);
|
||||||
}
|
}
|
||||||
INP_HASH_WUNLOCK(inp->inp_pcbinfo);
|
INP_HASH_WUNLOCK(inp->inp_pcbinfo);
|
||||||
inp->inp_flags &= ~INP_INHASHLIST;
|
inp->inp_flags &= ~INP_INHASHLIST;
|
||||||
|
|
@ -1835,7 +1975,7 @@ in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
|
||||||
struct inpcb *inp, *inp_temp;
|
struct inpcb *inp, *inp_temp;
|
||||||
|
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
INP_INFO_WLOCK(pcbinfo);
|
||||||
CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
|
CK_LIST_FOREACH_SAFE(inp, &pcbinfo->ipi_listhead, inp_list, inp_temp) {
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
#ifdef INET6
|
#ifdef INET6
|
||||||
if ((inp->inp_vflag & INP_IPV4) == 0) {
|
if ((inp->inp_vflag & INP_IPV4) == 0) {
|
||||||
|
|
@ -1854,49 +1994,57 @@ in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
INP_INFO_WUNLOCK(pcbinfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
inp_v4_multi_match(const struct inpcb *inp, void *v __unused)
|
||||||
|
{
|
||||||
|
|
||||||
|
if ((inp->inp_vflag & INP_IPV4) && inp->inp_moptions != NULL)
|
||||||
|
return (true);
|
||||||
|
else
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
|
in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
|
||||||
{
|
{
|
||||||
|
struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_WLOCKPCB,
|
||||||
|
inp_v4_multi_match, NULL);
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
struct in_multi *inm;
|
struct in_multi *inm;
|
||||||
struct in_mfilter *imf;
|
struct in_mfilter *imf;
|
||||||
struct ip_moptions *imo;
|
struct ip_moptions *imo;
|
||||||
|
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
IN_MULTI_LOCK_ASSERT();
|
||||||
CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
|
|
||||||
INP_WLOCK(inp);
|
|
||||||
imo = inp->inp_moptions;
|
|
||||||
if ((inp->inp_vflag & INP_IPV4) &&
|
|
||||||
imo != NULL) {
|
|
||||||
/*
|
|
||||||
* Unselect the outgoing interface if it is being
|
|
||||||
* detached.
|
|
||||||
*/
|
|
||||||
if (imo->imo_multicast_ifp == ifp)
|
|
||||||
imo->imo_multicast_ifp = NULL;
|
|
||||||
|
|
||||||
/*
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
* Drop multicast group membership if we joined
|
INP_WLOCK_ASSERT(inp);
|
||||||
* through the interface being detached.
|
|
||||||
*
|
imo = inp->inp_moptions;
|
||||||
* XXX This can all be deferred to an epoch_call
|
/*
|
||||||
*/
|
* Unselect the outgoing interface if it is being
|
||||||
|
* detached.
|
||||||
|
*/
|
||||||
|
if (imo->imo_multicast_ifp == ifp)
|
||||||
|
imo->imo_multicast_ifp = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drop multicast group membership if we joined
|
||||||
|
* through the interface being detached.
|
||||||
|
*
|
||||||
|
* XXX This can all be deferred to an epoch_call
|
||||||
|
*/
|
||||||
restart:
|
restart:
|
||||||
IP_MFILTER_FOREACH(imf, &imo->imo_head) {
|
IP_MFILTER_FOREACH(imf, &imo->imo_head) {
|
||||||
if ((inm = imf->imf_inm) == NULL)
|
if ((inm = imf->imf_inm) == NULL)
|
||||||
continue;
|
continue;
|
||||||
if (inm->inm_ifp != ifp)
|
if (inm->inm_ifp != ifp)
|
||||||
continue;
|
continue;
|
||||||
ip_mfilter_remove(&imo->imo_head, imf);
|
ip_mfilter_remove(&imo->imo_head, imf);
|
||||||
IN_MULTI_LOCK_ASSERT();
|
in_leavegroup_locked(inm, NULL);
|
||||||
in_leavegroup_locked(inm, NULL);
|
ip_mfilter_free(imf);
|
||||||
ip_mfilter_free(imf);
|
goto restart;
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
}
|
}
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1918,7 +2066,6 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
|
||||||
|
|
||||||
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
|
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
|
||||||
("%s: invalid lookup flags %d", __func__, lookupflags));
|
("%s: invalid lookup flags %d", __func__, lookupflags));
|
||||||
|
|
||||||
INP_HASH_LOCK_ASSERT(pcbinfo);
|
INP_HASH_LOCK_ASSERT(pcbinfo);
|
||||||
|
|
||||||
if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
|
if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
|
||||||
|
|
@ -2081,8 +2228,9 @@ in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lookup PCB in hash list, using pcbinfo tables. This variation assumes
|
* Lookup PCB in hash list, using pcbinfo tables. This variation assumes
|
||||||
* that the caller has locked the hash list, and will not perform any further
|
* that the caller has either locked the hash list, which usually happens
|
||||||
* locking or reference operations on either the hash list or the connection.
|
* for bind(2) operations, or is in SMR section, which happens when sorting
|
||||||
|
* out incoming packets.
|
||||||
*/
|
*/
|
||||||
static struct inpcb *
|
static struct inpcb *
|
||||||
in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
|
in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
|
||||||
|
|
@ -2223,20 +2371,15 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
|
||||||
{
|
{
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
|
|
||||||
|
smr_enter(pcbinfo->ipi_smr);
|
||||||
inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
|
inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
|
||||||
lookupflags & INPLOOKUP_WILDCARD, ifp, numa_domain);
|
lookupflags & INPLOOKUP_WILDCARD, ifp, numa_domain);
|
||||||
if (inp != NULL) {
|
if (inp != NULL) {
|
||||||
if (lookupflags & INPLOOKUP_WLOCKPCB) {
|
if (__predict_false(inp_smr_lock(inp,
|
||||||
INP_WLOCK(inp);
|
(lookupflags & INPLOOKUP_LOCKMASK)) == false))
|
||||||
} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
} else
|
|
||||||
panic("%s: locking bug", __func__);
|
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
INP_UNLOCK(inp);
|
|
||||||
inp = NULL;
|
inp = NULL;
|
||||||
}
|
} else
|
||||||
}
|
smr_exit(pcbinfo->ipi_smr);
|
||||||
|
|
||||||
return (inp);
|
return (inp);
|
||||||
}
|
}
|
||||||
|
|
@ -2331,11 +2474,10 @@ in_pcbinshash(struct inpcb *inp)
|
||||||
* If none exists, malloc one and tack it on.
|
* If none exists, malloc one and tack it on.
|
||||||
*/
|
*/
|
||||||
if (phd == NULL) {
|
if (phd == NULL) {
|
||||||
phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT);
|
phd = uma_zalloc_smr(pcbinfo->ipi_portzone, M_NOWAIT);
|
||||||
if (phd == NULL) {
|
if (phd == NULL) {
|
||||||
return (ENOBUFS); /* XXX */
|
return (ENOBUFS); /* XXX */
|
||||||
}
|
}
|
||||||
bzero(&phd->phd_epoch_ctx, sizeof(struct epoch_context));
|
|
||||||
phd->phd_port = inp->inp_lport;
|
phd->phd_port = inp->inp_lport;
|
||||||
CK_LIST_INIT(&phd->phd_pcblist);
|
CK_LIST_INIT(&phd->phd_pcblist);
|
||||||
CK_LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
|
CK_LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
|
||||||
|
|
@ -2353,6 +2495,10 @@ in_pcbinshash(struct inpcb *inp)
|
||||||
* changed. NOTE: This does not handle the case of the lport changing (the
|
* changed. NOTE: This does not handle the case of the lport changing (the
|
||||||
* hashed port list would have to be updated as well), so the lport must
|
* hashed port list would have to be updated as well), so the lport must
|
||||||
* not change after in_pcbinshash() has been called.
|
* not change after in_pcbinshash() has been called.
|
||||||
|
*
|
||||||
|
* XXXGL: a race between this function and SMR-protected hash iterator
|
||||||
|
* will lead to iterator traversing a possibly wrong hash list. However,
|
||||||
|
* this race should have been here since change from rwlock to epoch.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
in_pcbrehash(struct inpcb *inp)
|
in_pcbrehash(struct inpcb *inp)
|
||||||
|
|
@ -2381,39 +2527,6 @@ in_pcbrehash(struct inpcb *inp)
|
||||||
CK_LIST_INSERT_HEAD(head, inp, inp_hash);
|
CK_LIST_INSERT_HEAD(head, inp, inp_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Remove PCB from various lists.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
in_pcbremlists(struct inpcb *inp)
|
|
||||||
{
|
|
||||||
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
|
|
||||||
|
|
||||||
INP_WLOCK_ASSERT(inp);
|
|
||||||
INP_LIST_WLOCK_ASSERT(pcbinfo);
|
|
||||||
|
|
||||||
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
|
|
||||||
if (inp->inp_flags & INP_INHASHLIST) {
|
|
||||||
struct inpcbport *phd = inp->inp_phd;
|
|
||||||
|
|
||||||
INP_HASH_WLOCK(pcbinfo);
|
|
||||||
|
|
||||||
/* XXX: Only do if SO_REUSEPORT_LB set? */
|
|
||||||
in_pcbremlbgrouphash(inp);
|
|
||||||
|
|
||||||
CK_LIST_REMOVE(inp, inp_hash);
|
|
||||||
CK_LIST_REMOVE(inp, inp_portlist);
|
|
||||||
if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
|
|
||||||
CK_LIST_REMOVE(phd, phd_hash);
|
|
||||||
NET_EPOCH_CALL(inpcbport_free, &phd->phd_epoch_ctx);
|
|
||||||
}
|
|
||||||
INP_HASH_WUNLOCK(pcbinfo);
|
|
||||||
inp->inp_flags &= ~INP_INHASHLIST;
|
|
||||||
}
|
|
||||||
CK_LIST_REMOVE(inp, inp_list);
|
|
||||||
pcbinfo->ipi_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for alternatives when higher level complains
|
* Check for alternatives when higher level complains
|
||||||
* about service problems. For now, invalidate cached
|
* about service problems. For now, invalidate cached
|
||||||
|
|
@ -2548,15 +2661,12 @@ inp_unlock_assert(struct inpcb *inp)
|
||||||
void
|
void
|
||||||
inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
|
inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
|
||||||
{
|
{
|
||||||
|
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_tcbinfo,
|
||||||
|
INPLOOKUP_WLOCKPCB);
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
|
|
||||||
INP_INFO_WLOCK(&V_tcbinfo);
|
while ((inp = inp_next(&inpi)) != NULL)
|
||||||
CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
|
|
||||||
INP_WLOCK(inp);
|
|
||||||
func(inp, arg);
|
func(inp, arg);
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
}
|
|
||||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct socket *
|
struct socket *
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,9 @@
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#include <sys/lock.h>
|
#include <sys/lock.h>
|
||||||
|
#include <sys/proc.h>
|
||||||
#include <sys/rwlock.h>
|
#include <sys/rwlock.h>
|
||||||
|
#include <sys/smr.h>
|
||||||
#include <net/vnet.h>
|
#include <net/vnet.h>
|
||||||
#include <vm/uma.h>
|
#include <vm/uma.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -133,32 +135,19 @@ struct in_conninfo {
|
||||||
* struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
|
* struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
|
||||||
* IPv6 sockets. In the case of TCP and UDP, further per-connection state is
|
* IPv6 sockets. In the case of TCP and UDP, further per-connection state is
|
||||||
* hung off of inp_ppcb most of the time. Almost all fields of struct inpcb
|
* hung off of inp_ppcb most of the time. Almost all fields of struct inpcb
|
||||||
* are static after creation or protected by a per-inpcb rwlock, inp_lock. A
|
* are static after creation or protected by a per-inpcb rwlock, inp_lock.
|
||||||
* few fields are protected by multiple locks as indicated in the locking notes
|
|
||||||
* below. For these fields, all of the listed locks must be write-locked for
|
|
||||||
* any modifications. However, these fields can be safely read while any one of
|
|
||||||
* the listed locks are read-locked. This model can permit greater concurrency
|
|
||||||
* for read operations. For example, connections can be looked up while only
|
|
||||||
* holding a read lock on the global pcblist lock. This is important for
|
|
||||||
* performance when attempting to find the connection for a packet given its IP
|
|
||||||
* and port tuple.
|
|
||||||
*
|
*
|
||||||
* One noteworthy exception is that the global pcbinfo lock follows a different
|
* A inpcb database is indexed by addresses/ports hash as well as list of
|
||||||
* set of rules in relation to the inp_list field. Rather than being
|
* all pcbs that belong to a certain proto. Database lookups or list traversals
|
||||||
* write-locked for modifications and read-locked for list iterations, it must
|
* are be performed inside SMR section. Once desired PCB is found its own
|
||||||
* be read-locked during modifications and write-locked during list iterations.
|
* lock is to be obtained and SMR section exited.
|
||||||
* This ensures that the relatively rare global list iterations safely walk a
|
|
||||||
* stable snapshot of connections while allowing more common list modifications
|
|
||||||
* to safely grab the pcblist lock just while adding or removing a connection
|
|
||||||
* from the global list.
|
|
||||||
*
|
*
|
||||||
* Key:
|
* Key:
|
||||||
* (b) - Protected by the hpts lock.
|
* (b) - Protected by the hpts lock.
|
||||||
* (c) - Constant after initialization
|
* (c) - Constant after initialization
|
||||||
* (e) - Protected by the net_epoch_prempt epoch
|
* (e) - Protected by the SMR section
|
||||||
* (i) - Protected by the inpcb lock
|
* (i) - Protected by the inpcb lock
|
||||||
* (p) - Protected by the pcbinfo lock for the inpcb
|
* (p) - Protected by the pcbinfo lock for the inpcb
|
||||||
* (l) - Protected by the pcblist lock for the inpcb
|
|
||||||
* (h) - Protected by the pcbhash lock for the inpcb
|
* (h) - Protected by the pcbhash lock for the inpcb
|
||||||
* (s) - Protected by another subsystem's locks
|
* (s) - Protected by another subsystem's locks
|
||||||
* (x) - Undefined locking
|
* (x) - Undefined locking
|
||||||
|
|
@ -219,17 +208,13 @@ struct in_conninfo {
|
||||||
* socket has been freed), or there may be close(2)-related races.
|
* socket has been freed), or there may be close(2)-related races.
|
||||||
*
|
*
|
||||||
* The inp_vflag field is overloaded, and would otherwise ideally be (c).
|
* The inp_vflag field is overloaded, and would otherwise ideally be (c).
|
||||||
*
|
|
||||||
* TODO: Currently only the TCP stack is leveraging the global pcbinfo lock
|
|
||||||
* read-lock usage during modification, this model can be applied to other
|
|
||||||
* protocols (especially SCTP).
|
|
||||||
*/
|
*/
|
||||||
struct icmp6_filter;
|
struct icmp6_filter;
|
||||||
struct inpcbpolicy;
|
struct inpcbpolicy;
|
||||||
struct m_snd_tag;
|
struct m_snd_tag;
|
||||||
struct inpcb {
|
struct inpcb {
|
||||||
/* Cache line #1 (amd64) */
|
/* Cache line #1 (amd64) */
|
||||||
CK_LIST_ENTRY(inpcb) inp_hash; /* [w](h/i) [r](e/i) hash list */
|
CK_LIST_ENTRY(inpcb) inp_hash; /* (w:h/r:e) hash list */
|
||||||
struct rwlock inp_lock;
|
struct rwlock inp_lock;
|
||||||
/* Cache line #2 (amd64) */
|
/* Cache line #2 (amd64) */
|
||||||
#define inp_start_zero inp_hpts
|
#define inp_start_zero inp_hpts
|
||||||
|
|
@ -311,8 +296,8 @@ struct inpcb {
|
||||||
int in6p_cksum;
|
int in6p_cksum;
|
||||||
short in6p_hops;
|
short in6p_hops;
|
||||||
};
|
};
|
||||||
CK_LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
|
CK_LIST_ENTRY(inpcb) inp_portlist; /* (r:e/w:h) port list */
|
||||||
struct inpcbport *inp_phd; /* (i/h) head of this list */
|
struct inpcbport *inp_phd; /* (r:e/w:h) head of this list */
|
||||||
inp_gen_t inp_gencnt; /* (c) generation count */
|
inp_gen_t inp_gencnt; /* (c) generation count */
|
||||||
void *spare_ptr; /* Spare pointer. */
|
void *spare_ptr; /* Spare pointer. */
|
||||||
rt_gen_t inp_rt_cookie; /* generation for route entry */
|
rt_gen_t inp_rt_cookie; /* generation for route entry */
|
||||||
|
|
@ -320,10 +305,7 @@ struct inpcb {
|
||||||
struct route inp_route;
|
struct route inp_route;
|
||||||
struct route_in6 inp_route6;
|
struct route_in6 inp_route6;
|
||||||
};
|
};
|
||||||
CK_LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
|
CK_LIST_ENTRY(inpcb) inp_list; /* (r:e/w:p) all PCBs for proto */
|
||||||
/* (e[r]) for list iteration */
|
|
||||||
/* (p[w]/l) for addition/removal */
|
|
||||||
struct epoch_context inp_epoch_ctx;
|
|
||||||
};
|
};
|
||||||
#endif /* _KERNEL */
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
|
|
@ -396,80 +378,58 @@ void in_pcbtoxinpcb(const struct inpcb *, struct xinpcb *);
|
||||||
#endif
|
#endif
|
||||||
#endif /* _SYS_SOCKETVAR_H_ */
|
#endif /* _SYS_SOCKETVAR_H_ */
|
||||||
|
|
||||||
struct inpcbport {
|
#ifdef _KERNEL
|
||||||
struct epoch_context phd_epoch_ctx;
|
/*
|
||||||
CK_LIST_ENTRY(inpcbport) phd_hash;
|
|
||||||
struct inpcbhead phd_pcblist;
|
|
||||||
u_short phd_port;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*-
|
|
||||||
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
|
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
|
||||||
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
|
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
|
||||||
*
|
*
|
||||||
* Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
|
* The pcbs are protected with SMR section and thus all lists in inpcbinfo
|
||||||
* ipi_list_lock:
|
* are CK-lists. Locking is required to insert a pcb into database. Two
|
||||||
* - ipi_lock covering the global pcb list stability during loop iteration,
|
* locks are provided: one for the hash and one for the global list of pcbs,
|
||||||
* - ipi_hash_lock covering the hashed lookup tables,
|
* as well as overall count and generation count.
|
||||||
* - ipi_list_lock covering mutable global fields (such as the global
|
|
||||||
* pcb list)
|
|
||||||
*
|
|
||||||
* The lock order is:
|
|
||||||
*
|
|
||||||
* ipi_lock (before)
|
|
||||||
* inpcb locks (before)
|
|
||||||
* ipi_list locks (before)
|
|
||||||
*
|
*
|
||||||
* Locking key:
|
* Locking key:
|
||||||
*
|
*
|
||||||
* (c) Constant or nearly constant after initialisation
|
* (c) Constant or nearly constant after initialisation
|
||||||
* (e) - Protected by the net_epoch_prempt epoch
|
* (e) Protected by SMR section
|
||||||
* (g) Locked by ipi_lock
|
* (g) Locked by ipi_lock
|
||||||
* (l) Locked by ipi_list_lock
|
* (h) Locked by ipi_hash_lock
|
||||||
* (h) Read using either net_epoch_preempt or inpcb lock; write requires both ipi_hash_lock and inpcb lock
|
|
||||||
* (x) Synchronisation properties poorly defined
|
|
||||||
*/
|
*/
|
||||||
struct inpcbinfo {
|
struct inpcbinfo {
|
||||||
/*
|
/*
|
||||||
* Global lock protecting inpcb list modification
|
* Global lock protecting inpcb list modification
|
||||||
*/
|
*/
|
||||||
struct mtx ipi_lock;
|
struct mtx ipi_lock;
|
||||||
|
struct inpcbhead ipi_listhead; /* (r:e/w:g) */
|
||||||
/*
|
u_int ipi_count; /* (g) */
|
||||||
* Global list of inpcbs on the protocol.
|
|
||||||
*/
|
|
||||||
struct inpcbhead *ipi_listhead; /* [r](e) [w](g/l) */
|
|
||||||
u_int ipi_count; /* (l) */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generation count -- incremented each time a connection is allocated
|
* Generation count -- incremented each time a connection is allocated
|
||||||
* or freed.
|
* or freed.
|
||||||
*/
|
*/
|
||||||
u_quad_t ipi_gencnt; /* (l) */
|
u_quad_t ipi_gencnt; /* (g) */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fields associated with port lookup and allocation.
|
* Fields associated with port lookup and allocation.
|
||||||
*/
|
*/
|
||||||
u_short ipi_lastport; /* (x) */
|
u_short ipi_lastport; /* (h) */
|
||||||
u_short ipi_lastlow; /* (x) */
|
u_short ipi_lastlow; /* (h) */
|
||||||
u_short ipi_lasthi; /* (x) */
|
u_short ipi_lasthi; /* (h) */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* UMA zone from which inpcbs are allocated for this protocol.
|
* UMA zone from which inpcbs are allocated for this protocol.
|
||||||
*/
|
*/
|
||||||
struct uma_zone *ipi_zone; /* (c) */
|
uma_zone_t ipi_zone; /* (c) */
|
||||||
|
uma_zone_t ipi_portzone; /* (c) */
|
||||||
/*
|
smr_t ipi_smr; /* (c) */
|
||||||
* Global lock protecting modification hash lookup tables.
|
|
||||||
*/
|
|
||||||
struct mtx ipi_hash_lock;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Global hash of inpcbs, hashed by local and foreign addresses and
|
* Global hash of inpcbs, hashed by local and foreign addresses and
|
||||||
* port numbers.
|
* port numbers.
|
||||||
*/
|
*/
|
||||||
struct inpcbhead *ipi_hashbase; /* (h) */
|
struct mtx ipi_hash_lock;
|
||||||
u_long ipi_hashmask; /* (h) */
|
struct inpcbhead *ipi_hashbase; /* (r:e/w:h) */
|
||||||
|
u_long ipi_hashmask; /* (c) */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Global hash of inpcbs, hashed by only local port number.
|
* Global hash of inpcbs, hashed by only local port number.
|
||||||
|
|
@ -481,26 +441,15 @@ struct inpcbinfo {
|
||||||
* Load balance groups used for the SO_REUSEPORT_LB option,
|
* Load balance groups used for the SO_REUSEPORT_LB option,
|
||||||
* hashed by local port.
|
* hashed by local port.
|
||||||
*/
|
*/
|
||||||
struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (h) */
|
struct inpcblbgrouphead *ipi_lbgrouphashbase; /* (r:e/w:h) */
|
||||||
u_long ipi_lbgrouphashmask; /* (h) */
|
u_long ipi_lbgrouphashmask; /* (h) */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Pointer to network stack instance
|
* Pointer to network stack instance
|
||||||
*/
|
*/
|
||||||
struct vnet *ipi_vnet; /* (c) */
|
struct vnet *ipi_vnet; /* (c) */
|
||||||
|
|
||||||
/*
|
|
||||||
* general use 2
|
|
||||||
*/
|
|
||||||
void *ipi_pspare[2];
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Global lock protecting global inpcb list, inpcb count, etc.
|
|
||||||
*/
|
|
||||||
struct rwlock ipi_list_lock;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef _KERNEL
|
|
||||||
/*
|
/*
|
||||||
* Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
|
* Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
|
||||||
* (or unique address:port combination) can be re-used at most
|
* (or unique address:port combination) can be re-used at most
|
||||||
|
|
@ -523,7 +472,7 @@ struct inpcblbgroup {
|
||||||
};
|
};
|
||||||
|
|
||||||
#define INP_LOCK_INIT(inp, d, t) \
|
#define INP_LOCK_INIT(inp, d, t) \
|
||||||
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
|
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
|
||||||
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
|
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
|
||||||
#define INP_RLOCK(inp) rw_rlock(&(inp)->inp_lock)
|
#define INP_RLOCK(inp) rw_rlock(&(inp)->inp_lock)
|
||||||
#define INP_WLOCK(inp) rw_wlock(&(inp)->inp_lock)
|
#define INP_WLOCK(inp) rw_wlock(&(inp)->inp_lock)
|
||||||
|
|
@ -571,51 +520,21 @@ int inp_so_options(const struct inpcb *inp);
|
||||||
|
|
||||||
#endif /* _KERNEL */
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
#define INP_INFO_LOCK_INIT(ipi, d) \
|
#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
|
||||||
mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
|
|
||||||
#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
|
|
||||||
#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
|
|
||||||
#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
|
|
||||||
#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
|
#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
|
||||||
#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
|
#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
|
||||||
#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock))
|
#define INP_INFO_LOCK_ASSERT(ipi) MPASS(SMR_ENTERED((ipi)->ipi_smr) || \
|
||||||
|
mtx_owned(&(ipi)->ipi_lock))
|
||||||
#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
|
#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
|
||||||
#define INP_INFO_WUNLOCK_ASSERT(ipi) \
|
#define INP_INFO_WUNLOCK_ASSERT(ipi) \
|
||||||
mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED)
|
mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED)
|
||||||
|
|
||||||
#define INP_LIST_LOCK_INIT(ipi, d) \
|
|
||||||
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
|
|
||||||
#define INP_LIST_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_list_lock)
|
|
||||||
#define INP_LIST_RLOCK(ipi) rw_rlock(&(ipi)->ipi_list_lock)
|
|
||||||
#define INP_LIST_WLOCK(ipi) rw_wlock(&(ipi)->ipi_list_lock)
|
|
||||||
#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock)
|
|
||||||
#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock)
|
|
||||||
#define INP_LIST_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_list_lock)
|
|
||||||
#define INP_LIST_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_list_lock)
|
|
||||||
#define INP_LIST_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_list_lock)
|
|
||||||
#define INP_LIST_LOCK_ASSERT(ipi) \
|
|
||||||
rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED)
|
|
||||||
#define INP_LIST_RLOCK_ASSERT(ipi) \
|
|
||||||
rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED)
|
|
||||||
#define INP_LIST_WLOCK_ASSERT(ipi) \
|
|
||||||
rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED)
|
|
||||||
#define INP_LIST_UNLOCK_ASSERT(ipi) \
|
|
||||||
rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
|
|
||||||
|
|
||||||
#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF)
|
|
||||||
#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock)
|
|
||||||
#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
|
#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
|
||||||
#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
|
#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
|
||||||
#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock))
|
#define INP_HASH_LOCK_ASSERT(ipi) MPASS(SMR_ENTERED((ipi)->ipi_smr) || \
|
||||||
#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
|
mtx_owned(&(ipi)->ipi_hash_lock))
|
||||||
|
#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, \
|
||||||
#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
|
MA_OWNED)
|
||||||
MTX_DEF | MTX_DUPOK)
|
|
||||||
#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock)
|
|
||||||
|
|
||||||
#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock)
|
|
||||||
#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED)
|
|
||||||
#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock)
|
|
||||||
|
|
||||||
#define INP_PCBHASH(faddr, lport, fport, mask) \
|
#define INP_PCBHASH(faddr, lport, fport, mask) \
|
||||||
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
|
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
|
||||||
|
|
@ -644,7 +563,7 @@ int inp_so_options(const struct inpcb *inp);
|
||||||
#define INP_ANONPORT 0x00000040 /* port chosen for user */
|
#define INP_ANONPORT 0x00000040 /* port chosen for user */
|
||||||
#define INP_RECVIF 0x00000080 /* receive incoming interface */
|
#define INP_RECVIF 0x00000080 /* receive incoming interface */
|
||||||
#define INP_MTUDISC 0x00000100 /* user can do MTU discovery */
|
#define INP_MTUDISC 0x00000100 /* user can do MTU discovery */
|
||||||
/* 0x000200 unused: was INP_FAITH */
|
/* INP_FREED 0x00000200 private to in_pcb.c */
|
||||||
#define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */
|
#define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */
|
||||||
#define INP_DONTFRAG 0x00000800 /* don't fragment packet */
|
#define INP_DONTFRAG 0x00000800 /* don't fragment packet */
|
||||||
#define INP_BINDANY 0x00001000 /* allow bind to any address */
|
#define INP_BINDANY 0x00001000 /* allow bind to any address */
|
||||||
|
|
@ -682,7 +601,7 @@ int inp_so_options(const struct inpcb *inp);
|
||||||
#define INP_MBUF_ACKCMP 0x00000002 /* TCP mbuf ack compression ok */
|
#define INP_MBUF_ACKCMP 0x00000002 /* TCP mbuf ack compression ok */
|
||||||
/* 0x00000004 */
|
/* 0x00000004 */
|
||||||
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
|
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
|
||||||
#define INP_FREED 0x00000010 /* inp itself is not valid */
|
/* 0x00000010 */
|
||||||
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
|
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
|
||||||
#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */
|
#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */
|
||||||
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
|
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
|
||||||
|
|
@ -702,15 +621,19 @@ int inp_so_options(const struct inpcb *inp);
|
||||||
#define INP_2PCP_BASE INP_2PCP_BIT0
|
#define INP_2PCP_BASE INP_2PCP_BIT0
|
||||||
#define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2)
|
#define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2)
|
||||||
#define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */
|
#define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flags passed to in_pcblookup*() functions.
|
* Flags passed to in_pcblookup*(), inp_smr_lock() and inp_next().
|
||||||
*/
|
*/
|
||||||
#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */
|
typedef enum {
|
||||||
#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */
|
INPLOOKUP_WILDCARD = 0x00000001, /* Allow wildcard sockets. */
|
||||||
#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */
|
INPLOOKUP_RLOCKPCB = 0x00000002, /* Return inpcb read-locked. */
|
||||||
|
INPLOOKUP_WLOCKPCB = 0x00000004, /* Return inpcb write-locked. */
|
||||||
|
} inp_lookup_t;
|
||||||
|
|
||||||
#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
|
#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
|
||||||
INPLOOKUP_WLOCKPCB)
|
INPLOOKUP_WLOCKPCB)
|
||||||
|
#define INPLOOKUP_LOCKMASK (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)
|
||||||
|
|
||||||
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
|
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
|
||||||
|
|
||||||
|
|
@ -718,13 +641,6 @@ int inp_so_options(const struct inpcb *inp);
|
||||||
|
|
||||||
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
|
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
|
||||||
|
|
||||||
/*
|
|
||||||
* Constants for pcbinfo.ipi_hashfields.
|
|
||||||
*/
|
|
||||||
#define IPI_HASHFIELDS_NONE 0
|
|
||||||
#define IPI_HASHFIELDS_2TUPLE 1
|
|
||||||
#define IPI_HASHFIELDS_4TUPLE 2
|
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
VNET_DECLARE(int, ipport_reservedhigh);
|
VNET_DECLARE(int, ipport_reservedhigh);
|
||||||
VNET_DECLARE(int, ipport_reservedlow);
|
VNET_DECLARE(int, ipport_reservedlow);
|
||||||
|
|
@ -755,8 +671,8 @@ VNET_DECLARE(int, ipport_tcpallocs);
|
||||||
#define V_ipport_tcpallocs VNET(ipport_tcpallocs)
|
#define V_ipport_tcpallocs VNET(ipport_tcpallocs)
|
||||||
|
|
||||||
void in_pcbinfo_destroy(struct inpcbinfo *);
|
void in_pcbinfo_destroy(struct inpcbinfo *);
|
||||||
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
|
void in_pcbinfo_init(struct inpcbinfo *, const char *, u_int, int, char *,
|
||||||
int, int, char *, uma_init, u_int);
|
uma_init);
|
||||||
|
|
||||||
int in_pcbbind_check_bindmulti(const struct inpcb *ni,
|
int in_pcbbind_check_bindmulti(const struct inpcb *ni,
|
||||||
const struct inpcb *oi);
|
const struct inpcb *oi);
|
||||||
|
|
@ -788,8 +704,37 @@ void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
|
||||||
int, struct inpcb *(*)(struct inpcb *, int));
|
int, struct inpcb *(*)(struct inpcb *, int));
|
||||||
void in_pcbref(struct inpcb *);
|
void in_pcbref(struct inpcb *);
|
||||||
void in_pcbrehash(struct inpcb *);
|
void in_pcbrehash(struct inpcb *);
|
||||||
int in_pcbrele_rlocked(struct inpcb *);
|
bool in_pcbrele_rlocked(struct inpcb *);
|
||||||
int in_pcbrele_wlocked(struct inpcb *);
|
bool in_pcbrele_wlocked(struct inpcb *);
|
||||||
|
|
||||||
|
typedef bool inp_match_t(const struct inpcb *, void *);
|
||||||
|
struct inpcb_iterator {
|
||||||
|
const struct inpcbinfo *ipi;
|
||||||
|
struct inpcb *inp;
|
||||||
|
inp_match_t *match;
|
||||||
|
void *ctx;
|
||||||
|
int hash;
|
||||||
|
#define INP_ALL_LIST -1
|
||||||
|
const inp_lookup_t lock;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Note: sparse initializers guarantee .inp = NULL. */
|
||||||
|
#define INP_ITERATOR(_ipi, _lock, _match, _ctx) \
|
||||||
|
{ \
|
||||||
|
.ipi = (_ipi), \
|
||||||
|
.lock = (_lock), \
|
||||||
|
.hash = INP_ALL_LIST, \
|
||||||
|
.match = (_match), \
|
||||||
|
.ctx = (_ctx), \
|
||||||
|
}
|
||||||
|
#define INP_ALL_ITERATOR(_ipi, _lock) \
|
||||||
|
{ \
|
||||||
|
.ipi = (_ipi), \
|
||||||
|
.lock = (_lock), \
|
||||||
|
.hash = INP_ALL_LIST, \
|
||||||
|
}
|
||||||
|
|
||||||
|
struct inpcb *inp_next(struct inpcb_iterator *);
|
||||||
void in_losing(struct inpcb *);
|
void in_losing(struct inpcb *);
|
||||||
void in_pcbsetsolabel(struct socket *so);
|
void in_pcbsetsolabel(struct socket *so);
|
||||||
int in_getpeeraddr(struct socket *so, struct sockaddr **nam);
|
int in_getpeeraddr(struct socket *so, struct sockaddr **nam);
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@
|
||||||
* Definitions shared between netinet/in_pcb.c and netinet6/in6_pcb.c
|
* Definitions shared between netinet/in_pcb.c and netinet6/in6_pcb.c
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
bool inp_smr_lock(struct inpcb *, const inp_lookup_t);
|
||||||
int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *,
|
int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *,
|
||||||
struct ucred *, int);
|
struct ucred *, int);
|
||||||
int in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa,
|
int in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa,
|
||||||
|
|
@ -52,4 +53,10 @@ int in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa,
|
||||||
struct inpcb * in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_short,
|
struct inpcb * in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_short,
|
||||||
int, struct ucred *);
|
int, struct ucred *);
|
||||||
|
|
||||||
|
struct inpcbport {
|
||||||
|
struct inpcbhead phd_pcblist;
|
||||||
|
CK_LIST_ENTRY(inpcbport) phd_hash;
|
||||||
|
u_short phd_port;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* !_NETINET_IN_PCB_VAR_H_ */
|
#endif /* !_NETINET_IN_PCB_VAR_H_ */
|
||||||
|
|
|
||||||
|
|
@ -111,10 +111,7 @@ __FBSDID("$FreeBSD$");
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Internal variables. */
|
/* Internal variables. */
|
||||||
VNET_DEFINE_STATIC(struct inpcbhead, divcb);
|
|
||||||
VNET_DEFINE_STATIC(struct inpcbinfo, divcbinfo);
|
VNET_DEFINE_STATIC(struct inpcbinfo, divcbinfo);
|
||||||
|
|
||||||
#define V_divcb VNET(divcb)
|
|
||||||
#define V_divcbinfo VNET(divcbinfo)
|
#define V_divcbinfo VNET(divcbinfo)
|
||||||
|
|
||||||
static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */
|
static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */
|
||||||
|
|
@ -154,8 +151,7 @@ div_init(void)
|
||||||
* allocate one-entry hash lists than it is to check all over the
|
* allocate one-entry hash lists than it is to check all over the
|
||||||
* place for hashbase == NULL.
|
* place for hashbase == NULL.
|
||||||
*/
|
*/
|
||||||
in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
|
in_pcbinfo_init(&V_divcbinfo, "div", 1, 1, "divcb", div_inpcb_init);
|
||||||
div_inpcb_init, IPI_HASHFIELDS_NONE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -181,6 +177,14 @@ div_input(struct mbuf **mp, int *offp, int proto)
|
||||||
return (IPPROTO_DONE);
|
return (IPPROTO_DONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
div_port_match(const struct inpcb *inp, void *v)
|
||||||
|
{
|
||||||
|
uint16_t nport = *(uint16_t *)v;
|
||||||
|
|
||||||
|
return (inp->inp_lport == nport);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Divert a packet by passing it up to the divert socket at port 'port'.
|
* Divert a packet by passing it up to the divert socket at port 'port'.
|
||||||
*
|
*
|
||||||
|
|
@ -195,6 +199,8 @@ divert_packet(struct mbuf *m, bool incoming)
|
||||||
struct socket *sa;
|
struct socket *sa;
|
||||||
u_int16_t nport;
|
u_int16_t nport;
|
||||||
struct sockaddr_in divsrc;
|
struct sockaddr_in divsrc;
|
||||||
|
struct inpcb_iterator inpi = INP_ITERATOR(&V_divcbinfo,
|
||||||
|
INPLOOKUP_RLOCKPCB, div_port_match, &nport);
|
||||||
struct m_tag *mtag;
|
struct m_tag *mtag;
|
||||||
|
|
||||||
NET_EPOCH_ASSERT();
|
NET_EPOCH_ASSERT();
|
||||||
|
|
@ -288,27 +294,20 @@ divert_packet(struct mbuf *m, bool incoming)
|
||||||
|
|
||||||
/* Put packet on socket queue, if any */
|
/* Put packet on socket queue, if any */
|
||||||
sa = NULL;
|
sa = NULL;
|
||||||
|
/* nport is inp_next's context. */
|
||||||
nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
|
nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
|
||||||
CK_LIST_FOREACH(inp, &V_divcb, inp_list) {
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
|
sa = inp->inp_socket;
|
||||||
|
SOCKBUF_LOCK(&sa->so_rcv);
|
||||||
|
if (sbappendaddr_locked(&sa->so_rcv,
|
||||||
|
(struct sockaddr *)&divsrc, m, NULL) == 0) {
|
||||||
|
soroverflow_locked(sa);
|
||||||
|
sa = NULL; /* force mbuf reclaim below */
|
||||||
|
} else
|
||||||
|
sorwakeup_locked(sa);
|
||||||
/* XXX why does only one socket match? */
|
/* XXX why does only one socket match? */
|
||||||
if (inp->inp_lport == nport) {
|
INP_RUNLOCK(inp);
|
||||||
INP_RLOCK(inp);
|
break;
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
sa = inp->inp_socket;
|
|
||||||
SOCKBUF_LOCK(&sa->so_rcv);
|
|
||||||
if (sbappendaddr_locked(&sa->so_rcv,
|
|
||||||
(struct sockaddr *)&divsrc, m,
|
|
||||||
(struct mbuf *)0) == 0) {
|
|
||||||
soroverflow_locked(sa);
|
|
||||||
sa = NULL; /* force mbuf reclaim below */
|
|
||||||
} else
|
|
||||||
sorwakeup_locked(sa);
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (sa == NULL) {
|
if (sa == NULL) {
|
||||||
m_freem(m);
|
m_freem(m);
|
||||||
|
|
@ -603,14 +602,10 @@ div_attach(struct socket *so, int proto, struct thread *td)
|
||||||
error = soreserve(so, div_sendspace, div_recvspace);
|
error = soreserve(so, div_sendspace, div_recvspace);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
INP_INFO_WLOCK(&V_divcbinfo);
|
|
||||||
error = in_pcballoc(so, &V_divcbinfo);
|
error = in_pcballoc(so, &V_divcbinfo);
|
||||||
if (error) {
|
if (error)
|
||||||
INP_INFO_WUNLOCK(&V_divcbinfo);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
|
||||||
inp = (struct inpcb *)so->so_pcb;
|
inp = (struct inpcb *)so->so_pcb;
|
||||||
INP_INFO_WUNLOCK(&V_divcbinfo);
|
|
||||||
inp->inp_ip_p = proto;
|
inp->inp_ip_p = proto;
|
||||||
inp->inp_vflag |= INP_IPV4;
|
inp->inp_vflag |= INP_IPV4;
|
||||||
inp->inp_flags |= INP_HDRINCL;
|
inp->inp_flags |= INP_HDRINCL;
|
||||||
|
|
@ -625,11 +620,9 @@ div_detach(struct socket *so)
|
||||||
|
|
||||||
inp = sotoinpcb(so);
|
inp = sotoinpcb(so);
|
||||||
KASSERT(inp != NULL, ("div_detach: inp == NULL"));
|
KASSERT(inp != NULL, ("div_detach: inp == NULL"));
|
||||||
INP_INFO_WLOCK(&V_divcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
in_pcbdetach(inp);
|
in_pcbdetach(inp);
|
||||||
in_pcbfree(inp);
|
in_pcbfree(inp);
|
||||||
INP_INFO_WUNLOCK(&V_divcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
@ -652,13 +645,11 @@ div_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
|
||||||
if (nam->sa_len != sizeof(struct sockaddr_in))
|
if (nam->sa_len != sizeof(struct sockaddr_in))
|
||||||
return EINVAL;
|
return EINVAL;
|
||||||
((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
|
((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
|
||||||
INP_INFO_WLOCK(&V_divcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
INP_HASH_WLOCK(&V_divcbinfo);
|
INP_HASH_WLOCK(&V_divcbinfo);
|
||||||
error = in_pcbbind(inp, nam, td->td_ucred);
|
error = in_pcbbind(inp, nam, td->td_ucred);
|
||||||
INP_HASH_WUNLOCK(&V_divcbinfo);
|
INP_HASH_WUNLOCK(&V_divcbinfo);
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(&V_divcbinfo);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -697,8 +688,9 @@ div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
|
||||||
static int
|
static int
|
||||||
div_pcblist(SYSCTL_HANDLER_ARGS)
|
div_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
{
|
{
|
||||||
|
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_divcbinfo,
|
||||||
|
INPLOOKUP_RLOCKPCB);
|
||||||
struct xinpgen xig;
|
struct xinpgen xig;
|
||||||
struct epoch_tracker et;
|
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
|
@ -726,21 +718,18 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
NET_EPOCH_ENTER(et);
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead);
|
|
||||||
inp != NULL;
|
|
||||||
inp = CK_LIST_NEXT(inp, inp_list)) {
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
if (inp->inp_gencnt <= xig.xig_gen) {
|
if (inp->inp_gencnt <= xig.xig_gen) {
|
||||||
struct xinpcb xi;
|
struct xinpcb xi;
|
||||||
|
|
||||||
in_pcbtoxinpcb(inp, &xi);
|
in_pcbtoxinpcb(inp, &xi);
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
||||||
} else
|
if (error) {
|
||||||
INP_RUNLOCK(inp);
|
INP_RUNLOCK(inp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -223,25 +223,11 @@ static void
|
||||||
in_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
|
in_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
|
||||||
const struct sockaddr *sa, void *ctx)
|
const struct sockaddr *sa, void *ctx)
|
||||||
{
|
{
|
||||||
struct epoch_tracker et;
|
|
||||||
struct gre_socket *gs;
|
struct gre_socket *gs;
|
||||||
struct gre_softc *sc;
|
struct gre_softc *sc;
|
||||||
in_addr_t dst;
|
in_addr_t dst;
|
||||||
|
|
||||||
NET_EPOCH_ENTER(et);
|
NET_EPOCH_ASSERT();
|
||||||
/*
|
|
||||||
* udp_append() holds reference to inp, it is safe to check
|
|
||||||
* inp_flags2 without INP_RLOCK().
|
|
||||||
* If socket was closed before we have entered NET_EPOCH section,
|
|
||||||
* INP_FREED flag should be set. Otherwise it should be safe to
|
|
||||||
* make access to ctx data, because gre_so will be freed by
|
|
||||||
* gre_sofree() via NET_EPOCH_CALL().
|
|
||||||
*/
|
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
m_freem(m);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
gs = (struct gre_socket *)ctx;
|
gs = (struct gre_socket *)ctx;
|
||||||
dst = ((const struct sockaddr_in *)sa)->sin_addr.s_addr;
|
dst = ((const struct sockaddr_in *)sa)->sin_addr.s_addr;
|
||||||
|
|
@ -251,11 +237,9 @@ in_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
|
||||||
}
|
}
|
||||||
if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){
|
if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){
|
||||||
gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc);
|
gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc);
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
m_freem(m);
|
m_freem(m);
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
||||||
|
|
@ -87,10 +87,7 @@ SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
|
||||||
&VNET_NAME(ip_defttl), 0,
|
&VNET_NAME(ip_defttl), 0,
|
||||||
"Maximum TTL on IP packets");
|
"Maximum TTL on IP packets");
|
||||||
|
|
||||||
VNET_DEFINE(struct inpcbhead, ripcb);
|
|
||||||
VNET_DEFINE(struct inpcbinfo, ripcbinfo);
|
VNET_DEFINE(struct inpcbinfo, ripcbinfo);
|
||||||
|
|
||||||
#define V_ripcb VNET(ripcb)
|
|
||||||
#define V_ripcbinfo VNET(ripcbinfo)
|
#define V_ripcbinfo VNET(ripcbinfo)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -160,7 +157,7 @@ rip_inshash(struct inpcb *inp)
|
||||||
struct inpcbhead *pcbhash;
|
struct inpcbhead *pcbhash;
|
||||||
int hash;
|
int hash;
|
||||||
|
|
||||||
INP_INFO_WLOCK_ASSERT(pcbinfo);
|
INP_HASH_WLOCK_ASSERT(pcbinfo);
|
||||||
INP_WLOCK_ASSERT(inp);
|
INP_WLOCK_ASSERT(inp);
|
||||||
|
|
||||||
if (inp->inp_ip_p != 0 &&
|
if (inp->inp_ip_p != 0 &&
|
||||||
|
|
@ -178,7 +175,7 @@ static void
|
||||||
rip_delhash(struct inpcb *inp)
|
rip_delhash(struct inpcb *inp)
|
||||||
{
|
{
|
||||||
|
|
||||||
INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
|
INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
|
||||||
INP_WLOCK_ASSERT(inp);
|
INP_WLOCK_ASSERT(inp);
|
||||||
|
|
||||||
CK_LIST_REMOVE(inp, inp_hash);
|
CK_LIST_REMOVE(inp, inp_hash);
|
||||||
|
|
@ -212,8 +209,8 @@ void
|
||||||
rip_init(void)
|
rip_init(void)
|
||||||
{
|
{
|
||||||
|
|
||||||
in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
|
in_pcbinfo_init(&V_ripcbinfo, "rip", INP_PCBHASH_RAW_SIZE, 1, "ripcb",
|
||||||
1, "ripcb", rip_inpcb_init, IPI_HASHFIELDS_NONE);
|
rip_inpcb_init);
|
||||||
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
|
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
|
||||||
EVENTHANDLER_PRI_ANY);
|
EVENTHANDLER_PRI_ANY);
|
||||||
}
|
}
|
||||||
|
|
@ -230,47 +227,90 @@ VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL);
|
||||||
|
|
||||||
#ifdef INET
|
#ifdef INET
|
||||||
static int
|
static int
|
||||||
rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
|
rip_append(struct inpcb *inp, struct ip *ip, struct mbuf *m,
|
||||||
struct sockaddr_in *ripsrc)
|
struct sockaddr_in *ripsrc)
|
||||||
{
|
{
|
||||||
int policyfail = 0;
|
struct socket *so = inp->inp_socket;
|
||||||
|
struct mbuf *n, *opts = NULL;
|
||||||
|
|
||||||
INP_LOCK_ASSERT(last);
|
INP_LOCK_ASSERT(inp);
|
||||||
|
|
||||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
|
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
|
||||||
/* check AH/ESP integrity. */
|
/* check AH/ESP integrity. */
|
||||||
if (IPSEC_ENABLED(ipv4)) {
|
if (IPSEC_ENABLED(ipv4) && IPSEC_CHECK_POLICY(ipv4, m, inp) != 0)
|
||||||
if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0)
|
return (0);
|
||||||
policyfail = 1;
|
|
||||||
}
|
|
||||||
#endif /* IPSEC */
|
#endif /* IPSEC */
|
||||||
#ifdef MAC
|
#ifdef MAC
|
||||||
if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
|
if (mac_inpcb_check_deliver(inp, m) != 0)
|
||||||
policyfail = 1;
|
return (0);
|
||||||
#endif
|
#endif
|
||||||
/* Check the minimum TTL for socket. */
|
/* Check the minimum TTL for socket. */
|
||||||
if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
|
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
|
||||||
policyfail = 1;
|
return (0);
|
||||||
if (!policyfail) {
|
|
||||||
struct mbuf *opts = NULL;
|
|
||||||
struct socket *so;
|
|
||||||
|
|
||||||
so = last->inp_socket;
|
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
|
||||||
if ((last->inp_flags & INP_CONTROLOPTS) ||
|
return (0);
|
||||||
(so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
|
|
||||||
ip_savecontrol(last, &opts, ip, n);
|
if ((inp->inp_flags & INP_CONTROLOPTS) ||
|
||||||
SOCKBUF_LOCK(&so->so_rcv);
|
(so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
|
||||||
if (sbappendaddr_locked(&so->so_rcv,
|
ip_savecontrol(inp, &opts, ip, n);
|
||||||
(struct sockaddr *)ripsrc, n, opts) == 0) {
|
SOCKBUF_LOCK(&so->so_rcv);
|
||||||
soroverflow_locked(so);
|
if (sbappendaddr_locked(&so->so_rcv,
|
||||||
m_freem(n);
|
(struct sockaddr *)ripsrc, n, opts) == 0) {
|
||||||
if (opts)
|
soroverflow_locked(so);
|
||||||
m_freem(opts);
|
|
||||||
} else
|
|
||||||
sorwakeup_locked(so);
|
|
||||||
} else
|
|
||||||
m_freem(n);
|
m_freem(n);
|
||||||
return (policyfail);
|
if (opts)
|
||||||
|
m_freem(opts);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
sorwakeup_locked(so);
|
||||||
|
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct rip_inp_match_ctx {
|
||||||
|
struct ip *ip;
|
||||||
|
int proto;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool
|
||||||
|
rip_inp_match1(const struct inpcb *inp, void *v)
|
||||||
|
{
|
||||||
|
struct rip_inp_match_ctx *ctx = v;
|
||||||
|
|
||||||
|
if (inp->inp_ip_p != ctx->proto)
|
||||||
|
return (false);
|
||||||
|
#ifdef INET6
|
||||||
|
/* XXX inp locking */
|
||||||
|
if ((inp->inp_vflag & INP_IPV4) == 0)
|
||||||
|
return (false);
|
||||||
|
#endif
|
||||||
|
if (inp->inp_laddr.s_addr != ctx->ip->ip_dst.s_addr)
|
||||||
|
return (false);
|
||||||
|
if (inp->inp_faddr.s_addr != ctx->ip->ip_src.s_addr)
|
||||||
|
return (false);
|
||||||
|
return (true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
rip_inp_match2(const struct inpcb *inp, void *v)
|
||||||
|
{
|
||||||
|
struct rip_inp_match_ctx *ctx = v;
|
||||||
|
|
||||||
|
if (inp->inp_ip_p && inp->inp_ip_p != ctx->proto)
|
||||||
|
return (false);
|
||||||
|
#ifdef INET6
|
||||||
|
/* XXX inp locking */
|
||||||
|
if ((inp->inp_vflag & INP_IPV4) == 0)
|
||||||
|
return (false);
|
||||||
|
#endif
|
||||||
|
if (!in_nullhost(inp->inp_laddr) &&
|
||||||
|
!in_hosteq(inp->inp_laddr, ctx->ip->ip_dst))
|
||||||
|
return (false);
|
||||||
|
if (!in_nullhost(inp->inp_faddr) &&
|
||||||
|
!in_hosteq(inp->inp_faddr, ctx->ip->ip_src))
|
||||||
|
return (false);
|
||||||
|
return (true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -280,102 +320,57 @@ rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
|
||||||
int
|
int
|
||||||
rip_input(struct mbuf **mp, int *offp, int proto)
|
rip_input(struct mbuf **mp, int *offp, int proto)
|
||||||
{
|
{
|
||||||
|
struct rip_inp_match_ctx ctx = {
|
||||||
|
.ip = mtod(*mp, struct ip *),
|
||||||
|
.proto = proto,
|
||||||
|
};
|
||||||
|
struct inpcb_iterator inpi = INP_ITERATOR(&V_ripcbinfo,
|
||||||
|
INPLOOKUP_RLOCKPCB, rip_inp_match1, &ctx);
|
||||||
struct ifnet *ifp;
|
struct ifnet *ifp;
|
||||||
struct mbuf *m = *mp;
|
struct mbuf *m = *mp;
|
||||||
struct ip *ip = mtod(m, struct ip *);
|
struct inpcb *inp;
|
||||||
struct inpcb *inp, *last;
|
|
||||||
struct sockaddr_in ripsrc;
|
struct sockaddr_in ripsrc;
|
||||||
int hash;
|
int appended;
|
||||||
|
|
||||||
NET_EPOCH_ASSERT();
|
|
||||||
|
|
||||||
*mp = NULL;
|
*mp = NULL;
|
||||||
|
appended = 0;
|
||||||
|
|
||||||
bzero(&ripsrc, sizeof(ripsrc));
|
bzero(&ripsrc, sizeof(ripsrc));
|
||||||
ripsrc.sin_len = sizeof(ripsrc);
|
ripsrc.sin_len = sizeof(ripsrc);
|
||||||
ripsrc.sin_family = AF_INET;
|
ripsrc.sin_family = AF_INET;
|
||||||
ripsrc.sin_addr = ip->ip_src;
|
ripsrc.sin_addr = ctx.ip->ip_src;
|
||||||
last = NULL;
|
|
||||||
|
|
||||||
ifp = m->m_pkthdr.rcvif;
|
ifp = m->m_pkthdr.rcvif;
|
||||||
|
|
||||||
hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
|
inpi.hash = INP_PCBHASH_RAW(proto, ctx.ip->ip_src.s_addr,
|
||||||
ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
|
ctx.ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
|
||||||
CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
if (inp->inp_ip_p != proto)
|
INP_RLOCK_ASSERT(inp);
|
||||||
continue;
|
if (jailed_without_vnet(inp->inp_cred) &&
|
||||||
#ifdef INET6
|
prison_check_ip4(inp->inp_cred, &ctx.ip->ip_dst) != 0) {
|
||||||
/* XXX inp locking */
|
|
||||||
if ((inp->inp_vflag & INP_IPV4) == 0)
|
|
||||||
continue;
|
|
||||||
#endif
|
|
||||||
if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
|
|
||||||
continue;
|
|
||||||
if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
|
|
||||||
continue;
|
|
||||||
if (last != NULL) {
|
|
||||||
struct mbuf *n;
|
|
||||||
|
|
||||||
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
|
|
||||||
if (n != NULL)
|
|
||||||
(void) rip_append(last, ip, n, &ripsrc);
|
|
||||||
/* XXX count dropped packet */
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
last = NULL;
|
|
||||||
}
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED))
|
|
||||||
goto skip_1;
|
|
||||||
if (jailed_without_vnet(inp->inp_cred)) {
|
|
||||||
/*
|
/*
|
||||||
* XXX: If faddr was bound to multicast group,
|
* XXX: If faddr was bound to multicast group,
|
||||||
* jailed raw socket will drop datagram.
|
* jailed raw socket will drop datagram.
|
||||||
*/
|
*/
|
||||||
if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
|
continue;
|
||||||
goto skip_1;
|
|
||||||
}
|
}
|
||||||
last = inp;
|
appended += rip_append(inp, ctx.ip, m, &ripsrc);
|
||||||
continue;
|
|
||||||
skip_1:
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
}
|
}
|
||||||
CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
|
|
||||||
if (inp->inp_ip_p && inp->inp_ip_p != proto)
|
|
||||||
continue;
|
|
||||||
#ifdef INET6
|
|
||||||
/* XXX inp locking */
|
|
||||||
if ((inp->inp_vflag & INP_IPV4) == 0)
|
|
||||||
continue;
|
|
||||||
#endif
|
|
||||||
if (!in_nullhost(inp->inp_laddr) &&
|
|
||||||
!in_hosteq(inp->inp_laddr, ip->ip_dst))
|
|
||||||
continue;
|
|
||||||
if (!in_nullhost(inp->inp_faddr) &&
|
|
||||||
!in_hosteq(inp->inp_faddr, ip->ip_src))
|
|
||||||
continue;
|
|
||||||
if (last != NULL) {
|
|
||||||
struct mbuf *n;
|
|
||||||
|
|
||||||
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
|
inpi.hash = 0;
|
||||||
if (n != NULL)
|
inpi.match = rip_inp_match2;
|
||||||
(void) rip_append(last, ip, n, &ripsrc);
|
MPASS(inpi.inp == NULL);
|
||||||
/* XXX count dropped packet */
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
INP_RUNLOCK(last);
|
INP_RLOCK_ASSERT(inp);
|
||||||
last = NULL;
|
if (jailed_without_vnet(inp->inp_cred) &&
|
||||||
}
|
!IN_MULTICAST(ntohl(ctx.ip->ip_dst.s_addr)) &&
|
||||||
INP_RLOCK(inp);
|
prison_check_ip4(inp->inp_cred, &ctx.ip->ip_dst) != 0)
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED))
|
|
||||||
goto skip_2;
|
|
||||||
if (jailed_without_vnet(inp->inp_cred)) {
|
|
||||||
/*
|
/*
|
||||||
* Allow raw socket in jail to receive multicast;
|
* Allow raw socket in jail to receive multicast;
|
||||||
* assume process had PRIV_NETINET_RAW at attach,
|
* assume process had PRIV_NETINET_RAW at attach,
|
||||||
* and fall through into normal filter path if so.
|
* and fall through into normal filter path if so.
|
||||||
*/
|
*/
|
||||||
if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
|
continue;
|
||||||
prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
|
|
||||||
goto skip_2;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* If this raw socket has multicast state, and we
|
* If this raw socket has multicast state, and we
|
||||||
* have received a multicast, check if this socket
|
* have received a multicast, check if this socket
|
||||||
|
|
@ -383,7 +378,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
|
||||||
* the responsibility of the transport layer.
|
* the responsibility of the transport layer.
|
||||||
*/
|
*/
|
||||||
if (inp->inp_moptions != NULL &&
|
if (inp->inp_moptions != NULL &&
|
||||||
IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
|
IN_MULTICAST(ntohl(ctx.ip->ip_dst.s_addr))) {
|
||||||
/*
|
/*
|
||||||
* If the incoming datagram is for IGMP, allow it
|
* If the incoming datagram is for IGMP, allow it
|
||||||
* through unconditionally to the raw socket.
|
* through unconditionally to the raw socket.
|
||||||
|
|
@ -405,7 +400,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
|
||||||
bzero(&group, sizeof(struct sockaddr_in));
|
bzero(&group, sizeof(struct sockaddr_in));
|
||||||
group.sin_len = sizeof(struct sockaddr_in);
|
group.sin_len = sizeof(struct sockaddr_in);
|
||||||
group.sin_family = AF_INET;
|
group.sin_family = AF_INET;
|
||||||
group.sin_addr = ip->ip_dst;
|
group.sin_addr = ctx.ip->ip_dst;
|
||||||
|
|
||||||
blocked = imo_multi_filter(inp->inp_moptions,
|
blocked = imo_multi_filter(inp->inp_moptions,
|
||||||
ifp,
|
ifp,
|
||||||
|
|
@ -415,27 +410,18 @@ rip_input(struct mbuf **mp, int *offp, int proto)
|
||||||
|
|
||||||
if (blocked != MCAST_PASS) {
|
if (blocked != MCAST_PASS) {
|
||||||
IPSTAT_INC(ips_notmember);
|
IPSTAT_INC(ips_notmember);
|
||||||
goto skip_2;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
last = inp;
|
appended += rip_append(inp, ctx.ip, m, &ripsrc);
|
||||||
continue;
|
|
||||||
skip_2:
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
}
|
|
||||||
if (last != NULL) {
|
|
||||||
if (rip_append(last, ip, m, &ripsrc) != 0)
|
|
||||||
IPSTAT_INC(ips_delivered);
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
} else {
|
|
||||||
if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
|
|
||||||
IPSTAT_INC(ips_noproto);
|
|
||||||
IPSTAT_DEC(ips_delivered);
|
|
||||||
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
|
|
||||||
} else {
|
|
||||||
m_freem(m);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (appended == 0 &&
|
||||||
|
inetsw[ip_protox[ctx.ip->ip_p]].pr_input == rip_input) {
|
||||||
|
IPSTAT_INC(ips_noproto);
|
||||||
|
IPSTAT_DEC(ips_delivered);
|
||||||
|
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
|
||||||
|
} else
|
||||||
|
m_freem(m);
|
||||||
return (IPPROTO_DONE);
|
return (IPPROTO_DONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -898,18 +884,16 @@ rip_attach(struct socket *so, int proto, struct thread *td)
|
||||||
error = soreserve(so, rip_sendspace, rip_recvspace);
|
error = soreserve(so, rip_sendspace, rip_recvspace);
|
||||||
if (error)
|
if (error)
|
||||||
return (error);
|
return (error);
|
||||||
INP_INFO_WLOCK(&V_ripcbinfo);
|
|
||||||
error = in_pcballoc(so, &V_ripcbinfo);
|
error = in_pcballoc(so, &V_ripcbinfo);
|
||||||
if (error) {
|
if (error)
|
||||||
INP_INFO_WUNLOCK(&V_ripcbinfo);
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
|
||||||
inp = (struct inpcb *)so->so_pcb;
|
inp = (struct inpcb *)so->so_pcb;
|
||||||
inp->inp_vflag |= INP_IPV4;
|
inp->inp_vflag |= INP_IPV4;
|
||||||
inp->inp_ip_p = proto;
|
inp->inp_ip_p = proto;
|
||||||
inp->inp_ip_ttl = V_ip_defttl;
|
inp->inp_ip_ttl = V_ip_defttl;
|
||||||
|
INP_HASH_WLOCK(&V_ripcbinfo);
|
||||||
rip_inshash(inp);
|
rip_inshash(inp);
|
||||||
INP_INFO_WUNLOCK(&V_ripcbinfo);
|
INP_HASH_WUNLOCK(&V_ripcbinfo);
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
@ -924,9 +908,10 @@ rip_detach(struct socket *so)
|
||||||
KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
|
KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
|
||||||
("rip_detach: not closed"));
|
("rip_detach: not closed"));
|
||||||
|
|
||||||
INP_INFO_WLOCK(&V_ripcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
|
INP_HASH_WLOCK(&V_ripcbinfo);
|
||||||
rip_delhash(inp);
|
rip_delhash(inp);
|
||||||
|
INP_HASH_WUNLOCK(&V_ripcbinfo);
|
||||||
if (so == V_ip_mrouter && ip_mrouter_done)
|
if (so == V_ip_mrouter && ip_mrouter_done)
|
||||||
ip_mrouter_done();
|
ip_mrouter_done();
|
||||||
if (ip_rsvp_force_done)
|
if (ip_rsvp_force_done)
|
||||||
|
|
@ -935,7 +920,6 @@ rip_detach(struct socket *so)
|
||||||
ip_rsvp_done();
|
ip_rsvp_done();
|
||||||
in_pcbdetach(inp);
|
in_pcbdetach(inp);
|
||||||
in_pcbfree(inp);
|
in_pcbfree(inp);
|
||||||
INP_INFO_WUNLOCK(&V_ripcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -944,16 +928,16 @@ rip_dodisconnect(struct socket *so, struct inpcb *inp)
|
||||||
struct inpcbinfo *pcbinfo;
|
struct inpcbinfo *pcbinfo;
|
||||||
|
|
||||||
pcbinfo = inp->inp_pcbinfo;
|
pcbinfo = inp->inp_pcbinfo;
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
|
INP_HASH_WLOCK(pcbinfo);
|
||||||
rip_delhash(inp);
|
rip_delhash(inp);
|
||||||
inp->inp_faddr.s_addr = INADDR_ANY;
|
inp->inp_faddr.s_addr = INADDR_ANY;
|
||||||
rip_inshash(inp);
|
rip_inshash(inp);
|
||||||
|
INP_HASH_WUNLOCK(pcbinfo);
|
||||||
SOCK_LOCK(so);
|
SOCK_LOCK(so);
|
||||||
so->so_state &= ~SS_ISCONNECTED;
|
so->so_state &= ~SS_ISCONNECTED;
|
||||||
SOCK_UNLOCK(so);
|
SOCK_UNLOCK(so);
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -1019,13 +1003,13 @@ rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
|
||||||
ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
|
ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
|
||||||
return (EADDRNOTAVAIL);
|
return (EADDRNOTAVAIL);
|
||||||
|
|
||||||
INP_INFO_WLOCK(&V_ripcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
|
INP_HASH_WLOCK(&V_ripcbinfo);
|
||||||
rip_delhash(inp);
|
rip_delhash(inp);
|
||||||
inp->inp_laddr = addr->sin_addr;
|
inp->inp_laddr = addr->sin_addr;
|
||||||
rip_inshash(inp);
|
rip_inshash(inp);
|
||||||
|
INP_HASH_WUNLOCK(&V_ripcbinfo);
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(&V_ripcbinfo);
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1045,14 +1029,14 @@ rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
|
||||||
inp = sotoinpcb(so);
|
inp = sotoinpcb(so);
|
||||||
KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
|
KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
|
||||||
|
|
||||||
INP_INFO_WLOCK(&V_ripcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
|
INP_HASH_WLOCK(&V_ripcbinfo);
|
||||||
rip_delhash(inp);
|
rip_delhash(inp);
|
||||||
inp->inp_faddr = addr->sin_addr;
|
inp->inp_faddr = addr->sin_addr;
|
||||||
rip_inshash(inp);
|
rip_inshash(inp);
|
||||||
|
INP_HASH_WUNLOCK(&V_ripcbinfo);
|
||||||
soisconnected(so);
|
soisconnected(so);
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(&V_ripcbinfo);
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1118,8 +1102,9 @@ release:
|
||||||
static int
|
static int
|
||||||
rip_pcblist(SYSCTL_HANDLER_ARGS)
|
rip_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
{
|
{
|
||||||
|
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_ripcbinfo,
|
||||||
|
INPLOOKUP_RLOCKPCB);
|
||||||
struct xinpgen xig;
|
struct xinpgen xig;
|
||||||
struct epoch_tracker et;
|
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
|
@ -1147,24 +1132,19 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
if (error)
|
if (error)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
NET_EPOCH_ENTER(et);
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead);
|
|
||||||
inp != NULL;
|
|
||||||
inp = CK_LIST_NEXT(inp, inp_list)) {
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
if (inp->inp_gencnt <= xig.xig_gen &&
|
if (inp->inp_gencnt <= xig.xig_gen &&
|
||||||
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
|
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
|
||||||
struct xinpcb xi;
|
struct xinpcb xi;
|
||||||
|
|
||||||
in_pcbtoxinpcb(inp, &xi);
|
in_pcbtoxinpcb(inp, &xi);
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
||||||
if (error)
|
if (error) {
|
||||||
|
INP_RUNLOCK(inp);
|
||||||
break;
|
break;
|
||||||
} else
|
}
|
||||||
INP_RUNLOCK(inp);
|
}
|
||||||
}
|
}
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -579,28 +579,10 @@ again:
|
||||||
static void
|
static void
|
||||||
tcp_remove_hpts_ref(struct inpcb *inp, struct tcp_hpts_entry *hpts, int line)
|
tcp_remove_hpts_ref(struct inpcb *inp, struct tcp_hpts_entry *hpts, int line)
|
||||||
{
|
{
|
||||||
int32_t add_freed;
|
|
||||||
int32_t ret;
|
int32_t ret;
|
||||||
|
|
||||||
if (inp->inp_flags2 & INP_FREED) {
|
|
||||||
/*
|
|
||||||
* Need to play a special trick so that in_pcbrele_wlocked
|
|
||||||
* does not return 1 when it really should have returned 0.
|
|
||||||
*/
|
|
||||||
add_freed = 1;
|
|
||||||
inp->inp_flags2 &= ~INP_FREED;
|
|
||||||
} else {
|
|
||||||
add_freed = 0;
|
|
||||||
}
|
|
||||||
#ifndef INP_REF_DEBUG
|
|
||||||
ret = in_pcbrele_wlocked(inp);
|
ret = in_pcbrele_wlocked(inp);
|
||||||
#else
|
|
||||||
ret = __in_pcbrele_wlocked(inp, line);
|
|
||||||
#endif
|
|
||||||
KASSERT(ret != 1, ("inpcb:%p release ret 1", inp));
|
KASSERT(ret != 1, ("inpcb:%p release ret 1", inp));
|
||||||
if (add_freed) {
|
|
||||||
inp->inp_flags2 |= INP_FREED;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -1291,8 +1273,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
|
||||||
#ifdef VIMAGE
|
#ifdef VIMAGE
|
||||||
CURVNET_SET(inp->inp_vnet);
|
CURVNET_SET(inp->inp_vnet);
|
||||||
#endif
|
#endif
|
||||||
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
|
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
|
||||||
(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
out:
|
out:
|
||||||
hpts->p_inp = NULL;
|
hpts->p_inp = NULL;
|
||||||
if (in_pcbrele_wlocked(inp) == 0) {
|
if (in_pcbrele_wlocked(inp) == 0) {
|
||||||
|
|
@ -1593,8 +1574,7 @@ again:
|
||||||
hpts->p_inp = NULL;
|
hpts->p_inp = NULL;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
|
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
|
||||||
(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
out_now:
|
out_now:
|
||||||
KASSERT(mtx_owned(&hpts->p_mtx) == 0,
|
KASSERT(mtx_owned(&hpts->p_mtx) == 0,
|
||||||
("Hpts:%p owns mtx prior-to lock line:%d",
|
("Hpts:%p owns mtx prior-to lock line:%d",
|
||||||
|
|
|
||||||
|
|
@ -244,8 +244,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
|
||||||
&VNET_NAME(tcp_autorcvbuf_max), 0,
|
&VNET_NAME(tcp_autorcvbuf_max), 0,
|
||||||
"Max size of automatic receive buffer");
|
"Max size of automatic receive buffer");
|
||||||
|
|
||||||
VNET_DEFINE(struct inpcbhead, tcb);
|
|
||||||
#define tcb6 tcb /* for KAME src sync over BSD*'s */
|
|
||||||
VNET_DEFINE(struct inpcbinfo, tcbinfo);
|
VNET_DEFINE(struct inpcbinfo, tcbinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -1310,8 +1310,7 @@ tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
|
||||||
|
|
||||||
/* Check if the inp is dead, Jim. */
|
/* Check if the inp is dead, Jim. */
|
||||||
if (tp == NULL ||
|
if (tp == NULL ||
|
||||||
(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) ||
|
(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
|
||||||
(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
return (TCP_LRO_CANNOT);
|
return (TCP_LRO_CANNOT);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1376,6 +1376,8 @@ deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
|
||||||
* to the default stack.
|
* to the default stack.
|
||||||
*/
|
*/
|
||||||
if (force && blk->tfb_refcnt) {
|
if (force && blk->tfb_refcnt) {
|
||||||
|
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_tcbinfo,
|
||||||
|
INPLOOKUP_WLOCKPCB);
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
struct tcpcb *tp;
|
struct tcpcb *tp;
|
||||||
VNET_ITERATOR_DECL(vnet_iter);
|
VNET_ITERATOR_DECL(vnet_iter);
|
||||||
|
|
@ -1385,22 +1387,14 @@ deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
|
||||||
VNET_LIST_RLOCK();
|
VNET_LIST_RLOCK();
|
||||||
VNET_FOREACH(vnet_iter) {
|
VNET_FOREACH(vnet_iter) {
|
||||||
CURVNET_SET(vnet_iter);
|
CURVNET_SET(vnet_iter);
|
||||||
INP_INFO_WLOCK(&V_tcbinfo);
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
CK_LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
|
if (inp->inp_flags & INP_TIMEWAIT)
|
||||||
INP_WLOCK(inp);
|
|
||||||
if (inp->inp_flags & INP_TIMEWAIT) {
|
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
tp = intotcpcb(inp);
|
tp = intotcpcb(inp);
|
||||||
if (tp == NULL || tp->t_fb != blk) {
|
if (tp == NULL || tp->t_fb != blk)
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
tcp_switch_back_to_default(tp);
|
tcp_switch_back_to_default(tp);
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
}
|
}
|
||||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
||||||
CURVNET_RESTORE();
|
CURVNET_RESTORE();
|
||||||
}
|
}
|
||||||
VNET_LIST_RUNLOCK();
|
VNET_LIST_RUNLOCK();
|
||||||
|
|
@ -1488,8 +1482,8 @@ tcp_init(void)
|
||||||
"clipped from %d to %d.\n", __func__, oldhashsize,
|
"clipped from %d to %d.\n", __func__, oldhashsize,
|
||||||
hashsize);
|
hashsize);
|
||||||
}
|
}
|
||||||
in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
|
in_pcbinfo_init(&V_tcbinfo, "tcp", hashsize, hashsize,
|
||||||
"tcp_inpcb", tcp_inpcb_init, IPI_HASHFIELDS_4TUPLE);
|
"tcp_inpcb", tcp_inpcb_init);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These have to be type stable for the benefit of the timers.
|
* These have to be type stable for the benefit of the timers.
|
||||||
|
|
@ -1599,9 +1593,9 @@ tcp_destroy(void *unused __unused)
|
||||||
* Sleep to let all tcpcb timers really disappear and cleanup.
|
* Sleep to let all tcpcb timers really disappear and cleanup.
|
||||||
*/
|
*/
|
||||||
for (;;) {
|
for (;;) {
|
||||||
INP_LIST_RLOCK(&V_tcbinfo);
|
INP_INFO_WLOCK(&V_tcbinfo);
|
||||||
n = V_tcbinfo.ipi_count;
|
n = V_tcbinfo.ipi_count;
|
||||||
INP_LIST_RUNLOCK(&V_tcbinfo);
|
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
break;
|
break;
|
||||||
pause("tcpdes", hz / 10);
|
pause("tcpdes", hz / 10);
|
||||||
|
|
@ -2309,6 +2303,8 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
struct tcpcb *tp;
|
struct tcpcb *tp;
|
||||||
VNET_ITERATOR_DECL(vnet_iter);
|
VNET_ITERATOR_DECL(vnet_iter);
|
||||||
|
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_tcbinfo,
|
||||||
|
INPLOOKUP_WLOCKPCB);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check all active control blocks across all network stacks and change
|
* Check all active control blocks across all network stacks and change
|
||||||
|
|
@ -2318,17 +2314,12 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
|
||||||
VNET_LIST_RLOCK();
|
VNET_LIST_RLOCK();
|
||||||
VNET_FOREACH(vnet_iter) {
|
VNET_FOREACH(vnet_iter) {
|
||||||
CURVNET_SET(vnet_iter);
|
CURVNET_SET(vnet_iter);
|
||||||
INP_INFO_WLOCK(&V_tcbinfo);
|
|
||||||
/*
|
/*
|
||||||
* New connections already part way through being initialised
|
* XXXGL: would new accept(2)d connections use algo being
|
||||||
* with the CC algo we're removing will not race with this code
|
* unloaded?
|
||||||
* because the INP_INFO_WLOCK is held during initialisation. We
|
|
||||||
* therefore don't enter the loop below until the connection
|
|
||||||
* list has stabilised.
|
|
||||||
*/
|
*/
|
||||||
newalgo = CC_DEFAULT_ALGO();
|
newalgo = CC_DEFAULT_ALGO();
|
||||||
CK_LIST_FOREACH(inp, &V_tcb, inp_list) {
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
INP_WLOCK(inp);
|
|
||||||
/* Important to skip tcptw structs. */
|
/* Important to skip tcptw structs. */
|
||||||
if (!(inp->inp_flags & INP_TIMEWAIT) &&
|
if (!(inp->inp_flags & INP_TIMEWAIT) &&
|
||||||
(tp = intotcpcb(inp)) != NULL) {
|
(tp = intotcpcb(inp)) != NULL) {
|
||||||
|
|
@ -2362,7 +2353,6 @@ tcp_ccalgounload(struct cc_algo *unload_algo)
|
||||||
* need to try again.
|
* need to try again.
|
||||||
*/
|
*/
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
||||||
CURVNET_RESTORE();
|
CURVNET_RESTORE();
|
||||||
VNET_LIST_RUNLOCK();
|
VNET_LIST_RUNLOCK();
|
||||||
return (err);
|
return (err);
|
||||||
|
|
@ -2379,9 +2369,7 @@ proceed:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
}
|
}
|
||||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
||||||
CURVNET_RESTORE();
|
CURVNET_RESTORE();
|
||||||
}
|
}
|
||||||
VNET_LIST_RUNLOCK();
|
VNET_LIST_RUNLOCK();
|
||||||
|
|
@ -2399,7 +2387,6 @@ tcp_drop(struct tcpcb *tp, int errno)
|
||||||
struct socket *so = tp->t_inpcb->inp_socket;
|
struct socket *so = tp->t_inpcb->inp_socket;
|
||||||
|
|
||||||
NET_EPOCH_ASSERT();
|
NET_EPOCH_ASSERT();
|
||||||
INP_INFO_LOCK_ASSERT(&V_tcbinfo);
|
|
||||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||||
|
|
||||||
if (TCPS_HAVERCVDSYN(tp->t_state)) {
|
if (TCPS_HAVERCVDSYN(tp->t_state)) {
|
||||||
|
|
@ -2585,7 +2572,6 @@ tcp_close(struct tcpcb *tp)
|
||||||
struct inpcb *inp = tp->t_inpcb;
|
struct inpcb *inp = tp->t_inpcb;
|
||||||
struct socket *so;
|
struct socket *so;
|
||||||
|
|
||||||
INP_INFO_LOCK_ASSERT(&V_tcbinfo);
|
|
||||||
INP_WLOCK_ASSERT(inp);
|
INP_WLOCK_ASSERT(inp);
|
||||||
|
|
||||||
#ifdef TCP_OFFLOAD
|
#ifdef TCP_OFFLOAD
|
||||||
|
|
@ -2624,6 +2610,8 @@ tcp_close(struct tcpcb *tp)
|
||||||
void
|
void
|
||||||
tcp_drain(void)
|
tcp_drain(void)
|
||||||
{
|
{
|
||||||
|
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_tcbinfo,
|
||||||
|
INPLOOKUP_WLOCKPCB);
|
||||||
VNET_ITERATOR_DECL(vnet_iter);
|
VNET_ITERATOR_DECL(vnet_iter);
|
||||||
|
|
||||||
if (!do_tcpdrain)
|
if (!do_tcpdrain)
|
||||||
|
|
@ -2643,13 +2631,9 @@ tcp_drain(void)
|
||||||
* where we're really low on mbufs, this is potentially
|
* where we're really low on mbufs, this is potentially
|
||||||
* useful.
|
* useful.
|
||||||
*/
|
*/
|
||||||
INP_INFO_WLOCK(&V_tcbinfo);
|
while ((inpb = inp_next(&inpi)) != NULL) {
|
||||||
CK_LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
|
if (inpb->inp_flags & INP_TIMEWAIT)
|
||||||
INP_WLOCK(inpb);
|
|
||||||
if (inpb->inp_flags & INP_TIMEWAIT) {
|
|
||||||
INP_WUNLOCK(inpb);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
if ((tcpb = intotcpcb(inpb)) != NULL) {
|
if ((tcpb = intotcpcb(inpb)) != NULL) {
|
||||||
tcp_reass_flush(tcpb);
|
tcp_reass_flush(tcpb);
|
||||||
tcp_clean_sackreport(tcpb);
|
tcp_clean_sackreport(tcpb);
|
||||||
|
|
@ -2664,9 +2648,7 @@ tcp_drain(void)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
INP_WUNLOCK(inpb);
|
|
||||||
}
|
}
|
||||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
|
||||||
CURVNET_RESTORE();
|
CURVNET_RESTORE();
|
||||||
}
|
}
|
||||||
VNET_LIST_RUNLOCK_NOSLEEP();
|
VNET_LIST_RUNLOCK_NOSLEEP();
|
||||||
|
|
@ -2685,7 +2667,6 @@ tcp_notify(struct inpcb *inp, int error)
|
||||||
{
|
{
|
||||||
struct tcpcb *tp;
|
struct tcpcb *tp;
|
||||||
|
|
||||||
INP_INFO_LOCK_ASSERT(&V_tcbinfo);
|
|
||||||
INP_WLOCK_ASSERT(inp);
|
INP_WLOCK_ASSERT(inp);
|
||||||
|
|
||||||
if ((inp->inp_flags & INP_TIMEWAIT) ||
|
if ((inp->inp_flags & INP_TIMEWAIT) ||
|
||||||
|
|
@ -2731,9 +2712,10 @@ tcp_notify(struct inpcb *inp, int error)
|
||||||
static int
|
static int
|
||||||
tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
{
|
{
|
||||||
struct epoch_tracker et;
|
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_tcbinfo,
|
||||||
struct inpcb *inp;
|
INPLOOKUP_RLOCKPCB);
|
||||||
struct xinpgen xig;
|
struct xinpgen xig;
|
||||||
|
struct inpcb *inp;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
if (req->newptr != NULL)
|
if (req->newptr != NULL)
|
||||||
|
|
@ -2766,11 +2748,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
if (error)
|
if (error)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
NET_EPOCH_ENTER(et);
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead);
|
|
||||||
inp != NULL;
|
|
||||||
inp = CK_LIST_NEXT(inp, inp_list)) {
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
if (inp->inp_gencnt <= xig.xig_gen) {
|
if (inp->inp_gencnt <= xig.xig_gen) {
|
||||||
int crerr;
|
int crerr;
|
||||||
|
|
||||||
|
|
@ -2791,17 +2769,15 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
struct xtcpcb xt;
|
struct xtcpcb xt;
|
||||||
|
|
||||||
tcp_inptoxtp(inp, &xt);
|
tcp_inptoxtp(inp, &xt);
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
error = SYSCTL_OUT(req, &xt, sizeof xt);
|
error = SYSCTL_OUT(req, &xt, sizeof xt);
|
||||||
if (error)
|
if (error) {
|
||||||
|
INP_RUNLOCK(inp);
|
||||||
break;
|
break;
|
||||||
else
|
} else
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
}
|
}
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -908,7 +908,6 @@ VNET_DECLARE(int, tcp_sc_rst_sock_fail);
|
||||||
VNET_DECLARE(int, tcp_sendspace);
|
VNET_DECLARE(int, tcp_sendspace);
|
||||||
VNET_DECLARE(int, tcp_udp_tunneling_overhead);
|
VNET_DECLARE(int, tcp_udp_tunneling_overhead);
|
||||||
VNET_DECLARE(int, tcp_udp_tunneling_port);
|
VNET_DECLARE(int, tcp_udp_tunneling_port);
|
||||||
VNET_DECLARE(struct inpcbhead, tcb);
|
|
||||||
VNET_DECLARE(struct inpcbinfo, tcbinfo);
|
VNET_DECLARE(struct inpcbinfo, tcbinfo);
|
||||||
|
|
||||||
#define V_tcp_do_lrd VNET(tcp_do_lrd)
|
#define V_tcp_do_lrd VNET(tcp_do_lrd)
|
||||||
|
|
@ -917,7 +916,6 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
|
||||||
#define V_tcp_do_newcwv VNET(tcp_do_newcwv)
|
#define V_tcp_do_newcwv VNET(tcp_do_newcwv)
|
||||||
#define V_drop_synfin VNET(drop_synfin)
|
#define V_drop_synfin VNET(drop_synfin)
|
||||||
#define V_path_mtu_discovery VNET(path_mtu_discovery)
|
#define V_path_mtu_discovery VNET(path_mtu_discovery)
|
||||||
#define V_tcb VNET(tcb)
|
|
||||||
#define V_tcbinfo VNET(tcbinfo)
|
#define V_tcbinfo VNET(tcbinfo)
|
||||||
#define V_tcp_abc_l_var VNET(tcp_abc_l_var)
|
#define V_tcp_abc_l_var VNET(tcp_abc_l_var)
|
||||||
#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
|
#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
|
||||||
|
|
|
||||||
|
|
@ -147,9 +147,7 @@ u_long udp_recvspace = 40 * (1024 +
|
||||||
SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
|
SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
|
||||||
&udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
|
&udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
|
||||||
|
|
||||||
VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */
|
|
||||||
VNET_DEFINE(struct inpcbinfo, udbinfo);
|
VNET_DEFINE(struct inpcbinfo, udbinfo);
|
||||||
VNET_DEFINE(struct inpcbhead, ulitecb);
|
|
||||||
VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
|
VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
|
||||||
VNET_DEFINE_STATIC(uma_zone_t, udpcb_zone);
|
VNET_DEFINE_STATIC(uma_zone_t, udpcb_zone);
|
||||||
#define V_udpcb_zone VNET(udpcb_zone)
|
#define V_udpcb_zone VNET(udpcb_zone)
|
||||||
|
|
@ -211,8 +209,8 @@ udp_init(void)
|
||||||
* Once we can calculate the flowid that way and re-establish
|
* Once we can calculate the flowid that way and re-establish
|
||||||
* a 4-tuple, flip this to 4-tuple.
|
* a 4-tuple, flip this to 4-tuple.
|
||||||
*/
|
*/
|
||||||
in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
|
in_pcbinfo_init(&V_udbinfo, "udp", UDBHASHSIZE, UDBHASHSIZE,
|
||||||
"udp_inpcb", udp_inpcb_init, IPI_HASHFIELDS_2TUPLE);
|
"udp_inpcb", udp_inpcb_init);
|
||||||
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
|
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
|
||||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
||||||
uma_zone_set_max(V_udpcb_zone, maxsockets);
|
uma_zone_set_max(V_udpcb_zone, maxsockets);
|
||||||
|
|
@ -225,9 +223,8 @@ void
|
||||||
udplite_init(void)
|
udplite_init(void)
|
||||||
{
|
{
|
||||||
|
|
||||||
in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
|
in_pcbinfo_init(&V_ulitecbinfo, "udplite", UDBHASHSIZE,
|
||||||
UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init,
|
UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init);
|
||||||
IPI_HASHFIELDS_2TUPLE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -393,6 +390,123 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
udp_multi_match(const struct inpcb *inp, void *v)
|
||||||
|
{
|
||||||
|
struct ip *ip = v;
|
||||||
|
struct udphdr *uh = (struct udphdr *)(ip + 1);
|
||||||
|
|
||||||
|
if (inp->inp_lport != uh->uh_dport)
|
||||||
|
return (false);
|
||||||
|
#ifdef INET6
|
||||||
|
if ((inp->inp_vflag & INP_IPV4) == 0)
|
||||||
|
return (false);
|
||||||
|
#endif
|
||||||
|
if (inp->inp_laddr.s_addr != INADDR_ANY &&
|
||||||
|
inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
|
||||||
|
return (false);
|
||||||
|
if (inp->inp_faddr.s_addr != INADDR_ANY &&
|
||||||
|
inp->inp_faddr.s_addr != ip->ip_src.s_addr)
|
||||||
|
return (false);
|
||||||
|
if (inp->inp_fport != 0 &&
|
||||||
|
inp->inp_fport != uh->uh_sport)
|
||||||
|
return (false);
|
||||||
|
|
||||||
|
return (true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
udp_multi_input(struct mbuf *m, int proto, struct sockaddr_in *udp_in)
|
||||||
|
{
|
||||||
|
struct ip *ip = mtod(m, struct ip *);
|
||||||
|
struct inpcb_iterator inpi = INP_ITERATOR(udp_get_inpcbinfo(proto),
|
||||||
|
INPLOOKUP_RLOCKPCB, udp_multi_match, ip);
|
||||||
|
struct udphdr *uh = (struct udphdr *)(ip + 1);
|
||||||
|
struct inpcb *inp;
|
||||||
|
struct mbuf *n;
|
||||||
|
int appends = 0;
|
||||||
|
|
||||||
|
MPASS(ip->ip_hl == sizeof(struct ip) >> 2);
|
||||||
|
|
||||||
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
|
/*
|
||||||
|
* XXXRW: Because we weren't holding either the inpcb
|
||||||
|
* or the hash lock when we checked for a match
|
||||||
|
* before, we should probably recheck now that the
|
||||||
|
* inpcb lock is held.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Handle socket delivery policy for any-source
|
||||||
|
* and source-specific multicast. [RFC3678]
|
||||||
|
*/
|
||||||
|
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
|
||||||
|
struct ip_moptions *imo;
|
||||||
|
struct sockaddr_in group;
|
||||||
|
int blocked;
|
||||||
|
|
||||||
|
imo = inp->inp_moptions;
|
||||||
|
if (imo == NULL)
|
||||||
|
continue;
|
||||||
|
bzero(&group, sizeof(struct sockaddr_in));
|
||||||
|
group.sin_len = sizeof(struct sockaddr_in);
|
||||||
|
group.sin_family = AF_INET;
|
||||||
|
group.sin_addr = ip->ip_dst;
|
||||||
|
|
||||||
|
blocked = imo_multi_filter(imo, m->m_pkthdr.rcvif,
|
||||||
|
(struct sockaddr *)&group,
|
||||||
|
(struct sockaddr *)&udp_in[0]);
|
||||||
|
if (blocked != MCAST_PASS) {
|
||||||
|
if (blocked == MCAST_NOTGMEMBER)
|
||||||
|
IPSTAT_INC(ips_notmember);
|
||||||
|
if (blocked == MCAST_NOTSMEMBER ||
|
||||||
|
blocked == MCAST_MUTED)
|
||||||
|
UDPSTAT_INC(udps_filtermcast);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
|
||||||
|
if (proto == IPPROTO_UDPLITE)
|
||||||
|
UDPLITE_PROBE(receive, NULL, inp, ip, inp, uh);
|
||||||
|
else
|
||||||
|
UDP_PROBE(receive, NULL, inp, ip, inp, uh);
|
||||||
|
if (udp_append(inp, ip, n, sizeof(struct ip), udp_in)) {
|
||||||
|
INP_RUNLOCK(inp);
|
||||||
|
break;
|
||||||
|
} else
|
||||||
|
appends++;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Don't look for additional matches if this one does
|
||||||
|
* not have either the SO_REUSEPORT or SO_REUSEADDR
|
||||||
|
* socket options set. This heuristic avoids
|
||||||
|
* searching through all pcbs in the common case of a
|
||||||
|
* non-shared port. It assumes that an application
|
||||||
|
* will never clear these options after setting them.
|
||||||
|
*/
|
||||||
|
if ((inp->inp_socket->so_options &
|
||||||
|
(SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) {
|
||||||
|
INP_RUNLOCK(inp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_freem(m);
|
||||||
|
|
||||||
|
if (appends == 0) {
|
||||||
|
/*
|
||||||
|
* No matching pcb found; discard datagram. (No need
|
||||||
|
* to send an ICMP Port Unreachable for a broadcast
|
||||||
|
* or multicast datgram.)
|
||||||
|
*/
|
||||||
|
UDPSTAT_INC(udps_noport);
|
||||||
|
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
|
||||||
|
UDPSTAT_INC(udps_noportmcast);
|
||||||
|
else
|
||||||
|
UDPSTAT_INC(udps_noportbcast);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (IPPROTO_DONE);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
udp_input(struct mbuf **mp, int *offp, int proto)
|
udp_input(struct mbuf **mp, int *offp, int proto)
|
||||||
{
|
{
|
||||||
|
|
@ -519,140 +633,15 @@ udp_input(struct mbuf **mp, int *offp, int proto)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pcbinfo = udp_get_inpcbinfo(proto);
|
|
||||||
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
|
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
|
||||||
in_broadcast(ip->ip_dst, ifp)) {
|
in_broadcast(ip->ip_dst, ifp))
|
||||||
struct inpcb *last;
|
return (udp_multi_input(m, proto, udp_in));
|
||||||
struct inpcbhead *pcblist;
|
|
||||||
|
|
||||||
NET_EPOCH_ASSERT();
|
pcbinfo = udp_get_inpcbinfo(proto);
|
||||||
|
|
||||||
pcblist = udp_get_pcblist(proto);
|
|
||||||
last = NULL;
|
|
||||||
CK_LIST_FOREACH(inp, pcblist, inp_list) {
|
|
||||||
if (inp->inp_lport != uh->uh_dport)
|
|
||||||
continue;
|
|
||||||
#ifdef INET6
|
|
||||||
if ((inp->inp_vflag & INP_IPV4) == 0)
|
|
||||||
continue;
|
|
||||||
#endif
|
|
||||||
if (inp->inp_laddr.s_addr != INADDR_ANY &&
|
|
||||||
inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
|
|
||||||
continue;
|
|
||||||
if (inp->inp_faddr.s_addr != INADDR_ANY &&
|
|
||||||
inp->inp_faddr.s_addr != ip->ip_src.s_addr)
|
|
||||||
continue;
|
|
||||||
if (inp->inp_fport != 0 &&
|
|
||||||
inp->inp_fport != uh->uh_sport)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
|
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XXXRW: Because we weren't holding either the inpcb
|
|
||||||
* or the hash lock when we checked for a match
|
|
||||||
* before, we should probably recheck now that the
|
|
||||||
* inpcb lock is held.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Handle socket delivery policy for any-source
|
|
||||||
* and source-specific multicast. [RFC3678]
|
|
||||||
*/
|
|
||||||
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
|
|
||||||
struct ip_moptions *imo;
|
|
||||||
struct sockaddr_in group;
|
|
||||||
int blocked;
|
|
||||||
|
|
||||||
imo = inp->inp_moptions;
|
|
||||||
if (imo == NULL) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
bzero(&group, sizeof(struct sockaddr_in));
|
|
||||||
group.sin_len = sizeof(struct sockaddr_in);
|
|
||||||
group.sin_family = AF_INET;
|
|
||||||
group.sin_addr = ip->ip_dst;
|
|
||||||
|
|
||||||
blocked = imo_multi_filter(imo, ifp,
|
|
||||||
(struct sockaddr *)&group,
|
|
||||||
(struct sockaddr *)&udp_in[0]);
|
|
||||||
if (blocked != MCAST_PASS) {
|
|
||||||
if (blocked == MCAST_NOTGMEMBER)
|
|
||||||
IPSTAT_INC(ips_notmember);
|
|
||||||
if (blocked == MCAST_NOTSMEMBER ||
|
|
||||||
blocked == MCAST_MUTED)
|
|
||||||
UDPSTAT_INC(udps_filtermcast);
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (last != NULL) {
|
|
||||||
struct mbuf *n;
|
|
||||||
|
|
||||||
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
|
|
||||||
NULL) {
|
|
||||||
if (proto == IPPROTO_UDPLITE)
|
|
||||||
UDPLITE_PROBE(receive, NULL, last, ip,
|
|
||||||
last, uh);
|
|
||||||
else
|
|
||||||
UDP_PROBE(receive, NULL, last, ip, last,
|
|
||||||
uh);
|
|
||||||
if (udp_append(last, ip, n, iphlen,
|
|
||||||
udp_in)) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
goto badunlocked;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Release PCB lock taken on previous pass. */
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
}
|
|
||||||
last = inp;
|
|
||||||
/*
|
|
||||||
* Don't look for additional matches if this one does
|
|
||||||
* not have either the SO_REUSEPORT or SO_REUSEADDR
|
|
||||||
* socket options set. This heuristic avoids
|
|
||||||
* searching through all pcbs in the common case of a
|
|
||||||
* non-shared port. It assumes that an application
|
|
||||||
* will never clear these options after setting them.
|
|
||||||
*/
|
|
||||||
if ((last->inp_socket->so_options &
|
|
||||||
(SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (last == NULL) {
|
|
||||||
/*
|
|
||||||
* No matching pcb found; discard datagram. (No need
|
|
||||||
* to send an ICMP Port Unreachable for a broadcast
|
|
||||||
* or multicast datgram.)
|
|
||||||
*/
|
|
||||||
UDPSTAT_INC(udps_noport);
|
|
||||||
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
|
|
||||||
UDPSTAT_INC(udps_noportmcast);
|
|
||||||
else
|
|
||||||
UDPSTAT_INC(udps_noportbcast);
|
|
||||||
goto badunlocked;
|
|
||||||
}
|
|
||||||
if (proto == IPPROTO_UDPLITE)
|
|
||||||
UDPLITE_PROBE(receive, NULL, last, ip, last, uh);
|
|
||||||
else
|
|
||||||
UDP_PROBE(receive, NULL, last, ip, last, uh);
|
|
||||||
if (udp_append(last, ip, m, iphlen, udp_in) == 0)
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
return (IPPROTO_DONE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Locate pcb for datagram.
|
* Locate pcb for datagram.
|
||||||
*/
|
*
|
||||||
|
|
||||||
/*
|
|
||||||
* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
|
* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
|
||||||
*/
|
*/
|
||||||
if ((m->m_flags & M_IP_NEXTHOP) &&
|
if ((m->m_flags & M_IP_NEXTHOP) &&
|
||||||
|
|
@ -852,8 +841,9 @@ udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
|
||||||
static int
|
static int
|
||||||
udp_pcblist(SYSCTL_HANDLER_ARGS)
|
udp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
{
|
{
|
||||||
|
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo,
|
||||||
|
INPLOOKUP_RLOCKPCB);
|
||||||
struct xinpgen xig;
|
struct xinpgen xig;
|
||||||
struct epoch_tracker et;
|
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
|
@ -881,24 +871,19 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||||
if (error)
|
if (error)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
NET_EPOCH_ENTER(et);
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead);
|
|
||||||
inp != NULL;
|
|
||||||
inp = CK_LIST_NEXT(inp, inp_list)) {
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
if (inp->inp_gencnt <= xig.xig_gen &&
|
if (inp->inp_gencnt <= xig.xig_gen &&
|
||||||
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
|
cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
|
||||||
struct xinpcb xi;
|
struct xinpcb xi;
|
||||||
|
|
||||||
in_pcbtoxinpcb(inp, &xi);
|
in_pcbtoxinpcb(inp, &xi);
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
||||||
if (error)
|
if (error) {
|
||||||
|
INP_RUNLOCK(inp);
|
||||||
break;
|
break;
|
||||||
} else
|
}
|
||||||
INP_RUNLOCK(inp);
|
}
|
||||||
}
|
}
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
|
|
||||||
if (!error) {
|
if (!error) {
|
||||||
/*
|
/*
|
||||||
|
|
@ -1284,15 +1269,16 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||||
laddr = inp->inp_laddr;
|
laddr = inp->inp_laddr;
|
||||||
lport = inp->inp_lport;
|
lport = inp->inp_lport;
|
||||||
if (src.sin_family == AF_INET) {
|
if (src.sin_family == AF_INET) {
|
||||||
INP_HASH_LOCK_ASSERT(pcbinfo);
|
|
||||||
if ((lport == 0) ||
|
if ((lport == 0) ||
|
||||||
(laddr.s_addr == INADDR_ANY &&
|
(laddr.s_addr == INADDR_ANY &&
|
||||||
src.sin_addr.s_addr == INADDR_ANY)) {
|
src.sin_addr.s_addr == INADDR_ANY)) {
|
||||||
error = EINVAL;
|
error = EINVAL;
|
||||||
goto release;
|
goto release;
|
||||||
}
|
}
|
||||||
|
INP_HASH_WLOCK(pcbinfo);
|
||||||
error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
|
error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
|
||||||
&laddr.s_addr, &lport, td->td_ucred);
|
&laddr.s_addr, &lport, td->td_ucred);
|
||||||
|
INP_HASH_WUNLOCK(pcbinfo);
|
||||||
if (error)
|
if (error)
|
||||||
goto release;
|
goto release;
|
||||||
}
|
}
|
||||||
|
|
@ -1335,12 +1321,14 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||||
inp->inp_lport == 0 ||
|
inp->inp_lport == 0 ||
|
||||||
sin->sin_addr.s_addr == INADDR_ANY ||
|
sin->sin_addr.s_addr == INADDR_ANY ||
|
||||||
sin->sin_addr.s_addr == INADDR_BROADCAST) {
|
sin->sin_addr.s_addr == INADDR_BROADCAST) {
|
||||||
INP_HASH_LOCK_ASSERT(pcbinfo);
|
INP_HASH_WLOCK(pcbinfo);
|
||||||
error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
|
error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
|
||||||
&lport, &faddr.s_addr, &fport, NULL,
|
&lport, &faddr.s_addr, &fport, NULL,
|
||||||
td->td_ucred);
|
td->td_ucred);
|
||||||
if (error)
|
if (error) {
|
||||||
|
INP_HASH_WUNLOCK(pcbinfo);
|
||||||
goto release;
|
goto release;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXXRW: Why not commit the port if the address is
|
* XXXRW: Why not commit the port if the address is
|
||||||
|
|
@ -1357,7 +1345,6 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||||
if (prison_flag(td->td_ucred, PR_IP4))
|
if (prison_flag(td->td_ucred, PR_IP4))
|
||||||
inp->inp_laddr = laddr;
|
inp->inp_laddr = laddr;
|
||||||
inp->inp_lport = lport;
|
inp->inp_lport = lport;
|
||||||
INP_HASH_WLOCK(pcbinfo);
|
|
||||||
error = in_pcbinshash(inp);
|
error = in_pcbinshash(inp);
|
||||||
INP_HASH_WUNLOCK(pcbinfo);
|
INP_HASH_WUNLOCK(pcbinfo);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
|
|
@ -1366,7 +1353,8 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
|
||||||
goto release;
|
goto release;
|
||||||
}
|
}
|
||||||
inp->inp_flags |= INP_ANONPORT;
|
inp->inp_flags |= INP_ANONPORT;
|
||||||
}
|
} else
|
||||||
|
INP_HASH_WUNLOCK(pcbinfo);
|
||||||
} else {
|
} else {
|
||||||
faddr = sin->sin_addr;
|
faddr = sin->sin_addr;
|
||||||
fport = sin->sin_port;
|
fport = sin->sin_port;
|
||||||
|
|
@ -1560,12 +1548,9 @@ udp_attach(struct socket *so, int proto, struct thread *td)
|
||||||
error = soreserve(so, udp_sendspace, udp_recvspace);
|
error = soreserve(so, udp_sendspace, udp_recvspace);
|
||||||
if (error)
|
if (error)
|
||||||
return (error);
|
return (error);
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
|
||||||
error = in_pcballoc(so, pcbinfo);
|
error = in_pcballoc(so, pcbinfo);
|
||||||
if (error) {
|
if (error)
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
|
||||||
|
|
||||||
inp = sotoinpcb(so);
|
inp = sotoinpcb(so);
|
||||||
inp->inp_vflag |= INP_IPV4;
|
inp->inp_vflag |= INP_IPV4;
|
||||||
|
|
@ -1577,12 +1562,10 @@ udp_attach(struct socket *so, int proto, struct thread *td)
|
||||||
if (error) {
|
if (error) {
|
||||||
in_pcbdetach(inp);
|
in_pcbdetach(inp);
|
||||||
in_pcbfree(inp);
|
in_pcbfree(inp);
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
#endif /* INET */
|
#endif /* INET */
|
||||||
|
|
@ -1718,14 +1701,12 @@ udp_detach(struct socket *so)
|
||||||
KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
|
KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
|
||||||
KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
|
KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
|
||||||
("udp_detach: not disconnected"));
|
("udp_detach: not disconnected"));
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
up = intoudpcb(inp);
|
up = intoudpcb(inp);
|
||||||
KASSERT(up != NULL, ("%s: up == NULL", __func__));
|
KASSERT(up != NULL, ("%s: up == NULL", __func__));
|
||||||
inp->inp_ppcb = NULL;
|
inp->inp_ppcb = NULL;
|
||||||
in_pcbdetach(inp);
|
in_pcbdetach(inp);
|
||||||
in_pcbfree(inp);
|
in_pcbfree(inp);
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
udp_discardcb(up);
|
udp_discardcb(up);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -136,13 +136,9 @@ void kmod_udpstat_inc(int statnum);
|
||||||
SYSCTL_DECL(_net_inet_udp);
|
SYSCTL_DECL(_net_inet_udp);
|
||||||
|
|
||||||
extern struct pr_usrreqs udp_usrreqs;
|
extern struct pr_usrreqs udp_usrreqs;
|
||||||
VNET_DECLARE(struct inpcbhead, udb);
|
|
||||||
VNET_DECLARE(struct inpcbinfo, udbinfo);
|
VNET_DECLARE(struct inpcbinfo, udbinfo);
|
||||||
VNET_DECLARE(struct inpcbhead, ulitecb);
|
|
||||||
VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
|
VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
|
||||||
#define V_udb VNET(udb)
|
|
||||||
#define V_udbinfo VNET(udbinfo)
|
#define V_udbinfo VNET(udbinfo)
|
||||||
#define V_ulitecb VNET(ulitecb)
|
|
||||||
#define V_ulitecbinfo VNET(ulitecbinfo)
|
#define V_ulitecbinfo VNET(ulitecbinfo)
|
||||||
|
|
||||||
extern u_long udp_sendspace;
|
extern u_long udp_sendspace;
|
||||||
|
|
@ -165,12 +161,6 @@ udp_get_inpcbinfo(int protocol)
|
||||||
return (protocol == IPPROTO_UDP) ? &V_udbinfo : &V_ulitecbinfo;
|
return (protocol == IPPROTO_UDP) ? &V_udbinfo : &V_ulitecbinfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline struct inpcbhead *
|
|
||||||
udp_get_pcblist(int protocol)
|
|
||||||
{
|
|
||||||
return (protocol == IPPROTO_UDP) ? &V_udb : &V_ulitecb;
|
|
||||||
}
|
|
||||||
|
|
||||||
int udp_newudpcb(struct inpcb *);
|
int udp_newudpcb(struct inpcb *);
|
||||||
void udp_discardcb(struct udpcb *);
|
void udp_discardcb(struct udpcb *);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -124,14 +124,12 @@ VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
|
||||||
#endif /* VIMAGE */
|
#endif /* VIMAGE */
|
||||||
|
|
||||||
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
|
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
|
||||||
VNET_DECLARE(struct inpcbhead, ripcb);
|
|
||||||
VNET_DECLARE(int, icmp6errppslim);
|
VNET_DECLARE(int, icmp6errppslim);
|
||||||
VNET_DEFINE_STATIC(int, icmp6errpps_count) = 0;
|
VNET_DEFINE_STATIC(int, icmp6errpps_count) = 0;
|
||||||
VNET_DEFINE_STATIC(struct timeval, icmp6errppslim_last);
|
VNET_DEFINE_STATIC(struct timeval, icmp6errppslim_last);
|
||||||
VNET_DECLARE(int, icmp6_nodeinfo);
|
VNET_DECLARE(int, icmp6_nodeinfo);
|
||||||
|
|
||||||
#define V_ripcbinfo VNET(ripcbinfo)
|
#define V_ripcbinfo VNET(ripcbinfo)
|
||||||
#define V_ripcb VNET(ripcb)
|
|
||||||
#define V_icmp6errppslim VNET(icmp6errppslim)
|
#define V_icmp6errppslim VNET(icmp6errppslim)
|
||||||
#define V_icmp6errpps_count VNET(icmp6errpps_count)
|
#define V_icmp6errpps_count VNET(icmp6errpps_count)
|
||||||
#define V_icmp6errppslim_last VNET(icmp6errppslim_last)
|
#define V_icmp6errppslim_last VNET(icmp6errppslim_last)
|
||||||
|
|
@ -1875,21 +1873,39 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
|
||||||
return (copied);
|
return (copied);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
icmp6_rip6_match(const struct inpcb *inp, void *v)
|
||||||
|
{
|
||||||
|
struct ip6_hdr *ip6 = v;
|
||||||
|
|
||||||
|
if ((inp->inp_vflag & INP_IPV6) == 0)
|
||||||
|
return (false);
|
||||||
|
if (inp->inp_ip_p != IPPROTO_ICMPV6)
|
||||||
|
return (false);
|
||||||
|
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
|
||||||
|
!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
|
||||||
|
return (false);
|
||||||
|
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
|
||||||
|
!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
|
||||||
|
return (false);
|
||||||
|
return (true);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX almost dup'ed code with rip6_input.
|
* XXX almost dup'ed code with rip6_input.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
icmp6_rip6_input(struct mbuf **mp, int off)
|
icmp6_rip6_input(struct mbuf **mp, int off)
|
||||||
{
|
{
|
||||||
struct mbuf *m = *mp;
|
struct mbuf *n, *m = *mp;
|
||||||
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
||||||
|
struct inpcb_iterator inpi = INP_ITERATOR(&V_ripcbinfo,
|
||||||
|
INPLOOKUP_RLOCKPCB, icmp6_rip6_match, ip6);
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
struct inpcb *last = NULL;
|
|
||||||
struct sockaddr_in6 fromsa;
|
struct sockaddr_in6 fromsa;
|
||||||
struct icmp6_hdr *icmp6;
|
struct icmp6_hdr *icmp6;
|
||||||
struct mbuf *opts = NULL;
|
struct mbuf *opts = NULL;
|
||||||
|
int delivered = 0;
|
||||||
NET_EPOCH_ASSERT();
|
|
||||||
|
|
||||||
/* This is assumed to be safe; icmp6_input() does a pullup. */
|
/* This is assumed to be safe; icmp6_input() does a pullup. */
|
||||||
icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
|
icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
|
||||||
|
|
@ -1908,125 +1924,64 @@ icmp6_rip6_input(struct mbuf **mp, int off)
|
||||||
return (IPPROTO_DONE);
|
return (IPPROTO_DONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
if ((inp->inp_vflag & INP_IPV6) == 0)
|
|
||||||
continue;
|
|
||||||
if (inp->inp_ip_p != IPPROTO_ICMPV6)
|
|
||||||
continue;
|
|
||||||
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
|
|
||||||
!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
|
|
||||||
continue;
|
|
||||||
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
|
|
||||||
!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
|
|
||||||
continue;
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
|
if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
|
||||||
inp->in6p_icmp6filt)) {
|
inp->in6p_icmp6filt))
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
/*
|
||||||
if (last != NULL) {
|
* Recent network drivers tend to allocate a single
|
||||||
struct mbuf *n = NULL;
|
* mbuf cluster, rather than to make a couple of
|
||||||
|
* mbufs without clusters. Also, since the IPv6 code
|
||||||
/*
|
* path tries to avoid m_pullup(), it is highly
|
||||||
* Recent network drivers tend to allocate a single
|
* probable that we still have an mbuf cluster here
|
||||||
* mbuf cluster, rather than to make a couple of
|
* even though the necessary length can be stored in an
|
||||||
* mbufs without clusters. Also, since the IPv6 code
|
* mbuf's internal buffer.
|
||||||
* path tries to avoid m_pullup(), it is highly
|
* Meanwhile, the default size of the receive socket
|
||||||
* probable that we still have an mbuf cluster here
|
* buffer for raw sockets is not so large. This means
|
||||||
* even though the necessary length can be stored in an
|
* the possibility of packet loss is relatively higher
|
||||||
* mbuf's internal buffer.
|
* than before. To avoid this scenario, we copy the
|
||||||
* Meanwhile, the default size of the receive socket
|
* received data to a separate mbuf that does not use
|
||||||
* buffer for raw sockets is not so large. This means
|
* a cluster, if possible.
|
||||||
* the possibility of packet loss is relatively higher
|
* XXX: it is better to copy the data after stripping
|
||||||
* than before. To avoid this scenario, we copy the
|
* intermediate headers.
|
||||||
* received data to a separate mbuf that does not use
|
*/
|
||||||
* a cluster, if possible.
|
|
||||||
* XXX: it is better to copy the data after stripping
|
|
||||||
* intermediate headers.
|
|
||||||
*/
|
|
||||||
if ((m->m_flags & M_EXT) && m->m_next == NULL &&
|
|
||||||
m->m_len <= MHLEN) {
|
|
||||||
n = m_get(M_NOWAIT, m->m_type);
|
|
||||||
if (n != NULL) {
|
|
||||||
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
|
|
||||||
bcopy(m->m_data, n->m_data,
|
|
||||||
m->m_len);
|
|
||||||
n->m_len = m->m_len;
|
|
||||||
} else {
|
|
||||||
m_free(n);
|
|
||||||
n = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (n != NULL ||
|
|
||||||
(n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
|
|
||||||
if (last->inp_flags & INP_CONTROLOPTS)
|
|
||||||
ip6_savecontrol(last, n, &opts);
|
|
||||||
/* strip intermediate headers */
|
|
||||||
m_adj(n, off);
|
|
||||||
SOCKBUF_LOCK(&last->inp_socket->so_rcv);
|
|
||||||
if (sbappendaddr_locked(
|
|
||||||
&last->inp_socket->so_rcv,
|
|
||||||
(struct sockaddr *)&fromsa, n, opts)
|
|
||||||
== 0) {
|
|
||||||
soroverflow_locked(last->inp_socket);
|
|
||||||
m_freem(n);
|
|
||||||
if (opts) {
|
|
||||||
m_freem(opts);
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
sorwakeup_locked(last->inp_socket);
|
|
||||||
opts = NULL;
|
|
||||||
}
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
}
|
|
||||||
last = inp;
|
|
||||||
}
|
|
||||||
if (last != NULL) {
|
|
||||||
if (last->inp_flags & INP_CONTROLOPTS)
|
|
||||||
ip6_savecontrol(last, m, &opts);
|
|
||||||
/* strip intermediate headers */
|
|
||||||
m_adj(m, off);
|
|
||||||
|
|
||||||
/* avoid using mbuf clusters if possible (see above) */
|
|
||||||
if ((m->m_flags & M_EXT) && m->m_next == NULL &&
|
if ((m->m_flags & M_EXT) && m->m_next == NULL &&
|
||||||
m->m_len <= MHLEN) {
|
m->m_len <= MHLEN) {
|
||||||
struct mbuf *n;
|
|
||||||
|
|
||||||
n = m_get(M_NOWAIT, m->m_type);
|
n = m_get(M_NOWAIT, m->m_type);
|
||||||
if (n != NULL) {
|
if (n != NULL) {
|
||||||
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
|
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
|
||||||
bcopy(m->m_data, n->m_data, m->m_len);
|
bcopy(m->m_data, n->m_data, m->m_len);
|
||||||
n->m_len = m->m_len;
|
n->m_len = m->m_len;
|
||||||
|
|
||||||
m_freem(m);
|
|
||||||
m = n;
|
|
||||||
} else {
|
} else {
|
||||||
m_freem(n);
|
m_free(n);
|
||||||
n = NULL;
|
n = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} else
|
||||||
SOCKBUF_LOCK(&last->inp_socket->so_rcv);
|
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
|
||||||
if (sbappendaddr_locked(&last->inp_socket->so_rcv,
|
if (n == NULL)
|
||||||
(struct sockaddr *)&fromsa, m, opts) == 0) {
|
continue;
|
||||||
m_freem(m);
|
if (inp->inp_flags & INP_CONTROLOPTS)
|
||||||
|
ip6_savecontrol(inp, n, &opts);
|
||||||
|
/* strip intermediate headers */
|
||||||
|
m_adj(n, off);
|
||||||
|
SOCKBUF_LOCK(&inp->inp_socket->so_rcv);
|
||||||
|
if (sbappendaddr_locked(&inp->inp_socket->so_rcv,
|
||||||
|
(struct sockaddr *)&fromsa, n, opts) == 0) {
|
||||||
|
soroverflow_locked(inp->inp_socket);
|
||||||
|
m_freem(n);
|
||||||
if (opts)
|
if (opts)
|
||||||
m_freem(opts);
|
m_freem(opts);
|
||||||
soroverflow_locked(last->inp_socket);
|
} else {
|
||||||
} else
|
sorwakeup_locked(inp->inp_socket);
|
||||||
sorwakeup_locked(last->inp_socket);
|
delivered++;
|
||||||
INP_RUNLOCK(last);
|
}
|
||||||
} else {
|
opts = NULL;
|
||||||
m_freem(m);
|
|
||||||
IP6STAT_DEC(ip6s_delivered);
|
|
||||||
}
|
}
|
||||||
|
m_freem(m);
|
||||||
*mp = NULL;
|
*mp = NULL;
|
||||||
|
if (delivered == 0)
|
||||||
|
IP6STAT_DEC(ip6s_delivered);
|
||||||
return (IPPROTO_DONE);
|
return (IPPROTO_DONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -673,13 +673,21 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
|
||||||
* Call the protocol specific routine (if any) to report
|
* Call the protocol specific routine (if any) to report
|
||||||
* any errors for each matching socket.
|
* any errors for each matching socket.
|
||||||
*/
|
*/
|
||||||
|
static bool
|
||||||
|
inp_match6(const struct inpcb *inp, void *v __unused)
|
||||||
|
{
|
||||||
|
|
||||||
|
return ((inp->inp_vflag & INP_IPV6) != 0);
|
||||||
|
}
|
||||||
void
|
void
|
||||||
in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
|
in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
|
||||||
u_int fport_arg, const struct sockaddr *src, u_int lport_arg,
|
u_int fport_arg, const struct sockaddr *src, u_int lport_arg,
|
||||||
int cmd, void *cmdarg,
|
int cmd, void *cmdarg,
|
||||||
struct inpcb *(*notify)(struct inpcb *, int))
|
struct inpcb *(*notify)(struct inpcb *, int))
|
||||||
{
|
{
|
||||||
struct inpcb *inp, *inp_temp;
|
struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_WLOCKPCB,
|
||||||
|
inp_match6, NULL);
|
||||||
|
struct inpcb *inp;
|
||||||
struct sockaddr_in6 sa6_src, *sa6_dst;
|
struct sockaddr_in6 sa6_src, *sa6_dst;
|
||||||
u_short fport = fport_arg, lport = lport_arg;
|
u_short fport = fport_arg, lport = lport_arg;
|
||||||
u_int32_t flowinfo;
|
u_int32_t flowinfo;
|
||||||
|
|
@ -715,14 +723,8 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
|
||||||
notify = in6_rtchange;
|
notify = in6_rtchange;
|
||||||
}
|
}
|
||||||
errno = inet6ctlerrmap[cmd];
|
errno = inet6ctlerrmap[cmd];
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
|
INP_WLOCK_ASSERT(inp);
|
||||||
INP_WLOCK(inp);
|
|
||||||
if ((inp->inp_vflag & INP_IPV6) == 0) {
|
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the error designates a new path MTU for a destination
|
* If the error designates a new path MTU for a destination
|
||||||
* and the application (associated with this socket) wanted to
|
* and the application (associated with this socket) wanted to
|
||||||
|
|
@ -754,18 +756,13 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
|
||||||
!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
|
!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
|
||||||
&sa6_src.sin6_addr)) ||
|
&sa6_src.sin6_addr)) ||
|
||||||
(fport && inp->inp_fport != fport)) {
|
(fport && inp->inp_fport != fport)) {
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
do_notify:
|
do_notify:
|
||||||
if (notify) {
|
if (notify)
|
||||||
if ((*notify)(inp, errno))
|
(*notify)(inp, errno);
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
} else
|
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
}
|
}
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -866,49 +863,54 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
in6_multi_match(const struct inpcb *inp, void *v __unused)
|
||||||
|
{
|
||||||
|
|
||||||
|
if ((inp->inp_vflag & INP_IPV6) && inp->in6p_moptions != NULL)
|
||||||
|
return (true);
|
||||||
|
else
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
|
in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
|
||||||
{
|
{
|
||||||
|
struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_RLOCKPCB,
|
||||||
|
in6_multi_match, NULL);
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
struct in6_multi *inm;
|
struct in6_multi *inm;
|
||||||
struct in6_mfilter *imf;
|
struct in6_mfilter *imf;
|
||||||
struct ip6_moptions *im6o;
|
struct ip6_moptions *im6o;
|
||||||
|
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
IN6_MULTI_LOCK_ASSERT();
|
||||||
CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
|
|
||||||
INP_WLOCK(inp);
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
INP_RLOCK_ASSERT(inp);
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
im6o = inp->in6p_moptions;
|
im6o = inp->in6p_moptions;
|
||||||
if ((inp->inp_vflag & INP_IPV6) && im6o != NULL) {
|
/*
|
||||||
/*
|
* Unselect the outgoing ifp for multicast if it
|
||||||
* Unselect the outgoing ifp for multicast if it
|
* is being detached.
|
||||||
* is being detached.
|
*/
|
||||||
*/
|
if (im6o->im6o_multicast_ifp == ifp)
|
||||||
if (im6o->im6o_multicast_ifp == ifp)
|
im6o->im6o_multicast_ifp = NULL;
|
||||||
im6o->im6o_multicast_ifp = NULL;
|
/*
|
||||||
/*
|
* Drop multicast group membership if we joined
|
||||||
* Drop multicast group membership if we joined
|
* through the interface being detached.
|
||||||
* through the interface being detached.
|
*/
|
||||||
*/
|
|
||||||
restart:
|
restart:
|
||||||
IP6_MFILTER_FOREACH(imf, &im6o->im6o_head) {
|
IP6_MFILTER_FOREACH(imf, &im6o->im6o_head) {
|
||||||
if ((inm = imf->im6f_in6m) == NULL)
|
if ((inm = imf->im6f_in6m) == NULL)
|
||||||
continue;
|
continue;
|
||||||
if (inm->in6m_ifp != ifp)
|
if (inm->in6m_ifp != ifp)
|
||||||
continue;
|
continue;
|
||||||
ip6_mfilter_remove(&im6o->im6o_head, imf);
|
ip6_mfilter_remove(&im6o->im6o_head, imf);
|
||||||
IN6_MULTI_LOCK_ASSERT();
|
in6_leavegroup_locked(inm, NULL);
|
||||||
in6_leavegroup_locked(inm, NULL);
|
ip6_mfilter_free(imf);
|
||||||
ip6_mfilter_free(imf);
|
goto restart;
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
INP_WUNLOCK(inp);
|
|
||||||
}
|
}
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1124,20 +1126,16 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
|
||||||
{
|
{
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
|
|
||||||
|
smr_enter(pcbinfo->ipi_smr);
|
||||||
inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
|
inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
|
||||||
lookupflags & INPLOOKUP_WILDCARD, ifp, numa_domain);
|
lookupflags & INPLOOKUP_WILDCARD, ifp, numa_domain);
|
||||||
if (inp != NULL) {
|
if (inp != NULL) {
|
||||||
if (lookupflags & INPLOOKUP_WLOCKPCB) {
|
if (__predict_false(inp_smr_lock(inp,
|
||||||
INP_WLOCK(inp);
|
(lookupflags & INPLOOKUP_LOCKMASK)) == false))
|
||||||
} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
} else
|
|
||||||
panic("%s: locking bug", __func__);
|
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
INP_UNLOCK(inp);
|
|
||||||
inp = NULL;
|
inp = NULL;
|
||||||
}
|
} else
|
||||||
}
|
smr_exit(pcbinfo->ipi_smr);
|
||||||
|
|
||||||
return (inp);
|
return (inp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -216,30 +216,15 @@ static void
|
||||||
in6_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
|
in6_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
|
||||||
const struct sockaddr *sa, void *ctx)
|
const struct sockaddr *sa, void *ctx)
|
||||||
{
|
{
|
||||||
struct epoch_tracker et;
|
|
||||||
struct gre_socket *gs;
|
struct gre_socket *gs;
|
||||||
struct gre_softc *sc;
|
struct gre_softc *sc;
|
||||||
struct sockaddr_in6 dst;
|
struct sockaddr_in6 dst;
|
||||||
|
|
||||||
NET_EPOCH_ENTER(et);
|
NET_EPOCH_ASSERT();
|
||||||
/*
|
|
||||||
* udp_append() holds reference to inp, it is safe to check
|
|
||||||
* inp_flags2 without INP_RLOCK().
|
|
||||||
* If socket was closed before we have entered NET_EPOCH section,
|
|
||||||
* INP_FREED flag should be set. Otherwise it should be safe to
|
|
||||||
* make access to ctx data, because gre_so will be freed by
|
|
||||||
* gre_sofree() via NET_EPOCH_CALL().
|
|
||||||
*/
|
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
m_freem(m);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
gs = (struct gre_socket *)ctx;
|
gs = (struct gre_socket *)ctx;
|
||||||
dst = *(const struct sockaddr_in6 *)sa;
|
dst = *(const struct sockaddr_in6 *)sa;
|
||||||
if (sa6_embedscope(&dst, 0)) {
|
if (sa6_embedscope(&dst, 0)) {
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
m_freem(m);
|
m_freem(m);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -249,11 +234,9 @@ in6_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp,
|
||||||
}
|
}
|
||||||
if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){
|
if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){
|
||||||
gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc);
|
gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc);
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
m_freem(m);
|
m_freem(m);
|
||||||
NET_EPOCH_EXIT(et);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
||||||
|
|
@ -119,9 +119,7 @@ __FBSDID("$FreeBSD$");
|
||||||
* Raw interface to IP6 protocol.
|
* Raw interface to IP6 protocol.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
VNET_DECLARE(struct inpcbhead, ripcb);
|
|
||||||
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
|
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
|
||||||
#define V_ripcb VNET(ripcb)
|
|
||||||
#define V_ripcbinfo VNET(ripcbinfo)
|
#define V_ripcbinfo VNET(ripcbinfo)
|
||||||
|
|
||||||
extern u_long rip_sendspace;
|
extern u_long rip_sendspace;
|
||||||
|
|
@ -153,6 +151,33 @@ int (*ip6_mrouter_done)(void);
|
||||||
int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
|
int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
|
||||||
int (*mrt6_ioctl)(u_long, caddr_t);
|
int (*mrt6_ioctl)(u_long, caddr_t);
|
||||||
|
|
||||||
|
struct rip6_inp_match_ctx {
|
||||||
|
struct ip6_hdr *ip6;
|
||||||
|
int proto;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool
|
||||||
|
rip6_inp_match(const struct inpcb *inp, void *v)
|
||||||
|
{
|
||||||
|
struct rip6_inp_match_ctx *c = v;
|
||||||
|
struct ip6_hdr *ip6 = c->ip6;
|
||||||
|
int proto = c->proto;
|
||||||
|
|
||||||
|
/* XXX inp locking */
|
||||||
|
if ((inp->inp_vflag & INP_IPV6) == 0)
|
||||||
|
return (false);
|
||||||
|
if (inp->inp_ip_p && inp->inp_ip_p != proto)
|
||||||
|
return (false);
|
||||||
|
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
|
||||||
|
!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
|
||||||
|
return (false);
|
||||||
|
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
|
||||||
|
!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
|
||||||
|
return (false);
|
||||||
|
|
||||||
|
return (true);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Setup generic address and protocol structures for raw_input routine, then
|
* Setup generic address and protocol structures for raw_input routine, then
|
||||||
* pass them along with mbuf chain.
|
* pass them along with mbuf chain.
|
||||||
|
|
@ -161,12 +186,15 @@ int
|
||||||
rip6_input(struct mbuf **mp, int *offp, int proto)
|
rip6_input(struct mbuf **mp, int *offp, int proto)
|
||||||
{
|
{
|
||||||
struct ifnet *ifp;
|
struct ifnet *ifp;
|
||||||
struct mbuf *m = *mp;
|
struct mbuf *n, *m = *mp;
|
||||||
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
||||||
struct inpcb *inp;
|
struct inpcb *inp;
|
||||||
struct inpcb *last = NULL;
|
|
||||||
struct mbuf *opts = NULL;
|
struct mbuf *opts = NULL;
|
||||||
struct sockaddr_in6 fromsa;
|
struct sockaddr_in6 fromsa;
|
||||||
|
struct rip6_inp_match_ctx ctx = { .ip6 = ip6, .proto = proto };
|
||||||
|
struct inpcb_iterator inpi = INP_ITERATOR(&V_ripcbinfo,
|
||||||
|
INPLOOKUP_RLOCKPCB, rip6_inp_match, &ctx);
|
||||||
|
int delivered = 0;
|
||||||
|
|
||||||
NET_EPOCH_ASSERT();
|
NET_EPOCH_ASSERT();
|
||||||
|
|
||||||
|
|
@ -176,70 +204,27 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
|
||||||
|
|
||||||
ifp = m->m_pkthdr.rcvif;
|
ifp = m->m_pkthdr.rcvif;
|
||||||
|
|
||||||
CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
/* XXX inp locking */
|
INP_RLOCK_ASSERT(inp);
|
||||||
if ((inp->inp_vflag & INP_IPV6) == 0)
|
|
||||||
continue;
|
|
||||||
if (inp->inp_ip_p &&
|
|
||||||
inp->inp_ip_p != proto)
|
|
||||||
continue;
|
|
||||||
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
|
|
||||||
!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
|
|
||||||
continue;
|
|
||||||
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
|
|
||||||
!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
|
|
||||||
continue;
|
|
||||||
if (last != NULL) {
|
|
||||||
struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
|
|
||||||
|
|
||||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
|
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
|
||||||
/*
|
/*
|
||||||
* Check AH/ESP integrity.
|
* Check AH/ESP integrity.
|
||||||
*/
|
*/
|
||||||
if (IPSEC_ENABLED(ipv6)) {
|
if (IPSEC_ENABLED(ipv6) &&
|
||||||
if (n != NULL &&
|
IPSEC_CHECK_POLICY(ipv6, m, inp) != 0) {
|
||||||
IPSEC_CHECK_POLICY(ipv6, n, last) != 0) {
|
/* Do not inject data into pcb. */
|
||||||
m_freem(n);
|
continue;
|
||||||
/* Do not inject data into pcb. */
|
|
||||||
n = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* IPSEC */
|
|
||||||
if (n) {
|
|
||||||
if (last->inp_flags & INP_CONTROLOPTS ||
|
|
||||||
last->inp_socket->so_options & SO_TIMESTAMP)
|
|
||||||
ip6_savecontrol(last, n, &opts);
|
|
||||||
/* strip intermediate headers */
|
|
||||||
m_adj(n, *offp);
|
|
||||||
if (sbappendaddr(&last->inp_socket->so_rcv,
|
|
||||||
(struct sockaddr *)&fromsa,
|
|
||||||
n, opts) == 0) {
|
|
||||||
soroverflow(last->inp_socket);
|
|
||||||
m_freem(n);
|
|
||||||
if (opts)
|
|
||||||
m_freem(opts);
|
|
||||||
RIP6STAT_INC(rip6s_fullsock);
|
|
||||||
} else
|
|
||||||
sorwakeup(last->inp_socket);
|
|
||||||
opts = NULL;
|
|
||||||
}
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
last = NULL;
|
|
||||||
}
|
}
|
||||||
INP_RLOCK(inp);
|
#endif /* IPSEC */
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED))
|
if (jailed_without_vnet(inp->inp_cred) &&
|
||||||
goto skip_2;
|
!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
|
||||||
if (jailed_without_vnet(inp->inp_cred)) {
|
prison_check_ip6(inp->inp_cred, &ip6->ip6_dst) != 0)
|
||||||
/*
|
/*
|
||||||
* Allow raw socket in jail to receive multicast;
|
* Allow raw socket in jail to receive multicast;
|
||||||
* assume process had PRIV_NETINET_RAW at attach,
|
* assume process had PRIV_NETINET_RAW at attach,
|
||||||
* and fall through into normal filter path if so.
|
* and fall through into normal filter path if so.
|
||||||
*/
|
*/
|
||||||
if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
|
continue;
|
||||||
prison_check_ip6(inp->inp_cred,
|
|
||||||
&ip6->ip6_dst) != 0)
|
|
||||||
goto skip_2;
|
|
||||||
}
|
|
||||||
if (inp->in6p_cksum != -1) {
|
if (inp->in6p_cksum != -1) {
|
||||||
RIP6STAT_INC(rip6s_isum);
|
RIP6STAT_INC(rip6s_isum);
|
||||||
if (m->m_pkthdr.len - (*offp + inp->in6p_cksum) < 2 ||
|
if (m->m_pkthdr.len - (*offp + inp->in6p_cksum) < 2 ||
|
||||||
|
|
@ -251,8 +236,9 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
|
||||||
* ICMP6 message. Set proto to IPPROTO_NONE
|
* ICMP6 message. Set proto to IPPROTO_NONE
|
||||||
* to achieve that.
|
* to achieve that.
|
||||||
*/
|
*/
|
||||||
|
INP_RUNLOCK(inp);
|
||||||
proto = IPPROTO_NONE;
|
proto = IPPROTO_NONE;
|
||||||
goto skip_2;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
|
@ -298,43 +284,30 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
|
||||||
}
|
}
|
||||||
if (blocked != MCAST_PASS) {
|
if (blocked != MCAST_PASS) {
|
||||||
IP6STAT_INC(ip6s_notmember);
|
IP6STAT_INC(ip6s_notmember);
|
||||||
goto skip_2;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
last = inp;
|
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
|
||||||
continue;
|
continue;
|
||||||
skip_2:
|
if (inp->inp_flags & INP_CONTROLOPTS ||
|
||||||
INP_RUNLOCK(inp);
|
inp->inp_socket->so_options & SO_TIMESTAMP)
|
||||||
}
|
ip6_savecontrol(inp, n, &opts);
|
||||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
|
/* strip intermediate headers */
|
||||||
/*
|
m_adj(n, *offp);
|
||||||
* Check AH/ESP integrity.
|
if (sbappendaddr(&inp->inp_socket->so_rcv,
|
||||||
*/
|
(struct sockaddr *)&fromsa, n, opts) == 0) {
|
||||||
if (IPSEC_ENABLED(ipv6) && last != NULL &&
|
soroverflow(inp->inp_socket);
|
||||||
IPSEC_CHECK_POLICY(ipv6, m, last) != 0) {
|
m_freem(n);
|
||||||
m_freem(m);
|
|
||||||
IP6STAT_DEC(ip6s_delivered);
|
|
||||||
/* Do not inject data into pcb. */
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
} else
|
|
||||||
#endif /* IPSEC */
|
|
||||||
if (last != NULL) {
|
|
||||||
if (last->inp_flags & INP_CONTROLOPTS ||
|
|
||||||
last->inp_socket->so_options & SO_TIMESTAMP)
|
|
||||||
ip6_savecontrol(last, m, &opts);
|
|
||||||
/* Strip intermediate headers. */
|
|
||||||
m_adj(m, *offp);
|
|
||||||
if (sbappendaddr(&last->inp_socket->so_rcv,
|
|
||||||
(struct sockaddr *)&fromsa, m, opts) == 0) {
|
|
||||||
soroverflow(last->inp_socket);
|
|
||||||
m_freem(m);
|
|
||||||
if (opts)
|
if (opts)
|
||||||
m_freem(opts);
|
m_freem(opts);
|
||||||
RIP6STAT_INC(rip6s_fullsock);
|
RIP6STAT_INC(rip6s_fullsock);
|
||||||
} else
|
} else {
|
||||||
sorwakeup(last->inp_socket);
|
sorwakeup(inp->inp_socket);
|
||||||
INP_RUNLOCK(last);
|
delivered++;
|
||||||
} else {
|
}
|
||||||
|
opts = NULL;
|
||||||
|
}
|
||||||
|
if (delivered == 0) {
|
||||||
RIP6STAT_INC(rip6s_nosock);
|
RIP6STAT_INC(rip6s_nosock);
|
||||||
if (m->m_flags & M_MCAST)
|
if (m->m_flags & M_MCAST)
|
||||||
RIP6STAT_INC(rip6s_nosockmcast);
|
RIP6STAT_INC(rip6s_nosockmcast);
|
||||||
|
|
@ -345,7 +318,8 @@ skip_2:
|
||||||
ICMP6_PARAMPROB_NEXTHEADER,
|
ICMP6_PARAMPROB_NEXTHEADER,
|
||||||
ip6_get_prevhdr(m, *offp));
|
ip6_get_prevhdr(m, *offp));
|
||||||
IP6STAT_DEC(ip6s_delivered);
|
IP6STAT_DEC(ip6s_delivered);
|
||||||
}
|
} else
|
||||||
|
m_freem(m);
|
||||||
return (IPPROTO_DONE);
|
return (IPPROTO_DONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -678,15 +652,12 @@ rip6_attach(struct socket *so, int proto, struct thread *td)
|
||||||
filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
|
filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
|
||||||
if (filter == NULL)
|
if (filter == NULL)
|
||||||
return (ENOMEM);
|
return (ENOMEM);
|
||||||
INP_INFO_WLOCK(&V_ripcbinfo);
|
|
||||||
error = in_pcballoc(so, &V_ripcbinfo);
|
error = in_pcballoc(so, &V_ripcbinfo);
|
||||||
if (error) {
|
if (error) {
|
||||||
INP_INFO_WUNLOCK(&V_ripcbinfo);
|
|
||||||
free(filter, M_PCB);
|
free(filter, M_PCB);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
inp = (struct inpcb *)so->so_pcb;
|
inp = (struct inpcb *)so->so_pcb;
|
||||||
INP_INFO_WUNLOCK(&V_ripcbinfo);
|
|
||||||
inp->inp_vflag |= INP_IPV6;
|
inp->inp_vflag |= INP_IPV6;
|
||||||
inp->inp_ip_p = (long)proto;
|
inp->inp_ip_p = (long)proto;
|
||||||
inp->in6p_hops = -1; /* use kernel default */
|
inp->in6p_hops = -1; /* use kernel default */
|
||||||
|
|
@ -708,12 +679,10 @@ rip6_detach(struct socket *so)
|
||||||
if (so == V_ip6_mrouter && ip6_mrouter_done)
|
if (so == V_ip6_mrouter && ip6_mrouter_done)
|
||||||
ip6_mrouter_done();
|
ip6_mrouter_done();
|
||||||
/* xxx: RSVP */
|
/* xxx: RSVP */
|
||||||
INP_INFO_WLOCK(&V_ripcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
free(inp->in6p_icmp6filt, M_PCB);
|
free(inp->in6p_icmp6filt, M_PCB);
|
||||||
in_pcbdetach(inp);
|
in_pcbdetach(inp);
|
||||||
in_pcbfree(inp);
|
in_pcbfree(inp);
|
||||||
INP_INFO_WUNLOCK(&V_ripcbinfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXXRW: This can't ever be called. */
|
/* XXXRW: This can't ever be called. */
|
||||||
|
|
|
||||||
|
|
@ -207,6 +207,137 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off,
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct udp6_multi_match_ctx {
|
||||||
|
struct ip6_hdr *ip6;
|
||||||
|
struct udphdr *uh;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool
|
||||||
|
udp6_multi_match(const struct inpcb *inp, void *v)
|
||||||
|
{
|
||||||
|
struct udp6_multi_match_ctx *ctx = v;
|
||||||
|
|
||||||
|
if ((inp->inp_vflag & INP_IPV6) == 0)
|
||||||
|
return(false);
|
||||||
|
if (inp->inp_lport != ctx->uh->uh_dport)
|
||||||
|
return(false);
|
||||||
|
if (inp->inp_fport != 0 && inp->inp_fport != ctx->uh->uh_sport)
|
||||||
|
return(false);
|
||||||
|
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
|
||||||
|
!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ctx->ip6->ip6_dst))
|
||||||
|
return (false);
|
||||||
|
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
|
||||||
|
(!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ctx->ip6->ip6_src) ||
|
||||||
|
inp->inp_fport != ctx->uh->uh_sport))
|
||||||
|
return (false);
|
||||||
|
|
||||||
|
return (true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
udp6_multi_input(struct mbuf *m, int off, int proto,
|
||||||
|
struct sockaddr_in6 *fromsa)
|
||||||
|
{
|
||||||
|
struct udp6_multi_match_ctx ctx;
|
||||||
|
struct inpcb_iterator inpi = INP_ITERATOR(udp_get_inpcbinfo(proto),
|
||||||
|
INPLOOKUP_RLOCKPCB, udp6_multi_match, &ctx);
|
||||||
|
struct inpcb *inp;
|
||||||
|
struct ip6_moptions *imo;
|
||||||
|
struct mbuf *n;
|
||||||
|
int appends = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In the event that laddr should be set to the link-local
|
||||||
|
* address (this happens in RIPng), the multicast address
|
||||||
|
* specified in the received packet will not match laddr. To
|
||||||
|
* handle this situation, matching is relaxed if the
|
||||||
|
* receiving interface is the same as one specified in the
|
||||||
|
* socket and if the destination multicast address matches
|
||||||
|
* one of the multicast groups specified in the socket.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* KAME note: traditionally we dropped udpiphdr from mbuf
|
||||||
|
* here. We need udphdr for IPsec processing so we do that
|
||||||
|
* later.
|
||||||
|
*/
|
||||||
|
ctx.ip6 = mtod(m, struct ip6_hdr *);
|
||||||
|
ctx.uh = (struct udphdr *)((char *)ctx.ip6 + off);
|
||||||
|
while ((inp = inp_next(&inpi)) != NULL) {
|
||||||
|
INP_RLOCK_ASSERT(inp);
|
||||||
|
/*
|
||||||
|
* XXXRW: Because we weren't holding either the inpcb
|
||||||
|
* or the hash lock when we checked for a match
|
||||||
|
* before, we should probably recheck now that the
|
||||||
|
* inpcb lock is (supposed to be) held.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Handle socket delivery policy for any-source
|
||||||
|
* and source-specific multicast. [RFC3678]
|
||||||
|
*/
|
||||||
|
if ((imo = inp->in6p_moptions) != NULL) {
|
||||||
|
struct sockaddr_in6 mcaddr;
|
||||||
|
int blocked;
|
||||||
|
|
||||||
|
bzero(&mcaddr, sizeof(struct sockaddr_in6));
|
||||||
|
mcaddr.sin6_len = sizeof(struct sockaddr_in6);
|
||||||
|
mcaddr.sin6_family = AF_INET6;
|
||||||
|
mcaddr.sin6_addr = ctx.ip6->ip6_dst;
|
||||||
|
|
||||||
|
blocked = im6o_mc_filter(imo, m->m_pkthdr.rcvif,
|
||||||
|
(struct sockaddr *)&mcaddr,
|
||||||
|
(struct sockaddr *)&fromsa[0]);
|
||||||
|
if (blocked != MCAST_PASS) {
|
||||||
|
if (blocked == MCAST_NOTGMEMBER)
|
||||||
|
IP6STAT_INC(ip6s_notmember);
|
||||||
|
if (blocked == MCAST_NOTSMEMBER ||
|
||||||
|
blocked == MCAST_MUTED)
|
||||||
|
UDPSTAT_INC(udps_filtermcast);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
|
||||||
|
if (proto == IPPROTO_UDPLITE)
|
||||||
|
UDPLITE_PROBE(receive, NULL, inp, ctx.ip6,
|
||||||
|
inp, ctx.uh);
|
||||||
|
else
|
||||||
|
UDP_PROBE(receive, NULL, inp, ctx.ip6, inp,
|
||||||
|
ctx.uh);
|
||||||
|
if (udp6_append(inp, n, off, fromsa)) {
|
||||||
|
INP_RUNLOCK(inp);
|
||||||
|
break;
|
||||||
|
} else
|
||||||
|
appends++;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Don't look for additional matches if this one does
|
||||||
|
* not have either the SO_REUSEPORT or SO_REUSEADDR
|
||||||
|
* socket options set. This heuristic avoids
|
||||||
|
* searching through all pcbs in the common case of a
|
||||||
|
* non-shared port. It assumes that an application
|
||||||
|
* will never clear these options after setting them.
|
||||||
|
*/
|
||||||
|
if ((inp->inp_socket->so_options &
|
||||||
|
(SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) {
|
||||||
|
INP_RUNLOCK(inp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_freem(m);
|
||||||
|
|
||||||
|
if (appends == 0) {
|
||||||
|
/*
|
||||||
|
* No matching pcb found; discard datagram. (No need
|
||||||
|
* to send an ICMP Port Unreachable for a broadcast
|
||||||
|
* or multicast datgram.)
|
||||||
|
*/
|
||||||
|
UDPSTAT_INC(udps_noport);
|
||||||
|
UDPSTAT_INC(udps_noportmcast);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (IPPROTO_DONE);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
udp6_input(struct mbuf **mp, int *offp, int proto)
|
udp6_input(struct mbuf **mp, int *offp, int proto)
|
||||||
{
|
{
|
||||||
|
|
@ -311,144 +442,11 @@ skip_checksum:
|
||||||
fromsa[1].sin6_port = uh->uh_dport;
|
fromsa[1].sin6_port = uh->uh_dport;
|
||||||
|
|
||||||
pcbinfo = udp_get_inpcbinfo(nxt);
|
pcbinfo = udp_get_inpcbinfo(nxt);
|
||||||
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
|
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
|
||||||
struct inpcb *last;
|
|
||||||
struct inpcbhead *pcblist;
|
|
||||||
struct ip6_moptions *imo;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In the event that laddr should be set to the link-local
|
|
||||||
* address (this happens in RIPng), the multicast address
|
|
||||||
* specified in the received packet will not match laddr. To
|
|
||||||
* handle this situation, matching is relaxed if the
|
|
||||||
* receiving interface is the same as one specified in the
|
|
||||||
* socket and if the destination multicast address matches
|
|
||||||
* one of the multicast groups specified in the socket.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* KAME note: traditionally we dropped udpiphdr from mbuf
|
|
||||||
* here. We need udphdr for IPsec processing so we do that
|
|
||||||
* later.
|
|
||||||
*/
|
|
||||||
pcblist = udp_get_pcblist(nxt);
|
|
||||||
last = NULL;
|
|
||||||
CK_LIST_FOREACH(inp, pcblist, inp_list) {
|
|
||||||
if ((inp->inp_vflag & INP_IPV6) == 0)
|
|
||||||
continue;
|
|
||||||
if (inp->inp_lport != uh->uh_dport)
|
|
||||||
continue;
|
|
||||||
if (inp->inp_fport != 0 &&
|
|
||||||
inp->inp_fport != uh->uh_sport)
|
|
||||||
continue;
|
|
||||||
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
|
|
||||||
if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
|
|
||||||
&ip6->ip6_dst))
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
|
|
||||||
if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
|
|
||||||
&ip6->ip6_src) ||
|
|
||||||
inp->inp_fport != uh->uh_sport)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
INP_RLOCK(inp);
|
|
||||||
|
|
||||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XXXRW: Because we weren't holding either the inpcb
|
|
||||||
* or the hash lock when we checked for a match
|
|
||||||
* before, we should probably recheck now that the
|
|
||||||
* inpcb lock is (supposed to be) held.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Handle socket delivery policy for any-source
|
|
||||||
* and source-specific multicast. [RFC3678]
|
|
||||||
*/
|
|
||||||
imo = inp->in6p_moptions;
|
|
||||||
if (imo != NULL) {
|
|
||||||
struct sockaddr_in6 mcaddr;
|
|
||||||
int blocked;
|
|
||||||
|
|
||||||
bzero(&mcaddr, sizeof(struct sockaddr_in6));
|
|
||||||
mcaddr.sin6_len = sizeof(struct sockaddr_in6);
|
|
||||||
mcaddr.sin6_family = AF_INET6;
|
|
||||||
mcaddr.sin6_addr = ip6->ip6_dst;
|
|
||||||
|
|
||||||
blocked = im6o_mc_filter(imo, ifp,
|
|
||||||
(struct sockaddr *)&mcaddr,
|
|
||||||
(struct sockaddr *)&fromsa[0]);
|
|
||||||
if (blocked != MCAST_PASS) {
|
|
||||||
if (blocked == MCAST_NOTGMEMBER)
|
|
||||||
IP6STAT_INC(ip6s_notmember);
|
|
||||||
if (blocked == MCAST_NOTSMEMBER ||
|
|
||||||
blocked == MCAST_MUTED)
|
|
||||||
UDPSTAT_INC(udps_filtermcast);
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (last != NULL) {
|
|
||||||
struct mbuf *n;
|
|
||||||
|
|
||||||
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
|
|
||||||
NULL) {
|
|
||||||
if (nxt == IPPROTO_UDPLITE)
|
|
||||||
UDPLITE_PROBE(receive, NULL,
|
|
||||||
last, ip6, last, uh);
|
|
||||||
else
|
|
||||||
UDP_PROBE(receive, NULL, last,
|
|
||||||
ip6, last, uh);
|
|
||||||
if (udp6_append(last, n, off,
|
|
||||||
fromsa)) {
|
|
||||||
INP_RUNLOCK(inp);
|
|
||||||
goto badunlocked;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Release PCB lock taken on previous pass. */
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
}
|
|
||||||
last = inp;
|
|
||||||
/*
|
|
||||||
* Don't look for additional matches if this one does
|
|
||||||
* not have either the SO_REUSEPORT or SO_REUSEADDR
|
|
||||||
* socket options set. This heuristic avoids
|
|
||||||
* searching through all pcbs in the common case of a
|
|
||||||
* non-shared port. It assumes that an application
|
|
||||||
* will never clear these options after setting them.
|
|
||||||
*/
|
|
||||||
if ((last->inp_socket->so_options &
|
|
||||||
(SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (last == NULL) {
|
|
||||||
/*
|
|
||||||
* No matching pcb found; discard datagram. (No need
|
|
||||||
* to send an ICMP Port Unreachable for a broadcast
|
|
||||||
* or multicast datgram.)
|
|
||||||
*/
|
|
||||||
UDPSTAT_INC(udps_noport);
|
|
||||||
UDPSTAT_INC(udps_noportmcast);
|
|
||||||
goto badunlocked;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nxt == IPPROTO_UDPLITE)
|
|
||||||
UDPLITE_PROBE(receive, NULL, last, ip6, last, uh);
|
|
||||||
else
|
|
||||||
UDP_PROBE(receive, NULL, last, ip6, last, uh);
|
|
||||||
if (udp6_append(last, m, off, fromsa) == 0)
|
|
||||||
INP_RUNLOCK(last);
|
|
||||||
*mp = NULL;
|
*mp = NULL;
|
||||||
return (IPPROTO_DONE);
|
return (udp6_multi_input(m, off, proto, fromsa));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Locate pcb for datagram.
|
* Locate pcb for datagram.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1043,12 +1041,9 @@ udp6_attach(struct socket *so, int proto, struct thread *td)
|
||||||
if (error)
|
if (error)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
|
||||||
error = in_pcballoc(so, pcbinfo);
|
error = in_pcballoc(so, pcbinfo);
|
||||||
if (error) {
|
if (error)
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
|
||||||
inp = (struct inpcb *)so->so_pcb;
|
inp = (struct inpcb *)so->so_pcb;
|
||||||
inp->inp_vflag |= INP_IPV6;
|
inp->inp_vflag |= INP_IPV6;
|
||||||
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
|
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
|
||||||
|
|
@ -1067,11 +1062,9 @@ udp6_attach(struct socket *so, int proto, struct thread *td)
|
||||||
if (error) {
|
if (error) {
|
||||||
in_pcbdetach(inp);
|
in_pcbdetach(inp);
|
||||||
in_pcbfree(inp);
|
in_pcbfree(inp);
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
INP_WUNLOCK(inp);
|
INP_WUNLOCK(inp);
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1275,13 +1268,11 @@ udp6_detach(struct socket *so)
|
||||||
inp = sotoinpcb(so);
|
inp = sotoinpcb(so);
|
||||||
KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
|
KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
|
||||||
|
|
||||||
INP_INFO_WLOCK(pcbinfo);
|
|
||||||
INP_WLOCK(inp);
|
INP_WLOCK(inp);
|
||||||
up = intoudpcb(inp);
|
up = intoudpcb(inp);
|
||||||
KASSERT(up != NULL, ("%s: up == NULL", __func__));
|
KASSERT(up != NULL, ("%s: up == NULL", __func__));
|
||||||
in_pcbdetach(inp);
|
in_pcbdetach(inp);
|
||||||
in_pcbfree(inp);
|
in_pcbfree(inp);
|
||||||
INP_INFO_WUNLOCK(pcbinfo);
|
|
||||||
udp_discardcb(up);
|
udp_discardcb(up);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue