mirror of
https://github.com/opnsense/src.git
synced 2026-06-11 09:41:03 -04:00
sockets: remove the socket-on-stack hack from sorflush()
The hack can be tracked down to 4.4BSD, where copy was performed
under splimp() and then after splx() dom_dispose was called.
Stevens has a chapter on this function, but he doesn't answer why
this trick is necessary. Why can't we call into dom_dispose under
splimp()? Anyway, with multithreaded kernel the hack seems to be
necessary to avoid LORs between socket buffer lock and different
filesystem locks, especially network file systems.
The new socket buffers KPI sbcut() from 1d2df300e9 allow us to get
rid of the hack.
Reviewed by: markj
Differential revision: https://reviews.freebsd.org/D35125
This commit is contained in:
parent
42f2fa9953
commit
a982ce0442
4 changed files with 24 additions and 31 deletions
|
|
@ -2944,22 +2944,12 @@ done:
|
|||
void
|
||||
sorflush(struct socket *so)
|
||||
{
|
||||
struct socket aso;
|
||||
struct protosw *pr;
|
||||
int error;
|
||||
|
||||
VNET_SO_ASSERT(so);
|
||||
|
||||
/*
|
||||
* In order to avoid calling dom_dispose with the socket buffer mutex
|
||||
* held, we make a partial copy of the socket buffer and clear the
|
||||
* original. The new socket buffer copy won't have initialized locks so
|
||||
* we can only call routines that won't use or assert those locks.
|
||||
* Ideally calling socantrcvmore() would prevent data from being added
|
||||
* to the buffer, but currently it merely prevents buffered data from
|
||||
* being read by userspace. We make this effort to free buffered data
|
||||
* nonetheless.
|
||||
*
|
||||
* Dislodge threads currently blocked in receive and wait to acquire
|
||||
* a lock against other simultaneous readers before clearing the
|
||||
* socket buffer. Don't let our acquire be interrupted by a signal
|
||||
|
|
@ -2974,28 +2964,15 @@ sorflush(struct socket *so)
|
|||
return;
|
||||
}
|
||||
|
||||
SOCK_RECVBUF_LOCK(so);
|
||||
bzero(&aso, sizeof(aso));
|
||||
aso.so_pcb = so->so_pcb;
|
||||
bcopy(&so->so_rcv.sb_startzero, &aso.so_rcv.sb_startzero,
|
||||
offsetof(struct sockbuf, sb_endzero) -
|
||||
offsetof(struct sockbuf, sb_startzero));
|
||||
bzero(&so->so_rcv.sb_startzero,
|
||||
offsetof(struct sockbuf, sb_endzero) -
|
||||
offsetof(struct sockbuf, sb_startzero));
|
||||
SOCK_RECVBUF_UNLOCK(so);
|
||||
SOCK_IO_RECV_UNLOCK(so);
|
||||
|
||||
/*
|
||||
* Dispose of special rights and flush the copied socket. Don't call
|
||||
* any unsafe routines (that rely on locks being initialized) on aso.
|
||||
*/
|
||||
pr = so->so_proto;
|
||||
if (pr->pr_flags & PR_RIGHTS) {
|
||||
MPASS(pr->pr_domain->dom_dispose != NULL);
|
||||
(*pr->pr_domain->dom_dispose)(&aso);
|
||||
(*pr->pr_domain->dom_dispose)(so);
|
||||
} else {
|
||||
sbrelease(&so->so_rcv, so);
|
||||
SOCK_IO_RECV_UNLOCK(so);
|
||||
}
|
||||
sbrelease_internal(&aso.so_rcv, so);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -2767,7 +2767,9 @@ unp_dispose_mbuf(struct mbuf *m)
|
|||
static void
|
||||
unp_dispose(struct socket *so)
|
||||
{
|
||||
struct sockbuf *sb = &so->so_rcv;
|
||||
struct unpcb *unp;
|
||||
struct mbuf *m;
|
||||
|
||||
MPASS(!SOLISTENING(so));
|
||||
|
||||
|
|
@ -2775,7 +2777,21 @@ unp_dispose(struct socket *so)
|
|||
UNP_LINK_WLOCK();
|
||||
unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
|
||||
UNP_LINK_WUNLOCK();
|
||||
unp_dispose_mbuf(so->so_rcv.sb_mb);
|
||||
|
||||
/*
|
||||
* Grab our special mbufs before calling sbrelease().
|
||||
*/
|
||||
SOCK_RECVBUF_LOCK(so);
|
||||
m = sbcut_locked(sb, sb->sb_ccc);
|
||||
KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
|
||||
("%s: ccc %u mb %p mbcnt %u", __func__,
|
||||
sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
|
||||
sbrelease_locked(sb, so);
|
||||
SOCK_RECVBUF_UNLOCK(so);
|
||||
if (SOCK_IO_RECV_OWNED(so))
|
||||
SOCK_IO_RECV_UNLOCK(so);
|
||||
|
||||
unp_dispose_mbuf(m);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -83,7 +83,6 @@ struct sockbuf {
|
|||
struct mtx *sb_mtx; /* sockbuf lock */
|
||||
struct selinfo *sb_sel; /* process selecting read/write */
|
||||
short sb_state; /* (a) socket state on sockbuf */
|
||||
#define sb_startzero sb_flags
|
||||
short sb_flags; /* (a) flags, see above */
|
||||
struct mbuf *sb_mb; /* (a) the mbuf chain */
|
||||
struct mbuf *sb_mbtail; /* (a) the last mbuf in the chain */
|
||||
|
|
@ -108,7 +107,6 @@ struct sockbuf {
|
|||
struct mbuf *sb_mtlstail; /* (a) last mbuf in TLS chain */
|
||||
int (*sb_upcall)(struct socket *, void *, int); /* (a) */
|
||||
void *sb_upcallarg; /* (a) */
|
||||
#define sb_endzero sb_tls_seqno
|
||||
uint64_t sb_tls_seqno; /* (a) TLS seqno */
|
||||
struct ktls_session *sb_tls_info; /* (a + b) TLS state */
|
||||
TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */
|
||||
|
|
|
|||
|
|
@ -301,10 +301,12 @@ typedef enum { SO_RCV, SO_SND } sb_which;
|
|||
soiolock((so), &(so)->so_snd_sx, (flags))
|
||||
#define SOCK_IO_SEND_UNLOCK(so) \
|
||||
soiounlock(&(so)->so_snd_sx)
|
||||
#define SOCK_IO_SEND_OWNED(so) sx_xlocked(&(so)->so_snd_sx)
|
||||
#define SOCK_IO_RECV_LOCK(so, flags) \
|
||||
soiolock((so), &(so)->so_rcv_sx, (flags))
|
||||
#define SOCK_IO_RECV_UNLOCK(so) \
|
||||
soiounlock(&(so)->so_rcv_sx)
|
||||
#define SOCK_IO_RECV_OWNED(so) sx_xlocked(&(so)->so_rcv_sx)
|
||||
|
||||
/*
|
||||
* Do we need to notify the other side when I/O is possible?
|
||||
|
|
|
|||
Loading…
Reference in a new issue