sockets: enable protocol specific socket buffers

Split struct sockbuf into common shared fields and protocol specific
union, where protocols are free to implement whatever buffer they
want.  Such protocols should mark themselves with PR_SOCKBUF and are
expected to initialize their buffers in their pr_attach and tear
them down in pr_detach.

Reviewed by:		markj
Differential revision:	https://reviews.freebsd.org/D35299
This commit is contained in:
Gleb Smirnoff 2022-06-24 09:09:10 -07:00
parent 315167c0de
commit a4fc41423f
3 changed files with 67 additions and 34 deletions

View file

@ -418,8 +418,6 @@ soalloc(struct vnet *vnet)
* a feature to change class of an existing lock, so we use DUPOK. * a feature to change class of an existing lock, so we use DUPOK.
*/ */
mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
so->so_snd.sb_mtx = &so->so_snd_mtx;
so->so_rcv.sb_mtx = &so->so_rcv_mtx;
mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF); mtx_init(&so->so_snd_mtx, "so_snd", NULL, MTX_DEF);
mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF); mtx_init(&so->so_rcv_mtx, "so_rcv", NULL, MTX_DEF);
so->so_rcv.sb_sel = &so->so_rdsel; so->so_rcv.sb_sel = &so->so_rdsel;
@ -557,6 +555,10 @@ socreate(int dom, struct socket **aso, int type, int proto,
so_rdknl_assert_lock); so_rdknl_assert_lock);
knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
so_wrknl_assert_lock); so_wrknl_assert_lock);
if ((prp->pr_flags & PR_SOCKBUF) == 0) {
so->so_snd.sb_mtx = &so->so_snd_mtx;
so->so_rcv.sb_mtx = &so->so_rcv_mtx;
}
/* /*
* Auto-sizing of socket buffers is managed by the protocols and * Auto-sizing of socket buffers is managed by the protocols and
* the appropriate flags must be set in the pru_attach function. * the appropriate flags must be set in the pru_attach function.
@ -756,6 +758,10 @@ sonewconn(struct socket *head, int connstatus)
__func__, head->so_pcb); __func__, head->so_pcb);
return (NULL); return (NULL);
} }
if ((so->so_proto->pr_flags & PR_SOCKBUF) == 0) {
so->so_snd.sb_mtx = &so->so_snd_mtx;
so->so_rcv.sb_mtx = &so->so_rcv_mtx;
}
if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
sodealloc(so); sodealloc(so);
log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n", log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
@ -1207,7 +1213,7 @@ sofree(struct socket *so)
* socket exist anywhere else in the stack. Therefore, no locks need * socket exist anywhere else in the stack. Therefore, no locks need
* to be acquired or held. * to be acquired or held.
*/ */
if (!SOLISTENING(so)) { if (!(pr->pr_flags & PR_SOCKBUF) && !SOLISTENING(so)) {
sbdestroy(so, SO_SND); sbdestroy(so, SO_SND);
sbdestroy(so, SO_RCV); sbdestroy(so, SO_RCV);
} }

View file

@ -114,6 +114,8 @@ struct protosw {
* and the protocol understands the MSG_EOF flag. The first property is * and the protocol understands the MSG_EOF flag. The first property is
* is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed * is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed
* anyhow). * anyhow).
* PR_SOCKBUF requires protocol to initialize and destroy its socket buffers
* in its pr_attach and pr_detach.
*/ */
#define PR_ATOMIC 0x01 /* exchange atomic messages only */ #define PR_ATOMIC 0x01 /* exchange atomic messages only */
#define PR_ADDR 0x02 /* addresses given with messages */ #define PR_ADDR 0x02 /* addresses given with messages */
@ -123,6 +125,7 @@ struct protosw {
#define PR_IMPLOPCL 0x20 /* implied open/close */ #define PR_IMPLOPCL 0x20 /* implied open/close */
#define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */ #define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */
#define PR_CAPATTACH 0x80 /* socket can attach in cap mode */ #define PR_CAPATTACH 0x80 /* socket can attach in cap mode */
#define PR_SOCKBUF 0x100 /* private implementation of buffers */
/* /*
* In earlier BSD network stacks, a single pr_usrreq() function pointer was * In earlier BSD network stacks, a single pr_usrreq() function pointer was

View file

@ -75,41 +75,65 @@ struct thread;
struct selinfo; struct selinfo;
/* /*
* Variables for socket buffering. * Socket buffer
* *
* Locking key to struct sockbuf: * A buffer starts with the fields that are accessed by I/O multiplexing
* (a) locked by SOCKBUF_LOCK(). * APIs like select(2), kevent(2) or AIO and thus are shared between different
* buffer implementations. They are protected by the SOCK_RECVBUF_LOCK()
* or SOCK_SENDBUF_LOCK() of the owning socket.
*
* XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
* methods.
*
* Protocol specific implementations follow in a union.
*/ */
struct sockbuf { struct sockbuf {
struct mtx *sb_mtx; /* sockbuf lock */
struct selinfo *sb_sel; /* process selecting read/write */ struct selinfo *sb_sel; /* process selecting read/write */
short sb_state; /* (a) socket state on sockbuf */ short sb_state; /* socket state on sockbuf */
short sb_flags; /* (a) flags, see above */ short sb_flags; /* flags, see above */
struct mbuf *sb_mb; /* (a) the mbuf chain */ u_int sb_acc; /* available chars in buffer */
struct mbuf *sb_mbtail; /* (a) the last mbuf in the chain */ u_int sb_ccc; /* claimed chars in buffer */
struct mbuf *sb_lastrecord; /* (a) first mbuf of last u_int sb_mbcnt; /* chars of mbufs used */
* record in socket buffer */ u_int sb_ctl; /* non-data chars in buffer */
struct mbuf *sb_sndptr; /* (a) pointer into mbuf chain */ u_int sb_hiwat; /* max actual char count */
struct mbuf *sb_fnrdy; /* (a) pointer to first not ready buffer */ u_int sb_lowat; /* low water mark */
u_int sb_sndptroff; /* (a) byte offset of ptr into chain */ u_int sb_mbmax; /* max chars of mbufs to use */
u_int sb_acc; /* (a) available chars in buffer */ sbintime_t sb_timeo; /* timeout for read/write */
u_int sb_ccc; /* (a) claimed chars in buffer */ int (*sb_upcall)(struct socket *, void *, int);
u_int sb_hiwat; /* (a) max actual char count */ void *sb_upcallarg;
u_int sb_mbcnt; /* (a) chars of mbufs used */ TAILQ_HEAD(, kaiocb) sb_aiojobq; /* pending AIO ops */
u_int sb_mbmax; /* (a) max chars of mbufs to use */ struct task sb_aiotask; /* AIO task */
u_int sb_ctl; /* (a) non-data chars in buffer */ union {
u_int sb_tlscc; /* (a) TLS chain characters */ /*
u_int sb_tlsdcc; /* (a) TLS characters being decrypted */ * Classic BSD one-size-fits-all socket buffer, capable of
int sb_lowat; /* (a) low water mark */ * doing streams and datagrams. The stream part is able
sbintime_t sb_timeo; /* (a) timeout for read/write */ * to perform special features:
struct mbuf *sb_mtls; /* (a) TLS mbuf chain */ * - not ready data (sendfile)
struct mbuf *sb_mtlstail; /* (a) last mbuf in TLS chain */ * - TLS
int (*sb_upcall)(struct socket *, void *, int); /* (a) */ */
void *sb_upcallarg; /* (a) */ struct {
uint64_t sb_tls_seqno; /* (a) TLS seqno */ /* compat: sockbuf lock pointer */
struct ktls_session *sb_tls_info; /* (a + b) TLS state */ struct mtx *sb_mtx;
TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */ /* first and last mbufs in the chain */
struct task sb_aiotask; /* AIO task */ struct mbuf *sb_mb;
struct mbuf *sb_mbtail;
/* first mbuf of last record in socket buffer */
struct mbuf *sb_lastrecord;
/* pointer to data to send next (TCP */
struct mbuf *sb_sndptr;
/* pointer to first not ready buffer */
struct mbuf *sb_fnrdy;
/* byte offset of ptr into chain, used with sb_sndptr */
u_int sb_sndptroff;
/* TLS */
u_int sb_tlscc; /* TLS chain characters */
u_int sb_tlsdcc; /* characters being decrypted */
struct mbuf *sb_mtls; /* TLS mbuf chain */
struct mbuf *sb_mtlstail; /* last mbuf in TLS chain */
uint64_t sb_tls_seqno; /* TLS seqno */
struct ktls_session *sb_tls_info; /* TLS state */
};
};
}; };
#endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */