From 81158452be7803fda8aa36feb4464d5301dce85c Mon Sep 17 00:00:00 2001 From: Robert Watson Date: Mon, 18 Oct 2004 22:19:43 +0000 Subject: [PATCH] Push acquisition of the accept mutex out of sofree() into the caller (sorele()/sotryfree()): - This permits the caller to acquire the accept mutex before the socket mutex, avoiding sofree() having to drop the socket mutex and re-order, which could lead to races permitting more than one thread to enter sofree() after a socket is ready to be free'd. - This also covers clearing of the so_pcb weak socket reference from the protocol to the socket, preventing races in clearing and evaluation of the reference such that sofree() might be called more than once on the same socket. This appears to close a race I was able to easily trigger by repeatedly opening and resetting TCP connections to a host, in which the tcp_close() code called as a result of the RST raced with the close() of the accepted socket in the user process resulting in simultaneous attempts to de-allocate the same socket. The new locking increases the overhead for operations that may potentially free the socket, so we will want to revise the synchronization strategy here as we normalize the reference counting model for sockets. The use of the accept mutex in freeing of sockets that are not listen sockets is primarily motivated by the potential need to remove the socket from the incomplete connection queue on its parent (listen) socket, so cleaning up the reference model here may allow us to substantially weaken the synchronization requirements. RELENG_5_3 candidate. MFC after: 3 days Reviewed by: dwhite Discussed with: gnn, dwhite, green Reported by: Marc UBM Bocklet Reported by: Vlad --- sys/kern/kern_descrip.c | 1 + sys/kern/uipc_socket.c | 7 ++++--- sys/kern/uipc_usrreq.c | 1 + sys/net/raw_cb.c | 1 + sys/net/raw_usrreq.c | 1 + sys/netatalk/ddp_pcb.c | 1 + sys/netatm/atm_socket.c | 1 + sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c | 1 + sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c | 2 ++ .../bluetooth/socket/ng_btsocket_l2cap_raw.c | 1 + sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c | 1 + sys/netinet/in_pcb.c | 1 + sys/netinet/tcp_subr.c | 1 + sys/netinet/tcp_timewait.c | 1 + sys/netinet6/in6_pcb.c | 1 + sys/netipx/ipx_pcb.c | 1 + sys/netipx/ipx_usrreq.c | 1 + sys/netnatm/natm.c | 2 ++ sys/sys/socketvar.h | 12 ++++++++++-- 19 files changed, 33 insertions(+), 5 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 769b8db72a2..9a448b4e938 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -2063,6 +2063,7 @@ fputsock(struct socket *so) { NET_ASSERT_GIANT(); + ACCEPT_LOCK(); SOCK_LOCK(so); sorele(so); } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 17ecd6a39c1..1a74cffdd27 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -227,6 +227,7 @@ socreate(dom, aso, type, proto, cred, td) SOCK_UNLOCK(so); error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); if (error) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state |= SS_NOFDREF; sorele(so); @@ -333,9 +334,8 @@ sofree(so) { struct socket *head; - SOCK_UNLOCK(so); - ACCEPT_LOCK(); - SOCK_LOCK(so); + ACCEPT_LOCK_ASSERT(); + SOCK_LOCK_ASSERT(so); if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0 || so->so_count != 0) { @@ -467,6 +467,7 @@ drop: error = error2; } discard: + ACCEPT_LOCK(); SOCK_LOCK(so); KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); so->so_state |= SS_NOFDREF; diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 07cdbc7a9f7..6b97f94ad23 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -140,6 +140,7 @@ uipc_abort(struct socket *so) unp_drop(unp, ECONNABORTED); unp_detach(unp); UNP_UNLOCK_ASSERT(); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return (0); diff --git a/sys/net/raw_cb.c b/sys/net/raw_cb.c index 04b45163a3b..b6ab2083a0f 100644 --- a/sys/net/raw_cb.c +++ b/sys/net/raw_cb.c @@ -98,6 +98,7 @@ raw_detach(rp) { struct socket *so = rp->rcb_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); diff --git a/sys/net/raw_usrreq.c b/sys/net/raw_usrreq.c index 7517743dfa8..f444b571ba4 100644 --- a/sys/net/raw_usrreq.c +++ b/sys/net/raw_usrreq.c @@ -147,6 +147,7 @@ raw_uabort(struct socket *so) return EINVAL; raw_disconnect(rp); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return 0; diff --git a/sys/netatalk/ddp_pcb.c b/sys/netatalk/ddp_pcb.c index 8073d553af8..dd69850cd8f 100644 --- a/sys/netatalk/ddp_pcb.c +++ b/sys/netatalk/ddp_pcb.c @@ -282,6 +282,7 @@ at_pcbdetach(struct socket *so, struct ddpcb *ddp) DDP_LOCK_ASSERT(ddp); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netatm/atm_socket.c b/sys/netatm/atm_socket.c index 6a027e2a090..99dbaf6ceac 100644 --- a/sys/netatm/atm_socket.c +++ b/sys/netatm/atm_socket.c @@ -173,6 +173,7 @@ atm_sock_detach(so) /* * Break links and free control blocks */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c index d4df5bda758..7c347c370a9 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c @@ -1417,6 +1417,7 @@ ng_btsocket_hci_raw_detach(struct socket *so) bzero(pcb, sizeof(*pcb)); FREE(pcb, M_NETGRAPH_BTSOCKET_HCI_RAW); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c index f52bafa2113..cb91fd79a85 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c @@ -1804,6 +1804,7 @@ ng_btsocket_l2cap_rtclean(void *context, int pending) FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); @@ -2347,6 +2348,7 @@ ng_btsocket_l2cap_detach(struct socket *so) FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c index 8103f2790bb..97e924748e5 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c @@ -1129,6 +1129,7 @@ ng_btsocket_l2cap_raw_detach(struct socket *so) bzero(pcb, sizeof(*pcb)); FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP_RAW); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c index a81afac7c37..1ba72114a1e 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c @@ -724,6 +724,7 @@ ng_btsocket_rfcomm_detach(struct socket *so) FREE(pcb, M_NETGRAPH_BTSOCKET_RFCOMM); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index ec256555617..11e5b0dd0d5 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -687,6 +687,7 @@ in_pcbdetach(inp) inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 7619916a341..03610b390d9 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1680,6 +1680,7 @@ tcp_twstart(tp) } tcp_discardcb(tp); so = inp->inp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; tw->tw_cred = crhold(so->so_cred); diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 7619916a341..03610b390d9 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -1680,6 +1680,7 @@ tcp_twstart(tp) } tcp_discardcb(tp); so = inp->inp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; tw->tw_cred = crhold(so->so_cred); diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 9bcbeb82d3c..e7941030aa8 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -436,6 +436,7 @@ in6_pcbdetach(inp) in_pcbremlists(inp); if (so) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netipx/ipx_pcb.c b/sys/netipx/ipx_pcb.c index 2be7e1df92c..b1e2cabd3dc 100644 --- a/sys/netipx/ipx_pcb.c +++ b/sys/netipx/ipx_pcb.c @@ -268,6 +268,7 @@ ipx_pcbdetach(ipxp) { struct socket *so = ipxp->ipxp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); diff --git a/sys/netipx/ipx_usrreq.c b/sys/netipx/ipx_usrreq.c index f07cecb8754..cff94bc4ba3 100644 --- a/sys/netipx/ipx_usrreq.c +++ b/sys/netipx/ipx_usrreq.c @@ -424,6 +424,7 @@ ipx_usr_abort(so) ipx_pcbdetach(ipxp); splx(s); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return (0); diff --git a/sys/netnatm/natm.c b/sys/netnatm/natm.c index 5613b6a9e66..f999f32346a 100644 --- a/sys/netnatm/natm.c +++ b/sys/netnatm/natm.c @@ -135,6 +135,7 @@ natm_usr_detach(struct socket *so) * we turn on 'drain' *before* we sofree. */ npcb_free(npcb, NPCB_DESTROY); /* drain */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); @@ -464,6 +465,7 @@ struct proc *p; */ npcb_free(npcb, NPCB_DESTROY); /* drain */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index cc2481474c4..391ed3f7b80 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -158,6 +158,8 @@ struct socket { * until such time as it proves to be a good idea. */ extern struct mtx accept_mtx; +#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED) +#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED) #define ACCEPT_LOCK() mtx_lock(&accept_mtx) #define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx) @@ -344,21 +346,27 @@ struct xsocket { } while (0) #define sorele(so) do { \ + ACCEPT_LOCK_ASSERT(); \ SOCK_LOCK_ASSERT(so); \ if ((so)->so_count <= 0) \ panic("sorele"); \ if (--(so)->so_count == 0) \ sofree(so); \ - else \ + else { \ SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ } while (0) #define sotryfree(so) do { \ + ACCEPT_LOCK_ASSERT(); \ SOCK_LOCK_ASSERT(so); \ if ((so)->so_count == 0) \ sofree(so); \ - else \ + else { \ SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ } while(0) /*