inpcb: Remove bogus SO_REUSEPORT(_LB) checks in in_pcbbind()

This check for SO_REUSEPORT was added way back in commit 52b65dbe85.
Per the commit log, this commit restricted this port-stealing check to
unicast addresses, and then only if the existing socket does not have
SO_REUSEPORT set.  In other words, if there exists a socket bound to
INADDR_ANY, and we bind a socket to INADDR_ANY with the same port, then
the two sockets need not be owned by the same user if the existing
socket has SO_REUSEPORT set.

This is a surprising semantic; bugzilla PR 7713 gives some additional
context.  That PR makes a case for the behaviour described above when
binding to a multicast address.  But, the SO_REUSEPORT check is only
applied when binding to a non-multicast address, so it doesn't really
make sense.  In the PR the committer notes that "unicast applications
don't set SO_REUSEPORT", which makes some sense, but also refers to
"multicast applications that bind to INADDR_ANY", which sounds a bit
suspicious.

OpenBSD performs the multicast check, but not the SO_REUSEPORT check.
DragonflyBSD removed the SO_REUSEPORT (and INADDR_ANY) checks back in
2014 (commit 0323d5fde12a4).  NetBSD explicitly copied our logic and
still has it.

The plot thickens: 20 years later, SO_REUSEPORT_LB was ported from
DragonflyBSD: this option provides similar semantics to SO_REUSEPORT,
but for unicast addresses it causes incoming connections/datagrams to be
distributed among all sockets in the group.  This commit (1a43cff92a)
inverted the check for SO_REUSEPORT while adding one for
SO_REUSEPORT_LB; this appears to have been inadvertent.  However:
- apparently no one has noticed that the semantics were changed;
- sockets belonging to different users can now be bound to the same port
  so long as they belong to a single lbgroup bound to INADDR_ANY, which
  is not correct.

Simply remove the SO_REUSEPORT(_LB) checks, as their original
justification was dubious and their current implementation is wrong; add
some tests.

Reviewed by:	glebius
MFC after:	1 month
Sponsored by:	Klara, Inc.
Sponsored by:	Stormshield
Differential Revision:	https://reviews.freebsd.org/D47832
This commit is contained in:
Mark Johnston 2024-12-12 14:06:06 +00:00
parent a600aabe9b
commit 4f02a7d739
3 changed files with 241 additions and 7 deletions

View file

@ -925,9 +925,7 @@ in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
(inp->inp_socket->so_type != SOCK_STREAM || (inp->inp_socket->so_type != SOCK_STREAM ||
in_nullhost(t->inp_faddr)) && in_nullhost(t->inp_faddr)) &&
(!in_nullhost(laddr) || (!in_nullhost(laddr) ||
!in_nullhost(t->inp_laddr) || !in_nullhost(t->inp_laddr)) &&
(t->inp_socket->so_options & SO_REUSEPORT) ||
(t->inp_socket->so_options & SO_REUSEPORT_LB) == 0) &&
(inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) (inp->inp_cred->cr_uid != t->inp_cred->cr_uid))
return (EADDRINUSE); return (EADDRINUSE);
} }

View file

@ -245,9 +245,7 @@ in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6,
(inp->inp_socket->so_type != SOCK_STREAM || (inp->inp_socket->so_type != SOCK_STREAM ||
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
(!IN6_IS_ADDR_UNSPECIFIED(laddr) || (!IN6_IS_ADDR_UNSPECIFIED(laddr) ||
!IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr)) &&
(t->inp_socket->so_options & SO_REUSEPORT) ||
(t->inp_socket->so_options & SO_REUSEPORT_LB) == 0) &&
(inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) (inp->inp_cred->cr_uid != t->inp_cred->cr_uid))
return (EADDRINUSE); return (EADDRINUSE);

View file

@ -2,6 +2,7 @@
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
* *
* Copyright (c) 2019 Bjoern A. Zeeb * Copyright (c) 2019 Bjoern A. Zeeb
* Copyright (c) 2024 Stormshield
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
@ -25,11 +26,17 @@
* SUCH DAMAGE. * SUCH DAMAGE.
*/ */
#include <sys/cdefs.h> #include <sys/param.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/wait.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <errno.h> #include <errno.h>
#include <poll.h> #include <poll.h>
#include <pwd.h>
#include <stdio.h>
#include <unistd.h>
#include <atf-c.h> #include <atf-c.h>
@ -281,6 +288,235 @@ ATF_TC_BODY(socket_afinet_stream_reconnect, tc)
ATF_CHECK_EQ(0, rc); ATF_CHECK_EQ(0, rc);
} }
/*
* Make sure that unprivileged users can't set the IP_BINDANY or IPV6_BINDANY
* socket options.
*/
ATF_TC(socket_afinet_bindany);
ATF_TC_HEAD(socket_afinet_bindany, tc)
{
atf_tc_set_md_var(tc, "require.user", "unprivileged");
}
ATF_TC_BODY(socket_afinet_bindany, tc)
{
int s;
s = socket(AF_INET, SOCK_STREAM, 0);
ATF_REQUIRE(s >= 0);
ATF_REQUIRE_ERRNO(EPERM,
setsockopt(s, IPPROTO_IP, IP_BINDANY, &(int){1}, sizeof(int)) ==
-1);
ATF_REQUIRE(close(s) == 0);
s = socket(AF_INET, SOCK_DGRAM, 0);
ATF_REQUIRE(s >= 0);
ATF_REQUIRE_ERRNO(EPERM,
setsockopt(s, IPPROTO_IP, IP_BINDANY, &(int){1}, sizeof(int)) ==
-1);
ATF_REQUIRE(close(s) == 0);
s = socket(AF_INET6, SOCK_STREAM, 0);
ATF_REQUIRE(s >= 0);
ATF_REQUIRE_ERRNO(EPERM,
setsockopt(s, IPPROTO_IPV6, IPV6_BINDANY, &(int){1}, sizeof(int)) ==
-1);
ATF_REQUIRE(close(s) == 0);
s = socket(AF_INET6, SOCK_DGRAM, 0);
ATF_REQUIRE(s >= 0);
ATF_REQUIRE_ERRNO(EPERM,
setsockopt(s, IPPROTO_IPV6, IPV6_BINDANY, &(int){1}, sizeof(int)) ==
-1);
ATF_REQUIRE(close(s) == 0);
}
/*
* Bind a socket to the specified address, optionally dropping privileges and
* setting one of the SO_REUSE* options first.
*
* Returns true if the bind succeeded, and false if it failed with EADDRINUSE.
*/
static bool
child_bind(const atf_tc_t *tc, int type, struct sockaddr *sa, int opt, bool unpriv)
{
const char *user;
pid_t child;
if (unpriv) {
if (!atf_tc_has_config_var(tc, "unprivileged_user"))
atf_tc_skip("unprivileged_user not set");
user = atf_tc_get_config_var(tc, "unprivileged_user");
} else {
user = NULL;
}
child = fork();
ATF_REQUIRE(child != -1);
if (child == 0) {
int s;
if (user != NULL) {
struct passwd *passwd;
passwd = getpwnam(user);
if (seteuid(passwd->pw_uid) != 0)
_exit(1);
}
s = socket(sa->sa_family, type, 0);
if (s < 0)
_exit(2);
if (bind(s, sa, sa->sa_len) == 0)
_exit(3);
if (errno != EADDRINUSE)
_exit(4);
if (opt != 0) {
if (setsockopt(s, SOL_SOCKET, opt, &(int){1},
sizeof(int)) != 0)
_exit(5);
}
if (bind(s, sa, sa->sa_len) == 0)
_exit(6);
if (errno != EADDRINUSE)
_exit(7);
_exit(0);
} else {
int status;
ATF_REQUIRE_EQ(waitpid(child, &status, 0), child);
ATF_REQUIRE(WIFEXITED(status));
status = WEXITSTATUS(status);
ATF_REQUIRE_MSG(status == 0 || status == 6,
"child exited with %d", status);
return (status == 6);
}
}
static bool
child_bind_priv(const atf_tc_t *tc, int type, struct sockaddr *sa, int opt)
{
return (child_bind(tc, type, sa, opt, false));
}
static bool
child_bind_unpriv(const atf_tc_t *tc, int type, struct sockaddr *sa, int opt)
{
return (child_bind(tc, type, sa, opt, true));
}
static int
bind_socket(int domain, int type, int opt, bool unspec, struct sockaddr *sa)
{
socklen_t slen;
int s;
s = socket(domain, type, 0);
ATF_REQUIRE(s >= 0);
if (domain == AF_INET) {
struct sockaddr_in sin;
bzero(&sin, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_len = sizeof(sin);
sin.sin_addr.s_addr = htonl(unspec ?
INADDR_ANY : INADDR_LOOPBACK);
sin.sin_port = htons(0);
ATF_REQUIRE(bind(s, (struct sockaddr *)&sin, sizeof(sin)) == 0);
slen = sizeof(sin);
} else /* if (domain == AF_INET6) */ {
struct sockaddr_in6 sin6;
bzero(&sin6, sizeof(sin6));
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(sin6);
sin6.sin6_addr = unspec ? in6addr_any : in6addr_loopback;
sin6.sin6_port = htons(0);
ATF_REQUIRE(bind(s, (struct sockaddr *)&sin6, sizeof(sin6)) == 0);
slen = sizeof(sin6);
}
if (opt != 0) {
ATF_REQUIRE(setsockopt(s, SOL_SOCKET, opt, &(int){1},
sizeof(int)) == 0);
}
ATF_REQUIRE(getsockname(s, sa, &slen) == 0);
return (s);
}
static void
multibind_test(const atf_tc_t *tc, int domain, int type)
{
struct sockaddr_storage ss;
int opts[4] = { 0, SO_REUSEADDR, SO_REUSEPORT, SO_REUSEPORT_LB };
int s;
bool flags[2] = { false, true };
bool res;
for (size_t flagi = 0; flagi < nitems(flags); flagi++) {
for (size_t opti = 0; opti < nitems(opts); opti++) {
s = bind_socket(domain, type, opts[opti], flags[flagi],
(struct sockaddr *)&ss);
for (size_t optj = 0; optj < nitems(opts); optj++) {
int opt;
opt = opts[optj];
res = child_bind_priv(tc, type,
(struct sockaddr *)&ss, opt);
/*
* Multi-binding is only allowed when both
* sockets have SO_REUSEPORT or SO_REUSEPORT_LB
* set.
*/
if (opts[opti] != 0 &&
opts[opti] != SO_REUSEADDR && opti == optj)
ATF_REQUIRE(res);
else
ATF_REQUIRE(!res);
res = child_bind_unpriv(tc, type,
(struct sockaddr *)&ss, opt);
/*
* Multi-binding is only allowed when both
* sockets have the same owner.
*
* XXX-MJ we for some reason permit this when
* binding to the unspecified address, but I
* don't think that's right
*/
if (flags[flagi] && opts[opti] != 0 &&
opts[opti] != SO_REUSEADDR && opti == optj)
ATF_REQUIRE(res);
else
ATF_REQUIRE(!res);
}
ATF_REQUIRE(close(s) == 0);
}
}
}
/*
* Try to bind two sockets to the same address/port tuple. Under some
* conditions this is permitted.
*/
ATF_TC(socket_afinet_multibind);
ATF_TC_HEAD(socket_afinet_multibind, tc)
{
atf_tc_set_md_var(tc, "require.user", "root");
atf_tc_set_md_var(tc, "require.config", "unprivileged_user");
}
ATF_TC_BODY(socket_afinet_multibind, tc)
{
multibind_test(tc, AF_INET, SOCK_STREAM);
multibind_test(tc, AF_INET, SOCK_DGRAM);
multibind_test(tc, AF_INET6, SOCK_STREAM);
multibind_test(tc, AF_INET6, SOCK_DGRAM);
}
ATF_TP_ADD_TCS(tp) ATF_TP_ADD_TCS(tp)
{ {
ATF_TP_ADD_TC(tp, socket_afinet); ATF_TP_ADD_TC(tp, socket_afinet);
@ -289,6 +525,8 @@ ATF_TP_ADD_TCS(tp)
ATF_TP_ADD_TC(tp, socket_afinet_poll_no_rdhup); ATF_TP_ADD_TC(tp, socket_afinet_poll_no_rdhup);
ATF_TP_ADD_TC(tp, socket_afinet_poll_rdhup); ATF_TP_ADD_TC(tp, socket_afinet_poll_rdhup);
ATF_TP_ADD_TC(tp, socket_afinet_stream_reconnect); ATF_TP_ADD_TC(tp, socket_afinet_stream_reconnect);
ATF_TP_ADD_TC(tp, socket_afinet_bindany);
ATF_TP_ADD_TC(tp, socket_afinet_multibind);
return atf_no_error(); return atf_no_error();
} }