pf: Let rdr rules modify the src port if doing so would avoid a conflict

If NAT rules cause inbound connections to different external IPs to be
mapped to the same internal IP, and some application uses the same
source port for multiple such connections, rdr translation may result in
conflicts that cause some of the connections to be dropped.

Address this by letting rdr rules detect state conflicts and modulate
the source port to avoid them.

Reviewed by:	kp, allanjude
MFC after:	3 months
Sponsored by:	Klara, Inc.
Sponsored by:	Modirum
Differential Revision:	https://reviews.freebsd.org/D44488

(cherry picked from commit 9897a66923a3e79c22fcbd4bc80afae9eb9f277c)
This commit is contained in:
Mark Johnston 2024-08-19 14:08:44 +00:00
parent f5b3ce0d75
commit 08d0ebe560
5 changed files with 191 additions and 9 deletions

View file

@ -27,7 +27,7 @@
.\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd November 17, 2023
.Dd June 24, 2024
.Dt PF.CONF 5
.Os
.Sh NAME
@ -1403,9 +1403,14 @@ or
.Xr udp 4
connections; implicitly in the case of
.Ar nat
rules and explicitly in the case of
rules and both implicitly and explicitly in the case of
.Ar rdr
rules.
A
.Ar rdr
rule may cause the source port to be modified if doing so avoids a conflict
with an existing connection.
A random source port in the range 50001-65535 is chosen in this case.
Port numbers are never translated with a
.Ar binat
rule.

View file

@ -600,7 +600,7 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
{
struct pf_krule *r = NULL;
struct pf_addr *naddr;
uint16_t *nport;
uint16_t *nportp;
uint16_t low, high;
PF_RULES_RASSERT();
@ -643,9 +643,8 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
return (NULL);
}
/* XXX We only modify one side for now. */
naddr = &(*nkp)->addr[1];
nport = &(*nkp)->port[1];
nportp = &(*nkp)->port[1];
switch (r->action) {
case PF_NAT:
@ -658,7 +657,7 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
}
if (r->rpool.mape.offset > 0) {
if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
sport, daddr, dport, naddr, nport, sn)) {
sport, daddr, dport, naddr, nportp, sn)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: MAP-E port allocation (%u/%u/%u)"
" failed\n",
@ -668,7 +667,7 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
goto notrans;
}
} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
daddr, dport, naddr, nport, low, high, sn)) {
daddr, dport, naddr, nportp, low, high, sn)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: NAT proxy port allocation (%u-%u) failed\n",
r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
@ -742,6 +741,9 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
}
break;
case PF_RDR: {
struct pf_state_key_cmp key;
uint16_t cut, low, high, nport;
if (pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn))
goto notrans;
if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
@ -762,9 +764,63 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
/* Wrap around if necessary. */
if (tmp_nport > 65535)
tmp_nport -= 65535;
*nport = htons((uint16_t)tmp_nport);
nport = htons((uint16_t)tmp_nport);
} else if (r->rpool.proxy_port[0])
*nport = htons(r->rpool.proxy_port[0]);
nport = htons(r->rpool.proxy_port[0]);
else
nport = dport;
/*
* Update the destination port.
*/
*nportp = nport;
/*
* Do we have a source port conflict in the stack state? Try to
* modulate the source port if so. Note that this is racy since
* the state lookup may not find any matches here but will once
* pf_create_state() actually instantiates the state.
*/
bzero(&key, sizeof(key));
key.af = pd->af;
key.proto = pd->proto;
key.port[0] = sport;
PF_ACPY(&key.addr[0], saddr, key.af);
key.port[1] = nport;
PF_ACPY(&key.addr[1], naddr, key.af);
if (!pf_find_state_all_exists(&key, PF_OUT))
break;
low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
high = 65535;
cut = arc4random() % (1 + high - low) + low;
for (uint32_t tmp = cut;
tmp <= high && tmp <= UINT16_MAX; tmp++) {
key.port[0] = htons(tmp);
if (!pf_find_state_all_exists(&key, PF_OUT)) {
/* Update the source port. */
(*nkp)->port[0] = htons(tmp);
goto out;
}
}
for (uint32_t tmp = cut - 1; tmp >= low; tmp--) {
key.port[0] = htons(tmp);
if (!pf_find_state_all_exists(&key, PF_OUT)) {
/* Update the source port. */
(*nkp)->port[0] = htons(tmp);
goto out;
}
}
DPFPRINTF(PF_DEBUG_MISC,
("pf: RDR source port allocation failed\n"));
if (0) {
out:
DPFPRINTF(PF_DEBUG_MISC,
("pf: RDR source port allocation %u->%u\n",
ntohs(sport), ntohs((*nkp)->port[0])));
}
break;
}
default:

View file

@ -64,6 +64,7 @@ ${PACKAGE}FILES+= CVE-2019-5597.py \
frag-overreplace.py \
pfsync_defer.py \
pft_ether.py \
rdr-srcport.py \
utils.subr
${PACKAGE}FILESMODE_CVE-2019-5597.py= 0555

View file

@ -0,0 +1,20 @@
#
# A helper script which accepts TCP connections and writes the remote port
# number to the stream.
#
import socket
def main():
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind(('0.0.0.0', 8888))
s.listen(5)
while True:
cs, addr = s.accept()
cs.sendall(str(addr[1]).encode())
cs.close()
if __name__ == '__main__':
main()

View file

@ -121,7 +121,107 @@ tcp_v6_cleanup()
pft_cleanup
}
atf_test_case "srcport" "cleanup"
srcport_head()
{
atf_set descr 'TCP rdr srcport modulation'
atf_set require.user root
atf_set require.progs python3
atf_set timeout 9999
}
#
# Test that rdr works for multiple TCP with same srcip and srcport.
#
# Four jails, a, b, c, d, are used:
# - jail d runs a server on port 8888,
# - jail a makes connections to the server, routed through jails b and c,
# - jail b uses NAT to rewrite source addresses and ports to the same 2-tuple,
# avoiding the need to use SO_REUSEADDR in jail a,
# - jail c uses a redirect rule to map the destination address to the same
# address and port, resulting in a NAT state conflict.
#
# In this case, the rdr rule should also rewrite the source port (again) to
# resolve the state conflict.
#
srcport_body()
{
pft_init
j="rdr:srcport"
epair1=$(vnet_mkepair)
epair2=$(vnet_mkepair)
epair3=$(vnet_mkepair)
echo $epair_one
echo $epair_two
vnet_mkjail ${j}a ${epair1}a
vnet_mkjail ${j}b ${epair1}b ${epair2}a
vnet_mkjail ${j}c ${epair2}b ${epair3}a
vnet_mkjail ${j}d ${epair3}b
# configure addresses for a
jexec ${j}a ifconfig lo0 up
jexec ${j}a ifconfig ${epair1}a inet 198.51.100.50/24 up
jexec ${j}a ifconfig ${epair1}a inet alias 198.51.100.51/24
jexec ${j}a ifconfig ${epair1}a inet alias 198.51.100.52/24
# configure addresses for b
jexec ${j}b ifconfig lo0 up
jexec ${j}b ifconfig ${epair1}b inet 198.51.100.1/24 up
jexec ${j}b ifconfig ${epair2}a inet 198.51.101.2/24 up
# configure addresses for c
jexec ${j}c ifconfig lo0 up
jexec ${j}c ifconfig ${epair2}b inet 198.51.101.3/24 up
jexec ${j}c ifconfig ${epair2}b inet alias 198.51.101.4/24
jexec ${j}c ifconfig ${epair2}b inet alias 198.51.101.5/24
jexec ${j}c ifconfig ${epair3}a inet 203.0.113.1/24 up
# configure addresses for d
jexec ${j}d ifconfig lo0 up
jexec ${j}d ifconfig ${epair3}b inet 203.0.113.50/24 up
jexec ${j}b sysctl net.inet.ip.forwarding=1
jexec ${j}c sysctl net.inet.ip.forwarding=1
jexec ${j}b pfctl -e
jexec ${j}c pfctl -e
pft_set_rules ${j}b \
"set debug misc" \
"nat on ${epair2}a inet from 198.51.100.0/24 to any -> ${epair2}a static-port"
pft_set_rules ${j}c \
"set debug misc" \
"rdr on ${epair2}b proto tcp from any to ${epair2}b port 7777 -> 203.0.113.50 port 8888"
jexec ${j}a route add default 198.51.100.1
jexec ${j}c route add 198.51.100.0/24 198.51.101.2
jexec ${j}d route add 198.51.101.0/24 203.0.113.1
jexec ${j}d python3 $(atf_get_srcdir)/rdr-srcport.py &
sleep 1
echo a | jexec ${j}a nc -w 3 -s 198.51.100.50 -p 1234 198.51.101.3 7777 > port1
jexec ${j}a nc -s 198.51.100.51 -p 1234 198.51.101.4 7777 > port2 &
jexec ${j}a nc -s 198.51.100.52 -p 1234 198.51.101.5 7777 > port3 &
sleep 1
atf_check -o inline:"1234" cat port1
atf_check -o match:"[0-9]+" -o not-inline:"1234" cat port2
atf_check -o match:"[0-9]+" -o not-inline:"1234" cat port3
}
srcport_cleanup()
{
pft_cleanup
}
atf_init_test_cases()
{
atf_add_test_case "tcp_v6"
atf_add_test_case "srcport"
}