mirror of
https://github.com/opnsense/src.git
synced 2026-02-18 18:20:26 -05:00
sctp, tcp, udp: improve deferred computation of checksums
When the SCTP, TCP, or UDP implementation send a packet, it does not compute the corresponding checksum but defers that. The network layer will determine whether the network interface selected for the packet has the requested capability and computes the checksum in software, if the selected network interface doesn't have the requested capability. Do this not only for packets being sent by the local SCTP, TCP, and UDP stack, but also when forwarding packets. Furthermore, when such packets are delivered to a local SCTP, TCP, or UDP stack, do not compute or validate the checksum, since such packets never have been on the wire. This allows to support checksum offloading also in the case of local virtual machines or jails. Support for epair, vtnet, and tap interfaces will be added in separate commits. Reviewed by: kp, rgrimes, tuexen, manpages Differential Revision: https://reviews.freebsd.org/D51475 (cherry picked from commit bcb298fa9e23c1192c5707086a67d3b396186abc)
This commit is contained in:
parent
2f2e8368a3
commit
2927ebde30
10 changed files with 142 additions and 20 deletions
|
|
@ -22,7 +22,7 @@
|
|||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.Dd July 29, 2025
|
||||
.Dd August 1, 2025
|
||||
.Dt MBUF 9
|
||||
.Os
|
||||
.\"
|
||||
|
|
@ -1102,8 +1102,7 @@ of a packet contains two fields used for that purpose,
|
|||
.Vt int Va csum_flags
|
||||
and
|
||||
.Vt int Va csum_data .
|
||||
The meaning of those fields depends on the direction a packet flows in,
|
||||
and on whether the packet is fragmented.
|
||||
The meaning of those fields depends on whether the packet is fragmented.
|
||||
Henceforth,
|
||||
.Va csum_flags
|
||||
or
|
||||
|
|
@ -1117,14 +1116,14 @@ in the
|
|||
.Vt mbuf chain
|
||||
containing the packet.
|
||||
.Pp
|
||||
On output, the computation of the checksum is delayed until the outgoing
|
||||
interface has been determined for a packet.
|
||||
When a packet is sent by SCTP, TCP, or UDP, the computation of the checksum
|
||||
is delayed until the outgoing interface has been determined for a packet.
|
||||
The interface-specific field
|
||||
.Va ifnet.if_data.ifi_hwassist
|
||||
(see
|
||||
.Xr ifnet 9 )
|
||||
is consulted for the capabilities of the interface to assist in
|
||||
computing checksums.
|
||||
is consulted by IP for the capabilities of the network interface selected for
|
||||
output to assist in computing checksums.
|
||||
The
|
||||
.Va csum_flags
|
||||
field of the packet header is set to indicate which actions the interface
|
||||
|
|
@ -1163,8 +1162,8 @@ defined by the TCP and UDP specifications.
|
|||
In the case of SCTP, the checksum field will be initially
|
||||
set by the SCTP implementation to 0.
|
||||
.Pp
|
||||
On input, an interface indicates the actions it has performed
|
||||
on a packet by setting one or more of the following flags in
|
||||
When a packet is received by an interface, it indicates the actions it has
|
||||
performed on a packet by setting one or more of the following flags in
|
||||
.Va csum_flags
|
||||
associated with the packet:
|
||||
.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
|
||||
|
|
@ -1215,6 +1214,21 @@ is not relevant and
|
|||
in
|
||||
.Va csum_flags
|
||||
is not set, since SCTP does not use a pseudo header checksum.
|
||||
.Pp
|
||||
If IP delivers a packet with the flags
|
||||
.Dv CSUM_SCTP ,
|
||||
.Dv CSUM_TCP ,
|
||||
or
|
||||
.Dv CSUM_UDP
|
||||
set in
|
||||
.Va csum_flags
|
||||
to a local SCTP, TCP, or UDP stack, the packet will be processed without
|
||||
computing or validating the checksum, since the packet has not been on the
|
||||
wire.
|
||||
This can happen if the packet was handled by a virtual interface such as
|
||||
.Xr tap 4
|
||||
or
|
||||
.Xr epair 4 .
|
||||
.Sh STRESS TESTING
|
||||
When running a kernel compiled with the option
|
||||
.Dv MBUF_STRESS_TEST ,
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@
|
|||
|
||||
#include <sys/cdefs.h>
|
||||
#include "opt_ipstealth.h"
|
||||
#include "opt_sctp.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
|
@ -102,6 +103,10 @@
|
|||
|
||||
#include <machine/in_cksum.h>
|
||||
|
||||
#if defined(SCTP) || defined(SCTP_SUPPORT)
|
||||
#include <netinet/sctp_crc32.h>
|
||||
#endif
|
||||
|
||||
#define V_ipsendredirects VNET(ipsendredirects)
|
||||
|
||||
static struct mbuf *
|
||||
|
|
@ -453,6 +458,23 @@ passout:
|
|||
} else
|
||||
gw = (const struct sockaddr *)dst;
|
||||
|
||||
/*
|
||||
* If TCP/UDP header still needs a valid checksum and interface will not
|
||||
* calculate it for us, do it here.
|
||||
*/
|
||||
if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &
|
||||
~nh->nh_ifp->if_hwassist)) {
|
||||
in_delayed_cksum(m);
|
||||
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
|
||||
}
|
||||
#if defined(SCTP) || defined(SCTP_SUPPORT)
|
||||
if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP_SCTP &
|
||||
~nh->nh_ifp->if_hwassist)) {
|
||||
sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
|
||||
m->m_pkthdr.csum_flags &= ~CSUM_IP_SCTP;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Handle redirect case. */
|
||||
redest.s_addr = 0;
|
||||
if (V_ipsendredirects && osrc.s_addr == ip->ip_src.s_addr &&
|
||||
|
|
|
|||
|
|
@ -5780,7 +5780,11 @@ sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
|
|||
goto out;
|
||||
}
|
||||
ecn_bits = ip->ip_tos;
|
||||
if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
|
||||
if (m->m_pkthdr.csum_flags & (CSUM_SCTP_VALID | CSUM_IP_SCTP)) {
|
||||
/*
|
||||
* Packet with CSUM_IP_SCTP were sent from local host using
|
||||
* checksum offloading. Checksum not required.
|
||||
*/
|
||||
SCTP_STAT_INCR(sctps_recvhwcrc);
|
||||
compute_crc = 0;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -667,6 +667,12 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
|
|||
th->th_sum = in6_cksum_pseudo(ip6, tlen,
|
||||
IPPROTO_TCP, m->m_pkthdr.csum_data);
|
||||
th->th_sum ^= 0xffff;
|
||||
} else if (m->m_pkthdr.csum_flags & CSUM_IP6_TCP) {
|
||||
/*
|
||||
* Packet from local host (maybe from a VM).
|
||||
* Checksum not required.
|
||||
*/
|
||||
th->th_sum = 0;
|
||||
} else
|
||||
th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
|
||||
if (th->th_sum) {
|
||||
|
|
@ -727,6 +733,12 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
|
|||
htonl(m->m_pkthdr.csum_data + tlen +
|
||||
IPPROTO_TCP));
|
||||
th->th_sum ^= 0xffff;
|
||||
} else if (m->m_pkthdr.csum_flags & CSUM_IP_TCP) {
|
||||
/*
|
||||
* Packet from local host (maybe from a VM).
|
||||
* Checksum not required.
|
||||
*/
|
||||
th->th_sum = 0;
|
||||
} else {
|
||||
struct ipovly *ipov = (struct ipovly *)ip;
|
||||
|
||||
|
|
|
|||
|
|
@ -561,6 +561,12 @@ udp_input(struct mbuf **mp, int *offp, int proto)
|
|||
ip->ip_dst.s_addr, htonl((u_short)len +
|
||||
m->m_pkthdr.csum_data + proto));
|
||||
uh_sum ^= 0xffff;
|
||||
} else if (m->m_pkthdr.csum_flags & CSUM_IP_UDP) {
|
||||
/*
|
||||
* Packet from local host (maybe from a VM).
|
||||
* Checksum not required.
|
||||
*/
|
||||
uh_sum = 0;
|
||||
} else {
|
||||
char b[offsetof(struct ipovly, ih_src)];
|
||||
struct ipovly *ipov = (struct ipovly *)ip;
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include <sys/cdefs.h>
|
||||
#include "opt_inet6.h"
|
||||
#include "opt_ipstealth.h"
|
||||
#include "opt_sctp.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
|
@ -54,6 +55,10 @@
|
|||
#include <netinet6/ip6_var.h>
|
||||
#include <netinet6/nd6.h>
|
||||
|
||||
#if defined(SCTP) || defined(SCTP_SUPPORT)
|
||||
#include <netinet/sctp_crc32.h>
|
||||
#endif
|
||||
|
||||
static int
|
||||
ip6_findroute(struct nhop_object **pnh, const struct sockaddr_in6 *dst,
|
||||
struct mbuf *m)
|
||||
|
|
@ -269,6 +274,29 @@ passout:
|
|||
ip6->ip6_hlim -= IPV6_HLIMDEC;
|
||||
}
|
||||
|
||||
/*
|
||||
* If TCP/UDP header still needs a valid checksum and interface will not
|
||||
* calculate it for us, do it here.
|
||||
*/
|
||||
if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
|
||||
~nh->nh_ifp->if_hwassist)) {
|
||||
int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
|
||||
|
||||
if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len)
|
||||
goto drop;
|
||||
in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset);
|
||||
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
|
||||
}
|
||||
#if defined(SCTP) || defined(SCTP_SUPPORT)
|
||||
if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP &
|
||||
~nh->nh_ifp->if_hwassist)) {
|
||||
int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
|
||||
|
||||
sctp_delayed_cksum(m, offset);
|
||||
m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP;
|
||||
}
|
||||
#endif
|
||||
|
||||
m_clrprotoflags(m); /* Avoid confusing lower layers. */
|
||||
IP_PROBE(send, NULL, NULL, ip6, nifp, NULL, ip6);
|
||||
|
||||
|
|
|
|||
|
|
@ -75,6 +75,10 @@
|
|||
|
||||
#include <netipsec/ipsec_support.h>
|
||||
|
||||
#if defined(SCTP) || defined(SCTP_SUPPORT)
|
||||
#include <netinet/sctp_crc32.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Forward a packet. If some error occurs return the sender
|
||||
* an icmp packet. Note we can't always generate a meaningful
|
||||
|
|
@ -394,6 +398,29 @@ pass:
|
|||
goto bad;
|
||||
}
|
||||
|
||||
/*
|
||||
* If TCP/UDP header still needs a valid checksum and interface will not
|
||||
* calculate it for us, do it here.
|
||||
*/
|
||||
if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
|
||||
~nh->nh_ifp->if_hwassist)) {
|
||||
int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
|
||||
|
||||
if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len)
|
||||
goto bad;
|
||||
in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset);
|
||||
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
|
||||
}
|
||||
#if defined(SCTP) || defined(SCTP_SUPPORT)
|
||||
if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP &
|
||||
~nh->nh_ifp->if_hwassist)) {
|
||||
int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
|
||||
|
||||
sctp_delayed_cksum(m, offset);
|
||||
m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Currently LLE layer stores embedded IPv6 addresses */
|
||||
if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6_addr)) {
|
||||
in6_set_unicast_scopeid(&dst.sin6_addr, dst.sin6_scope_id);
|
||||
|
|
|
|||
|
|
@ -139,7 +139,11 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
|
|||
goto out;
|
||||
}
|
||||
ecn_bits = IPV6_TRAFFIC_CLASS(ip6);
|
||||
if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
|
||||
if (m->m_pkthdr.csum_flags & (CSUM_SCTP_VALID | CSUM_IP6_SCTP)) {
|
||||
/*
|
||||
* Packet with CSUM_IP6_SCTP were sent from local host using
|
||||
* checksum offloading. Checksum not required.
|
||||
*/
|
||||
SCTP_STAT_INCR(sctps_recvhwcrc);
|
||||
compute_crc = 0;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -435,6 +435,12 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
|
|||
uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
|
||||
m->m_pkthdr.csum_data);
|
||||
uh_sum ^= 0xffff;
|
||||
} else if (m->m_pkthdr.csum_flags & CSUM_IP6_UDP) {
|
||||
/*
|
||||
* Packet from local host (maybe from a VM).
|
||||
* Checksum not required.
|
||||
*/
|
||||
uh_sum = 0;
|
||||
} else
|
||||
uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
|
||||
|
||||
|
|
|
|||
|
|
@ -642,16 +642,15 @@ m_epg_pagelen(const struct mbuf *m, int pidx, int pgoff)
|
|||
|
||||
/*
|
||||
* Flags indicating checksum, segmentation and other offload work to be
|
||||
* done, or already done, by hardware or lower layers. It is split into
|
||||
* separate inbound and outbound flags.
|
||||
* done, or already done, by hardware or lower layers.
|
||||
*
|
||||
* Outbound flags that are set by upper protocol layers requesting lower
|
||||
* Flags that are set by upper protocol layers requesting lower
|
||||
* layers, or ideally the hardware, to perform these offloading tasks.
|
||||
* For outbound packets this field and its flags can be directly tested
|
||||
* against ifnet if_hwassist. Note that the outbound and the inbound flags do
|
||||
* not collide right now but they could be allowed to (as long as the flags are
|
||||
* scrubbed appropriately when the direction of an mbuf changes). CSUM_BITS
|
||||
* would also have to split into CSUM_BITS_TX and CSUM_BITS_RX.
|
||||
* Before passing packets to a network interface this field and its flags can
|
||||
* be directly tested against ifnet if_hwassist. Note that the flags
|
||||
* CSUM_IP_SCTP, CSUM_IP_TCP, and CSUM_IP_UDP can appear on input processing
|
||||
* of SCTP, TCP, and UDP. In such a case the checksum will not be computed or
|
||||
* validated by SCTP, TCP, or TCP, since the packet has not been on the wire.
|
||||
*
|
||||
* CSUM_INNER_<x> is the same as CSUM_<x> but it applies to the inner frame.
|
||||
* The CSUM_ENCAP_<x> bits identify the outer encapsulation.
|
||||
|
|
@ -680,7 +679,7 @@ m_epg_pagelen(const struct mbuf *m, int pidx, int pgoff)
|
|||
#define CSUM_ENCAP_VXLAN 0x00040000 /* VXLAN outer encapsulation */
|
||||
#define CSUM_ENCAP_RSVD1 0x00080000
|
||||
|
||||
/* Inbound checksum support where the checksum was verified by hardware. */
|
||||
/* Flags used to indicate that the checksum was verified by hardware. */
|
||||
#define CSUM_INNER_L3_CALC 0x00100000
|
||||
#define CSUM_INNER_L3_VALID 0x00200000
|
||||
#define CSUM_INNER_L4_CALC 0x00400000
|
||||
|
|
|
|||
Loading…
Reference in a new issue