opnsense-src/sys/dev/hyperv/netvsc/hn_rndis.c
Wei Hu a491581f3f Hyper-V: hn: Enable vSwitch RSC support in hn netvsc driver
Receive Segment Coalescing (RSC) in the vSwitch is a feature available in
Windows Server 2019 hosts and later. It reduces the per packet processing
overhead by coalescing multiple TCP segments when possible. This happens
mostly when TCP traffics are among different guests on same host.
This patch adds netvsc driver support for this feature.

The patch also updates NVS version to 6.1 as needed for RSC
enablement.

MFC after:	2 weeks
Sponsored by:	Microsoft
Differential Revision:	https://reviews.freebsd.org/D29075
2021-03-12 04:35:16 +00:00

1061 lines
29 KiB
C

/*-
* Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet6.h"
#include "opt_inet.h"
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <machine/atomic.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_media.h>
#include <net/rndis.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp_lro.h>
#include <dev/hyperv/include/hyperv.h>
#include <dev/hyperv/include/hyperv_busdma.h>
#include <dev/hyperv/include/vmbus.h>
#include <dev/hyperv/include/vmbus_xact.h>
#include <dev/hyperv/netvsc/ndis.h>
#include <dev/hyperv/netvsc/if_hnreg.h>
#include <dev/hyperv/netvsc/if_hnvar.h>
#include <dev/hyperv/netvsc/hn_nvs.h>
#include <dev/hyperv/netvsc/hn_rndis.h>
#define HN_RNDIS_RID_COMPAT_MASK 0xffff
#define HN_RNDIS_RID_COMPAT_MAX HN_RNDIS_RID_COMPAT_MASK
#define HN_RNDIS_XFER_SIZE 2048
#define HN_NDIS_TXCSUM_CAP_IP4 \
(NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
#define HN_NDIS_TXCSUM_CAP_TCP4 \
(NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
#define HN_NDIS_TXCSUM_CAP_TCP6 \
(NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
NDIS_TXCSUM_CAP_IP6EXT)
#define HN_NDIS_TXCSUM_CAP_UDP6 \
(NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
#define HN_NDIS_LSOV2_CAP_IP6 \
(NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
static const void *hn_rndis_xact_exec1(struct hn_softc *,
struct vmbus_xact *, size_t,
struct hn_nvs_sendctx *, size_t *);
static const void *hn_rndis_xact_execute(struct hn_softc *,
struct vmbus_xact *, uint32_t, size_t, size_t *,
uint32_t);
static int hn_rndis_query(struct hn_softc *, uint32_t,
const void *, size_t, void *, size_t *);
static int hn_rndis_query2(struct hn_softc *, uint32_t,
const void *, size_t, void *, size_t *, size_t);
static int hn_rndis_set(struct hn_softc *, uint32_t,
const void *, size_t);
static int hn_rndis_init(struct hn_softc *);
static int hn_rndis_halt(struct hn_softc *);
static int hn_rndis_conf_offload(struct hn_softc *, int);
static int hn_rndis_query_hwcaps(struct hn_softc *,
struct ndis_offload *);
static __inline uint32_t
hn_rndis_rid(struct hn_softc *sc)
{
uint32_t rid;
again:
rid = atomic_fetchadd_int(&sc->hn_rndis_rid, 1);
if (rid == 0)
goto again;
/* Use upper 16 bits for non-compat RNDIS messages. */
return ((rid & 0xffff) << 16);
}
void
hn_rndis_rx_ctrl(struct hn_softc *sc, const void *data, int dlen)
{
const struct rndis_comp_hdr *comp;
const struct rndis_msghdr *hdr;
KASSERT(dlen >= sizeof(*hdr), ("invalid RNDIS msg\n"));
hdr = data;
switch (hdr->rm_type) {
case REMOTE_NDIS_INITIALIZE_CMPLT:
case REMOTE_NDIS_QUERY_CMPLT:
case REMOTE_NDIS_SET_CMPLT:
case REMOTE_NDIS_KEEPALIVE_CMPLT: /* unused */
if (dlen < sizeof(*comp)) {
if_printf(sc->hn_ifp, "invalid RNDIS cmplt\n");
return;
}
comp = data;
KASSERT(comp->rm_rid > HN_RNDIS_RID_COMPAT_MAX,
("invalid RNDIS rid 0x%08x\n", comp->rm_rid));
vmbus_xact_ctx_wakeup(sc->hn_xact, comp, dlen);
break;
case REMOTE_NDIS_RESET_CMPLT:
/*
* Reset completed, no rid.
*
* NOTE:
* RESET is not issued by hn(4), so this message should
* _not_ be observed.
*/
if_printf(sc->hn_ifp, "RESET cmplt received\n");
break;
default:
if_printf(sc->hn_ifp, "unknown RNDIS msg 0x%x\n",
hdr->rm_type);
break;
}
}
int
hn_rndis_get_eaddr(struct hn_softc *sc, uint8_t *eaddr)
{
size_t eaddr_len;
int error;
eaddr_len = ETHER_ADDR_LEN;
error = hn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, NULL, 0,
eaddr, &eaddr_len);
if (error)
return (error);
if (eaddr_len != ETHER_ADDR_LEN) {
if_printf(sc->hn_ifp, "invalid eaddr len %zu\n", eaddr_len);
return (EINVAL);
}
return (0);
}
int
hn_rndis_get_linkstatus(struct hn_softc *sc, uint32_t *link_status)
{
size_t size;
int error;
size = sizeof(*link_status);
error = hn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0,
link_status, &size);
if (error)
return (error);
if (size != sizeof(uint32_t)) {
if_printf(sc->hn_ifp, "invalid link status len %zu\n", size);
return (EINVAL);
}
return (0);
}
int
hn_rndis_get_mtu(struct hn_softc *sc, uint32_t *mtu)
{
size_t size;
int error;
size = sizeof(*mtu);
error = hn_rndis_query(sc, OID_GEN_MAXIMUM_FRAME_SIZE, NULL, 0,
mtu, &size);
if (error)
return (error);
if (size != sizeof(uint32_t)) {
if_printf(sc->hn_ifp, "invalid mtu len %zu\n", size);
return (EINVAL);
}
return (0);
}
static const void *
hn_rndis_xact_exec1(struct hn_softc *sc, struct vmbus_xact *xact, size_t reqlen,
struct hn_nvs_sendctx *sndc, size_t *comp_len)
{
struct vmbus_gpa gpa[HN_XACT_REQ_PGCNT];
int gpa_cnt, error;
bus_addr_t paddr;
KASSERT(reqlen <= HN_XACT_REQ_SIZE && reqlen > 0,
("invalid request length %zu", reqlen));
/*
* Setup the SG list.
*/
paddr = vmbus_xact_req_paddr(xact);
KASSERT((paddr & PAGE_MASK) == 0,
("vmbus xact request is not page aligned 0x%jx", (uintmax_t)paddr));
for (gpa_cnt = 0; gpa_cnt < HN_XACT_REQ_PGCNT; ++gpa_cnt) {
int len = PAGE_SIZE;
if (reqlen == 0)
break;
if (reqlen < len)
len = reqlen;
gpa[gpa_cnt].gpa_page = atop(paddr) + gpa_cnt;
gpa[gpa_cnt].gpa_len = len;
gpa[gpa_cnt].gpa_ofs = 0;
reqlen -= len;
}
KASSERT(reqlen == 0, ("still have %zu request data left", reqlen));
/*
* Send this RNDIS control message and wait for its completion
* message.
*/
vmbus_xact_activate(xact);
error = hn_nvs_send_rndis_ctrl(sc->hn_prichan, sndc, gpa, gpa_cnt);
if (error) {
vmbus_xact_deactivate(xact);
if_printf(sc->hn_ifp, "RNDIS ctrl send failed: %d\n", error);
return (NULL);
}
return (vmbus_chan_xact_wait(sc->hn_prichan, xact, comp_len,
HN_CAN_SLEEP(sc)));
}
static const void *
hn_rndis_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, uint32_t rid,
size_t reqlen, size_t *comp_len0, uint32_t comp_type)
{
const struct rndis_comp_hdr *comp;
size_t comp_len, min_complen = *comp_len0;
KASSERT(rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid rid %u\n", rid));
KASSERT(min_complen >= sizeof(*comp),
("invalid minimum complete len %zu", min_complen));
/*
* Execute the xact setup by the caller.
*/
comp = hn_rndis_xact_exec1(sc, xact, reqlen, &hn_nvs_sendctx_none,
&comp_len);
if (comp == NULL)
return (NULL);
/*
* Check this RNDIS complete message.
*/
if (comp_len < min_complen) {
if (comp_len >= sizeof(*comp)) {
/* rm_status field is valid */
if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu, "
"status 0x%08x\n", comp_len, comp->rm_status);
} else {
if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu\n",
comp_len);
}
return (NULL);
}
if (comp->rm_len < min_complen) {
if_printf(sc->hn_ifp, "invalid RNDIS comp msglen %u\n",
comp->rm_len);
return (NULL);
}
if (comp->rm_type != comp_type) {
if_printf(sc->hn_ifp, "unexpected RNDIS comp 0x%08x, "
"expect 0x%08x\n", comp->rm_type, comp_type);
return (NULL);
}
if (comp->rm_rid != rid) {
if_printf(sc->hn_ifp, "RNDIS comp rid mismatch %u, "
"expect %u\n", comp->rm_rid, rid);
return (NULL);
}
/* All pass! */
*comp_len0 = comp_len;
return (comp);
}
static int
hn_rndis_query(struct hn_softc *sc, uint32_t oid,
const void *idata, size_t idlen, void *odata, size_t *odlen0)
{
return (hn_rndis_query2(sc, oid, idata, idlen, odata, odlen0, *odlen0));
}
static int
hn_rndis_query2(struct hn_softc *sc, uint32_t oid,
const void *idata, size_t idlen, void *odata, size_t *odlen0,
size_t min_odlen)
{
struct rndis_query_req *req;
const struct rndis_query_comp *comp;
struct vmbus_xact *xact;
size_t reqlen, odlen = *odlen0, comp_len;
int error, ofs;
uint32_t rid;
reqlen = sizeof(*req) + idlen;
xact = vmbus_xact_get(sc->hn_xact, reqlen);
if (xact == NULL) {
if_printf(sc->hn_ifp, "no xact for RNDIS query 0x%08x\n", oid);
return (ENXIO);
}
rid = hn_rndis_rid(sc);
req = vmbus_xact_req_data(xact);
req->rm_type = REMOTE_NDIS_QUERY_MSG;
req->rm_len = reqlen;
req->rm_rid = rid;
req->rm_oid = oid;
/*
* XXX
* This is _not_ RNDIS Spec conforming:
* "This MUST be set to 0 when there is no input data
* associated with the OID."
*
* If this field was set to 0 according to the RNDIS Spec,
* Hyper-V would set non-SUCCESS status in the query
* completion.
*/
req->rm_infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET;
if (idlen > 0) {
req->rm_infobuflen = idlen;
/* Input data immediately follows RNDIS query. */
memcpy(req + 1, idata, idlen);
}
comp_len = sizeof(*comp) + min_odlen;
comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
REMOTE_NDIS_QUERY_CMPLT);
if (comp == NULL) {
if_printf(sc->hn_ifp, "exec RNDIS query 0x%08x failed\n", oid);
error = EIO;
goto done;
}
if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
if_printf(sc->hn_ifp, "RNDIS query 0x%08x failed: "
"status 0x%08x\n", oid, comp->rm_status);
error = EIO;
goto done;
}
if (comp->rm_infobuflen == 0 || comp->rm_infobufoffset == 0) {
/* No output data! */
if_printf(sc->hn_ifp, "RNDIS query 0x%08x, no data\n", oid);
*odlen0 = 0;
error = 0;
goto done;
}
/*
* Check output data length and offset.
*/
/* ofs is the offset from the beginning of comp. */
ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->rm_infobufoffset);
if (ofs < sizeof(*comp) || ofs + comp->rm_infobuflen > comp_len) {
if_printf(sc->hn_ifp, "RNDIS query invalid comp ib off/len, "
"%u/%u\n", comp->rm_infobufoffset, comp->rm_infobuflen);
error = EINVAL;
goto done;
}
/*
* Save output data.
*/
if (comp->rm_infobuflen < odlen)
odlen = comp->rm_infobuflen;
memcpy(odata, ((const uint8_t *)comp) + ofs, odlen);
*odlen0 = odlen;
error = 0;
done:
vmbus_xact_put(xact);
return (error);
}
int
hn_rndis_query_rsscaps(struct hn_softc *sc, int *rxr_cnt0)
{
struct ndis_rss_caps in, caps;
size_t caps_len;
int error, indsz, rxr_cnt, hash_fnidx;
uint32_t hash_func = 0, hash_types = 0;
*rxr_cnt0 = 0;
if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_20)
return (EOPNOTSUPP);
memset(&in, 0, sizeof(in));
in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
caps_len = NDIS_RSS_CAPS_SIZE;
error = hn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
&in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0);
if (error)
return (error);
/*
* Preliminary verification.
*/
if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n",
caps.ndis_hdr.ndis_type);
return (EINVAL);
}
if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n",
caps.ndis_hdr.ndis_rev);
return (EINVAL);
}
if (caps.ndis_hdr.ndis_size > caps_len) {
if_printf(sc->hn_ifp, "invalid NDIS objsize %u, "
"data size %zu\n", caps.ndis_hdr.ndis_size, caps_len);
return (EINVAL);
} else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
caps.ndis_hdr.ndis_size);
return (EINVAL);
}
/*
* Save information for later RSS configuration.
*/
if (caps.ndis_nrxr == 0) {
if_printf(sc->hn_ifp, "0 RX rings!?\n");
return (EINVAL);
}
if (bootverbose)
if_printf(sc->hn_ifp, "%u RX rings\n", caps.ndis_nrxr);
rxr_cnt = caps.ndis_nrxr;
if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
if (caps.ndis_nind > NDIS_HASH_INDCNT) {
if_printf(sc->hn_ifp,
"too many RSS indirect table entries %u\n",
caps.ndis_nind);
return (EOPNOTSUPP);
}
if (!powerof2(caps.ndis_nind)) {
if_printf(sc->hn_ifp, "RSS indirect table size is not "
"power-of-2 %u\n", caps.ndis_nind);
}
if (bootverbose) {
if_printf(sc->hn_ifp, "RSS indirect table size %u\n",
caps.ndis_nind);
}
indsz = caps.ndis_nind;
} else {
indsz = NDIS_HASH_INDCNT;
}
if (indsz < rxr_cnt) {
if_printf(sc->hn_ifp, "# of RX rings (%d) > "
"RSS indirect table size %d\n", rxr_cnt, indsz);
rxr_cnt = indsz;
}
/*
* NOTE:
* Toeplitz is at the lowest bit, and it is prefered; so ffs(),
* instead of fls(), is used here.
*/
hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK);
if (hash_fnidx == 0) {
if_printf(sc->hn_ifp, "no hash functions, caps 0x%08x\n",
caps.ndis_caps);
return (EOPNOTSUPP);
}
hash_func = 1 << (hash_fnidx - 1); /* ffs is 1-based */
if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4;
if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6;
if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX;
if (hash_types == 0) {
if_printf(sc->hn_ifp, "no hash types, caps 0x%08x\n",
caps.ndis_caps);
return (EOPNOTSUPP);
}
if (bootverbose)
if_printf(sc->hn_ifp, "RSS caps %#x\n", caps.ndis_caps);
/* Commit! */
sc->hn_rss_ind_size = indsz;
sc->hn_rss_hcap = hash_func | hash_types;
if (sc->hn_caps & HN_CAP_UDPHASH) {
/* UDP 4-tuple hash is unconditionally enabled. */
sc->hn_rss_hcap |= NDIS_HASH_UDP_IPV4_X;
}
*rxr_cnt0 = rxr_cnt;
return (0);
}
static int
hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen)
{
struct rndis_set_req *req;
const struct rndis_set_comp *comp;
struct vmbus_xact *xact;
size_t reqlen, comp_len;
uint32_t rid;
int error;
KASSERT(dlen > 0, ("invalid dlen %zu", dlen));
reqlen = sizeof(*req) + dlen;
xact = vmbus_xact_get(sc->hn_xact, reqlen);
if (xact == NULL) {
if_printf(sc->hn_ifp, "no xact for RNDIS set 0x%08x\n", oid);
return (ENXIO);
}
rid = hn_rndis_rid(sc);
req = vmbus_xact_req_data(xact);
req->rm_type = REMOTE_NDIS_SET_MSG;
req->rm_len = reqlen;
req->rm_rid = rid;
req->rm_oid = oid;
req->rm_infobuflen = dlen;
req->rm_infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET;
/* Data immediately follows RNDIS set. */
memcpy(req + 1, data, dlen);
comp_len = sizeof(*comp);
comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
REMOTE_NDIS_SET_CMPLT);
if (comp == NULL) {
if_printf(sc->hn_ifp, "exec RNDIS set 0x%08x failed\n", oid);
error = EIO;
goto done;
}
if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
if_printf(sc->hn_ifp, "RNDIS set 0x%08x failed: "
"status 0x%08x\n", oid, comp->rm_status);
error = EIO;
goto done;
}
error = 0;
done:
vmbus_xact_put(xact);
return (error);
}
static int
hn_rndis_conf_offload(struct hn_softc *sc, int mtu)
{
struct ndis_offload hwcaps;
struct ndis_offload_params params;
uint32_t caps = 0;
size_t paramsz;
int error, tso_maxsz, tso_minsg;
error = hn_rndis_query_hwcaps(sc, &hwcaps);
if (error) {
if_printf(sc->hn_ifp, "hwcaps query failed: %d\n", error);
return (error);
}
/* NOTE: 0 means "no change" */
memset(&params, 0, sizeof(params));
params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_30) {
params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
paramsz = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
} else {
params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
paramsz = NDIS_OFFLOAD_PARAMS_SIZE;
}
params.ndis_hdr.ndis_size = paramsz;
/*
* TSO4/TSO6 setup.
*/
tso_maxsz = IP_MAXPACKET;
tso_minsg = 2;
if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) {
caps |= HN_CAP_TSO4;
params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz)
tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz;
if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg)
tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg;
}
if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) &&
(hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) ==
HN_NDIS_LSOV2_CAP_IP6) {
caps |= HN_CAP_TSO6;
params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz)
tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz;
if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg)
tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg;
}
sc->hn_ndis_tso_szmax = 0;
sc->hn_ndis_tso_sgmin = 0;
if (caps & (HN_CAP_TSO4 | HN_CAP_TSO6)) {
KASSERT(tso_maxsz <= IP_MAXPACKET,
("invalid NDIS TSO maxsz %d", tso_maxsz));
KASSERT(tso_minsg >= 2,
("invalid NDIS TSO minsg %d", tso_minsg));
if (tso_maxsz < tso_minsg * mtu) {
if_printf(sc->hn_ifp, "invalid NDIS TSO config: "
"maxsz %d, minsg %d, mtu %d; "
"disable TSO4 and TSO6\n",
tso_maxsz, tso_minsg, mtu);
caps &= ~(HN_CAP_TSO4 | HN_CAP_TSO6);
params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF;
params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF;
} else {
sc->hn_ndis_tso_szmax = tso_maxsz;
sc->hn_ndis_tso_sgmin = tso_minsg;
if (bootverbose) {
if_printf(sc->hn_ifp, "NDIS TSO "
"szmax %d sgmin %d\n",
sc->hn_ndis_tso_szmax,
sc->hn_ndis_tso_sgmin);
}
}
}
/* IPv4 checksum */
if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4) ==
HN_NDIS_TXCSUM_CAP_IP4) {
caps |= HN_CAP_IPCS;
params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
}
if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) {
if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX)
params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX;
else
params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX;
}
/* TCP4 checksum */
if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4) ==
HN_NDIS_TXCSUM_CAP_TCP4) {
caps |= HN_CAP_TCP4CS;
params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
}
if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) {
if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX)
params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX;
else
params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX;
}
/* UDP4 checksum */
if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
caps |= HN_CAP_UDP4CS;
params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
}
if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) {
if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX)
params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX;
else
params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX;
}
/* TCP6 checksum */
if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6) ==
HN_NDIS_TXCSUM_CAP_TCP6) {
caps |= HN_CAP_TCP6CS;
params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
}
if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) {
if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX)
params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX;
else
params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX;
}
/* UDP6 checksum */
if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_UDP6) ==
HN_NDIS_TXCSUM_CAP_UDP6) {
caps |= HN_CAP_UDP6CS;
params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
}
if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) {
if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX)
params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX;
else
params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX;
}
/* RSC offload */
if (hwcaps.ndis_hdr.ndis_rev >= NDIS_OFFLOAD_PARAMS_REV_3) {
if (hwcaps.ndis_rsc.ndis_ip4 && hwcaps.ndis_rsc.ndis_ip6) {
params.ndis_rsc_ip4 = NDIS_OFFLOAD_RSC_ON;
params.ndis_rsc_ip6 = NDIS_OFFLOAD_RSC_ON;
} else {
params.ndis_rsc_ip4 = NDIS_OFFLOAD_RSC_OFF;
params.ndis_rsc_ip6 = NDIS_OFFLOAD_RSC_OFF;
}
}
if (bootverbose) {
if_printf(sc->hn_ifp, "offload csum: "
"ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n",
params.ndis_ip4csum,
params.ndis_tcp4csum,
params.ndis_udp4csum,
params.ndis_tcp6csum,
params.ndis_udp6csum);
if_printf(sc->hn_ifp, "offload lsov2: ip4 %u, ip6 %u\n",
params.ndis_lsov2_ip4,
params.ndis_lsov2_ip6);
if (hwcaps.ndis_hdr.ndis_rev >= NDIS_OFFLOAD_PARAMS_REV_3)
if_printf(sc->hn_ifp, "offload rsc: ip4 %u, ip6 %u\n",
params.ndis_rsc_ip4,
params.ndis_rsc_ip6);
}
error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, &params, paramsz);
if (error) {
if_printf(sc->hn_ifp, "offload config failed: %d\n", error);
return (error);
}
if (bootverbose)
if_printf(sc->hn_ifp, "offload config done\n");
sc->hn_caps |= caps;
return (0);
}
int
hn_rndis_conf_rss(struct hn_softc *sc, uint16_t flags)
{
struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
struct ndis_rss_params *prm = &rss->rss_params;
int error, rss_size;
/*
* Only NDIS 6.20+ is supported:
* We only support 4bytes element in indirect table, which has been
* adopted since NDIS 6.20.
*/
KASSERT(sc->hn_ndis_ver >= HN_NDIS_VERSION_6_20,
("NDIS 6.20+ is required, NDIS version 0x%08x", sc->hn_ndis_ver));
/* XXX only one can be specified through, popcnt? */
KASSERT((sc->hn_rss_hash & NDIS_HASH_FUNCTION_MASK),
("no hash func %08x", sc->hn_rss_hash));
KASSERT((sc->hn_rss_hash & NDIS_HASH_STD),
("no standard hash types %08x", sc->hn_rss_hash));
KASSERT(sc->hn_rss_ind_size > 0, ("no indirect table size"));
if (bootverbose) {
if_printf(sc->hn_ifp, "RSS indirect table size %d, "
"hash 0x%08x\n", sc->hn_rss_ind_size, sc->hn_rss_hash);
}
/*
* NOTE:
* DO NOT whack rss_key and rss_ind, which are setup by the caller.
*/
memset(prm, 0, sizeof(*prm));
rss_size = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->hn_rss_ind_size);
prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
prm->ndis_hdr.ndis_size = rss_size;
prm->ndis_flags = flags;
prm->ndis_hash = sc->hn_rss_hash &
(NDIS_HASH_FUNCTION_MASK | NDIS_HASH_STD);
prm->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->hn_rss_ind_size;
prm->ndis_indoffset =
__offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
prm->ndis_keysize = sizeof(rss->rss_key);
prm->ndis_keyoffset =
__offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
error = hn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS,
rss, rss_size);
if (error) {
if_printf(sc->hn_ifp, "RSS config failed: %d\n", error);
} else {
if (bootverbose)
if_printf(sc->hn_ifp, "RSS config done\n");
}
return (error);
}
int
hn_rndis_set_rxfilter(struct hn_softc *sc, uint32_t filter)
{
int error;
error = hn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
&filter, sizeof(filter));
if (error) {
if_printf(sc->hn_ifp, "set RX filter 0x%08x failed: %d\n",
filter, error);
} else {
if (bootverbose) {
if_printf(sc->hn_ifp, "set RX filter 0x%08x done\n",
filter);
}
}
return (error);
}
static int
hn_rndis_init(struct hn_softc *sc)
{
struct rndis_init_req *req;
const struct rndis_init_comp *comp;
struct vmbus_xact *xact;
size_t comp_len;
uint32_t rid;
int error;
xact = vmbus_xact_get(sc->hn_xact, sizeof(*req));
if (xact == NULL) {
if_printf(sc->hn_ifp, "no xact for RNDIS init\n");
return (ENXIO);
}
rid = hn_rndis_rid(sc);
req = vmbus_xact_req_data(xact);
req->rm_type = REMOTE_NDIS_INITIALIZE_MSG;
req->rm_len = sizeof(*req);
req->rm_rid = rid;
req->rm_ver_major = RNDIS_VERSION_MAJOR;
req->rm_ver_minor = RNDIS_VERSION_MINOR;
req->rm_max_xfersz = HN_RNDIS_XFER_SIZE;
comp_len = RNDIS_INIT_COMP_SIZE_MIN;
comp = hn_rndis_xact_execute(sc, xact, rid, sizeof(*req), &comp_len,
REMOTE_NDIS_INITIALIZE_CMPLT);
if (comp == NULL) {
if_printf(sc->hn_ifp, "exec RNDIS init failed\n");
error = EIO;
goto done;
}
if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
if_printf(sc->hn_ifp, "RNDIS init failed: status 0x%08x\n",
comp->rm_status);
error = EIO;
goto done;
}
sc->hn_rndis_agg_size = comp->rm_pktmaxsz;
sc->hn_rndis_agg_pkts = comp->rm_pktmaxcnt;
sc->hn_rndis_agg_align = 1U << comp->rm_align;
if (sc->hn_rndis_agg_align < sizeof(uint32_t)) {
/*
* The RNDIS packet messsage encap assumes that the RNDIS
* packet message is at least 4 bytes aligned. Fix up the
* alignment here, if the remote side sets the alignment
* too low.
*/
if_printf(sc->hn_ifp, "fixup RNDIS aggpkt align: %u -> %zu\n",
sc->hn_rndis_agg_align, sizeof(uint32_t));
sc->hn_rndis_agg_align = sizeof(uint32_t);
}
if (bootverbose) {
if_printf(sc->hn_ifp, "RNDIS ver %u.%u, "
"aggpkt size %u, aggpkt cnt %u, aggpkt align %u\n",
comp->rm_ver_major, comp->rm_ver_minor,
sc->hn_rndis_agg_size, sc->hn_rndis_agg_pkts,
sc->hn_rndis_agg_align);
}
error = 0;
done:
vmbus_xact_put(xact);
return (error);
}
static int
hn_rndis_halt(struct hn_softc *sc)
{
struct vmbus_xact *xact;
struct rndis_halt_req *halt;
struct hn_nvs_sendctx sndc;
size_t comp_len;
xact = vmbus_xact_get(sc->hn_xact, sizeof(*halt));
if (xact == NULL) {
if_printf(sc->hn_ifp, "no xact for RNDIS halt\n");
return (ENXIO);
}
halt = vmbus_xact_req_data(xact);
halt->rm_type = REMOTE_NDIS_HALT_MSG;
halt->rm_len = sizeof(*halt);
halt->rm_rid = hn_rndis_rid(sc);
/* No RNDIS completion; rely on NVS message send completion */
hn_nvs_sendctx_init(&sndc, hn_nvs_sent_xact, xact);
hn_rndis_xact_exec1(sc, xact, sizeof(*halt), &sndc, &comp_len);
vmbus_xact_put(xact);
if (bootverbose)
if_printf(sc->hn_ifp, "RNDIS halt done\n");
return (0);
}
static int
hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps)
{
struct ndis_offload in;
size_t caps_len, size;
int error;
memset(&in, 0, sizeof(in));
in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_30) {
in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
size = NDIS_OFFLOAD_SIZE;
} else if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_1) {
in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
size = NDIS_OFFLOAD_SIZE_6_1;
} else {
in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
size = NDIS_OFFLOAD_SIZE_6_0;
}
in.ndis_hdr.ndis_size = size;
caps_len = NDIS_OFFLOAD_SIZE;
error = hn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
&in, size, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0);
if (error)
return (error);
/*
* Preliminary verification.
*/
if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n",
caps->ndis_hdr.ndis_type);
return (EINVAL);
}
if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n",
caps->ndis_hdr.ndis_rev);
return (EINVAL);
}
if (caps->ndis_hdr.ndis_size > caps_len) {
if_printf(sc->hn_ifp, "invalid NDIS objsize %u, "
"data size %zu\n", caps->ndis_hdr.ndis_size, caps_len);
return (EINVAL);
} else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
caps->ndis_hdr.ndis_size);
return (EINVAL);
} else if (caps->ndis_hdr.ndis_rev >= NDIS_OFFLOAD_REV_3 &&
caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE) {
if_printf(sc->hn_ifp, "invalid NDIS rev3 objsize %u\n",
caps->ndis_hdr.ndis_size);
return (EINVAL);
}
if (bootverbose) {
/*
* NOTE:
* caps->ndis_hdr.ndis_size MUST be checked before accessing
* NDIS 6.1+ specific fields.
*/
if_printf(sc->hn_ifp, "hwcaps rev %u\n",
caps->ndis_hdr.ndis_rev);
if_printf(sc->hn_ifp, "hwcaps csum: "
"ip4 tx 0x%x/0x%x rx 0x%x/0x%x, "
"ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n",
caps->ndis_csum.ndis_ip4_txcsum,
caps->ndis_csum.ndis_ip4_txenc,
caps->ndis_csum.ndis_ip4_rxcsum,
caps->ndis_csum.ndis_ip4_rxenc,
caps->ndis_csum.ndis_ip6_txcsum,
caps->ndis_csum.ndis_ip6_txenc,
caps->ndis_csum.ndis_ip6_rxcsum,
caps->ndis_csum.ndis_ip6_rxenc);
if_printf(sc->hn_ifp, "hwcaps lsov2: "
"ip4 maxsz %u minsg %u encap 0x%x, "
"ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n",
caps->ndis_lsov2.ndis_ip4_maxsz,
caps->ndis_lsov2.ndis_ip4_minsg,
caps->ndis_lsov2.ndis_ip4_encap,
caps->ndis_lsov2.ndis_ip6_maxsz,
caps->ndis_lsov2.ndis_ip6_minsg,
caps->ndis_lsov2.ndis_ip6_encap,
caps->ndis_lsov2.ndis_ip6_opts);
if (caps->ndis_hdr.ndis_rev >= NDIS_OFFLOAD_REV_3)
if_printf(sc->hn_ifp, "hwcaps rsc: "
"ip4 %u ip6 %u\n",
caps->ndis_rsc.ndis_ip4,
caps->ndis_rsc.ndis_ip6);
}
return (0);
}
int
hn_rndis_attach(struct hn_softc *sc, int mtu, int *init_done)
{
int error;
*init_done = 0;
/*
* Initialize RNDIS.
*/
error = hn_rndis_init(sc);
if (error)
return (error);
*init_done = 1;
/*
* Configure NDIS offload settings.
*/
hn_rndis_conf_offload(sc, mtu);
return (0);
}
void
hn_rndis_detach(struct hn_softc *sc)
{
/* Halt the RNDIS. */
hn_rndis_halt(sc);
}