diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index d57ece717ec..7ebf28d2058 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -81,8 +81,19 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xe -/* freeBSD HW LRO is limited by 16KB - the size of max mbuf */ +#define MLX5E_MAX_RX_SEGS 7 + +#ifndef MLX5E_MAX_RX_BYTES +#define MLX5E_MAX_RX_BYTES MCLBYTES +#endif + +#if (MLX5E_MAX_RX_SEGS == 1) +/* FreeBSD HW LRO is limited by 16KB - the size of max mbuf */ #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ MJUM16BYTES +#else +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ \ + MIN(65535, MLX5E_MAX_RX_SEGS * MLX5E_MAX_RX_BYTES) +#endif #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 @@ -533,6 +544,7 @@ struct mlx5e_rq { struct mtx mtx; bus_dma_tag_t dma_tag; u32 wqe_sz; + u32 nsegs; struct mlx5e_rq_mbuf *mbuf; struct ifnet *ifp; struct mlx5e_rq_stats stats; @@ -806,9 +818,12 @@ struct mlx5e_tx_wqe { struct mlx5e_rx_wqe { struct mlx5_wqe_srq_next_seg next; - struct mlx5_wqe_data_seg data; + struct mlx5_wqe_data_seg data[]; }; +/* the size of the structure above must be power of two */ +CTASSERT(powerof2(sizeof(struct mlx5e_rx_wqe))); + struct mlx5e_eeprom { int lock_bit; int i2c_addr; diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index 74a9bef76b8..a049d6d6ebd 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -33,9 +33,12 @@ #ifndef ETH_DRIVER_VERSION #define ETH_DRIVER_VERSION "3.4.1" #endif + char mlx5e_version[] = "Mellanox Ethernet driver" " (" ETH_DRIVER_VERSION ")"; +static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs); + struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; @@ -810,6 +813,11 @@ mlx5e_create_rq(struct mlx5e_channel *c, int wq_sz; int err; int i; + u32 nsegs, wqe_sz; + + err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs); + if (err != 0) + goto done; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( @@ -819,9 +827,9 @@ mlx5e_create_rq(struct mlx5e_channel *c, BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ - MJUM16BYTES, /* maxsize */ - 1, /* nsegments */ - MJUM16BYTES, /* maxsegsize */ + nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */ + nsegs, /* nsegments */ + nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &rq->dma_tag))) @@ -834,23 +842,9 @@ mlx5e_create_rq(struct mlx5e_channel *c, rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; - if (priv->params.hw_lro_en) { - rq->wqe_sz = priv->params.lro_wqe_sz; - } else { - rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu); - } - if (rq->wqe_sz > MJUM16BYTES) { - err = -ENOMEM; + err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs); + if (err != 0) goto err_rq_wq_destroy; - } else if (rq->wqe_sz > MJUM9BYTES) { - rq->wqe_sz = MJUM16BYTES; - } else if (rq->wqe_sz > MJUMPAGESIZE) { - rq->wqe_sz = MJUM9BYTES; - } else if (rq->wqe_sz > MCLBYTES) { - rq->wqe_sz = MJUMPAGESIZE; - } else { - rq->wqe_sz = MCLBYTES; - } wq_sz = mlx5_wq_ll_get_size(&rq->wq); @@ -861,7 +855,11 @@ mlx5e_create_rq(struct mlx5e_channel *c, rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); for (i = 0; i != wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); +#if (MLX5E_MAX_RX_SEGS == 1) uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; +#else + int j; +#endif err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map); if (err != 0) { @@ -869,8 +867,15 @@ mlx5e_create_rq(struct mlx5e_channel *c, bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); goto err_rq_mbuf_free; } - wqe->data.lkey = c->mkey_be; - wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING); + + /* set value for constant fields */ +#if (MLX5E_MAX_RX_SEGS == 1) + wqe->data[0].lkey = c->mkey_be; + wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING); +#else + for (j = 0; j < rq->nsegs; j++) + wqe->data[j].lkey = c->mkey_be; +#endif } rq->ifp = c->ifp; @@ -1809,16 +1814,51 @@ mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp) free(c, M_MLX5EN); } +static int +mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs) +{ + u32 r, n; + + r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz : + MLX5E_SW2MB_MTU(priv->ifp->if_mtu); + if (r > MJUM16BYTES) + return (-ENOMEM); + + if (r > MJUM9BYTES) + r = MJUM16BYTES; + else if (r > MJUMPAGESIZE) + r = MJUM9BYTES; + else if (r > MCLBYTES) + r = MJUMPAGESIZE; + else + r = MCLBYTES; + + /* + * n + 1 must be a power of two, because stride size must be. + * Stride size is 16 * (n + 1), as the first segment is + * control. + */ + for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++) + ; + + *wqe_sz = r; + *nsegs = n; + return (0); +} + static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 wqe_sz, nsegs; + mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) + + nsegs * sizeof(struct mlx5_wqe_data_seg))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c index fb14be43b32..346dd62e2b9 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -32,21 +32,47 @@ static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { - bus_dma_segment_t segs[1]; + bus_dma_segment_t segs[rq->nsegs]; struct mbuf *mb; int nsegs; int err; - +#if (MLX5E_MAX_RX_SEGS != 1) + struct mbuf *mb_head; + int i; +#endif if (rq->mbuf[ix].mbuf != NULL) return (0); +#if (MLX5E_MAX_RX_SEGS == 1) mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz); if (unlikely(!mb)) return (-ENOMEM); - /* set initial mbuf length */ mb->m_pkthdr.len = mb->m_len = rq->wqe_sz; +#else + mb_head = mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, + MLX5E_MAX_RX_BYTES); + if (unlikely(mb == NULL)) + return (-ENOMEM); + mb->m_len = MLX5E_MAX_RX_BYTES; + mb->m_pkthdr.len = MLX5E_MAX_RX_BYTES; + + for (i = 1; i < rq->nsegs; i++) { + if (mb_head->m_pkthdr.len >= rq->wqe_sz) + break; + mb = mb->m_next = m_getjcl(M_NOWAIT, MT_DATA, 0, + MLX5E_MAX_RX_BYTES); + if (unlikely(mb == NULL)) { + m_freem(mb_head); + return (-ENOMEM); + } + mb->m_len = MLX5E_MAX_RX_BYTES; + mb_head->m_pkthdr.len += MLX5E_MAX_RX_BYTES; + } + /* rewind to first mbuf in chain */ + mb = mb_head; +#endif /* get IP header aligned */ m_adj(mb, MLX5E_NET_IP_ALIGN); @@ -54,12 +80,26 @@ mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (err != 0) goto err_free_mbuf; - if (unlikely(nsegs != 1)) { + if (unlikely(nsegs == 0)) { bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map); err = -ENOMEM; goto err_free_mbuf; } - wqe->data.addr = cpu_to_be64(segs[0].ds_addr); +#if (MLX5E_MAX_RX_SEGS == 1) + wqe->data[0].addr = cpu_to_be64(segs[0].ds_addr); +#else + wqe->data[0].addr = cpu_to_be64(segs[0].ds_addr); + wqe->data[0].byte_count = cpu_to_be32(segs[0].ds_len | + MLX5_HW_START_PADDING); + for (i = 1; i != nsegs; i++) { + wqe->data[i].addr = cpu_to_be64(segs[i].ds_addr); + wqe->data[i].byte_count = cpu_to_be32(segs[i].ds_len); + } + for (; i < rq->nsegs; i++) { + wqe->data[i].addr = 0; + wqe->data[i].byte_count = 0; + } +#endif rq->mbuf[ix].mbuf = mb; rq->mbuf[ix].data = mb->m_data; @@ -214,6 +254,9 @@ mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, { struct ifnet *ifp = rq->ifp; struct mlx5e_channel *c; +#if (MLX5E_MAX_RX_SEGS != 1) + struct mbuf *mb_head; +#endif int lro_num_seg; /* HW LRO session aggregated packets counter */ uint64_t tstmp; @@ -224,7 +267,26 @@ mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, rq->stats.lro_bytes += cqe_bcnt; } +#if (MLX5E_MAX_RX_SEGS == 1) mb->m_pkthdr.len = mb->m_len = cqe_bcnt; +#else + mb->m_pkthdr.len = cqe_bcnt; + for (mb_head = mb; mb != NULL; mb = mb->m_next) { + if (mb->m_len > cqe_bcnt) + mb->m_len = cqe_bcnt; + cqe_bcnt -= mb->m_len; + if (likely(cqe_bcnt == 0)) { + if (likely(mb->m_next != NULL)) { + /* trim off empty mbufs */ + m_freem(mb->m_next); + mb->m_next = NULL; + } + break; + } + } + /* rewind to first mbuf in chain */ + mb = mb_head; +#endif /* check if a Toeplitz hash was computed */ if (cqe->rss_hash_type != 0) { mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result); @@ -402,6 +464,10 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) } if ((MHLEN - MLX5E_NET_IP_ALIGN) >= byte_cnt && (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) { +#if (MLX5E_MAX_RX_SEGS != 1) + /* set maximum mbuf length */ + mb->m_len = MHLEN - MLX5E_NET_IP_ALIGN; +#endif /* get IP header aligned */ mb->m_data += MLX5E_NET_IP_ALIGN;