mana: refill the rx mbuf in batch

Set the default refill threshod to be one quarter of the rx queue
length. User can change this value with hw.mana.rx_refill_thresh
in loader.conf. It improves the rx completion handling by saving
10% to 15% of overall time with this change.

Tested by:	whu
MFC after:	2 weeks
Sponsored by:	Microsoft
This commit is contained in:
Wei Hu 2025-02-27 08:02:46 +00:00
parent 8a85584785
commit 9b8701b81f
3 changed files with 114 additions and 30 deletions

View file

@ -149,6 +149,7 @@ struct mana_stats {
counter_u64_t collapse_err; /* tx */ counter_u64_t collapse_err; /* tx */
counter_u64_t dma_mapping_err; /* rx, tx */ counter_u64_t dma_mapping_err; /* rx, tx */
counter_u64_t mbuf_alloc_fail; /* rx */ counter_u64_t mbuf_alloc_fail; /* rx */
counter_u64_t partial_refill; /* rx */
counter_u64_t alt_chg; /* tx */ counter_u64_t alt_chg; /* tx */
counter_u64_t alt_reset; /* tx */ counter_u64_t alt_reset; /* tx */
counter_u64_t cqe_err; /* tx */ counter_u64_t cqe_err; /* tx */
@ -441,6 +442,8 @@ struct mana_rxq {
uint32_t num_rx_buf; uint32_t num_rx_buf;
uint32_t buf_index; uint32_t buf_index;
uint32_t next_to_refill;
uint32_t refill_thresh;
uint64_t lro_tried; uint64_t lro_tried;
uint64_t lro_failed; uint64_t lro_failed;
@ -711,6 +714,13 @@ struct mana_cfg_rx_steer_resp {
#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
#define MANA_IDX_NEXT(idx, size) (((idx) + 1) & ((size) - 1))
#define MANA_GET_SPACE(start_idx, end_idx, size) \
(((end_idx) >= (start_idx)) ? \
((end_idx) - (start_idx)) : ((size) - (start_idx) + (end_idx)))
#define MANA_RX_REFILL_THRESH 256
struct mana_tx_package { struct mana_tx_package {
struct gdma_wqe_request wqe_req; struct gdma_wqe_request wqe_req;
struct gdma_sge sgl_array[MAX_MBUF_FRAGS]; struct gdma_sge sgl_array[MAX_MBUF_FRAGS];

View file

@ -69,6 +69,7 @@ static int mana_down(struct mana_port_context *apc);
extern unsigned int mana_tx_req_size; extern unsigned int mana_tx_req_size;
extern unsigned int mana_rx_req_size; extern unsigned int mana_rx_req_size;
extern unsigned int mana_rx_refill_threshold;
static void static void
mana_rss_key_fill(void *k, size_t size) mana_rss_key_fill(void *k, size_t size)
@ -638,8 +639,7 @@ mana_xmit(struct mana_txq *txq)
continue; continue;
} }
next_to_use = next_to_use = MANA_IDX_NEXT(next_to_use, tx_queue_size);
(next_to_use + 1) % tx_queue_size;
(void)atomic_inc_return(&txq->pending_sends); (void)atomic_inc_return(&txq->pending_sends);
@ -1527,7 +1527,7 @@ mana_poll_tx_cq(struct mana_cq *cq)
mb(); mb();
next_to_complete = next_to_complete =
(next_to_complete + 1) % tx_queue_size; MANA_IDX_NEXT(next_to_complete, tx_queue_size);
pkt_transmitted++; pkt_transmitted++;
} }
@ -1592,18 +1592,11 @@ mana_poll_tx_cq(struct mana_cq *cq)
} }
static void static void
mana_post_pkt_rxq(struct mana_rxq *rxq) mana_post_pkt_rxq(struct mana_rxq *rxq,
struct mana_recv_buf_oob *recv_buf_oob)
{ {
struct mana_recv_buf_oob *recv_buf_oob;
uint32_t curr_index;
int err; int err;
curr_index = rxq->buf_index++;
if (rxq->buf_index == rxq->num_rx_buf)
rxq->buf_index = 0;
recv_buf_oob = &rxq->rx_oobs[curr_index];
err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req, err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req,
&recv_buf_oob->wqe_inf); &recv_buf_oob->wqe_inf);
if (err) { if (err) {
@ -1722,6 +1715,68 @@ mana_rx_mbuf(struct mbuf *mbuf, struct mana_rxcomp_oob *cqe,
counter_exit(); counter_exit();
} }
static int
mana_refill_rx_mbufs(struct mana_port_context *apc,
struct mana_rxq *rxq, uint32_t num)
{
struct mana_recv_buf_oob *rxbuf_oob;
uint32_t next_to_refill;
uint32_t i;
int err;
next_to_refill = rxq->next_to_refill;
for (i = 0; i < num; i++) {
if (next_to_refill == rxq->buf_index) {
mana_warn(NULL, "refilling index reached current, "
"aborted! rxq %u, oob idx %u\n",
rxq->rxq_idx, next_to_refill);
break;
}
rxbuf_oob = &rxq->rx_oobs[next_to_refill];
if (likely(rxbuf_oob->mbuf == NULL)) {
err = mana_load_rx_mbuf(apc, rxq, rxbuf_oob, true);
} else {
mana_warn(NULL, "mbuf not null when refilling, "
"rxq %u, oob idx %u, reusing\n",
rxq->rxq_idx, next_to_refill);
err = mana_load_rx_mbuf(apc, rxq, rxbuf_oob, false);
}
if (unlikely(err != 0)) {
mana_dbg(NULL,
"failed to load rx mbuf, err = %d, rxq = %u\n",
err, rxq->rxq_idx);
counter_u64_add(rxq->stats.mbuf_alloc_fail, 1);
break;
}
mana_post_pkt_rxq(rxq, rxbuf_oob);
next_to_refill = MANA_IDX_NEXT(next_to_refill,
rxq->num_rx_buf);
}
if (likely(i != 0)) {
struct gdma_context *gc =
rxq->gdma_rq->gdma_dev->gdma_context;
mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq);
}
if (unlikely(i < num)) {
counter_u64_add(rxq->stats.partial_refill, 1);
mana_dbg(NULL,
"refilled rxq %u with only %u mbufs (%u requested)\n",
rxq->rxq_idx, i, num);
}
rxq->next_to_refill = next_to_refill;
return (i);
}
static void static void
mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct gdma_comp *cqe) struct gdma_comp *cqe)
@ -1731,8 +1786,8 @@ mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
if_t ndev = rxq->ndev; if_t ndev = rxq->ndev;
struct mana_port_context *apc; struct mana_port_context *apc;
struct mbuf *old_mbuf; struct mbuf *old_mbuf;
uint32_t refill_required;
uint32_t curr, pktlen; uint32_t curr, pktlen;
int err;
switch (oob->cqe_hdr.cqe_type) { switch (oob->cqe_hdr.cqe_type) {
case CQE_RX_OKAY: case CQE_RX_OKAY:
@ -1785,29 +1840,24 @@ mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
/* Unload DMA map for the old mbuf */ /* Unload DMA map for the old mbuf */
mana_unload_rx_mbuf(apc, rxq, rxbuf_oob, false); mana_unload_rx_mbuf(apc, rxq, rxbuf_oob, false);
/* Clear the mbuf pointer to avoid reuse */
/* Load a new mbuf to replace the old one */ rxbuf_oob->mbuf = NULL;
err = mana_load_rx_mbuf(apc, rxq, rxbuf_oob, true);
if (err) {
mana_dbg(NULL,
"failed to load rx mbuf, err = %d, packet dropped.\n",
err);
counter_u64_add(rxq->stats.mbuf_alloc_fail, 1);
/*
* Failed to load new mbuf, rxbuf_oob->mbuf is still
* pointing to the old one. Drop the packet.
*/
old_mbuf = NULL;
/* Reload the existing mbuf */
mana_load_rx_mbuf(apc, rxq, rxbuf_oob, false);
}
mana_rx_mbuf(old_mbuf, oob, rxq); mana_rx_mbuf(old_mbuf, oob, rxq);
drop: drop:
mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
mana_post_pkt_rxq(rxq); rxq->buf_index = MANA_IDX_NEXT(rxq->buf_index, rxq->num_rx_buf);
/* Check if refill is needed */
refill_required = MANA_GET_SPACE(rxq->next_to_refill,
rxq->buf_index, rxq->num_rx_buf);
if (refill_required >= rxq->refill_thresh) {
/* Refill empty rx_oobs with new mbufs */
mana_refill_rx_mbufs(apc, rxq, refill_required);
}
} }
static void static void
@ -2349,6 +2399,23 @@ mana_create_rxq(struct mana_port_context *apc, uint32_t rxq_idx,
mana_dbg(NULL, "Setting rxq %d datasize %d\n", mana_dbg(NULL, "Setting rxq %d datasize %d\n",
rxq_idx, rxq->datasize); rxq_idx, rxq->datasize);
/*
* Two steps to set the mbuf refill_thresh.
* 1) If mana_rx_refill_threshold is set, honor it.
* Set to default value otherwise.
* 2) Select the smaller of 1) above and 1/4 of the
* rx buffer size.
*/
if (mana_rx_refill_threshold != 0)
rxq->refill_thresh = mana_rx_refill_threshold;
else
rxq->refill_thresh = MANA_RX_REFILL_THRESH;
rxq->refill_thresh = min_t(uint32_t,
rxq->num_rx_buf / 4, rxq->refill_thresh);
mana_dbg(NULL, "Setting rxq %d refill thresh %u\n",
rxq_idx, rxq->refill_thresh);
rxq->rxobj = INVALID_MANA_HANDLE; rxq->rxobj = INVALID_MANA_HANDLE;
err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);

View file

@ -36,6 +36,7 @@ int mana_log_level = MANA_ALERT | MANA_WARNING | MANA_INFO;
unsigned int mana_tx_req_size; unsigned int mana_tx_req_size;
unsigned int mana_rx_req_size; unsigned int mana_rx_req_size;
unsigned int mana_rx_refill_threshold;
SYSCTL_NODE(_hw, OID_AUTO, mana, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, SYSCTL_NODE(_hw, OID_AUTO, mana, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"MANA driver parameters"); "MANA driver parameters");
@ -44,6 +45,9 @@ SYSCTL_UINT(_hw_mana, OID_AUTO, tx_req_size, CTLFLAG_RWTUN,
&mana_tx_req_size, 0, "requested number of unit of tx queue"); &mana_tx_req_size, 0, "requested number of unit of tx queue");
SYSCTL_UINT(_hw_mana, OID_AUTO, rx_req_size, CTLFLAG_RWTUN, SYSCTL_UINT(_hw_mana, OID_AUTO, rx_req_size, CTLFLAG_RWTUN,
&mana_rx_req_size, 0, "requested number of unit of rx queue"); &mana_rx_req_size, 0, "requested number of unit of rx queue");
SYSCTL_UINT(_hw_mana, OID_AUTO, rx_refill_thresh, CTLFLAG_RWTUN,
&mana_rx_refill_threshold, 0,
"number of rx slots before starting the refill");
/* /*
* Logging level for changing verbosity of the output * Logging level for changing verbosity of the output
@ -329,6 +333,9 @@ mana_sysctl_add_queues(struct mana_port_context *apc)
SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
"mbuf_alloc_fail", CTLFLAG_RD, "mbuf_alloc_fail", CTLFLAG_RD,
&rx_stats->mbuf_alloc_fail, "Failed mbuf allocs"); &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs");
SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
"partial_refill", CTLFLAG_RD,
&rx_stats->partial_refill, "Partially refilled mbuf");
SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
"dma_mapping_err", CTLFLAG_RD, "dma_mapping_err", CTLFLAG_RD,
&rx_stats->dma_mapping_err, "DMA mapping errors"); &rx_stats->dma_mapping_err, "DMA mapping errors");