From efe6ab18dad8ff8d3188346845d7df8d0f37276a Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Thu, 9 Nov 2017 11:48:22 +0000 Subject: [PATCH] Check for Rx ring state to prevent from stall in the ENA driver In case when Rx ring is full and driver will fail to allocate Rx mbufs, the ring could be stalled. Keep alive is checking every second for Rx ring state, and if it is full for two cycles, then trigger rx_cleanup routine in another thread. Submitted by: Michal Krawczyk Reviewed by: byenduri_gmail.com Obtained from: Semihalf Sponsored by: Amazon, Inc. Differential Revision: https://reviews.freebsd.org/D12856 --- sys/dev/ena/ena.c | 105 ++++++++++++++++++++++++++++++++++++++- sys/dev/ena/ena.h | 16 ++++-- sys/dev/ena/ena_sysctl.c | 3 ++ 3 files changed, 119 insertions(+), 5 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index bb98e457476..33ef61de62d 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -122,6 +122,7 @@ static void ena_destroy_all_rx_queues(struct ena_adapter *); static void ena_destroy_all_io_queues(struct ena_adapter *); static int ena_create_io_queues(struct ena_adapter *); static int ena_tx_cleanup(struct ena_ring *); +static void ena_deferred_rx_cleanup(void *, int); static int ena_rx_cleanup(struct ena_ring *); static inline int validate_tx_req_id(struct ena_ring *, uint16_t); static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, @@ -471,6 +472,7 @@ ena_init_io_rings(struct ena_adapter *adapter) device_get_nameunit(adapter->pdev), i); mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF); + mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF); que = &adapter->que[i]; que->adapter = adapter; @@ -480,6 +482,8 @@ ena_init_io_rings(struct ena_adapter *adapter) txr->que = que; rxr->que = que; + + rxr->empty_rx_queue = 0; } } @@ -495,6 +499,7 @@ ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid) sizeof(rxr->rx_stats)); mtx_destroy(&txr->ring_mtx); + mtx_destroy(&rxr->ring_mtx); drbr_free(txr->br, M_DEVBUF); @@ -847,6 +852,22 @@ ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid) } } + /* Allocate taskqueues */ + TASK_INIT(&rx_ring->cmpl_task, 0, ena_deferred_rx_cleanup, rx_ring); + rx_ring->cmpl_tq = taskqueue_create_fast("ena RX completion", M_WAITOK, + taskqueue_thread_enqueue, &rx_ring->cmpl_tq); + + /* RSS set cpu for thread */ +#ifdef RSS + CPU_SETOF(que->cpu, &cpu_mask); + taskqueue_start_threads_cpuset(&rx_ring->cmpl_tq, 1, PI_NET, &cpu_mask, + "%s rx_ring cmpl (bucket %d)", + device_get_nameunit(adapter->pdev), que->cpu); +#else + taskqueue_start_threads(&rx_ring->cmpl_tq, 1, PI_NET, + "%s rx_ring cmpl %d", device_get_nameunit(adapter->pdev), que->cpu); +#endif + return (0); err_rx_dma: @@ -877,6 +898,11 @@ ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid) ena_trace(ENA_INFO, "%s qid %d\n", __func__, qid); + while (taskqueue_cancel(rx_ring->cmpl_tq, &rx_ring->cmpl_task, NULL) != 0) + taskqueue_drain(rx_ring->cmpl_tq, &rx_ring->cmpl_task); + + taskqueue_free(rx_ring->cmpl_tq); + /* Free buffer DMA maps, */ for (int i = 0; i < rx_ring->ring_size; i++) { m_freem(rx_ring->rx_buffer_info[i].mbuf); @@ -1565,6 +1591,24 @@ ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, return; } +static void +ena_deferred_rx_cleanup(void *arg, int pending) +{ + struct ena_ring *rx_ring = arg; + int budget = CLEAN_BUDGET; + + ENA_RING_MTX_LOCK(rx_ring); + /* + * If deferred task was executed, perform cleanup of all awaiting + * descs (or until given budget is depleted to avoid infinite loop). + */ + while (budget--) { + if (ena_rx_cleanup(rx_ring) == 0) + break; + } + ENA_RING_MTX_UNLOCK(rx_ring); +} + /** * ena_rx_cleanup - handle rx irq * @arg: ring for which irq is being handled @@ -1736,7 +1780,17 @@ ena_handle_msix(void *arg) io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; for (i = 0; i < CLEAN_BUDGET; ++i) { - rxc = ena_rx_cleanup(rx_ring); + /* + * If lock cannot be acquired, then deferred cleanup task was + * being executed and rx ring is being cleaned up in + * another thread. + */ + if (ENA_RING_MTX_TRYLOCK(rx_ring)) { + rxc = ena_rx_cleanup(rx_ring); + ENA_RING_MTX_UNLOCK(rx_ring); + } else { + rxc = 0; + } /* Protection from calling ena_tx_cleanup from ena_start_xmit */ ENA_RING_MTX_LOCK(tx_ring); @@ -3374,6 +3428,53 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter) adapter->next_monitored_tx_qid = i % adapter->num_queues; } +/* trigger deferred rx cleanup after 2 consecutive detections */ +#define EMPTY_RX_REFILL 2 +/* For the rare case where the device runs out of Rx descriptors and the + * msix handler failed to refill new Rx descriptors (due to a lack of memory + * for example). + * This case will lead to a deadlock: + * The device won't send interrupts since all the new Rx packets will be dropped + * The msix handler won't allocate new Rx descriptors so the device won't be + * able to send new packets. + * + * When such a situation is detected - execute rx cleanup task in another thread + */ +static void +check_for_empty_rx_ring(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, refill_required; + + if (!adapter->up) + return; + + if (adapter->trigger_reset) + return; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + + refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + + if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { + counter_u64_add(rx_ring->rx_stats.empty_rx_ring, + 1); + + device_printf(adapter->pdev, + "trigger refill for ring %d\n", i); + + taskqueue_enqueue(rx_ring->cmpl_tq, + &rx_ring->cmpl_task); + rx_ring->empty_rx_queue = 0; + } + } else { + rx_ring->empty_rx_queue = 0; + } + } +} static void ena_timer_service(void *data) @@ -3388,6 +3489,8 @@ ena_timer_service(void *data) check_for_missing_tx_completions(adapter); + check_for_empty_rx_ring(adapter); + if (host_info) ena_update_host_info(host_info, adapter->ifp); diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h index b3b8b0cbdee..43a67a51740 100644 --- a/sys/dev/ena/ena.h +++ b/sys/dev/ena/ena.h @@ -284,16 +284,24 @@ struct ena_ring { struct buf_ring *br; /* only for TX */ struct mtx ring_mtx; char mtx_name[16]; - struct task enqueue_task; - struct taskqueue *enqueue_tq; - struct task cmpl_task; - struct taskqueue *cmpl_tq; + union { + struct { + struct task enqueue_task; + struct taskqueue *enqueue_tq; + }; + struct { + struct task cmpl_task; + struct taskqueue *cmpl_tq; + }; + }; union { struct ena_stats_tx tx_stats; struct ena_stats_rx rx_stats; }; + int empty_rx_queue; + } __aligned(CACHE_LINE_SIZE); struct ena_stats_dev { diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c index 9199a28538a..3afd25b3744 100644 --- a/sys/dev/ena/ena_sysctl.c +++ b/sys/dev/ena/ena_sysctl.c @@ -200,6 +200,9 @@ ena_sysctl_add_stats(struct ena_adapter *adapter) SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id", CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count"); + SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, + "empty_rx_ring", CTLFLAG_RD, + &rx_stats->empty_rx_ring, "RX descriptors depletion count"); } /* Stats read from device */