mirror of
https://github.com/opnsense/src.git
synced 2026-06-09 08:43:19 -04:00
ena: Add differentiation for missing TX completions reset
This commit adds differentiation for a reset caused by missing tx
completions, by verifying if the driver didn't receive tx
completions caused by missing interrupts.
The cleanup_running field was added to ena_ring because
cleanup_task.ta_pending is zeroed before ena_cleanup() runs.
Also ena_increment_reset_counter() API was added in order to support
only incrementing the reset counter.
Approved by: cperciva (mentor)
Sponsored by: Amazon, Inc.
(cherry picked from commit a33ec635d1)
This commit is contained in:
parent
a20c06c6f1
commit
db0c751ed7
3 changed files with 77 additions and 18 deletions
|
|
@ -169,6 +169,9 @@ static int ena_copy_eni_metrics(struct ena_adapter *);
|
|||
static int ena_copy_srd_metrics(struct ena_adapter *);
|
||||
static int ena_copy_customer_metrics(struct ena_adapter *);
|
||||
static void ena_timer_service(void *);
|
||||
static enum ena_regs_reset_reason_types check_cdesc_in_tx_cq(struct ena_adapter *,
|
||||
struct ena_ring *);
|
||||
|
||||
|
||||
static char ena_version[] = ENA_DEVICE_NAME ENA_DRV_MODULE_NAME
|
||||
" v" ENA_DRV_MODULE_VERSION;
|
||||
|
|
@ -3088,6 +3091,31 @@ check_for_rx_interrupt_queue(struct ena_adapter *adapter,
|
|||
return (0);
|
||||
}
|
||||
|
||||
static enum ena_regs_reset_reason_types
|
||||
check_cdesc_in_tx_cq(struct ena_adapter *adapter,
|
||||
struct ena_ring *tx_ring)
|
||||
{
|
||||
device_t pdev = adapter->pdev;
|
||||
int rc;
|
||||
u16 req_id;
|
||||
|
||||
rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id);
|
||||
/* TX CQ is empty */
|
||||
if (rc == ENA_COM_TRY_AGAIN) {
|
||||
ena_log(pdev, ERR,
|
||||
"No completion descriptors found in CQ %d\n",
|
||||
tx_ring->qid);
|
||||
return ENA_REGS_RESET_MISS_TX_CMPL;
|
||||
}
|
||||
|
||||
/* TX CQ has cdescs */
|
||||
ena_log(pdev, ERR,
|
||||
"Completion descriptors found in CQ %d",
|
||||
tx_ring->qid);
|
||||
|
||||
return ENA_REGS_RESET_MISS_INTERRUPT;
|
||||
}
|
||||
|
||||
static int
|
||||
check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
|
||||
struct ena_ring *tx_ring)
|
||||
|
|
@ -3100,6 +3128,8 @@ check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
|
|||
int missing_tx_comp_to;
|
||||
sbintime_t time_offset;
|
||||
int i, rc = 0;
|
||||
enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
|
||||
bool cleanup_scheduled, cleanup_running;
|
||||
|
||||
getbinuptime(&curtime);
|
||||
|
||||
|
|
@ -3155,7 +3185,19 @@ check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
|
|||
"The number of lost tx completion is above the threshold "
|
||||
"(%d > %d). Reset the device\n",
|
||||
missed_tx, adapter->missing_tx_threshold);
|
||||
ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
|
||||
/* Set the reset flag to prevent ena_cleanup() from running */
|
||||
ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
|
||||
/* Need to make sure that ENA_FLAG_TRIGGER_RESET is visible to ena_cleanup() and
|
||||
* that cleanup_running is visible to check_missing_comp_in_tx_queue() to
|
||||
* prevent the case of accessing CQ concurrently with check_cdesc_in_tx_cq()
|
||||
*/
|
||||
mb();
|
||||
cleanup_scheduled = !!(atomic_load_16(&tx_ring->que->cleanup_task.ta_pending));
|
||||
cleanup_running = !!(atomic_load_8((&tx_ring->cleanup_running)));
|
||||
if (!(cleanup_scheduled || cleanup_running))
|
||||
reset_reason = check_cdesc_in_tx_cq(adapter, tx_ring);
|
||||
|
||||
adapter->reset_reason = reset_reason;
|
||||
rc = EIO;
|
||||
}
|
||||
/* Add the newly discovered missing TX completions */
|
||||
|
|
@ -3618,6 +3660,7 @@ ena_reset_task(void *arg, int pending)
|
|||
|
||||
ENA_LOCK_LOCK();
|
||||
if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
|
||||
ena_increment_reset_counter(adapter);
|
||||
ena_destroy_device(adapter, false);
|
||||
ena_restore_device(adapter);
|
||||
|
||||
|
|
|
|||
|
|
@ -327,6 +327,7 @@ struct ena_ring {
|
|||
};
|
||||
|
||||
uint8_t first_interrupt;
|
||||
uint8_t cleanup_running;
|
||||
uint16_t no_interrupt_event_cnt;
|
||||
|
||||
struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
|
||||
|
|
@ -583,22 +584,28 @@ ena_mbuf_count(struct mbuf *mbuf)
|
|||
return count;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ena_increment_reset_counter(struct ena_adapter *adapter)
|
||||
{
|
||||
enum ena_regs_reset_reason_types reset_reason = adapter->reset_reason;
|
||||
const struct ena_reset_stats_offset *ena_reset_stats_offset =
|
||||
&resets_to_stats_offset_map[reset_reason];
|
||||
|
||||
if (ena_reset_stats_offset->has_counter) {
|
||||
uint64_t *stat_ptr = (uint64_t *)&adapter->dev_stats +
|
||||
ena_reset_stats_offset->stat_offset;
|
||||
|
||||
counter_u64_add((counter_u64_t)(*stat_ptr), 1);
|
||||
}
|
||||
|
||||
counter_u64_add(adapter->dev_stats.total_resets, 1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
ena_trigger_reset(struct ena_adapter *adapter,
|
||||
enum ena_regs_reset_reason_types reset_reason)
|
||||
{
|
||||
if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
|
||||
const struct ena_reset_stats_offset *ena_reset_stats_offset =
|
||||
&resets_to_stats_offset_map[reset_reason];
|
||||
|
||||
if (ena_reset_stats_offset->has_counter) {
|
||||
uint64_t *stat_ptr = (uint64_t *)&adapter->dev_stats +
|
||||
ena_reset_stats_offset->stat_offset;
|
||||
|
||||
counter_u64_add((counter_u64_t)(*stat_ptr), 1);
|
||||
}
|
||||
|
||||
counter_u64_add(adapter->dev_stats.total_resets, 1);
|
||||
adapter->reset_reason = reset_reason;
|
||||
ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,17 +77,24 @@ ena_cleanup(void *arg, int pending)
|
|||
int qid, ena_qid;
|
||||
int txc, rxc, i;
|
||||
|
||||
if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
|
||||
return;
|
||||
|
||||
ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
|
||||
|
||||
tx_ring = que->tx_ring;
|
||||
rx_ring = que->rx_ring;
|
||||
qid = que->id;
|
||||
ena_qid = ENA_IO_TXQ_IDX(qid);
|
||||
io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
|
||||
|
||||
atomic_store_8(&tx_ring->cleanup_running, 1);
|
||||
/* Need to make sure that ENA_FLAG_TRIGGER_RESET is visible to ena_cleanup() and
|
||||
* that cleanup_running is visible to check_missing_comp_in_tx_queue() to
|
||||
* prevent the case of accessing CQ concurrently with check_cdesc_in_tx_cq()
|
||||
*/
|
||||
mb();
|
||||
if (unlikely(((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
|
||||
(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))))
|
||||
return;
|
||||
|
||||
ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
|
||||
|
||||
atomic_store_8(&tx_ring->first_interrupt, 1);
|
||||
atomic_store_8(&rx_ring->first_interrupt, 1);
|
||||
|
||||
|
|
@ -95,7 +102,8 @@ ena_cleanup(void *arg, int pending)
|
|||
rxc = ena_rx_cleanup(rx_ring);
|
||||
txc = ena_tx_cleanup(tx_ring);
|
||||
|
||||
if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
|
||||
if (unlikely(((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
|
||||
(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))))
|
||||
return;
|
||||
|
||||
if ((txc != ENA_TX_BUDGET) && (rxc != ENA_RX_BUDGET))
|
||||
|
|
@ -107,6 +115,7 @@ ena_cleanup(void *arg, int pending)
|
|||
ENA_TX_IRQ_INTERVAL, true, false);
|
||||
counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
|
||||
ena_com_unmask_intr(io_cq, &intr_reg);
|
||||
atomic_store_8(&tx_ring->cleanup_running, 0);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
Loading…
Reference in a new issue