ena: Add reset reason for missing admin interrupt

There can be cases when we trigger reset if an admin interrupt
is missing.
In order to identify this use-case specifically,
this commit adds a new reset reason.

Approved by: cperciva (mentor)
Sponsored by: Amazon, Inc.

(cherry picked from commit 274319acb48424958242d55e1b0c7d4528da7f70)
This commit is contained in:
Osama Abboud 2024-08-07 06:24:19 +00:00 committed by Osama Abboud
parent e445e3afde
commit a0594d1f65
3 changed files with 19 additions and 3 deletions

View file

@ -3029,6 +3029,7 @@ static void
check_for_missing_keep_alive(struct ena_adapter *adapter)
{
sbintime_t timestamp, time;
enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
if (adapter->wd_active == 0)
return;
@ -3040,7 +3041,10 @@ check_for_missing_keep_alive(struct ena_adapter *adapter)
time = getsbinuptime() - timestamp;
if (unlikely(time > adapter->keep_alive_timeout)) {
ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n");
ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
if (ena_com_aenq_has_keep_alive(adapter->ena_dev))
reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT;
ena_trigger_reset(adapter, reset_reason);
}
}
@ -3048,10 +3052,15 @@ check_for_missing_keep_alive(struct ena_adapter *adapter)
static void
check_for_admin_com_state(struct ena_adapter *adapter)
{
enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_ADMIN_TO;
if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) {
ena_log(adapter->pdev, ERR,
"ENA admin queue is not in running state!\n");
ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
if (ena_com_get_missing_admin_interrupt(adapter->ena_dev))
reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT;
ena_trigger_reset(adapter, reset_reason);
}
}

View file

@ -391,6 +391,8 @@ struct ena_stats_dev {
counter_u64_t missing_intr;
counter_u64_t tx_desc_malformed;
counter_u64_t rx_desc_malformed;
counter_u64_t missing_admin_interrupt;
counter_u64_t admin_to;
};
struct ena_hw_stats {
@ -542,7 +544,7 @@ struct ena_reset_stats_offset {
static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_RESET_LAST] = {
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_KEEP_ALIVE_TO, wd_expired),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_ADMIN_TO, admin_q_pause),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_ADMIN_TO, admin_to),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_OS_TRIGGER, os_trigger),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_TX_CMPL, missing_tx_cmpl),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_INV_RX_REQ_ID, bad_rx_req_id),
@ -552,6 +554,7 @@ static const struct ena_reset_stats_offset resets_to_stats_offset_map[ENA_REGS_R
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISS_INTERRUPT, missing_intr),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED, tx_desc_malformed),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED, rx_desc_malformed),
ENA_RESET_STATS_ENTRY(ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT, missing_admin_interrupt),
};
int ena_up(struct ena_adapter *adapter);

View file

@ -298,6 +298,10 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
&dev_stats->tx_desc_malformed, "TX descriptors malformed count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD,
&dev_stats->rx_desc_malformed, "RX descriptors malformed count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD,
&dev_stats->missing_admin_interrupt, "Missing admin interrupts count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD,
&dev_stats->admin_to, "Admin queue timeouts count");
SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD,
&dev_stats->total_resets, "Total resets count");