mirror of
https://github.com/opnsense/src.git
synced 2026-06-11 09:41:03 -04:00
gve: Add callout to detect and handle TX timeouts
A TX timeout occurs when the driver allocates resources on a TX queue for a packet to be sent, prompts the hardware to send the packet, but does not receive a completion for the packet within a given timeout period. An accumulation of TX timeouts can cause one or more queues to run out of space and cause the entire driver to become stuck. This commit adds a lockless timer service that runs periodically and checks queues for timed out packets. In the event we detect a timeout, we prompt the completion phase taskqueue to process completions. Upon the next inspection of the queue we still detect timed out packets, if the last "kick" occurred within a fixed cooldown window, we opt to reset the driver, even if the prior kick successfully freed timed out packets. Signed-off-by: Jasper Tran O'Leary <jtranoleary@google.com> Reviewed by: markj, ziaee MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D50385
This commit is contained in:
parent
b044f12537
commit
3d2957336c
7 changed files with 241 additions and 5 deletions
|
|
@ -26,7 +26,7 @@
|
|||
.\" ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
.\" SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
.Dd October 14, 2024
|
||||
.Dd May 20, 2025
|
||||
.Dt GVE 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
|
|
@ -180,6 +180,12 @@ These messages are seen if any admin queue command fails:
|
|||
.It "Unknown AQ command opcode %d"
|
||||
.El
|
||||
.Pp
|
||||
These messages appear if a TX timeout is detected:
|
||||
.Bl -diag
|
||||
.It "Found %d timed out packet(s) on txq%d, kicking it for completions"
|
||||
.It "Found %d timed out packet(s) on txq%d with its last kick %ld sec ago which is less than the cooldown period %d. Resetting device"
|
||||
.El
|
||||
.Pp
|
||||
These messages are recorded when the device is being reset due to an error:
|
||||
.Bl -diag
|
||||
.It "Scheduling reset task!"
|
||||
|
|
|
|||
|
|
@ -47,6 +47,21 @@
|
|||
#define GVE_TX_MAX_DESCS 4
|
||||
#define GVE_TX_BUFRING_ENTRIES 4096
|
||||
|
||||
#define GVE_TX_TIMEOUT_PKT_SEC 5
|
||||
#define GVE_TX_TIMEOUT_CHECK_CADENCE_SEC 5
|
||||
/*
|
||||
* If the driver finds timed out packets on a tx queue it first kicks it and
|
||||
* records the time. If the driver again finds a timeout on the same queue
|
||||
* before the end of the cooldown period, only then will it reset. Thus, for a
|
||||
* reset to be able to occur at all, the cooldown must be at least as long
|
||||
* as the tx timeout checking cadence multiplied by the number of queues.
|
||||
*/
|
||||
#define GVE_TX_TIMEOUT_MAX_TX_QUEUES 16
|
||||
#define GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC \
|
||||
(2 * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC * GVE_TX_TIMEOUT_MAX_TX_QUEUES)
|
||||
|
||||
#define GVE_TIMESTAMP_INVALID -1
|
||||
|
||||
#define ADMINQ_SIZE PAGE_SIZE
|
||||
|
||||
#define GVE_DEFAULT_RX_BUFFER_SIZE 2048
|
||||
|
|
@ -337,6 +352,14 @@ struct gve_tx_fifo {
|
|||
|
||||
struct gve_tx_buffer_state {
|
||||
struct mbuf *mbuf;
|
||||
|
||||
/*
|
||||
* Time at which the xmit tq places descriptors for mbuf's payload on a
|
||||
* tx queue. This timestamp is invalidated when the mbuf is freed and
|
||||
* must be checked for validity when read.
|
||||
*/
|
||||
int64_t enqueue_time_sec;
|
||||
|
||||
struct gve_tx_iovec iov[GVE_TX_MAX_DESCS];
|
||||
};
|
||||
|
||||
|
|
@ -357,12 +380,21 @@ struct gve_txq_stats {
|
|||
counter_u64_t tx_mbuf_defrag_err;
|
||||
counter_u64_t tx_mbuf_dmamap_enomem_err;
|
||||
counter_u64_t tx_mbuf_dmamap_err;
|
||||
counter_u64_t tx_timeout;
|
||||
};
|
||||
|
||||
#define NUM_TX_STATS (sizeof(struct gve_txq_stats) / sizeof(counter_u64_t))
|
||||
|
||||
struct gve_tx_pending_pkt_dqo {
|
||||
struct mbuf *mbuf;
|
||||
|
||||
/*
|
||||
* Time at which the xmit tq places descriptors for mbuf's payload on a
|
||||
* tx queue. This timestamp is invalidated when the mbuf is freed and
|
||||
* must be checked for validity when read.
|
||||
*/
|
||||
int64_t enqueue_time_sec;
|
||||
|
||||
union {
|
||||
/* RDA */
|
||||
bus_dmamap_t dmamap;
|
||||
|
|
@ -396,6 +428,8 @@ struct gve_tx_ring {
|
|||
uint32_t req; /* free-running total number of packets written to the nic */
|
||||
uint32_t done; /* free-running total number of completed packets */
|
||||
|
||||
int64_t last_kicked; /* always-valid timestamp in seconds for the last queue kick */
|
||||
|
||||
union {
|
||||
/* GQI specific stuff */
|
||||
struct {
|
||||
|
|
@ -594,6 +628,11 @@ struct gve_priv {
|
|||
|
||||
struct gve_state_flags state_flags;
|
||||
struct sx gve_iface_lock;
|
||||
|
||||
struct callout tx_timeout_service;
|
||||
/* The index of tx queue that the timer service will check on its next invocation */
|
||||
uint16_t check_tx_queue_idx;
|
||||
|
||||
};
|
||||
|
||||
static inline bool
|
||||
|
|
@ -652,6 +691,7 @@ int gve_alloc_tx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_
|
|||
void gve_free_tx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx);
|
||||
int gve_create_tx_rings(struct gve_priv *priv);
|
||||
int gve_destroy_tx_rings(struct gve_priv *priv);
|
||||
int gve_check_tx_timeout_gqi(struct gve_priv *priv, struct gve_tx_ring *tx);
|
||||
int gve_tx_intr(void *arg);
|
||||
int gve_xmit_ifp(if_t ifp, struct mbuf *mbuf);
|
||||
void gve_qflush(if_t ifp);
|
||||
|
|
@ -662,6 +702,7 @@ void gve_tx_cleanup_tq(void *arg, int pending);
|
|||
int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i);
|
||||
void gve_tx_free_ring_dqo(struct gve_priv *priv, int i);
|
||||
void gve_clear_tx_ring_dqo(struct gve_priv *priv, int i);
|
||||
int gve_check_tx_timeout_dqo(struct gve_priv *priv, struct gve_tx_ring *tx);
|
||||
int gve_tx_intr_dqo(void *arg);
|
||||
int gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr);
|
||||
int gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf);
|
||||
|
|
@ -697,6 +738,12 @@ int gve_alloc_irqs(struct gve_priv *priv);
|
|||
void gve_unmask_all_queue_irqs(struct gve_priv *priv);
|
||||
void gve_mask_all_queue_irqs(struct gve_priv *priv);
|
||||
|
||||
/* Miscellaneous functions defined in gve_utils.c */
|
||||
void gve_invalidate_timestamp(int64_t *timestamp_sec);
|
||||
int64_t gve_seconds_since(int64_t *timestamp_sec);
|
||||
void gve_set_timestamp(int64_t *timestamp_sec);
|
||||
bool gve_timestamp_valid(int64_t *timestamp_sec);
|
||||
|
||||
/* Systcl functions defined in gve_sysctl.c */
|
||||
extern bool gve_disable_hw_lro;
|
||||
extern char gve_queue_format[8];
|
||||
|
|
|
|||
|
|
@ -32,10 +32,10 @@
|
|||
#include "gve_adminq.h"
|
||||
#include "gve_dqo.h"
|
||||
|
||||
#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.3\n"
|
||||
#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n"
|
||||
#define GVE_VERSION_MAJOR 1
|
||||
#define GVE_VERSION_MINOR 3
|
||||
#define GVE_VERSION_SUB 3
|
||||
#define GVE_VERSION_SUB 4
|
||||
|
||||
#define GVE_DEFAULT_RX_COPYBREAK 256
|
||||
|
||||
|
|
@ -50,6 +50,9 @@ static struct gve_dev {
|
|||
|
||||
struct sx gve_global_lock;
|
||||
|
||||
static void gve_start_tx_timeout_service(struct gve_priv *priv);
|
||||
static void gve_stop_tx_timeout_service(struct gve_priv *priv);
|
||||
|
||||
static int
|
||||
gve_verify_driver_compatibility(struct gve_priv *priv)
|
||||
{
|
||||
|
|
@ -99,6 +102,72 @@ gve_verify_driver_compatibility(struct gve_priv *priv)
|
|||
return (err);
|
||||
}
|
||||
|
||||
static void
|
||||
gve_handle_tx_timeout(struct gve_priv *priv, struct gve_tx_ring *tx,
|
||||
int num_timeout_pkts)
|
||||
{
|
||||
int64_t time_since_last_kick;
|
||||
|
||||
counter_u64_add_protected(tx->stats.tx_timeout, 1);
|
||||
|
||||
/* last_kicked is never GVE_TIMESTAMP_INVALID so we can skip checking */
|
||||
time_since_last_kick = gve_seconds_since(&tx->last_kicked);
|
||||
|
||||
/* Try kicking first in case the timeout is due to a missed interrupt */
|
||||
if (time_since_last_kick > GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC) {
|
||||
device_printf(priv->dev,
|
||||
"Found %d timed out packet(s) on txq%d, kicking it for completions\n",
|
||||
num_timeout_pkts, tx->com.id);
|
||||
gve_set_timestamp(&tx->last_kicked);
|
||||
taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
|
||||
} else {
|
||||
device_printf(priv->dev,
|
||||
"Found %d timed out packet(s) on txq%d with its last kick %jd sec ago which is less than the cooldown period %d. Resetting device\n",
|
||||
num_timeout_pkts, tx->com.id,
|
||||
(intmax_t)time_since_last_kick,
|
||||
GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC);
|
||||
gve_schedule_reset(priv);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gve_tx_timeout_service_callback(void *data)
|
||||
{
|
||||
struct gve_priv *priv = (struct gve_priv *)data;
|
||||
struct gve_tx_ring *tx;
|
||||
uint16_t num_timeout_pkts;
|
||||
|
||||
tx = &priv->tx[priv->check_tx_queue_idx];
|
||||
|
||||
num_timeout_pkts = gve_is_gqi(priv) ?
|
||||
gve_check_tx_timeout_gqi(priv, tx) :
|
||||
gve_check_tx_timeout_dqo(priv, tx);
|
||||
if (num_timeout_pkts)
|
||||
gve_handle_tx_timeout(priv, tx, num_timeout_pkts);
|
||||
|
||||
priv->check_tx_queue_idx = (priv->check_tx_queue_idx + 1) %
|
||||
priv->tx_cfg.num_queues;
|
||||
callout_reset_sbt(&priv->tx_timeout_service,
|
||||
SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0,
|
||||
gve_tx_timeout_service_callback, (void *)priv, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
gve_start_tx_timeout_service(struct gve_priv *priv)
|
||||
{
|
||||
priv->check_tx_queue_idx = 0;
|
||||
callout_init(&priv->tx_timeout_service, true);
|
||||
callout_reset_sbt(&priv->tx_timeout_service,
|
||||
SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0,
|
||||
gve_tx_timeout_service_callback, (void *)priv, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
gve_stop_tx_timeout_service(struct gve_priv *priv)
|
||||
{
|
||||
callout_drain(&priv->tx_timeout_service);
|
||||
}
|
||||
|
||||
static int
|
||||
gve_up(struct gve_priv *priv)
|
||||
{
|
||||
|
|
@ -149,6 +218,9 @@ gve_up(struct gve_priv *priv)
|
|||
gve_unmask_all_queue_irqs(priv);
|
||||
gve_set_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
|
||||
priv->interface_up_cnt++;
|
||||
|
||||
gve_start_tx_timeout_service(priv);
|
||||
|
||||
return (0);
|
||||
|
||||
reset:
|
||||
|
|
@ -164,6 +236,8 @@ gve_down(struct gve_priv *priv)
|
|||
if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP))
|
||||
return;
|
||||
|
||||
gve_stop_tx_timeout_service(priv);
|
||||
|
||||
if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
|
||||
if_link_state_change(priv->ifp, LINK_STATE_DOWN);
|
||||
gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ gve_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
|
|||
&stats->tx_delayed_pkt_tsoerr,
|
||||
"TSO packets delayed due to err in prep errors");
|
||||
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
|
||||
"tx_mbuf_collpase", CTLFLAG_RD,
|
||||
"tx_mbuf_collapse", CTLFLAG_RD,
|
||||
&stats->tx_mbuf_collapse,
|
||||
"tx mbufs that had to be collapsed");
|
||||
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
|
||||
|
|
@ -187,6 +187,10 @@ gve_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
|
|||
"tx_mbuf_dmamap_err", CTLFLAG_RD,
|
||||
&stats->tx_mbuf_dmamap_err,
|
||||
"tx mbufs that could not be dma-mapped");
|
||||
SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
|
||||
"tx_timeout", CTLFLAG_RD,
|
||||
&stats->tx_timeout,
|
||||
"detections of timed out packets on tx queues");
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -173,6 +173,8 @@ gve_tx_alloc_ring(struct gve_priv *priv, int i)
|
|||
}
|
||||
com->q_resources = com->q_resources_mem.cpu_addr;
|
||||
|
||||
tx->last_kicked = 0;
|
||||
|
||||
return (0);
|
||||
|
||||
abort:
|
||||
|
|
@ -218,6 +220,7 @@ gve_tx_clear_desc_ring(struct gve_tx_ring *tx)
|
|||
for (i = 0; i < com->priv->tx_desc_cnt; i++) {
|
||||
tx->desc_ring[i] = (union gve_tx_desc){};
|
||||
tx->info[i] = (struct gve_tx_buffer_state){};
|
||||
gve_invalidate_timestamp(&tx->info[i].enqueue_time_sec);
|
||||
}
|
||||
|
||||
bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
|
||||
|
|
@ -344,6 +347,30 @@ gve_destroy_tx_rings(struct gve_priv *priv)
|
|||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
gve_check_tx_timeout_gqi(struct gve_priv *priv, struct gve_tx_ring *tx)
|
||||
{
|
||||
struct gve_tx_buffer_state *info;
|
||||
uint32_t pkt_idx;
|
||||
int num_timeouts;
|
||||
|
||||
num_timeouts = 0;
|
||||
|
||||
for (pkt_idx = 0; pkt_idx < priv->tx_desc_cnt; pkt_idx++) {
|
||||
info = &tx->info[pkt_idx];
|
||||
|
||||
if (!gve_timestamp_valid(&info->enqueue_time_sec))
|
||||
continue;
|
||||
|
||||
if (__predict_false(
|
||||
gve_seconds_since(&info->enqueue_time_sec) >
|
||||
GVE_TX_TIMEOUT_PKT_SEC))
|
||||
num_timeouts += 1;
|
||||
}
|
||||
|
||||
return (num_timeouts);
|
||||
}
|
||||
|
||||
int
|
||||
gve_tx_intr(void *arg)
|
||||
{
|
||||
|
|
@ -396,7 +423,10 @@ gve_tx_cleanup_tq(void *arg, int pending)
|
|||
if (mbuf == NULL)
|
||||
continue;
|
||||
|
||||
gve_invalidate_timestamp(&info->enqueue_time_sec);
|
||||
|
||||
info->mbuf = NULL;
|
||||
|
||||
counter_enter();
|
||||
counter_u64_add_protected(tx->stats.tbytes, mbuf->m_pkthdr.len);
|
||||
counter_u64_add_protected(tx->stats.tpackets, 1);
|
||||
|
|
@ -685,6 +715,8 @@ gve_xmit(struct gve_tx_ring *tx, struct mbuf *mbuf)
|
|||
/* So that the cleanup taskqueue can free the mbuf eventually. */
|
||||
info->mbuf = mbuf;
|
||||
|
||||
gve_set_timestamp(&info->enqueue_time_sec);
|
||||
|
||||
/*
|
||||
* We don't want to split the header, so if necessary, pad to the end
|
||||
* of the fifo and then put the header at the beginning of the fifo.
|
||||
|
|
|
|||
|
|
@ -527,6 +527,8 @@ gve_alloc_pending_packet(struct gve_tx_ring *tx)
|
|||
tx->dqo.free_pending_pkts_csm = pending_pkt->next;
|
||||
pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
|
||||
|
||||
gve_set_timestamp(&pending_pkt->enqueue_time_sec);
|
||||
|
||||
return (pending_pkt);
|
||||
}
|
||||
|
||||
|
|
@ -539,6 +541,8 @@ gve_free_pending_packet(struct gve_tx_ring *tx,
|
|||
|
||||
pending_pkt->state = GVE_PACKET_STATE_FREE;
|
||||
|
||||
gve_invalidate_timestamp(&pending_pkt->enqueue_time_sec);
|
||||
|
||||
/* Add pending_pkt to the producer list */
|
||||
while (true) {
|
||||
old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd);
|
||||
|
|
@ -939,6 +943,29 @@ gve_handle_packet_completion(struct gve_priv *priv,
|
|||
return (pkt_len);
|
||||
}
|
||||
|
||||
int
|
||||
gve_check_tx_timeout_dqo(struct gve_priv *priv, struct gve_tx_ring *tx)
|
||||
{
|
||||
struct gve_tx_pending_pkt_dqo *pending_pkt;
|
||||
int num_timeouts;
|
||||
uint16_t pkt_idx;
|
||||
|
||||
num_timeouts = 0;
|
||||
for (pkt_idx = 0; pkt_idx < tx->dqo.num_pending_pkts; pkt_idx++) {
|
||||
pending_pkt = &tx->dqo.pending_pkts[pkt_idx];
|
||||
|
||||
if (!gve_timestamp_valid(&pending_pkt->enqueue_time_sec))
|
||||
continue;
|
||||
|
||||
if (__predict_false(
|
||||
gve_seconds_since(&pending_pkt->enqueue_time_sec) >
|
||||
GVE_TX_TIMEOUT_PKT_SEC))
|
||||
num_timeouts += 1;
|
||||
}
|
||||
|
||||
return (num_timeouts);
|
||||
}
|
||||
|
||||
int
|
||||
gve_tx_intr_dqo(void *arg)
|
||||
{
|
||||
|
|
@ -960,8 +987,11 @@ gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx)
|
|||
struct gve_ring_com *com = &tx->com;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < com->priv->tx_desc_cnt; i++)
|
||||
for (i = 0; i < com->priv->tx_desc_cnt; i++) {
|
||||
tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){};
|
||||
gve_invalidate_timestamp(
|
||||
&tx->dqo.pending_pkts[i].enqueue_time_sec);
|
||||
}
|
||||
|
||||
bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
|
||||
BUS_DMASYNC_PREWRITE);
|
||||
|
|
|
|||
|
|
@ -439,3 +439,46 @@ gve_mask_all_queue_irqs(struct gve_priv *priv)
|
|||
gve_db_bar_write_4(priv, rx->com.irq_db_offset, GVE_IRQ_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* In some cases, such as tracking timeout events, we must mark a timestamp as
|
||||
* invalid when we do not want to consider its value. Such timestamps must be
|
||||
* checked for validity before reading them.
|
||||
*/
|
||||
void
|
||||
gve_invalidate_timestamp(int64_t *timestamp_sec)
|
||||
{
|
||||
atomic_store_64(timestamp_sec, GVE_TIMESTAMP_INVALID);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 0 if the timestamp is invalid, otherwise returns the elapsed seconds
|
||||
* since the timestamp was set.
|
||||
*/
|
||||
int64_t
|
||||
gve_seconds_since(int64_t *timestamp_sec)
|
||||
{
|
||||
struct bintime curr_time;
|
||||
int64_t enqueued_time;
|
||||
|
||||
getbintime(&curr_time);
|
||||
enqueued_time = atomic_load_64(timestamp_sec);
|
||||
if (enqueued_time == GVE_TIMESTAMP_INVALID)
|
||||
return (0);
|
||||
return ((int64_t)(curr_time.sec - enqueued_time));
|
||||
}
|
||||
|
||||
void
|
||||
gve_set_timestamp(int64_t *timestamp_sec)
|
||||
{
|
||||
struct bintime curr_time;
|
||||
|
||||
getbintime(&curr_time);
|
||||
atomic_store_64(timestamp_sec, curr_time.sec);
|
||||
}
|
||||
|
||||
bool
|
||||
gve_timestamp_valid(int64_t *timestamp_sec)
|
||||
{
|
||||
return (atomic_load_64(timestamp_sec) != GVE_TIMESTAMP_INVALID);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue