diff --git a/share/man/man4/sfxge.4 b/share/man/man4/sfxge.4 index e4b0b948826..9b6025dd30e 100644 --- a/share/man/man4/sfxge.4 +++ b/share/man/man4/sfxge.4 @@ -93,10 +93,18 @@ Supported values are: 512, 1024, 2048 and 4096. .It Va hw.sfxge.tx_dpl_get_max The maximum length of the deferred packet .Dq get-list -for queued transmit -packets, used only if the transmit queue lock can be acquired. +for queued transmit packets (TCP and non-TCP), used only if the transmit +queue lock can be acquired. If a packet is dropped, the -.Va tx_early_drops +.Va tx_get_overflow +counter is incremented and the local sender receives ENOBUFS. +The value must be greater than 0. +.It Va hw.sfxge.tx_dpl_get_non_tcp_max +The maximum number of non-TCP packets in the deferred packet +.Dq get-list +, used only if the transmit queue lock can be acquired. +If packet is dropped, the +.Va tx_get_non_tcp_overflow counter is incremented and the local sender receives ENOBUFS. The value must be greater than 0. .It Va hw.sfxge.tx_dpl_put_max @@ -105,7 +113,7 @@ The maximum length of the deferred packet for queued transmit packets, used if the transmit queue lock cannot be acquired. If a packet is dropped, the -.Va tx_early_drops +.Va tx_put_overflow counter is incremented and the local sender receives ENOBUFS. The value must be greater than or equal to 0. .It Va hw.sfxge.N.max_rss_channels diff --git a/sys/dev/sfxge/sfxge_tx.c b/sys/dev/sfxge/sfxge_tx.c index b69e922eadb..0a9218c7622 100644 --- a/sys/dev/sfxge/sfxge_tx.c +++ b/sys/dev/sfxge/sfxge_tx.c @@ -85,14 +85,23 @@ static int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT; TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_MAX, &sfxge_tx_dpl_get_max); SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_max, CTLFLAG_RDTUN, &sfxge_tx_dpl_get_max, 0, - "Maximum number of packets in deferred packet get-list"); + "Maximum number of any packets in deferred packet get-list"); + +#define SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX \ + SFXGE_PARAM(tx_dpl_get_non_tcp_max) +static int sfxge_tx_dpl_get_non_tcp_max = + SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT; +TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, &sfxge_tx_dpl_get_non_tcp_max); +SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_non_tcp_max, CTLFLAG_RDTUN, + &sfxge_tx_dpl_get_non_tcp_max, 0, + "Maximum number of non-TCP packets in deferred packet get-list"); #define SFXGE_PARAM_TX_DPL_PUT_MAX SFXGE_PARAM(tx_dpl_put_max) static int sfxge_tx_dpl_put_max = SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT; TUNABLE_INT(SFXGE_PARAM_TX_DPL_PUT_MAX, &sfxge_tx_dpl_put_max); SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN, &sfxge_tx_dpl_put_max, 0, - "Maximum number of packets in deferred packet put-list"); + "Maximum number of any packets in deferred packet put-list"); #endif @@ -147,6 +156,15 @@ sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq) #ifdef SFXGE_HAVE_MQ +static inline unsigned int +sfxge_is_mbuf_non_tcp(struct mbuf *mbuf) +{ + /* Absense of TCP checksum flags does not mean that it is non-TCP + * but it should be true if user wants to achieve high throughput. + */ + return (!(mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))); +} + /* * Reorder the put list and append it to the get list. */ @@ -158,6 +176,7 @@ sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq) volatile uintptr_t *putp; uintptr_t put; unsigned int count; + unsigned int non_tcp_count; mtx_assert(&txq->lock, MA_OWNED); @@ -176,9 +195,11 @@ sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq) get_next = NULL; count = 0; + non_tcp_count = 0; do { struct mbuf *put_next; + non_tcp_count += sfxge_is_mbuf_non_tcp(mbuf); put_next = mbuf->m_nextpkt; mbuf->m_nextpkt = get_next; get_next = mbuf; @@ -192,6 +213,7 @@ sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq) *stdp->std_getp = get_next; stdp->std_getp = get_tailp; stdp->std_get_count += count; + stdp->std_get_non_tcp_count += non_tcp_count; } #endif /* SFXGE_HAVE_MQ */ @@ -382,6 +404,7 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq) struct sfxge_tx_dpl *stdp; struct mbuf *mbuf, *next; unsigned int count; + unsigned int non_tcp_count; unsigned int pushed; int rc; @@ -396,6 +419,10 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq) mbuf = stdp->std_get; count = stdp->std_get_count; + non_tcp_count = stdp->std_get_non_tcp_count; + + if (count > stdp->std_get_hiwat) + stdp->std_get_hiwat = count; while (count != 0) { KASSERT(mbuf != NULL, ("mbuf == NULL")); @@ -410,6 +437,7 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq) rc = sfxge_tx_queue_mbuf(txq, mbuf); --count; + non_tcp_count -= sfxge_is_mbuf_non_tcp(mbuf); mbuf = next; if (rc != 0) continue; @@ -426,12 +454,16 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq) if (count == 0) { KASSERT(mbuf == NULL, ("mbuf != NULL")); + KASSERT(non_tcp_count == 0, + ("inconsistent TCP/non-TCP detection")); stdp->std_get = NULL; stdp->std_get_count = 0; + stdp->std_get_non_tcp_count = 0; stdp->std_getp = &stdp->std_get; } else { stdp->std_get = mbuf; stdp->std_get_count = count; + stdp->std_get_non_tcp_count = non_tcp_count; } if (txq->added != pushed) @@ -491,8 +523,18 @@ sfxge_tx_qdpl_put(struct sfxge_txq *txq, struct mbuf *mbuf, int locked) sfxge_tx_qdpl_swizzle(txq); - if (stdp->std_get_count >= stdp->std_get_max) + if (stdp->std_get_count >= stdp->std_get_max) { + txq->get_overflow++; return (ENOBUFS); + } + if (sfxge_is_mbuf_non_tcp(mbuf)) { + if (stdp->std_get_non_tcp_count >= + stdp->std_get_non_tcp_max) { + txq->get_non_tcp_overflow++; + return (ENOBUFS); + } + stdp->std_get_non_tcp_count++; + } *(stdp->std_getp) = mbuf; stdp->std_getp = &mbuf->m_nextpkt; @@ -513,8 +555,10 @@ sfxge_tx_qdpl_put(struct sfxge_txq *txq, struct mbuf *mbuf, int locked) old_len = mp->m_pkthdr.csum_data; } else old_len = 0; - if (old_len >= stdp->std_put_max) + if (old_len >= stdp->std_put_max) { + atomic_add_long(&txq->put_overflow, 1); return (ENOBUFS); + } mbuf->m_pkthdr.csum_data = old_len + 1; mbuf->m_nextpkt = (void *)old; } while (atomic_cmpset_ptr(putp, old, new) == 0); @@ -535,6 +579,7 @@ sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m) if (!SFXGE_LINK_UP(txq->sc)) { rc = ENETDOWN; + atomic_add_long(&txq->netdown_drops, 1); goto fail; } @@ -572,7 +617,6 @@ sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m) fail: m_freem(m); - atomic_add_long(&txq->early_drops, 1); return (rc); } @@ -591,6 +635,7 @@ sfxge_tx_qdpl_flush(struct sfxge_txq *txq) } stdp->std_get = NULL; stdp->std_get_count = 0; + stdp->std_get_non_tcp_count = 0; stdp->std_getp = &stdp->std_get; mtx_unlock(&txq->lock); @@ -1402,6 +1447,13 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, rc = EINVAL; goto fail_tx_dpl_get_max; } + if (sfxge_tx_dpl_get_non_tcp_max <= 0) { + log(LOG_ERR, "%s=%d must be greater than 0", + SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, + sfxge_tx_dpl_get_non_tcp_max); + rc = EINVAL; + goto fail_tx_dpl_get_max; + } if (sfxge_tx_dpl_put_max < 0) { log(LOG_ERR, "%s=%d must be greater or equal to 0", SFXGE_PARAM_TX_DPL_PUT_MAX, sfxge_tx_dpl_put_max); @@ -1413,6 +1465,7 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, stdp = &txq->dpl; stdp->std_put_max = sfxge_tx_dpl_put_max; stdp->std_get_max = sfxge_tx_dpl_get_max; + stdp->std_get_non_tcp_max = sfxge_tx_dpl_get_non_tcp_max; stdp->std_getp = &stdp->std_get; mtx_init(&txq->lock, "txq", NULL, MTX_DEF); @@ -1421,6 +1474,14 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, SYSCTL_CHILDREN(txq_node), OID_AUTO, "dpl_get_count", CTLFLAG_RD | CTLFLAG_STATS, &stdp->std_get_count, 0, ""); + SYSCTL_ADD_UINT(device_get_sysctl_ctx(sc->dev), + SYSCTL_CHILDREN(txq_node), OID_AUTO, + "dpl_get_non_tcp_count", CTLFLAG_RD | CTLFLAG_STATS, + &stdp->std_get_non_tcp_count, 0, ""); + SYSCTL_ADD_UINT(device_get_sysctl_ctx(sc->dev), + SYSCTL_CHILDREN(txq_node), OID_AUTO, + "dpl_get_hiwat", CTLFLAG_RD | CTLFLAG_STATS, + &stdp->std_get_hiwat, 0, ""); #endif txq->type = type; @@ -1458,7 +1519,10 @@ static const struct { SFXGE_TX_STAT(tso_long_headers, tso_long_headers), SFXGE_TX_STAT(tx_collapses, collapses), SFXGE_TX_STAT(tx_drops, drops), - SFXGE_TX_STAT(tx_early_drops, early_drops), + SFXGE_TX_STAT(tx_get_overflow, get_overflow), + SFXGE_TX_STAT(tx_get_non_tcp_overflow, get_non_tcp_overflow), + SFXGE_TX_STAT(tx_put_overflow, put_overflow), + SFXGE_TX_STAT(tx_netdown_drops, netdown_drops), }; static int diff --git a/sys/dev/sfxge/sfxge_tx.h b/sys/dev/sfxge/sfxge_tx.h index 2227ae243b9..958dbc3217d 100644 --- a/sys/dev/sfxge/sfxge_tx.h +++ b/sys/dev/sfxge/sfxge_tx.h @@ -75,21 +75,29 @@ struct sfxge_tx_mapping { enum sfxge_tx_buf_flags flags; }; -#define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT 1024 -#define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 64 +#define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT (64 * 1024) +#define SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT 1024 +#define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 64 /* * Deferred packet list. */ struct sfxge_tx_dpl { - unsigned int std_get_max; /* Maximum number of packets + unsigned int std_get_max; /* Maximum number of packets * in get list */ - unsigned int std_put_max; /* Maximum number of packets + unsigned int std_get_non_tcp_max; /* Maximum number + * of non-TCP packets + * in get list */ + unsigned int std_put_max; /* Maximum number of packets * in put list */ - uintptr_t std_put; /* Head of put list. */ - struct mbuf *std_get; /* Head of get list. */ - struct mbuf **std_getp; /* Tail of get list. */ - unsigned int std_get_count; /* Packets in get list. */ + uintptr_t std_put; /* Head of put list. */ + struct mbuf *std_get; /* Head of get list. */ + struct mbuf **std_getp; /* Tail of get list. */ + unsigned int std_get_count; /* Packets in get list. */ + unsigned int std_get_non_tcp_count; /* Non-TCP packets + * in get list */ + unsigned int std_get_hiwat; /* Packets in get list + * high watermark */ }; @@ -165,7 +173,10 @@ struct sfxge_txq { unsigned long tso_long_headers; unsigned long collapses; unsigned long drops; - unsigned long early_drops; + unsigned long get_overflow; + unsigned long get_non_tcp_overflow; + unsigned long put_overflow; + unsigned long netdown_drops; /* The following fields change more often, and are used mostly * on the completion path