From 063efed28c45a81e9f92c69b2fbe1f4f278c8f1a Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Fri, 28 Sep 2012 18:28:27 +0000 Subject: [PATCH] The drbr(9) API appeared to be so unclear, that most drivers in tree used it incorrectly, which lead to inaccurate overrated if_obytes accounting. The drbr(9) used to update ifnet stats on drbr_enqueue(), which is not accurate since enqueuing doesn't imply successful processing by driver. Dequeuing neither mean that. Most drivers also called drbr_stats_update() which did accounting again, leading to doubled if_obytes statistics. And in case of severe transmitting, when a packet could be several times enqueued and dequeued it could have been accounted several times. o Thus, make drbr(9) API thinner. Now drbr(9) merely chooses between ALTQ queueing or buf_ring(9) queueing. - It doesn't touch the buf_ring stats any more. - It doesn't touch ifnet stats anymore. - drbr_stats_update() no longer exists. o buf_ring(9) handles its stats itself: - It handles br_drops itself. - br_prod_bytes stats are dropped. Rationale: no one ever reads them but update of a common counter on every packet negatively affects performance due to excessive cache invalidation. - buf_ring_enqueue_bytes() reduced to buf_ring_enqueue(), since we no longer account bytes. o Drivers handle their stats theirselves: if_obytes, if_omcasts. o mlx4(4), igb(4), em(4), vxge(4), oce(4) and ixv(4) no longer use drbr_stats_update(), and update ifnet stats theirselves. o bxe(4) was the most correct driver, it didn't call drbr_stats_update(), thus it was the only driver accurate under moderate load. Now it also maintains stats itself. o ixgbe(4) had already taken stats from hardware, so just - drop software stats updating. - take multicast packet count from hardware as well. o mxge(4) just no longer needs NO_SLOW_STATS define. o cxgb(4), cxgbe(4) need no change, since they obtain stats from hardware. Reviewed by: jfv, gnn --- share/man/man9/buf_ring.9 | 15 ++------------- share/man/man9/drbr.9 | 8 +------- sys/dev/bxe/if_bxe.c | 5 +++++ sys/dev/e1000/if_em.c | 4 +++- sys/dev/e1000/if_igb.c | 4 +++- sys/dev/ixgbe/ixgbe.c | 2 +- sys/dev/ixgbe/ixv.c | 4 +++- sys/dev/mxge/if_mxge.c | 2 -- sys/dev/oce/oce_if.c | 4 +++- sys/dev/vxge/vxge.c | 4 +++- sys/net/if_var.h | 20 +++----------------- sys/ofed/drivers/net/mlx4/en_tx.c | 4 +++- sys/sys/buf_ring.h | 12 ++---------- 13 files changed, 32 insertions(+), 56 deletions(-) diff --git a/share/man/man9/buf_ring.9 b/share/man/man9/buf_ring.9 index ffade9444d8..95b3a3de94a 100644 --- a/share/man/man9/buf_ring.9 +++ b/share/man/man9/buf_ring.9 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 30, 2012 +.Dd September 27, 2012 .Dt BUF_RING 9 .Os .Sh NAME @@ -33,7 +33,6 @@ .Nm buf_ring_alloc , .Nm buf_ring_free , .Nm buf_ring_enqueue , -.Nm buf_ring_enqueue_bytes , .Nm buf_ring_dequeue_mc , .Nm buf_ring_dequeue_sc , .Nm buf_ring_count , @@ -50,8 +49,6 @@ .Fn buf_ring_free "struct buf_ring *br" "struct malloc_type *type" .Ft int .Fn buf_ring_enqueue "struct buf_ring *br" "void *buf" -.Ft int -.Fn buf_ring_enqueue_bytes "struct buf_ring *br" "void *buf" "int bytes" .Ft void * .Fn buf_ring_dequeue_mc "struct buf_ring *br" .Ft void * @@ -91,12 +88,6 @@ The function is used to enqueue a buffer to a buf_ring. .Pp The -.Fn buf_ring_enqueue_bytes -function is used to enqueue a buffer to a buf_ring and increment the -number of bytes enqueued by -.Fa bytes . -.Pp -The .Fn buf_ring_dequeue_mc function is a multi-consumer safe way of dequeueing elements from a buf_ring. .Pp @@ -134,9 +125,7 @@ otherwise. .Sh RETURN VALUES The .Fn buf_ring_enqueue -and -.Fn buf_ring_enqueue_bytes -functions return +function return .Er ENOBUFS if there are no available slots in the buf_ring. .Sh HISTORY diff --git a/share/man/man9/drbr.9 b/share/man/man9/drbr.9 index 168ce1047a9..3d9f881dccd 100644 --- a/share/man/man9/drbr.9 +++ b/share/man/man9/drbr.9 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 30, 2012 +.Dd September 27, 2012 .Dt DRBR 9 .Os .Sh NAME @@ -37,7 +37,6 @@ .Nm drbr_flush , .Nm drbr_empty , .Nm drbr_inuse , -.Nm drbr_stats_update , .Nd network driver interface to buf_ring .Sh SYNOPSIS .In sys/param.h @@ -57,8 +56,6 @@ .Fn drbr_empty "struct ifnet *ifp" "struct buf_ring *br" .Ft int .Fn drbr_inuse "struct ifnet *ifp" "struct buf_ring *br" -.Ft void -.Fn drbr_stats_update "struct ifnet *ifp" "int len" "int mflags" .Sh DESCRIPTION The .Nm @@ -123,9 +120,6 @@ there will not be more mbufs when is actually called. Provided the tx queue lock is held there will not be less. .Pp -The -.Fn drbr_stats_update -function updates the number of bytes and the number of multicast packets sent. .Sh RETURN VALUES The .Fn drbr_enqueue diff --git a/sys/dev/bxe/if_bxe.c b/sys/dev/bxe/if_bxe.c index c51480c06fa..fe6bd8bcad9 100644 --- a/sys/dev/bxe/if_bxe.c +++ b/sys/dev/bxe/if_bxe.c @@ -9557,6 +9557,11 @@ bxe_tx_mq_start_locked(struct ifnet *ifp, /* The transmit frame was enqueued successfully. */ tx_count++; + /* Update stats */ + ifp->if_obytes += next->m_pkthdr.len; + if (next->m_flags & M_MCAST) + ifp->if_omcasts++; + /* Send a copy of the frame to any BPF listeners. */ BPF_MTAP(ifp, next); diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 63f399f130e..112b538f558 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -922,7 +922,9 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) break; } enq++; - drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); + ifp->if_obytes += next->m_pkthdr.len; + if (next->m_flags & M_MCAST) + ifp->if_omcasts++; ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index cd91ad4e1ad..13189108c06 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -1014,7 +1014,9 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) break; } enq++; - drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); + ifp->if_obytes += next->m_pkthdr.len; + if (next->m_flags & M_MCAST) + ifp->if_omcasts++; ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index acf4ba2546e..19e3c407d7a 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -853,7 +853,6 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) break; } enqueued++; - drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) @@ -5335,6 +5334,7 @@ ixgbe_update_stats_counters(struct adapter *adapter) ifp->if_ibytes = adapter->stats.gorc; ifp->if_obytes = adapter->stats.gotc; ifp->if_imcasts = adapter->stats.mprc; + ifp->if_omcasts = adapter->stats.mptc; ifp->if_collisions = 0; /* Rx Errors */ diff --git a/sys/dev/ixgbe/ixv.c b/sys/dev/ixgbe/ixv.c index b429560e640..1acb3f0bf96 100644 --- a/sys/dev/ixgbe/ixv.c +++ b/sys/dev/ixgbe/ixv.c @@ -641,7 +641,9 @@ ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) break; } enqueued++; - drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); + ifp->if_obytes += next->m_pkthdr.len; + if (next->m_flags & M_MCAST) + ifp->if_omcasts++; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c index c61d3601414..1e51d1bc3bb 100644 --- a/sys/dev/mxge/if_mxge.c +++ b/sys/dev/mxge/if_mxge.c @@ -47,8 +47,6 @@ __FBSDID("$FreeBSD$"); #include #include -/* count xmits ourselves, rather than via drbr */ -#define NO_SLOW_STATS #include #include #include diff --git a/sys/dev/oce/oce_if.c b/sys/dev/oce/oce_if.c index fdeb339eea2..5a47424257a 100644 --- a/sys/dev/oce/oce_if.c +++ b/sys/dev/oce/oce_if.c @@ -1183,7 +1183,9 @@ oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq) } break; } - drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); + ifp->if_obytes += next->m_pkthdr.len; + if (next->m_flags & M_MCAST) + ifp->if_omcasts++; ETHER_BPF_MTAP(ifp, next); next = drbr_dequeue(ifp, br); } diff --git a/sys/dev/vxge/vxge.c b/sys/dev/vxge/vxge.c index 0750e3594b4..9948801a544 100644 --- a/sys/dev/vxge/vxge.c +++ b/sys/dev/vxge/vxge.c @@ -709,7 +709,9 @@ vxge_mq_send_locked(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t m_head) VXGE_DRV_STATS(vpath, tx_again); break; } - drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); + ifp->if_obytes += next->m_pkthdr.len; + if (next->m_flags & M_MCAST) + ifp->if_omcasts++; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 01ec2b602dd..8a1d5cc560f 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -590,22 +590,10 @@ do { \ } while (0) #ifdef _KERNEL -static __inline void -drbr_stats_update(struct ifnet *ifp, int len, int mflags) -{ -#ifndef NO_SLOW_STATS - ifp->if_obytes += len; - if (mflags & M_MCAST) - ifp->if_omcasts++; -#endif -} - static __inline int drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m) { int error = 0; - int len = m->m_pkthdr.len; - int mflags = m->m_flags; #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) { @@ -613,12 +601,10 @@ drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m) return (error); } #endif - if ((error = buf_ring_enqueue_bytes(br, m, len)) == ENOBUFS) { - br->br_drops++; + error = buf_ring_enqueue(br, m); + if (error) m_freem(m); - } else - drbr_stats_update(ifp, len, mflags); - + return (error); } diff --git a/sys/ofed/drivers/net/mlx4/en_tx.c b/sys/ofed/drivers/net/mlx4/en_tx.c index cd45f9dc0ac..4d1690f8da0 100644 --- a/sys/ofed/drivers/net/mlx4/en_tx.c +++ b/sys/ofed/drivers/net/mlx4/en_tx.c @@ -948,7 +948,9 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m) break; } enqueued++; - drbr_stats_update(dev, next->m_pkthdr.len, next->m_flags); + dev->if_obytes += next->m_pkthdr.len; + if (next->m_flags & M_MCAST) + dev->if_omcasts++; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(dev, next); if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) diff --git a/sys/sys/buf_ring.h b/sys/sys/buf_ring.h index 57e42e5c189..9f99fa20a66 100644 --- a/sys/sys/buf_ring.h +++ b/sys/sys/buf_ring.h @@ -48,7 +48,6 @@ struct buf_ring { int br_prod_mask; uint64_t br_drops; uint64_t br_prod_bufs; - uint64_t br_prod_bytes; /* * Pad out to next L2 cache line */ @@ -74,7 +73,7 @@ struct buf_ring { * */ static __inline int -buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes) +buf_ring_enqueue(struct buf_ring *br, void *buf) { uint32_t prod_head, prod_next; uint32_t cons_tail; @@ -95,6 +94,7 @@ buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes) prod_next = (prod_head + 1) & br->br_prod_mask; if (prod_next == cons_tail) { + br->br_drops++; critical_exit(); return (ENOBUFS); } @@ -117,19 +117,11 @@ buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes) while (br->br_prod_tail != prod_head) cpu_spinwait(); br->br_prod_bufs++; - br->br_prod_bytes += nbytes; br->br_prod_tail = prod_next; critical_exit(); return (0); } -static __inline int -buf_ring_enqueue(struct buf_ring *br, void *buf) -{ - - return (buf_ring_enqueue_bytes(br, buf, 0)); -} - /* * multi-consumer safe dequeue *