cxgbe(4): minor optimizations in ingress queue processing.

Reorganize struct sge_iq.  Make the iq entry size a compile time
constant.  While here, eliminate RX_FL_ESIZE and use EQ_ESIZE directly.

MFC after:	2 weeks
This commit is contained in:
Navdeep Parhar 2014-08-02 00:56:34 +00:00
parent 814f4c5896
commit b2daa9a9cd
3 changed files with 94 additions and 116 deletions

View file

@ -125,16 +125,24 @@ struct adapter;
typedef struct adapter adapter_t;
enum {
/*
* All ingress queues use this entry size. Note that the firmware event
* queue and any iq expecting CPL_RX_PKT in the descriptor needs this to
* be at least 64.
*/
IQ_ESIZE = 64,
/* Default queue sizes for all kinds of ingress queues */
FW_IQ_QSIZE = 256,
FW_IQ_ESIZE = 64, /* At least 64 mandated by the firmware spec */
RX_IQ_QSIZE = 1024,
RX_IQ_ESIZE = 64, /* At least 64 so CPL_RX_PKT will fit */
EQ_ESIZE = 64, /* All egress queues use this entry size */
SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */
/* All egress queues use this entry size */
EQ_ESIZE = 64,
/* Default queue sizes for all kinds of egress queues */
CTRL_EQ_QSIZE = 128,
TX_EQ_QSIZE = 1024,
RX_FL_ESIZE = EQ_ESIZE, /* 8 64bit addresses */
#if MJUMPAGESIZE != MCLBYTES
SW_ZONE_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */
#else
@ -142,9 +150,7 @@ enum {
#endif
CL_METADATA_SIZE = CACHE_LINE_SIZE,
CTRL_EQ_QSIZE = 128,
TX_EQ_QSIZE = 1024,
SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */
TX_SGL_SEGS = 36,
TX_WR_FLITS = SGE_MAX_WR_LEN / 8
};
@ -317,6 +323,16 @@ struct tx_sdesc {
uint8_t credits; /* NIC txq: # of frames sent out in the WR */
};
#define IQ_PAD (IQ_ESIZE - sizeof(struct rsp_ctrl) - sizeof(struct rss_header))
struct iq_desc {
struct rss_header rss;
uint8_t cpl[IQ_PAD];
struct rsp_ctrl rsp;
};
#undef IQ_PAD
CTASSERT(sizeof(struct iq_desc) == IQ_ESIZE);
enum {
/* iq flags */
IQ_ALLOCATED = (1 << 0), /* firmware resources allocated */
@ -334,27 +350,25 @@ enum {
* Ingress Queue: T4 is producer, driver is consumer.
*/
struct sge_iq {
bus_dma_tag_t desc_tag;
bus_dmamap_t desc_map;
bus_addr_t ba; /* bus address of descriptor ring */
uint32_t flags;
uint16_t abs_id; /* absolute SGE id for the iq */
int8_t intr_pktc_idx; /* packet count threshold index */
int8_t pad0;
__be64 *desc; /* KVA of descriptor ring */
volatile int state;
struct adapter *adapter;
const __be64 *cdesc; /* current descriptor */
struct iq_desc *desc; /* KVA of descriptor ring */
int8_t intr_pktc_idx; /* packet count threshold index */
uint8_t gen; /* generation bit */
uint8_t intr_params; /* interrupt holdoff parameters */
uint8_t intr_next; /* XXX: holdoff for next interrupt */
uint8_t esize; /* size (bytes) of each entry in the queue */
uint16_t qsize; /* size (# of entries) of the queue */
uint16_t sidx; /* index of the entry with the status page */
uint16_t cidx; /* consumer index */
uint16_t cntxt_id; /* SGE context id for the iq */
uint16_t abs_id; /* absolute SGE id for the iq */
STAILQ_ENTRY(sge_iq) link;
bus_dma_tag_t desc_tag;
bus_dmamap_t desc_map;
bus_addr_t ba; /* bus address of descriptor ring */
};
enum {
@ -570,23 +584,10 @@ struct sge_wrq {
#ifdef DEV_NETMAP
#define CPL_PAD (RX_IQ_ESIZE - sizeof(struct rsp_ctrl) - \
sizeof(struct rss_header))
struct nm_iq_desc {
struct rss_header rss;
union {
uint8_t cpl[CPL_PAD];
struct cpl_fw6_msg fw6_msg;
struct cpl_rx_pkt rx_pkt;
} u;
struct rsp_ctrl rsp;
};
CTASSERT(sizeof(struct nm_iq_desc) == RX_IQ_ESIZE);
struct sge_nm_rxq {
struct port_info *pi;
struct nm_iq_desc *iq_desc;
struct iq_desc *iq_desc;
uint16_t iq_abs_id;
uint16_t iq_cntxt_id;
uint16_t iq_cidx;
@ -847,6 +848,12 @@ struct adapter {
for (q = &pi->adapter->sge.nm_rxq[pi->first_nm_rxq], iter = 0; \
iter < pi->nnmrxq; ++iter, ++q)
#define IDXINCR(head, incr, wrap) do { \
head = wrap - head > incr ? head + incr : incr - (wrap - head); \
} while (0)
#define IDXDIFF(head, tail, wrap) \
(head >= tail ? head - tail : wrap - tail + head)
/* One for errors, one for firmware events */
#define T4_EXTRA_INTR 2

View file

@ -238,8 +238,8 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq)
MPASS(nm_rxq->iq_desc != NULL);
MPASS(nm_rxq->fl_desc != NULL);
bzero(nm_rxq->iq_desc, pi->qsize_rxq * RX_IQ_ESIZE);
bzero(nm_rxq->fl_desc, na->num_rx_desc * RX_FL_ESIZE + spg_len);
bzero(nm_rxq->iq_desc, pi->qsize_rxq * IQ_ESIZE);
bzero(nm_rxq->fl_desc, na->num_rx_desc * EQ_ESIZE + spg_len);
bzero(&c, sizeof(c));
c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
@ -264,7 +264,7 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq)
c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
F_FW_IQ_CMD_IQGTSMODE |
V_FW_IQ_CMD_IQINTCNTTHRESH(0) |
V_FW_IQ_CMD_IQESIZE(ilog2(RX_IQ_ESIZE) - 4));
V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
c.iqsize = htobe16(pi->qsize_rxq);
c.iqaddr = htobe64(nm_rxq->iq_ba);
c.iqns_to_fl0congen |=
@ -274,7 +274,7 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq)
c.fl0dcaen_to_fl0cidxfthresh =
htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
c.fl0size = htobe16(na->num_rx_desc + spg_len / RX_FL_ESIZE);
c.fl0size = htobe16(na->num_rx_desc + spg_len / EQ_ESIZE);
c.fl0addr = htobe64(nm_rxq->fl_ba);
rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
@ -285,7 +285,7 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq)
}
nm_rxq->iq_cidx = 0;
MPASS(nm_rxq->iq_sidx == pi->qsize_rxq - spg_len / RX_IQ_ESIZE);
MPASS(nm_rxq->iq_sidx == pi->qsize_rxq - spg_len / IQ_ESIZE);
nm_rxq->iq_gen = F_RSPD_GEN;
nm_rxq->iq_cntxt_id = be16toh(c.iqid);
nm_rxq->iq_abs_id = be16toh(c.physiqid);
@ -581,18 +581,7 @@ npkt_to_len16(const int n)
return (n * 2 + 1);
}
static inline uint16_t
idxdiff(uint16_t head, uint16_t tail, uint16_t wrap)
{
MPASS(wrap > head);
MPASS(wrap > tail);
if (head >= tail)
return (head - tail);
else
return (wrap - tail + head);
}
#define IDXDIFF(q, idx) idxdiff((q)->pidx, (q)->idx, (q)->sidx)
#define NMIDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->sidx)
static void
ring_nm_txq_db(struct adapter *sc, struct sge_nm_txq *nm_txq)
@ -602,7 +591,7 @@ ring_nm_txq_db(struct adapter *sc, struct sge_nm_txq *nm_txq)
MPASS(nm_txq->pidx != nm_txq->dbidx);
n = IDXDIFF(nm_txq, dbidx);
n = NMIDXDIFF(nm_txq, dbidx);
if (n > 1)
clrbit(&db, DOORBELL_WCWR);
wmb();
@ -733,16 +722,16 @@ cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq,
return;
}
if (IDXDIFF(nm_txq, equiqidx) >= nm_txq->sidx / 2) {
if (NMIDXDIFF(nm_txq, equiqidx) >= nm_txq->sidx / 2) {
wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ |
F_FW_WR_EQUIQ);
nm_txq->equeqidx = nm_txq->pidx;
nm_txq->equiqidx = nm_txq->pidx;
} else if (IDXDIFF(nm_txq, equeqidx) >= 64) {
} else if (NMIDXDIFF(nm_txq, equeqidx) >= 64) {
wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
nm_txq->equeqidx = nm_txq->pidx;
}
if (IDXDIFF(nm_txq, dbidx) >= 2 * SGE_MAX_WR_NDESC)
if (NMIDXDIFF(nm_txq, dbidx) >= 2 * SGE_MAX_WR_NDESC)
ring_nm_txq_db(sc, nm_txq);
}
@ -782,8 +771,15 @@ reclaim_nm_tx_desc(struct sge_nm_txq *nm_txq)
n += wr->npkt;
nm_txq->cidx += npkt_to_ndesc(wr->npkt);
if (__predict_false(nm_txq->cidx >= nm_txq->sidx))
nm_txq->cidx -= nm_txq->sidx;
/*
* We never sent a WR that wrapped around so the credits coming
* back, WR by WR, should never cause the cidx to wrap around
* either.
*/
MPASS(nm_txq->cidx <= nm_txq->sidx);
if (__predict_false(nm_txq->cidx == nm_txq->sidx))
nm_txq->cidx = 0;
}
return (n);
@ -890,13 +886,8 @@ cxgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
MPASS((fl_pidx & 7) == 0);
MPASS((n & 7) == 0);
kring->nr_hwcur += n;
if (kring->nr_hwcur >= kring->nkr_num_slots)
kring->nr_hwcur -= kring->nkr_num_slots;
nm_rxq->fl_pidx += n;
if (nm_rxq->fl_pidx >= nm_rxq->fl_sidx)
nm_rxq->fl_pidx -= nm_rxq->fl_sidx;
IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots);
IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx);
while (n > 0) {
for (i = 0; i < 8; i++, fl_pidx++, slot++) {
@ -1073,7 +1064,7 @@ t4_nm_intr(void *arg)
struct netmap_adapter *na = NA(ifp);
struct netmap_kring *kring = &na->rx_rings[nm_rxq->nid];
struct netmap_ring *ring = kring->ring;
struct nm_iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx];
struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx];
uint32_t lq;
u_int n = 0;
int processed = 0;
@ -1100,7 +1091,8 @@ t4_nm_intr(void *arg)
switch (opcode) {
case CPL_FW4_MSG:
case CPL_FW6_MSG:
handle_nm_fw6_msg(sc, ifp, &d->u.fw6_msg);
handle_nm_fw6_msg(sc, ifp,
(const void *)&d->cpl[0]);
break;
case CPL_RX_PKT:
ring->slot[fl_cidx].len = G_RSPD_LEN(lq) - fl_pktshift;

View file

@ -175,8 +175,7 @@ static int service_iq(struct sge_iq *, int);
static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
int *);
static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
int);
static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int);
static inline void init_fl(struct adapter *, struct sge_fl *, int, int, int,
char *);
static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
@ -224,8 +223,6 @@ static int alloc_txq(struct port_info *, struct sge_txq *, int,
struct sysctl_oid *);
static int free_txq(struct port_info *, struct sge_txq *);
static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
static inline void iq_next(struct sge_iq *);
static inline void ring_fl_db(struct adapter *, struct sge_fl *);
static int refill_fl(struct adapter *, struct sge_fl *, int);
static void refill_sfl(void *);
@ -1005,8 +1002,7 @@ t4_setup_port_queues(struct port_info *pi)
}
for_each_rxq(pi, i, rxq) {
init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
RX_IQ_ESIZE);
init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq);
snprintf(name, sizeof(name), "%s rxq%d-fl",
device_get_nameunit(pi->dev), i);
@ -1030,7 +1026,7 @@ t4_setup_port_queues(struct port_info *pi)
for_each_ofld_rxq(pi, i, ofld_rxq) {
init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
pi->qsize_rxq, RX_IQ_ESIZE);
pi->qsize_rxq);
snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
device_get_nameunit(pi->dev), i);
@ -1319,8 +1315,7 @@ service_iq(struct sge_iq *iq, int budget)
struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */
struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
struct adapter *sc = iq->adapter;
struct rsp_ctrl *ctrl;
const struct rss_header *rss;
struct iq_desc *d = &iq->desc[iq->cidx];
int ndescs = 0, limit, fl_bufs_used = 0;
int rsp_type;
uint32_t lq;
@ -1339,14 +1334,13 @@ service_iq(struct sge_iq *iq, int budget)
* interrupts and other responses after running a single handler.
*/
for (;;) {
while (is_new_response(iq, &ctrl)) {
while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) {
rmb();
m0 = NULL;
rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
lq = be32toh(ctrl->pldbuflen_qid);
rss = (const void *)iq->cdesc;
rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen);
lq = be32toh(d->rsp.pldbuflen_qid);
switch (rsp_type) {
case X_RSPD_TYPE_FLBUF:
@ -1376,10 +1370,10 @@ service_iq(struct sge_iq *iq, int budget)
/* fall through */
case X_RSPD_TYPE_CPL:
KASSERT(rss->opcode < NUM_CPL_CMDS,
KASSERT(d->rss.opcode < NUM_CPL_CMDS,
("%s: bad opcode %02x.", __func__,
rss->opcode));
sc->cpl_handler[rss->opcode](iq, rss, m0);
d->rss.opcode));
sc->cpl_handler[d->rss.opcode](iq, &d->rss, m0);
break;
case X_RSPD_TYPE_INTR:
@ -1401,7 +1395,7 @@ service_iq(struct sge_iq *iq, int budget)
* iWARP async notification.
*/
if (lq >= 1024) {
sc->an_handler(iq, ctrl);
sc->an_handler(iq, &d->rsp);
break;
}
@ -1436,8 +1430,13 @@ service_iq(struct sge_iq *iq, int budget)
fl_bufs_used = 0;
}
iq_next(iq);
if (++ndescs == limit) {
d++;
if (__predict_false(++iq->cidx == iq->sidx)) {
iq->cidx = 0;
iq->gen ^= F_RSPD_GEN;
d = &iq->desc[0];
}
if (__predict_false(++ndescs == limit)) {
t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
V_CIDXINC(ndescs) |
V_INGRESSQID(iq->cntxt_id) |
@ -2101,8 +2100,9 @@ can_resume_tx(struct sge_eq *eq)
static inline void
init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
int qsize, int esize)
int qsize)
{
KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
("%s: bad tmr_idx %d", __func__, tmr_idx));
KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */
@ -2117,7 +2117,7 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
iq->intr_pktc_idx = pktc_idx;
}
iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */
iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */
iq->sidx = iq->qsize - spg_len / IQ_ESIZE;
}
static inline void
@ -2218,7 +2218,7 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
struct adapter *sc = iq->adapter;
__be32 v = 0;
len = iq->qsize * iq->esize;
len = iq->qsize * IQ_ESIZE;
rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
(void **)&iq->desc);
if (rc != 0)
@ -2250,7 +2250,7 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
F_FW_IQ_CMD_IQGTSMODE |
V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
c.iqsize = htobe16(iq->qsize);
c.iqaddr = htobe64(iq->ba);
if (cong >= 0)
@ -2259,14 +2259,14 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
if (fl) {
mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
len = fl->qsize * RX_FL_ESIZE;
len = fl->qsize * EQ_ESIZE;
rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
&fl->ba, (void **)&fl->desc);
if (rc)
return (rc);
/* Allocate space for one software descriptor per buffer. */
fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8;
fl->cap = (fl->qsize - spg_len / EQ_ESIZE) * 8;
rc = alloc_fl_sdesc(fl);
if (rc != 0) {
device_printf(sc->dev,
@ -2305,9 +2305,8 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
return (rc);
}
iq->cdesc = iq->desc;
iq->cidx = 0;
iq->gen = 1;
iq->gen = F_RSPD_GEN;
iq->intr_next = iq->intr_params;
iq->cntxt_id = be16toh(c.iqid);
iq->abs_id = be16toh(c.physiqid);
@ -2457,7 +2456,7 @@ alloc_fwq(struct adapter *sc)
struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE);
init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE);
fwq->flags |= IQ_INTR; /* always */
intr_idx = sc->intr_count > 1 ? 1 : 0;
rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
@ -2677,13 +2676,13 @@ alloc_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int intr_idx,
MPASS(na != NULL);
len = pi->qsize_rxq * RX_IQ_ESIZE;
len = pi->qsize_rxq * IQ_ESIZE;
rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map,
&nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc);
if (rc != 0)
return (rc);
len = na->num_rx_desc * RX_FL_ESIZE + spg_len;
len = na->num_rx_desc * EQ_ESIZE + spg_len;
rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map,
&nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc);
if (rc != 0)
@ -2692,7 +2691,7 @@ alloc_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int intr_idx,
nm_rxq->pi = pi;
nm_rxq->nid = idx;
nm_rxq->iq_cidx = 0;
nm_rxq->iq_sidx = pi->qsize_rxq - spg_len / RX_IQ_ESIZE;
nm_rxq->iq_sidx = pi->qsize_rxq - spg_len / IQ_ESIZE;
nm_rxq->iq_gen = F_RSPD_GEN;
nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0;
nm_rxq->fl_sidx = na->num_rx_desc;
@ -3214,26 +3213,6 @@ oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
*ba = error ? 0 : segs->ds_addr;
}
static inline bool
is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
{
*ctrl = (void *)((uintptr_t)iq->cdesc +
(iq->esize - sizeof(struct rsp_ctrl)));
return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
}
static inline void
iq_next(struct sge_iq *iq)
{
iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
if (__predict_false(++iq->cidx == iq->qsize - spg_len / iq->esize)) {
iq->cidx = 0;
iq->gen ^= 1;
iq->cdesc = iq->desc;
}
}
#define FL_HW_IDX(x) ((x) >> 3)
static inline void
ring_fl_db(struct adapter *sc, struct sge_fl *fl)