From 0a0a697c73b3dca1e0e52884e4da993f351c0f9f Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Mon, 14 Jan 2013 20:36:22 +0000 Subject: [PATCH] cxgbe(4): Updates to the hardware L2 table management code. - Add full support for IPv6 addresses. - Read the size of the L2 table during attach. Do not assume that PCIe physical function 4 of the card has all of the table to itself. - Use FNV instead of Jenkins to hash L3 addresses and drop the private copy of jhash.h from the driver. MFC after: 1 week --- sys/dev/cxgbe/common/jhash.h | 140 --------------------------------- sys/dev/cxgbe/offload.h | 5 +- sys/dev/cxgbe/t4_l2t.c | 44 +++++++---- sys/dev/cxgbe/t4_l2t.h | 6 +- sys/dev/cxgbe/t4_main.c | 9 ++- sys/dev/cxgbe/tom/t4_tom_l2t.c | 124 +++++++++++++++++++++++------ sys/modules/cxgbe/tom/Makefile | 11 ++- 7 files changed, 149 insertions(+), 190 deletions(-) delete mode 100644 sys/dev/cxgbe/common/jhash.h diff --git a/sys/dev/cxgbe/common/jhash.h b/sys/dev/cxgbe/common/jhash.h deleted file mode 100644 index 4546b7b3d5b..00000000000 --- a/sys/dev/cxgbe/common/jhash.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef _JHASH_H -#define _JHASH_H - -/* jhash.h: Jenkins hash support. - * - * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) - * - * http://burtleburtle.net/bob/hash/ - * - * These are the credits from Bob's sources: - * - * lookup2.c, by Bob Jenkins, December 1996, Public Domain. - * hash(), hash2(), hash3, and mix() are externally useful functions. - * Routines to test the hash are included if SELF_TEST is defined. - * You can use this free for any purpose. It has no warranty. - * - * $FreeBSD$ - */ - -/* NOTE: Arguments are modified. */ -#define __jhash_mix(a, b, c) \ -{ \ - a -= b; a -= c; a ^= (c>>13); \ - b -= c; b -= a; b ^= (a<<8); \ - c -= a; c -= b; c ^= (b>>13); \ - a -= b; a -= c; a ^= (c>>12); \ - b -= c; b -= a; b ^= (a<<16); \ - c -= a; c -= b; c ^= (b>>5); \ - a -= b; a -= c; a ^= (c>>3); \ - b -= c; b -= a; b ^= (a<<10); \ - c -= a; c -= b; c ^= (b>>15); \ -} - -/* The golden ration: an arbitrary value */ -#define JHASH_GOLDEN_RATIO 0x9e3779b9 - -/* The most generic version, hashes an arbitrary sequence - * of bytes. No alignment or length assumptions are made about - * the input key. - */ -static inline u32 jhash(const void *key, u32 length, u32 initval) -{ - u32 a, b, c, len; - const u8 *k = key; - - len = length; - a = b = JHASH_GOLDEN_RATIO; - c = initval; - - while (len >= 12) { - a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24)); - b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24)); - c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24)); - - __jhash_mix(a,b,c); - - k += 12; - len -= 12; - } - - c += length; - switch (len) { - case 11: c += ((u32)k[10]<<24); - case 10: c += ((u32)k[9]<<16); - case 9 : c += ((u32)k[8]<<8); - case 8 : b += ((u32)k[7]<<24); - case 7 : b += ((u32)k[6]<<16); - case 6 : b += ((u32)k[5]<<8); - case 5 : b += k[4]; - case 4 : a += ((u32)k[3]<<24); - case 3 : a += ((u32)k[2]<<16); - case 2 : a += ((u32)k[1]<<8); - case 1 : a += k[0]; - }; - - __jhash_mix(a,b,c); - - return c; -} - -/* A special optimized version that handles 1 or more of u32s. - * The length parameter here is the number of u32s in the key. - */ -static inline u32 jhash2(u32 *k, u32 length, u32 initval) -{ - u32 a, b, c, len; - - a = b = JHASH_GOLDEN_RATIO; - c = initval; - len = length; - - while (len >= 3) { - a += k[0]; - b += k[1]; - c += k[2]; - __jhash_mix(a, b, c); - k += 3; len -= 3; - } - - c += length * 4; - - switch (len) { - case 2 : b += k[1]; - case 1 : a += k[0]; - }; - - __jhash_mix(a,b,c); - - return c; -} - - -/* A special ultra-optimized versions that knows they are hashing exactly - * 3, 2 or 1 word(s). - * - * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally - * done at the end is not done here. - */ -static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) -{ - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += initval; - - __jhash_mix(a, b, c); - - return c; -} - -static inline u32 jhash_2words(u32 a, u32 b, u32 initval) -{ - return jhash_3words(a, b, 0, initval); -} - -static inline u32 jhash_1word(u32 a, u32 initval) -{ - return jhash_3words(a, 0, 0, initval); -} - -#endif /* _JHASH_H */ diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h index 091c67aa112..55ac71b54ef 100644 --- a/sys/dev/cxgbe/offload.h +++ b/sys/dev/cxgbe/offload.h @@ -104,8 +104,8 @@ struct tid_info { }; struct t4_range { - unsigned int start; - unsigned int size; + u_int start; + u_int size; }; struct t4_virt_res { /* virtualized HW resources */ @@ -117,6 +117,7 @@ struct t4_virt_res { /* virtualized HW resources */ struct t4_range qp; struct t4_range cq; struct t4_range ocq; + struct t4_range l2t; }; #ifdef TCP_OFFLOAD diff --git a/sys/dev/cxgbe/t4_l2t.c b/sys/dev/cxgbe/t4_l2t.c index dd8748edbac..dcff5e8a6a4 100644 --- a/sys/dev/cxgbe/t4_l2t.c +++ b/sys/dev/cxgbe/t4_l2t.c @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include #include "common/common.h" -#include "common/jhash.h" #include "common/t4_msg.h" #include "t4_l2t.h" @@ -78,7 +77,7 @@ t4_alloc_l2e(struct l2t_data *d) return (NULL); /* there's definitely a free entry */ - for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e) + for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e) if (atomic_load_acq_int(&e->refcnt) == 0) goto found; @@ -115,6 +114,7 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) { struct wrqe *wr; struct cpl_l2t_write_req *req; + int idx = e->idx + sc->vres.l2t.start; mtx_assert(&e->lock, MA_OWNED); @@ -124,10 +124,10 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) req = wrtod(wr); INIT_TP_WR(req, 0); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx | + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx | V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id))); req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync)); - req->l2t_idx = htons(e->idx); + req->l2t_idx = htons(idx); req->vlan = htons(e->vlan); memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); @@ -183,18 +183,24 @@ t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan, int t4_init_l2t(struct adapter *sc, int flags) { - int i; + int i, l2t_size; struct l2t_data *d; - d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags); + l2t_size = sc->vres.l2t.size; + if (l2t_size < 2) /* At least 1 bucket for IP and 1 for IPv6 */ + return (EINVAL); + + d = malloc(sizeof(*d) + l2t_size * sizeof (struct l2t_entry), M_CXGBE, + M_ZERO | flags); if (!d) return (ENOMEM); + d->l2t_size = l2t_size; d->rover = d->l2tab; - atomic_store_rel_int(&d->nfree, L2T_SIZE); + atomic_store_rel_int(&d->nfree, l2t_size); rw_init(&d->lock, "L2T"); - for (i = 0; i < L2T_SIZE; i++) { + for (i = 0; i < l2t_size; i++) { struct l2t_entry *e = &d->l2tab[i]; e->idx = i; @@ -215,7 +221,7 @@ t4_free_l2t(struct l2t_data *d) { int i; - for (i = 0; i < L2T_SIZE; i++) + for (i = 0; i < d->l2t_size; i++) mtx_destroy(&d->l2tab[i].lock); rw_destroy(&d->lock); free(d, M_CXGBE); @@ -229,11 +235,11 @@ do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, { const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); unsigned int tid = GET_TID(rpl); - unsigned int idx = tid & (L2T_SIZE - 1); + unsigned int idx = tid % L2T_SIZE; if (__predict_false(rpl->status != CPL_ERR_NONE)) { log(LOG_ERR, - "Unexpected L2T_WRITE_RPL status %u for entry %u\n", + "Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n", rpl->status, idx); return (EINVAL); } @@ -269,7 +275,7 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS) struct l2t_entry *e; struct sbuf *sb; int rc, i, header = 0; - char ip[60]; + char ip[INET6_ADDRSTRLEN]; if (l2t == NULL) return (ENXIO); @@ -283,7 +289,7 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS) return (ENOMEM); e = &l2t->l2tab[0]; - for (i = 0; i < L2T_SIZE; i++, e++) { + for (i = 0; i < l2t->l2t_size; i++, e++) { mtx_lock(&e->lock); if (e->state == L2T_STATE_UNUSED) goto skip; @@ -295,11 +301,15 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS) } if (e->state == L2T_STATE_SWITCHING) ip[0] = 0; - else - snprintf(ip, sizeof(ip), "%s", - inet_ntoa(*(struct in_addr *)&e->addr)); + else { + inet_ntop(e->ipv6 ? AF_INET6 : AF_INET, &e->addr[0], + &ip[0], sizeof(ip)); + } - /* XXX: e->ifp may not be around */ + /* + * XXX: e->ifp may not be around. + * XXX: IPv6 addresses may not align properly in the output. + */ sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d" " %u %2u %c %5u %s", e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2], diff --git a/sys/dev/cxgbe/t4_l2t.h b/sys/dev/cxgbe/t4_l2t.h index 6927b810631..c60eef1367b 100644 --- a/sys/dev/cxgbe/t4_l2t.h +++ b/sys/dev/cxgbe/t4_l2t.h @@ -60,7 +60,7 @@ enum { struct l2t_entry { uint16_t state; /* entry state */ uint16_t idx; /* entry index */ - uint32_t addr; /* next hop IP address */ + uint32_t addr[4]; /* next hop IP or IPv6 address */ struct ifnet *ifp; /* outgoing interface */ uint16_t smt_idx; /* SMT index */ uint16_t vlan; /* VLAN TCI (id: 0-11, prio: 13-15) */ @@ -70,15 +70,17 @@ struct l2t_entry { struct mtx lock; volatile int refcnt; /* entry reference count */ uint16_t hash; /* hash bucket the entry is on */ + uint8_t ipv6; /* entry is for an IPv6 address */ uint8_t lport; /* associated offload logical port */ uint8_t dmac[ETHER_ADDR_LEN]; /* next hop's MAC address */ }; struct l2t_data { struct rwlock lock; + u_int l2t_size; volatile int nfree; /* number of free entries */ struct l2t_entry *rover;/* starting point for next allocation */ - struct l2t_entry l2tab[L2T_SIZE]; + struct l2t_entry l2tab[]; }; diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 35553544233..726de9fcf15 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -1866,7 +1866,9 @@ get_params__post_init(struct adapter *sc) param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); - rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val); + param[4] = FW_PARAM_PFVF(L2T_START); + param[5] = FW_PARAM_PFVF(L2T_END); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); @@ -1877,6 +1879,11 @@ get_params__post_init(struct adapter *sc) sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; + sc->vres.l2t.start = val[4]; + sc->vres.l2t.size = val[5] - val[4] + 1; + KASSERT(sc->vres.l2t.size <= L2T_SIZE, + ("%s: L2 table size (%u) larger than expected (%u)", + __func__, sc->vres.l2t.size, L2T_SIZE)); /* get capabilites */ bzero(&caps, sizeof(caps)); diff --git a/sys/dev/cxgbe/tom/t4_tom_l2t.c b/sys/dev/cxgbe/tom/t4_tom_l2t.c index ffe64c5be6e..7a75394566d 100644 --- a/sys/dev/cxgbe/tom/t4_tom_l2t.c +++ b/sys/dev/cxgbe/tom/t4_tom_l2t.c @@ -27,6 +27,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #ifdef TCP_OFFLOAD #include @@ -34,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -48,28 +50,89 @@ __FBSDID("$FreeBSD$"); #include #include "common/common.h" -#include "common/jhash.h" #include "common/t4_msg.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" #define VLAN_NONE 0xfff -#define SA(x) ((struct sockaddr *)(x)) -#define SIN(x) ((struct sockaddr_in *)(x)) -#define SINADDR(x) (SIN(x)->sin_addr.s_addr) - static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e) { + if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ atomic_subtract_int(&d->nfree, 1); } -static inline unsigned int -arp_hash(const uint32_t key, int ifindex) +static inline u_int +l2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex) { - return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1); + u_int hash, half = d->l2t_size / 2, start = 0; + const void *key; + size_t len; + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + key = &sin->sin_addr; + len = sizeof(sin->sin_addr); + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + key = &sin6->sin6_addr; + len = sizeof(sin6->sin6_addr); + start = half; + } + + hash = fnv_32_buf(key, len, FNV1_32_INIT); + hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash); + hash %= half; + + return (hash + start); +} + +static inline int +l2_cmp(const struct sockaddr *sa, struct l2t_entry *e) +{ + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + return (e->addr[0] != sin->sin_addr.s_addr); + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr))); + } +} + +static inline void +l2_store(const struct sockaddr *sa, struct l2t_entry *e) +{ + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + e->addr[0] = sin->sin_addr.s_addr; + e->ipv6 = 0; + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)); + e->ipv6 = 1; + } } /* @@ -100,7 +163,7 @@ send_pending(struct adapter *sc, struct l2t_entry *e) static void resolution_failed_for_wr(struct wrqe *wr) { - log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr, + log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr, wr->wr_len); /* free(wr, M_CXGBE); */ @@ -175,15 +238,25 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e) struct tom_data *td = sc->tom_softc; struct toedev *tod = &td->tod; struct sockaddr_in sin = {0}; + struct sockaddr_in6 sin6 = {0}; + struct sockaddr *sa; uint8_t dmac[ETHER_ADDR_LEN]; uint16_t vtag = VLAN_NONE; int rc; - sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); - SINADDR(&sin) = e->addr; + if (e->ipv6 == 0) { + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_addr.s_addr = e->addr[0]; + sa = (void *)&sin; + } else { + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr)); + sa = (void *)&sin6; + } - rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag); + rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag); if (rc == EWOULDBLOCK) return (rc); @@ -263,7 +336,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, struct adapter *sc = iq->adapter; const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); unsigned int tid = GET_TID(rpl); - unsigned int idx = tid & (L2T_SIZE - 1); + unsigned int idx = tid % L2T_SIZE; int rc; rc = do_l2t_write_rpl(iq, rss, m); @@ -271,7 +344,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, return (rc); if (tid & F_SYNC_WR) { - struct l2t_entry *e = &sc->l2t->l2tab[idx]; + struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start]; mtx_lock(&e->lock); if (e->state != L2T_STATE_SWITCHING) { @@ -310,21 +383,22 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) { struct l2t_entry *e; struct l2t_data *d = pi->adapter->l2t; - uint32_t addr = SINADDR(sa); - int hash = arp_hash(addr, ifp->if_index); - unsigned int smt_idx = pi->port_id; + u_int hash, smt_idx = pi->port_id; - if (sa->sa_family != AF_INET) - return (NULL); /* XXX: no IPv6 support right now */ + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); #ifndef VLAN_TAG if (ifp->if_type == IFT_L2VLAN) return (NULL); #endif + hash = l2_hash(d, sa, ifp->if_index); rw_wlock(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) { - if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) { + if (l2_cmp(sa, e) == 0 && e->ifp == ifp && + e->smt_idx == smt_idx) { l2t_hold(d, e); goto done; } @@ -338,7 +412,7 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) d->l2tab[hash].first = e; e->state = L2T_STATE_RESOLVING; - e->addr = addr; + l2_store(sa, e); e->ifp = ifp; e->smt_idx = smt_idx; e->hash = hash; @@ -368,14 +442,14 @@ t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, struct adapter *sc = tod->tod_softc; struct l2t_entry *e; struct l2t_data *d = sc->l2t; - uint32_t addr = SINADDR(sa); - int hash = arp_hash(addr, ifp->if_index); + u_int hash; KASSERT(d != NULL, ("%s: no L2 table", __func__)); + hash = l2_hash(d, sa, ifp->if_index); rw_rlock(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) { - if (e->addr == addr && e->ifp == ifp) { + if (l2_cmp(sa, e) == 0 && e->ifp == ifp) { mtx_lock(&e->lock); if (atomic_load_acq_int(&e->refcnt)) goto found; diff --git a/sys/modules/cxgbe/tom/Makefile b/sys/modules/cxgbe/tom/Makefile index 72721be6412..d02afd4cc3f 100644 --- a/sys/modules/cxgbe/tom/Makefile +++ b/sys/modules/cxgbe/tom/Makefile @@ -10,15 +10,20 @@ CXGBE = ${.CURDIR}/../../../dev/cxgbe KMOD = t4_tom SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c t4_ddp.c SRCS+= device_if.h bus_if.h pci_if.h -SRCS+= opt_inet.h +SRCS+= opt_inet.h opt_inet6.h CFLAGS+= -I${CXGBE} .if !defined(KERNBUILDDIR) .if ${MK_INET_SUPPORT} != "no" opt_inet.h: - echo "#define INET 1" > ${.TARGET} - echo "#define TCP_OFFLOAD 1" >> ${.TARGET} + @echo "#define INET 1" > ${.TARGET} + @echo "#define TCP_OFFLOAD 1" >> ${.TARGET} +.endif + +.if ${MK_INET6_SUPPORT} != "no" +opt_inet6.h: + @echo "#define INET6 1" > ${.TARGET} .endif .endif