diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index f5a911b6d3f..00cc93298f3 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -449,7 +449,6 @@ mb_dtor_mbuf(void *mem, int size, void *arg) if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) m_tag_delete_chain(m, NULL); - KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); #ifdef INVARIANTS trash_dtor(mem, size, arg); diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index 8a56227579d..36432e0e90f 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -287,19 +287,31 @@ m_extadd(struct mbuf *mb, caddr_t buf, u_int size, void mb_free_ext(struct mbuf *m) { - int skipmbuf; + int freembuf; - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ext_cnt != NULL, ("%s: ext_cnt not set", __func__)); + KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); /* - * check if the header is embedded in the cluster + * Check if the header is embedded in the cluster. */ - skipmbuf = (m->m_flags & M_NOFREE); + freembuf = (m->m_flags & M_NOFREE) ? 0 : 1; + + switch (m->m_ext.ext_type) { + case EXT_SFBUF: + sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); + break; + default: + KASSERT(m->m_ext.ext_cnt != NULL, + ("%s: no refcounting pointer on %p", __func__, m)); + /* + * Free attached storage if this mbuf is the only + * reference to it. + */ + if (*(m->m_ext.ext_cnt) != 1) { + if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1) + break; + } - /* Free attached storage if this mbuf is the only reference to it. */ - if (*(m->m_ext.ext_cnt) == 1 || - atomic_fetchadd_int(m->m_ext.ext_cnt, -1) == 1) { switch (m->m_ext.ext_type) { case EXT_PACKET: /* The packet zone is special. */ if (*(m->m_ext.ext_cnt) == 0) @@ -318,7 +330,6 @@ mb_free_ext(struct mbuf *m) case EXT_JUMBO16: uma_zfree(zone_jumbo16, m->m_ext.ext_buf); break; - case EXT_SFBUF: case EXT_NET_DRV: case EXT_MOD_TYPE: case EXT_DISPOSABLE: @@ -337,23 +348,9 @@ mb_free_ext(struct mbuf *m) ("%s: unknown ext_type", __func__)); } } - if (skipmbuf) - return; - /* - * Free this mbuf back to the mbuf zone with all m_ext - * information purged. - */ - m->m_ext.ext_buf = NULL; - m->m_ext.ext_free = NULL; - m->m_ext.ext_arg1 = NULL; - m->m_ext.ext_arg2 = NULL; - m->m_ext.ext_cnt = NULL; - m->m_ext.ext_size = 0; - m->m_ext.ext_type = 0; - m->m_ext.ext_flags = 0; - m->m_flags &= ~M_EXT; - uma_zfree(zone_mbuf, m); + if (freembuf) + uma_zfree(zone_mbuf, m); } /* @@ -363,22 +360,24 @@ mb_free_ext(struct mbuf *m) static void mb_dupcl(struct mbuf *n, struct mbuf *m) { - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ext_cnt != NULL, ("%s: ext_cnt not set", __func__)); - KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - if (*(m->m_ext.ext_cnt) == 1) - *(m->m_ext.ext_cnt) += 1; - else - atomic_add_int(m->m_ext.ext_cnt, 1); - n->m_ext.ext_buf = m->m_ext.ext_buf; - n->m_ext.ext_free = m->m_ext.ext_free; - n->m_ext.ext_arg1 = m->m_ext.ext_arg1; - n->m_ext.ext_arg2 = m->m_ext.ext_arg2; - n->m_ext.ext_size = m->m_ext.ext_size; - n->m_ext.ext_cnt = m->m_ext.ext_cnt; - n->m_ext.ext_type = m->m_ext.ext_type; - n->m_ext.ext_flags = m->m_ext.ext_flags; + KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); + KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n)); + + switch (m->m_ext.ext_type) { + case EXT_SFBUF: + sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2); + break; + default: + KASSERT(m->m_ext.ext_cnt != NULL, + ("%s: no refcounting pointer on %p", __func__, m)); + if (*(m->m_ext.ext_cnt) == 1) + *(m->m_ext.ext_cnt) += 1; + else + atomic_add_int(m->m_ext.ext_cnt, 1); + } + + bcopy(&m->m_ext, &n->m_ext, sizeof(m->m_ext)); n->m_flags |= M_EXT; n->m_flags |= m->m_flags & M_RDONLY; } diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 2581d2c8399..4675f19fb27 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1983,32 +1983,56 @@ filt_sfsync(struct knote *kn, long hint) return (ret); } +/* + * Add more references to a vm_page + sf_buf + sendfile_sync. + */ +void +sf_ext_ref(void *arg1, void *arg2) +{ + struct sf_buf *sf = arg1; + struct sendfile_sync *sfs = arg2; + vm_page_t pg = sf_buf_page(sf); + + /* XXXGL: there should be sf_buf_ref() */ + sf_buf_alloc(sf_buf_page(sf), SFB_NOWAIT); + + vm_page_lock(pg); + vm_page_wire(pg); + vm_page_unlock(pg); + + if (sfs != NULL) { + mtx_lock(&sfs->mtx); + KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); + sfs->count++; + mtx_unlock(&sfs->mtx); + } +} /* * Detach mapped page and release resources back to the system. */ void -sf_buf_mext(struct mbuf *mb, void *addr, void *args) +sf_ext_free(void *arg1, void *arg2) { - vm_page_t m; - struct sendfile_sync *sfs; + struct sf_buf *sf = arg1; + struct sendfile_sync *sfs = arg2; + vm_page_t pg = sf_buf_page(sf); - m = sf_buf_page(args); - sf_buf_free(args); - vm_page_lock(m); - vm_page_unwire(m, PQ_INACTIVE); + sf_buf_free(sf); + + vm_page_lock(pg); + vm_page_unwire(pg, PQ_INACTIVE); /* * Check for the object going away on us. This can * happen since we don't hold a reference to it. * If so, we're responsible for freeing the page. */ - if (m->wire_count == 0 && m->object == NULL) - vm_page_free(m); - vm_page_unlock(m); - if (addr != NULL) { - sfs = addr; + if (pg->wire_count == 0 && pg->object == NULL) + vm_page_free(pg); + vm_page_unlock(pg); + + if (sfs != NULL) sf_sync_deref(sfs); - } } /* @@ -2124,7 +2148,7 @@ sf_sync_alloc(uint32_t flags) /* * Take a reference to a sfsync instance. * - * This has to map 1:1 to free calls coming in via sf_buf_mext(), + * This has to map 1:1 to free calls coming in via sf_ext_free(), * so typically this will be referenced once for each mbuf allocated. */ void @@ -3062,17 +3086,19 @@ retry_space: m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); if (m0 == NULL) { error = (mnw ? EAGAIN : ENOBUFS); - sf_buf_mext(NULL, NULL, sf); - break; - } - if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE, - sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF, - (mnw ? M_NOWAIT : M_WAITOK)) != 0) { - error = (mnw ? EAGAIN : ENOBUFS); - sf_buf_mext(NULL, NULL, sf); - m_freem(m0); + sf_ext_free(sf, NULL); break; } + /* + * Attach EXT_SFBUF external storage. + */ + m0->m_ext.ext_buf = (caddr_t )sf_buf_kva(sf); + m0->m_ext.ext_size = PAGE_SIZE; + m0->m_ext.ext_arg1 = sf; + m0->m_ext.ext_arg2 = sfs; + m0->m_ext.ext_type = EXT_SFBUF; + m0->m_ext.ext_flags = 0; + m0->m_flags |= (M_EXT|M_RDONLY); m0->m_data = (char *)sf_buf_kva(sf) + pgoff; m0->m_len = xfsize; diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index b092c0ccf7f..83ad3c3dafc 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -373,6 +373,12 @@ struct mbuf { "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \ "\30EXT_FLAG_EXP4" +/* + * External reference/free functions. + */ +void sf_ext_ref(void *, void *); +void sf_ext_free(void *, void *); + /* * Flags indicating checksum, segmentation and other offload work to be * done, or already done, by hardware or lower layers. It is split into diff --git a/sys/sys/sf_buf.h b/sys/sys/sf_buf.h index 58e3bcbd10b..07871dd2999 100644 --- a/sys/sys/sf_buf.h +++ b/sys/sys/sf_buf.h @@ -52,7 +52,6 @@ struct sfstat { /* sendfile statistics */ #include #include #include -struct mbuf; /* for sf_buf_mext() */ extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; #define SFSTAT_ADD(name, val) \ @@ -60,7 +59,4 @@ extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; (val)) #define SFSTAT_INC(name) SFSTAT_ADD(name, 1) #endif /* _KERNEL */ - -void sf_buf_mext(struct mbuf *mb, void *addr, void *args); - #endif /* !_SYS_SF_BUF_H_ */