From 3cf3b4e6419d14ed25c0b67b84763b3be937c8b5 Mon Sep 17 00:00:00 2001 From: Jeff Roberson Date: Sun, 22 Dec 2019 06:56:44 +0000 Subject: [PATCH] Make page busy state deterministic on free. Pages must be xbusy when removed from objects including calls to free. Pages must not be xbusy when freed and not on an object. Strengthen assertions to match these expectations. In practice very little code had to change busy handling to meet these rules but we can now make stronger guarantees to busy holders and avoid conditionally dropping busy in free. Refine vm_page_remove() and vm_page_replace() semantics now that we have stronger guarantees about busy state. This removes redundant and potentially problematic code that has proliferated. Discussed with: markj Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D22822 --- sys/compat/linuxkpi/common/src/linux_compat.c | 5 +- sys/dev/drm2/ttm/ttm_bo_vm.c | 2 + sys/dev/netmap/netmap_freebsd.c | 5 +- sys/dev/xen/gntdev/gntdev.c | 13 +-- sys/dev/xen/privcmd/privcmd.c | 14 ++- sys/vm/device_pager.c | 10 +- sys/vm/sg_pager.c | 7 +- sys/vm/vm_fault.c | 12 ++- sys/vm/vm_kern.c | 1 + sys/vm/vm_object.c | 8 -- sys/vm/vm_page.c | 101 ++++++++++++------ sys/vm/vm_page.h | 22 +--- 12 files changed, 106 insertions(+), 94 deletions(-) diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c index 10d0866c254..6d9cff99be3 100644 --- a/sys/compat/linuxkpi/common/src/linux_compat.c +++ b/sys/compat/linuxkpi/common/src/linux_compat.c @@ -508,10 +508,7 @@ linux_cdev_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, page = vm_page_getfake(paddr, vm_obj->memattr); VM_OBJECT_WLOCK(vm_obj); - vm_page_replace_checked(page, vm_obj, - (*mres)->pindex, *mres); - - vm_page_free(*mres); + vm_page_replace(page, vm_obj, (*mres)->pindex, *mres); *mres = page; } vm_page_valid(page); diff --git a/sys/dev/drm2/ttm/ttm_bo_vm.c b/sys/dev/drm2/ttm/ttm_bo_vm.c index 0f35dac3a1d..c87eff0f6c9 100644 --- a/sys/dev/drm2/ttm/ttm_bo_vm.c +++ b/sys/dev/drm2/ttm/ttm_bo_vm.c @@ -237,6 +237,7 @@ reserve: goto retry; } m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); + /* XXX This looks like it should just be vm_page_replace? */ if (m1 == NULL) { if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) { vm_page_xunbusy(m); @@ -255,6 +256,7 @@ reserve: vm_page_valid(m); if (*mres != NULL) { KASSERT(*mres != m, ("losing %p %p", *mres, m)); + vm_page_xunbusy(*mres); vm_page_free(*mres); } *mres = m; diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 2580144ab7f..e37815dc88d 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -1022,12 +1022,10 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, vm_paddr_t paddr; vm_page_t page; vm_memattr_t memattr; - vm_pindex_t pidx; nm_prdis("object %p offset %jd prot %d mres %p", object, (intmax_t)offset, prot, mres); memattr = object->memattr; - pidx = OFF_TO_IDX(offset); paddr = netmap_mem_ofstophys(na->nm_mem, offset); if (paddr == 0) return VM_PAGER_FAIL; @@ -1052,9 +1050,8 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, VM_OBJECT_WUNLOCK(object); page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); - vm_page_free(*mres); + vm_page_replace(page, object, (*mres)->pindex, *mres); *mres = page; - vm_page_insert(page, object, pidx); } vm_page_valid(page); return (VM_PAGER_OK); diff --git a/sys/dev/xen/gntdev/gntdev.c b/sys/dev/xen/gntdev/gntdev.c index eb5275771a8..d2f3d979f8d 100644 --- a/sys/dev/xen/gntdev/gntdev.c +++ b/sys/dev/xen/gntdev/gntdev.c @@ -806,7 +806,7 @@ gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, { struct gntdev_gmap *gmap = object->handle; vm_pindex_t pidx, ridx; - vm_page_t page, oldm; + vm_page_t page; vm_ooffset_t relative_offset; if (gmap->map == NULL) @@ -829,15 +829,12 @@ gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, KASSERT(vm_page_wired(page), ("page %p is not wired", page)); KASSERT(!vm_page_busied(page), ("page %p is busy", page)); - if (*mres != NULL) { - oldm = *mres; - vm_page_free(oldm); - *mres = NULL; - } - vm_page_busy_acquire(page, 0); vm_page_valid(page); - vm_page_insert(page, object, pidx); + if (*mres != NULL) + vm_page_replace(page, object, pidx, *mres); + else + vm_page_insert(page, object, pidx); *mres = page; return (VM_PAGER_OK); } diff --git a/sys/dev/xen/privcmd/privcmd.c b/sys/dev/xen/privcmd/privcmd.c index 75e87df9dc5..04dc9e88307 100644 --- a/sys/dev/xen/privcmd/privcmd.c +++ b/sys/dev/xen/privcmd/privcmd.c @@ -154,7 +154,7 @@ privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, { struct privcmd_map *map = object->handle; vm_pindex_t pidx; - vm_page_t page, oldm; + vm_page_t page; if (map->mapped != true) return (VM_PAGER_FAIL); @@ -172,15 +172,13 @@ privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, KASSERT(vm_page_wired(page), ("page %p not wired", page)); KASSERT(!vm_page_busied(page), ("page %p is busy", page)); - if (*mres != NULL) { - oldm = *mres; - vm_page_free(oldm); - *mres = NULL; - } - vm_page_busy_acquire(page, 0); vm_page_valid(page); - vm_page_insert(page, object, pidx); + + if (*mres != NULL) + vm_page_replace(page, object, pidx, *mres); + else + vm_page_insert(page, object, pidx); *mres = page; return (VM_PAGER_OK); } diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index f7afe67911f..a38f11277c3 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -236,7 +236,6 @@ cdev_pager_free_page(vm_object_t object, vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); pmap_remove_all(m); (void)vm_page_remove(m); - vm_page_xunbusy(m); } else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); } @@ -271,8 +270,12 @@ dev_pager_dealloc(vm_object_t object) * Free up our fake pages. */ while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist)) - != NULL) + != NULL) { + if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) + continue; + dev_pager_free_page(object, m); + } } object->handle = NULL; object->type = OBJT_DEAD; @@ -391,8 +394,7 @@ old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); - vm_page_replace_checked(page, object, (*mres)->pindex, *mres); - vm_page_free(*mres); + vm_page_replace(page, object, (*mres)->pindex, *mres); *mres = page; } vm_page_valid(page); diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c index 520476a4b33..f5e211ab9bc 100644 --- a/sys/vm/sg_pager.c +++ b/sys/vm/sg_pager.c @@ -129,6 +129,8 @@ sg_pager_dealloc(vm_object_t object) * Free up our fake pages. */ while ((m = TAILQ_FIRST(&object->un_pager.sgp.sgp_pglist)) != 0) { + if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) + continue; TAILQ_REMOVE(&object->un_pager.sgp.sgp_pglist, m, plinks.q); vm_page_putfake(m); } @@ -193,10 +195,7 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, plinks.q); - vm_page_replace_checked(page, object, offset, m[0]); - vm_page_lock(m[0]); - vm_page_free(m[0]); - vm_page_unlock(m[0]); + vm_page_replace(page, object, offset, m[0]); m[0] = page; vm_page_valid(page); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index fb86b2591a4..3ed135fd8b5 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -180,8 +180,6 @@ fault_page_free(vm_page_t *mp) VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_wired(m)) vm_page_free(m); - else - vm_page_xunbusy(m); *mp = NULL; } } @@ -1229,10 +1227,14 @@ readrest: */ fs.object == fs.first_object->backing_object) { - (void)vm_page_remove(fs.m); - vm_page_replace_checked(fs.m, fs.first_object, + /* + * Remove but keep xbusy for replace. fs.m is + * moved into fs.first_object and left busy + * while fs.first_m is conditionally freed. + */ + vm_page_remove_xbusy(fs.m); + vm_page_replace(fs.m, fs.first_object, fs.first_pindex, fs.first_m); - vm_page_free(fs.first_m); vm_page_dirty(fs.m); #if VM_NRESERVLEVEL > 0 /* diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 8b9b2578e96..8f9ca97f759 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -586,6 +586,7 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) #endif for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); + vm_page_busy_acquire(m, 0); vm_page_unwire_noq(m); vm_page_free(m); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 90a605d9bd8..3b9d26749a3 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1484,8 +1484,6 @@ retry: if (vm_page_none_valid(m)) { if (vm_page_remove(m)) vm_page_free(m); - else - vm_page_xunbusy(m); continue; } @@ -1675,8 +1673,6 @@ vm_object_collapse_scan(vm_object_t object, int op) ("freeing mapped page %p", p)); if (vm_page_remove(p)) vm_page_free(p); - else - vm_page_xunbusy(p); continue; } @@ -1708,8 +1704,6 @@ vm_object_collapse_scan(vm_object_t object, int op) */ if (vm_page_remove(pp)) vm_page_free(pp); - else - vm_page_xunbusy(pp); pp = NULL; } @@ -1728,8 +1722,6 @@ vm_object_collapse_scan(vm_object_t object, int op) ("freeing mapped page %p", p)); if (vm_page_remove(p)) vm_page_free(p); - else - vm_page_xunbusy(p); if (pp != NULL) vm_page_xunbusy(pp); continue; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index d5ed9ea5ba3..139703a4b49 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -181,6 +181,8 @@ static void _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_dequeue_complete(vm_page_t m); static void vm_page_enqueue(vm_page_t m, uint8_t queue); +static bool vm_page_free_prep(vm_page_t m); +static void vm_page_free_toq(vm_page_t m); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); @@ -1290,8 +1292,7 @@ vm_page_putfake(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m)); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_putfake: bad page %p", m)); - if (vm_page_xbusied(m)) - vm_page_xunbusy(m); + vm_page_xunbusy(m); uma_zfree(fakepg_zone, m); } @@ -1579,6 +1580,7 @@ vm_page_object_remove(vm_page_t m) vm_object_t object; vm_page_t mrem; + vm_page_assert_xbusied(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((m->ref_count & VPRC_OBJREF) != 0, @@ -1615,10 +1617,30 @@ vm_page_object_remove(vm_page_t m) * invalidate any backing storage. Returns true if the object's reference * was the last reference to the page, and false otherwise. * - * The object must be locked. + * The object must be locked and the page must be exclusively busied. + * The exclusive busy will be released on return. If this is not the + * final ref and the caller does not hold a wire reference it may not + * continue to access the page. */ bool vm_page_remove(vm_page_t m) +{ + bool dropped; + + dropped = vm_page_remove_xbusy(m); + vm_page_xunbusy(m); + + return (dropped); +} + +/* + * vm_page_remove_xbusy + * + * Removes the page but leaves the xbusy held. Returns true if this + * removed the final ref and false otherwise. + */ +bool +vm_page_remove_xbusy(vm_page_t m) { vm_page_object_remove(m); @@ -1704,13 +1726,24 @@ vm_page_prev(vm_page_t m) /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. + * + * Both pages must be exclusively busied on enter. The old page is + * unbusied on exit. + * + * A return value of true means mold is now free. If this is not the + * final ref and the caller does not hold a wire reference it may not + * continue to access the page. */ -vm_page_t -vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) +static bool +vm_page_replace_hold(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, + vm_page_t mold) { - vm_page_t mold; + vm_page_t mret; + bool dropped; VM_OBJECT_ASSERT_WLOCKED(object); + vm_page_assert_xbusied(mnew); + vm_page_assert_xbusied(mold); KASSERT(mnew->object == NULL && (mnew->ref_count & VPRC_OBJREF) == 0, ("vm_page_replace: page %p already in object", mnew)); @@ -1723,7 +1756,9 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) mnew->object = object; mnew->pindex = pindex; atomic_set_int(&mnew->ref_count, VPRC_OBJREF); - mold = vm_radix_replace(&object->rtree, mnew); + mret = vm_radix_replace(&object->rtree, mnew); + KASSERT(mret == mold, + ("invalid page replacement, mold=%p, mret=%p", mold, mret)); KASSERT((mold->oflags & VPO_UNMANAGED) == (mnew->oflags & VPO_UNMANAGED), ("vm_page_replace: mismatched VPO_UNMANAGED")); @@ -1731,10 +1766,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) /* Keep the resident page list in sorted order. */ TAILQ_INSERT_AFTER(&object->memq, mold, mnew, listq); TAILQ_REMOVE(&object->memq, mold, listq); - mold->object = NULL; - atomic_clear_int(&mold->ref_count, VPRC_OBJREF); - vm_page_xunbusy(mold); /* * The object's resident_page_count does not change because we have @@ -1743,7 +1775,19 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) */ if (pmap_page_is_write_mapped(mnew)) vm_object_set_writeable_dirty(object); - return (mold); + dropped = vm_page_drop(mold, VPRC_OBJREF) == VPRC_OBJREF; + vm_page_xunbusy(mold); + + return (dropped); +} + +void +vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, + vm_page_t mold) +{ + + if (vm_page_replace_hold(mnew, object, pindex, mold)) + vm_page_free(mold); } /* @@ -2761,9 +2805,9 @@ retry: m_new->dirty = m->dirty; m->flags &= ~PG_ZERO; vm_page_dequeue(m); - vm_page_replace_checked(m_new, object, - m->pindex, m); - if (vm_page_free_prep(m)) + if (vm_page_replace_hold(m_new, object, + m->pindex, m) && + vm_page_free_prep(m)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); @@ -3644,7 +3688,7 @@ vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq) * The object must be locked. The page must be locked if it is * managed. */ -bool +static bool vm_page_free_prep(vm_page_t m) { @@ -3654,6 +3698,9 @@ vm_page_free_prep(vm_page_t m) */ atomic_thread_fence_acq(); + if (vm_page_sbusied(m)) + panic("vm_page_free_prep: freeing shared busy page %p", m); + #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) { uint64_t *p; @@ -3675,9 +3722,6 @@ vm_page_free_prep(vm_page_t m) } VM_CNT_INC(v_tfree); - if (vm_page_sbusied(m)) - panic("vm_page_free_prep: freeing shared busy page %p", m); - if (m->object != NULL) { KASSERT(((m->oflags & VPO_UNMANAGED) != 0) == ((m->object->flags & OBJ_UNMANAGED) != 0), @@ -3695,10 +3739,11 @@ vm_page_free_prep(vm_page_t m) m, m->ref_count)); m->object = NULL; m->ref_count -= VPRC_OBJREF; + vm_page_xunbusy(m); } if (vm_page_xbusied(m)) - vm_page_xunbusy(m); + panic("vm_page_free_prep: freeing exclusive busy page %p", m); /* * If fictitious remove object association and @@ -3754,7 +3799,7 @@ vm_page_free_prep(vm_page_t m) * The object must be locked. The page must be locked if it is * managed. */ -void +static void vm_page_free_toq(vm_page_t m) { struct vm_domain *vmd; @@ -4099,19 +4144,15 @@ vm_page_release(vm_page_t m, int flags) if (object == NULL) break; /* Depends on type-stability. */ - if (vm_page_busied(m) || !VM_OBJECT_TRYWLOCK(object)) { - object = NULL; + if (vm_page_busied(m) || !VM_OBJECT_TRYWLOCK(object)) break; + if (object == m->object) { + vm_page_release_locked(m, flags); + VM_OBJECT_WUNLOCK(object); + return; } - if (object == m->object) - break; VM_OBJECT_WUNLOCK(object); } - if (__predict_true(object != NULL)) { - vm_page_release_locked(m, flags); - VM_OBJECT_WUNLOCK(object); - return; - } } /* @@ -4158,7 +4199,7 @@ vm_page_release_locked(vm_page_t m, int flags) if (vm_page_unwire_noq(m)) { if ((flags & VPR_TRYFREE) != 0 && (m->object->ref_count == 0 || !pmap_page_is_mapped(m)) && - m->dirty == 0 && !vm_page_busied(m)) { + m->dirty == 0 && vm_page_tryxbusy(m)) { vm_page_free(m); } else { vm_page_lock(m); diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index a31b299fabb..79576f9d66d 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -622,7 +622,6 @@ void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); void vm_page_dequeue_deferred(vm_page_t m); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); -bool vm_page_free_prep(vm_page_t m); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); @@ -646,9 +645,10 @@ void vm_page_reference(vm_page_t m); void vm_page_release(vm_page_t m, int flags); void vm_page_release_locked(vm_page_t m, int flags); bool vm_page_remove(vm_page_t); +bool vm_page_remove_xbusy(vm_page_t); int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t); -vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, - vm_pindex_t pindex); +void vm_page_replace(vm_page_t mnew, vm_object_t object, + vm_pindex_t pindex, vm_page_t mold); void vm_page_requeue(vm_page_t m); int vm_page_sbusied(vm_page_t m); vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, @@ -681,7 +681,6 @@ int vm_page_is_valid(vm_page_t, int, int); void vm_page_test_dirty(vm_page_t); vm_page_bits_t vm_page_bits(int base, int size); void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); -void vm_page_free_toq(vm_page_t m); void vm_page_free_pages_toq(struct spglist *free, bool update_wire_count); void vm_page_dirty_KBI(vm_page_t m); @@ -902,21 +901,6 @@ vm_page_undirty(vm_page_t m) m->dirty = 0; } -static inline void -vm_page_replace_checked(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, - vm_page_t mold) -{ - vm_page_t mret; - - mret = vm_page_replace(mnew, object, pindex); - KASSERT(mret == mold, - ("invalid page replacement, mold=%p, mret=%p", mold, mret)); - - /* Unused if !INVARIANTS. */ - (void)mold; - (void)mret; -} - /* * vm_page_queue: *