From 6470c8d3db6d04a44682b6cc898d446dcccc0ef0 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 29 Aug 2019 07:50:25 +0000 Subject: [PATCH] Rework v_object lifecycle for vnodes. Current implementation of vnode_create_vobject() and vnode_destroy_vobject() is written so that it prepared to handle the vm object destruction for live vnode. Practically, no filesystems use this, except for some remnants that were present in UFS till today. One of the consequences of that model is that each filesystem must call vnode_destroy_vobject() in VOP_RECLAIM() or earlier, as result all of them get rid of the v_object in reclaim. Move the call to vnode_destroy_vobject() to vgonel() before VOP_RECLAIM(). This makes v_object stable: either the object is NULL, or it is valid vm object till the vnode reclamation. Remove code from vnode_create_vobject() to handle races with the parallel destruction. Reviewed by: markj Tested by: pho Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D21412 --- .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 3 -- sys/fs/cd9660/cd9660_node.c | 4 -- sys/fs/devfs/devfs_vnops.c | 1 - sys/fs/ext2fs/ext2_inode.c | 1 - sys/fs/fuse/fuse_vnops.c | 1 - sys/fs/msdosfs/msdosfs_denode.c | 4 -- sys/fs/nfsclient/nfs_clnode.c | 5 -- sys/fs/smbfs/smbfs_node.c | 4 -- sys/fs/tmpfs/tmpfs_vnops.c | 2 - sys/fs/udf/udf_vnops.c | 5 -- sys/kern/vfs_subr.c | 26 +++++++--- sys/ufs/ufs/ufs_extern.h | 1 - sys/ufs/ufs/ufs_inode.c | 39 +++++--------- sys/vm/vm_object.h | 1 - sys/vm/vnode_pager.c | 51 +++++-------------- 15 files changed, 45 insertions(+), 103 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index fa3085d2def..30e312104bb 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -5375,9 +5375,6 @@ zfs_freebsd_reclaim(ap) ASSERT(zp != NULL); - /* Destroy the vm object and flush associated pages. */ - vnode_destroy_vobject(vp); - /* * z_teardown_inactive_lock protects from a race with * zfs_znode_dmu_fini in zfsvfs_teardown during diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c index 17d70d869b7..c74bf4a3005 100644 --- a/sys/fs/cd9660/cd9660_node.c +++ b/sys/fs/cd9660/cd9660_node.c @@ -91,10 +91,6 @@ cd9660_reclaim(ap) { struct vnode *vp = ap->a_vp; - /* - * Destroy the vm object and flush associated pages. - */ - vnode_destroy_vobject(vp); /* * Remove the inode from its hash chain. */ diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index 590334f1615..ab6e7e13b8b 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -1428,7 +1428,6 @@ devfs_reclaim(struct vop_reclaim_args *ap) vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); - vnode_destroy_vobject(vp); return (0); } diff --git a/sys/fs/ext2fs/ext2_inode.c b/sys/fs/ext2fs/ext2_inode.c index 4f0de4b6f5c..d5e05cbe474 100644 --- a/sys/fs/ext2fs/ext2_inode.c +++ b/sys/fs/ext2fs/ext2_inode.c @@ -639,6 +639,5 @@ ext2_reclaim(struct vop_reclaim_args *ap) vfs_hash_remove(vp); free(vp->v_data, M_EXT2NODE); vp->v_data = 0; - vnode_destroy_vobject(vp); return (0); } diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 537f1b734c2..c886527b480 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1537,7 +1537,6 @@ fuse_vnop_reclaim(struct vop_reclaim_args *ap) fuse_vnode_setparent(vp, NULL); cache_purge(vp); vfs_hash_remove(vp); - vnode_destroy_vobject(vp); fuse_vnode_destroy(vp); return 0; diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c index 19ba40fed97..858cf1b5890 100644 --- a/sys/fs/msdosfs/msdosfs_denode.c +++ b/sys/fs/msdosfs/msdosfs_denode.c @@ -551,10 +551,6 @@ msdosfs_reclaim(struct vop_reclaim_args *ap) dep, dep->de_Name, dep->de_refcnt); #endif - /* - * Destroy the vm object and flush associated pages. - */ - vnode_destroy_vobject(vp); /* * Remove the denode from its hash chain. */ diff --git a/sys/fs/nfsclient/nfs_clnode.c b/sys/fs/nfsclient/nfs_clnode.c index 9e54f6e9f23..0baf710d9cf 100644 --- a/sys/fs/nfsclient/nfs_clnode.c +++ b/sys/fs/nfsclient/nfs_clnode.c @@ -296,11 +296,6 @@ ncl_reclaim(struct vop_reclaim_args *ap) ncl_releasesillyrename(vp, ap->a_td); mtx_unlock(&np->n_mtx); - /* - * Destroy the vm object and flush associated pages. - */ - vnode_destroy_vobject(vp); - if (NFS_ISV4(vp) && vp->v_type == VREG) /* * We can now safely close any remaining NFSv4 Opens for diff --git a/sys/fs/smbfs/smbfs_node.c b/sys/fs/smbfs/smbfs_node.c index 299d07a0b4a..16b5de93846 100644 --- a/sys/fs/smbfs/smbfs_node.c +++ b/sys/fs/smbfs/smbfs_node.c @@ -272,10 +272,6 @@ smbfs_reclaim(ap) KASSERT((np->n_flag & NOPEN) == 0, ("file not closed before reclaim")); - /* - * Destroy the vm object and flush associated pages. - */ - vnode_destroy_vobject(vp); dvp = (np->n_parent && (np->n_flag & NREFPARENT)) ? np->n_parent : NULL; diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index 1b3aa6d2db9..1c181bc3a59 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -1317,8 +1317,6 @@ tmpfs_reclaim(struct vop_reclaim_args *v) if (vp->v_type == VREG) tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj); - else - vnode_destroy_vobject(vp); vp->v_object = NULL; if (tmpfs_use_nc(vp)) cache_purge(vp); diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c index b1b004f9516..3f0666054f9 100644 --- a/sys/fs/udf/udf_vnops.c +++ b/sys/fs/udf/udf_vnops.c @@ -1266,11 +1266,6 @@ udf_reclaim(struct vop_reclaim_args *a) vp = a->a_vp; unode = VTON(vp); - /* - * Destroy the vm object and flush associated pages. - */ - vnode_destroy_vobject(vp); - if (unode != NULL) { vfs_hash_remove(vp); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 1af4aa02883..b6ec815fca0 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -3481,9 +3481,9 @@ static void vgonel(struct vnode *vp) { struct thread *td; - int oweinact; - int active; struct mount *mp; + vm_object_t object; + bool active, oweinact; ASSERT_VOP_ELOCKED(vp, "vgonel"); ASSERT_VI_LOCKED(vp, "vgonel"); @@ -3503,8 +3503,8 @@ vgonel(struct vnode *vp) * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). */ - active = vp->v_usecount; - oweinact = (vp->v_iflag & VI_OWEINACT); + active = vp->v_usecount > 0; + oweinact = (vp->v_iflag & VI_OWEINACT) != 0; VI_UNLOCK(vp); vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); @@ -3543,13 +3543,25 @@ vgonel(struct vnode *vp) ("vp %p bufobj not invalidated", vp)); /* - * For VMIO bufobj, BO_DEAD is set in vm_object_terminate() - * after the object's page queue is flushed. + * For VMIO bufobj, BO_DEAD is set later, or in + * vm_object_terminate() after the object's page queue is + * flushed. */ - if (vp->v_bufobj.bo_object == NULL) + object = vp->v_bufobj.bo_object; + if (object == NULL) vp->v_bufobj.bo_flag |= BO_DEAD; BO_UNLOCK(&vp->v_bufobj); + /* + * Handle the VM part. Tmpfs handles v_object on its own (the + * OBJT_VNODE check). Nullfs or other bypassing filesystems + * should not touch the object borrowed from the lower vnode + * (the handle check). + */ + if (object != NULL && object->type == OBJT_VNODE && + object->handle == vp) + vnode_destroy_vobject(vp); + /* * Reclaim the vnode. */ diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index db75d3a2f66..9a326a92eb3 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -79,7 +79,6 @@ int ufs_inactive(struct vop_inactive_args *); int ufs_init(struct vfsconf *); void ufs_itimes(struct vnode *vp); int ufs_lookup(struct vop_cachedlookup_args *); -void ufs_prepare_reclaim(struct vnode *vp); int ufs_readdir(struct vop_readdir_args *); int ufs_reclaim(struct vop_reclaim_args *); void ffs_snapgone(struct inode *); diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index 7ef5e6fb8ad..8fd54b384d3 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -179,31 +179,6 @@ out: return (error); } -void -ufs_prepare_reclaim(struct vnode *vp) -{ - struct inode *ip; -#ifdef QUOTA - int i; -#endif - - ip = VTOI(vp); - - vnode_destroy_vobject(vp); -#ifdef QUOTA - for (i = 0; i < MAXQUOTAS; i++) { - if (ip->i_dquot[i] != NODQUOT) { - dqrele(vp, ip->i_dquot[i]); - ip->i_dquot[i] = NODQUOT; - } - } -#endif -#ifdef UFS_DIRHASH - if (ip->i_dirhash != NULL) - ufsdirhash_free(ip); -#endif -} - /* * Reclaim an inode so that it can be used for other purposes. */ @@ -216,8 +191,20 @@ ufs_reclaim(ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); +#ifdef QUOTA + int i; - ufs_prepare_reclaim(vp); + for (i = 0; i < MAXQUOTAS; i++) { + if (ip->i_dquot[i] != NODQUOT) { + dqrele(vp, ip->i_dquot[i]); + ip->i_dquot[i] = NODQUOT; + } + } +#endif +#ifdef UFS_DIRHASH + if (ip->i_dirhash != NULL) + ufsdirhash_free(ip); +#endif if (ip->i_flag & IN_LAZYMOD) ip->i_flag |= IN_MODIFIED; diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 8625e1022ca..3910e3d1952 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -189,7 +189,6 @@ struct vm_object { #define OBJ_TMPFS_DIRTY 0x0400 /* dirty tmpfs obj */ #define OBJ_COLORED 0x1000 /* pg_color is defined */ #define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */ -#define OBJ_DISCONNECTWNT 0x4000 /* disconnect from vnode wanted */ #define OBJ_TMPFS 0x8000 /* has tmpfs vnode allocated */ /* diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index ed286e0024c..a12a4b80def 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -147,17 +147,9 @@ vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td) if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) return (0); - while ((object = vp->v_object) != NULL) { - VM_OBJECT_WLOCK(object); - if (!(object->flags & OBJ_DEAD)) { - VM_OBJECT_WUNLOCK(object); - return (0); - } - VOP_UNLOCK(vp, 0); - vm_object_set_flag(object, OBJ_DISCONNECTWNT); - VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead", 0); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - } + object = vp->v_object; + if (object != NULL) + return (0); if (size == 0) { if (vn_isdisk(vp, NULL)) { @@ -190,10 +182,11 @@ vnode_destroy_vobject(struct vnode *vp) struct vm_object *obj; obj = vp->v_object; - if (obj == NULL) + if (obj == NULL || obj->handle != vp) return; ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject"); VM_OBJECT_WLOCK(obj); + MPASS(obj->type == OBJT_VNODE); umtx_shm_object_terminated(obj); if (obj->ref_count == 0) { /* @@ -223,9 +216,6 @@ vnode_destroy_vobject(struct vnode *vp) * prevented new waiters from referencing the dying * object. */ - KASSERT((obj->flags & OBJ_DISCONNECTWNT) == 0, - ("OBJ_DISCONNECTWNT set obj %p flags %x", - obj, obj->flags)); vp->v_object = NULL; VM_OBJECT_WUNLOCK(obj); } @@ -243,8 +233,6 @@ vnode_destroy_vobject(struct vnode *vp) /* * Allocate (or lookup) pager for a vnode. * Handle is a vnode pointer. - * - * MPSAFE */ vm_object_t vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, @@ -259,28 +247,18 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, if (handle == NULL) return (NULL); - vp = (struct vnode *) handle; - - /* - * If the object is being terminated, wait for it to - * go away. - */ -retry: - while ((object = vp->v_object) != NULL) { - VM_OBJECT_WLOCK(object); - if ((object->flags & OBJ_DEAD) == 0) - break; - vm_object_set_flag(object, OBJ_DISCONNECTWNT); - VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vadead", 0); - } - + vp = (struct vnode *)handle; + ASSERT_VOP_LOCKED(vp, "vnode_pager_alloc"); KASSERT(vp->v_usecount != 0, ("vnode_pager_alloc: no vnode reference")); +retry: + object = vp->v_object; if (object == NULL) { /* * Add an object of the appropriate size */ - object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size))); + object = vm_object_allocate(OBJT_VNODE, + OFF_TO_IDX(round_page(size))); object->un_pager.vnp.vnp_size = size; object->un_pager.vnp.writemappings = 0; @@ -290,7 +268,7 @@ retry: VI_LOCK(vp); if (vp->v_object != NULL) { /* - * Object has been created while we were sleeping + * Object has been created while we were allocating. */ VI_UNLOCK(vp); VM_OBJECT_WLOCK(object); @@ -305,6 +283,7 @@ retry: vp->v_object = object; VI_UNLOCK(vp); } else { + VM_OBJECT_WLOCK(object); object->ref_count++; #if VM_NRESERVLEVEL > 0 vm_object_color(object, 0); @@ -334,10 +313,6 @@ vnode_pager_dealloc(vm_object_t object) object->handle = NULL; object->type = OBJT_DEAD; - if (object->flags & OBJ_DISCONNECTWNT) { - vm_object_clear_flag(object, OBJ_DISCONNECTWNT); - wakeup(object); - } ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc"); if (object->un_pager.vnp.writemappings > 0) { object->un_pager.vnp.writemappings = 0;