From e838f09cd02ff3a4835d20e9f0bb227858f30afa Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 31 Jul 2012 18:25:00 +0000 Subject: [PATCH] Reorder the managament of advisory locks on open files so that the advisory lock is obtained before the write count is increased during open() and the lock is released after the write count is decreased during close(). The first change closes a race where an open() that will block with O_SHLOCK or O_EXLOCK can increase the write count while it waits. If the process holding the current lock on the file then tries to call exec() on the file it has locked, it can fail with ETXTBUSY even though the advisory lock is preventing other threads from succesfully completeing a writable open(). The second change closes a race where a read-only open() with O_SHLOCK or O_EXLOCK may return successfully while the write count is non-zero due to another descriptor that had the advisory lock and was blocking the open() still being in the process of closing. If the process that completed the open() then attempts to call exec() on the file it locked, it can fail with ETXTBUSY even though the other process that held a write lock has closed the file and released the lock. Reviewed by: kib MFC after: 1 month --- sys/kern/vfs_syscalls.c | 44 +++++------------------------- sys/kern/vfs_vnops.c | 60 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 43 deletions(-) diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 66ab5a31975..cc6d93cb988 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1093,8 +1093,7 @@ kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct file *fp; struct vnode *vp; int cmode; - int type, indx = -1, error; - struct flock lf; + int indx = -1, error; struct nameidata nd; int vfslocked; cap_rights_t rights_needed = CAP_LOOKUP; @@ -1180,26 +1179,11 @@ kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, if (fp->f_ops == &badfileops) { KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); fp->f_seqcount = 1; - finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops); + finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, + vp, &vnops); } VOP_UNLOCK(vp, 0); - if (fp->f_type == DTYPE_VNODE && (flags & (O_EXLOCK | O_SHLOCK)) != 0) { - lf.l_whence = SEEK_SET; - lf.l_start = 0; - lf.l_len = 0; - if (flags & O_EXLOCK) - lf.l_type = F_WRLCK; - else - lf.l_type = F_RDLCK; - type = F_FLOCK; - if ((flags & FNONBLOCK) == 0) - type |= F_WAIT; - if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, - type)) != 0) - goto bad; - atomic_set_int(&fp->f_flag, FHASLOCK); - } if (flags & O_TRUNC) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error) @@ -4483,9 +4467,8 @@ sys_fhopen(td, uap) struct mount *mp; struct vnode *vp; struct fhandle fhp; - struct flock lf; struct file *fp; - int fmode, error, type; + int fmode, error; int vfslocked; int indx; @@ -4542,24 +4525,9 @@ sys_fhopen(td, uap) #endif fp->f_vnode = vp; fp->f_seqcount = 1; - finit(fp, fmode & FMASK, DTYPE_VNODE, vp, &vnops); + finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, + &vnops); VOP_UNLOCK(vp, 0); - if (fmode & (O_EXLOCK | O_SHLOCK)) { - lf.l_whence = SEEK_SET; - lf.l_start = 0; - lf.l_len = 0; - if (fmode & O_EXLOCK) - lf.l_type = F_WRLCK; - else - lf.l_type = F_RDLCK; - type = F_FLOCK; - if ((fmode & FNONBLOCK) == 0) - type |= F_WAIT; - if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, - type)) != 0) - goto bad; - atomic_set_int(&fp->f_flag, FHASLOCK); - } if (fmode & O_TRUNC) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index b5dcadf2d34..0ad2db82388 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -229,8 +229,10 @@ int vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, struct thread *td, struct file *fp) { + struct mount *mp; accmode_t accmode; - int error; + struct flock lf; + int error, have_flock, lock_flags, type; VFS_ASSERT_GIANT(vp->v_mount); if (vp->v_type == VLNK) @@ -271,6 +273,51 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0) return (error); + if (fmode & (O_EXLOCK | O_SHLOCK)) { + KASSERT(fp != NULL, ("open with flock requires fp")); + lock_flags = VOP_ISLOCKED(vp); + VOP_UNLOCK(vp, 0); + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (fmode & O_EXLOCK) + lf.l_type = F_WRLCK; + else + lf.l_type = F_RDLCK; + type = F_FLOCK; + if ((fmode & FNONBLOCK) == 0) + type |= F_WAIT; + error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); + have_flock = (error == 0); + vn_lock(vp, lock_flags | LK_RETRY); + if (error == 0 && vp->v_iflag & VI_DOOMED) + error = ENOENT; + /* + * Another thread might have used this vnode as an + * executable while the vnode lock was dropped. + * Ensure the vnode is still able to be opened for + * writing after the lock has been obtained. + */ + if (error == 0 && accmode & VWRITE) + error = vn_writechk(vp); + if (error) { + VOP_UNLOCK(vp, 0); + if (have_flock) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, + F_FLOCK); + } + vn_start_write(vp, &mp, V_WAIT); + vn_lock(vp, lock_flags | LK_RETRY); + (void)VOP_CLOSE(vp, fmode, cred, td); + vn_finished_write(mp); + return (error); + } + fp->f_flag |= FHASLOCK; + } if (fmode & FWRITE) { vp->v_writecount++; CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", @@ -1400,19 +1447,22 @@ vn_closefile(fp, td) int error; vp = fp->f_vnode; + fp->f_ops = &badfileops; vfslocked = VFS_LOCK_GIANT(vp->v_mount); + if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) + vref(vp); + + error = vn_close(vp, fp->f_flag, fp->f_cred, td); + if (fp->f_type == DTYPE_VNODE && fp->f_flag & FHASLOCK) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); + vrele(vp); } - - fp->f_ops = &badfileops; - - error = vn_close(vp, fp->f_flag, fp->f_cred, td); VFS_UNLOCK_GIANT(vfslocked); return (error); }