From c2d36fc5cd1e11754c0fe9df231a0056627d2edb Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Sat, 16 Apr 2016 07:35:53 +0000 Subject: [PATCH] zfs: enable vn_io_fault support Note that now we have to account for possible partial writes in dmu_write_uio_dbuf(). It seems that on illumos either all or none of the data are expected to be written. But the partial writes are quite expected when vn_io_fault support is enabled. Reviewed by: kib MFC after: 7 weeks Differential Revision: https://reviews.freebsd.org/D2790 --- .../opensolaris/uts/common/fs/zfs/dmu.c | 10 +++++ .../uts/common/fs/zfs/zfs_vfsops.c | 1 + .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 39 ++++++++++++++++++- 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c index 706ec94c97f..bb81879eae8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1092,8 +1092,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) else XUIOSTAT_BUMP(xuiostat_rbuf_copied); } else { +#ifdef illumos err = uiomove((char *)db->db_data + bufoff, tocpy, UIO_READ, uio); +#else + err = vn_io_fault_uiomove((char *)db->db_data + bufoff, + tocpy, uio); +#endif } if (err) break; @@ -1187,6 +1192,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) else dmu_buf_will_dirty(db, tx); +#ifdef illumos /* * XXX uiomove could block forever (eg. nfs-backed * pages). There needs to be a uiolockdown() function @@ -1195,6 +1201,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) */ err = uiomove((char *)db->db_data + bufoff, tocpy, UIO_WRITE, uio); +#else + err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy, + uio); +#endif if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c index 78d3fdcdb74..932b61a31ee 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c @@ -1170,6 +1170,7 @@ zfs_domount(vfs_t *vfsp, char *osname) vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; + vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ /* * The fsid is 64 bits, composed of an 8-bit fs type, which diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 96b1cec1228..544067114f8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -656,7 +656,11 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); +#ifdef illumos error = uiomove(va + off, bytes, UIO_READ, uio); +#else + error = vn_io_fault_uiomove(va + off, bytes, uio); +#endif zfs_unmap_page(sf); zfs_vmobject_wlock(obj); page_unhold(pp); @@ -1034,18 +1038,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) * holding up the transaction if the data copy hangs * up on a pagefault (e.g., from an NFS server mapping). */ +#ifdef illumos size_t cbytes; +#endif abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), max_blksz); ASSERT(abuf != NULL); ASSERT(arc_buf_size(abuf) == max_blksz); +#ifdef illumos if (error = uiocopy(abuf->b_data, max_blksz, UIO_WRITE, uio, &cbytes)) { dmu_return_arcbuf(abuf); break; } ASSERT(cbytes == max_blksz); +#else + ssize_t resid = uio->uio_resid; + error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio); + if (error != 0) { + uio->uio_offset -= resid - uio->uio_resid; + uio->uio_resid = resid; + dmu_return_arcbuf(abuf); + break; + } +#endif } /* @@ -1123,8 +1140,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), woff, abuf, tx); } +#ifdef illumos ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); +#endif } if (tx_bytes && vn_has_cached_data(vp)) { update_pages(vp, woff, tx_bytes, zfsvfs->z_os, @@ -1178,7 +1197,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) while ((end_size = zp->z_size) < uio->uio_loffset) { (void) atomic_cas_64(&zp->z_size, end_size, uio->uio_loffset); +#ifdef illumos ASSERT(error == 0); +#else + ASSERT(error == 0 || error == EFAULT); +#endif } /* * If we are replaying and eof is non zero then force @@ -1188,7 +1211,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) zp->z_size = zfsvfs->z_replay_eof; - error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + if (error == 0) + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + else + (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); dmu_tx_commit(tx); @@ -1215,6 +1241,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) return (error); } +#ifdef __FreeBSD__ + /* + * EFAULT means that at least one page of the source buffer was not + * available. VFS will re-try remaining I/O upon this error. + */ + if (error == EFAULT) { + ZFS_EXIT(zfsvfs); + return (error); + } +#endif + if (ioflag & (FSYNC | FDSYNC) || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, zp->z_id);