diff --git a/sbin/mount_fusefs/mount_fusefs.8 b/sbin/mount_fusefs/mount_fusefs.8 index 30eb7c0acb7..259101a3863 100644 --- a/sbin/mount_fusefs/mount_fusefs.8 +++ b/sbin/mount_fusefs/mount_fusefs.8 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 19, 2019 +.Dd June 14, 2019 .Dt MOUNT_FUSEFS 8 .Os .Sh NAME @@ -136,21 +136,24 @@ The following options are available (and also their negated versions, by prefixing them with .Dq no ) : .Bl -tag -width indent -.It Cm default_permissions -Enable traditional (file mode based) permission checking in kernel .It Cm allow_other Do not apply .Sx STRICT ACCESS POLICY . Only root can use this option +.It Cm async +I/O to the file system may be done asynchronously. +Writes may delayed and/or reordered. +.It Cm default_permissions +Enable traditional (file mode based) permission checking in kernel .It Cm max_read Ns = Ns Ar n Limit size of read requests to .Ar n +.It Cm neglect_shares +Do not refuse unmounting if there are secondary mounts .It Cm private Refuse shared mounting of the daemon. This is the default behaviour, to allow sharing, expicitly use .Fl o Cm noprivate -.It Cm neglect_shares -Do not refuse unmounting if there are secondary mounts .It Cm push_symlinks_in Prefix absolute symlinks with the mountpoint .It Cm subtype Ns = Ns Ar fsname diff --git a/sbin/mount_fusefs/mount_fusefs.c b/sbin/mount_fusefs/mount_fusefs.c index e6131ec8b06..b5dda5077dc 100644 --- a/sbin/mount_fusefs/mount_fusefs.c +++ b/sbin/mount_fusefs/mount_fusefs.c @@ -88,6 +88,8 @@ static struct mntopt mopts[] = { { "large_read", 0, 0x00, 1 }, /* "nonempty", just the first two chars are stripped off during parsing */ { "nempty", 0, 0x00, 1 }, + { "async", 0, MNT_ASYNC, 0}, + { "noasync", 1, MNT_ASYNC, 0}, MOPT_STDOPTS, MOPT_END }; diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index c373300953f..06cfb2a933b 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -565,11 +565,13 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio, daddr_t lbn; off_t filesize; int bcount; - int n, on, err = 0; + int n, on, seqcount, err = 0; bool last_page; const int biosize = fuse_iosize(vp); + seqcount = ioflag >> IO_SEQSHIFT; + KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode")); if (vp->v_type != VREG) return (EIO); @@ -644,6 +646,7 @@ again: * with other readers */ err = fuse_vnode_setsize(vp, uio->uio_offset + n); + filesize = uio->uio_offset + n; fvdat->flag |= FN_SIZECHANGE; if (err) { brelse(bp); @@ -787,20 +790,22 @@ again: } else if (vm_page_count_severe() || buf_dirty_count_severe() || (ioflag & IO_ASYNC)) { - /* TODO: enable write clustering later */ + bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp); bawrite(bp); } else if (on == 0 && n == bcount) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { + bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 4, bp); - bdwrite(bp); + cluster_write(vp, bp, filesize, seqcount, 0); } else { SDT_PROBE2(fusefs, , io, write_biobackend_issue, 5, bp); bawrite(bp); } } else if (ioflag & IO_DIRECT) { + bp->b_flags |= B_CLUSTEROK; SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp); bawrite(bp); } else { diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c index 6a5adac8720..64d7ccc7600 100644 --- a/sys/fs/fuse/fuse_vfsops.c +++ b/sys/fs/fuse/fuse_vfsops.c @@ -433,6 +433,7 @@ fuse_vfsop_mount(struct mount *mp) } copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &len); bzero(mp->mnt_stat.f_mntfromname + len, MNAMELEN - len); + mp->mnt_iosize_max = MAXPHYS; /* Now handshaking with daemon */ fuse_internal_send_init(data, td); diff --git a/tests/sys/fs/fusefs/mockfs.cc b/tests/sys/fs/fusefs/mockfs.cc index c5cefe9560f..4d3f0f42150 100644 --- a/tests/sys/fs/fusefs/mockfs.cc +++ b/tests/sys/fs/fusefs/mockfs.cc @@ -336,7 +336,7 @@ void MockFS::debug_response(const mockfs_buf_out &out) { MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions, bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags, - uint32_t kernel_minor_version) + uint32_t kernel_minor_version, uint32_t max_write, bool async) { struct sigaction sa; struct iovec *iov = NULL; @@ -347,6 +347,7 @@ MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions, m_daemon_id = NULL; m_kernel_minor_version = kernel_minor_version; m_maxreadahead = max_readahead; + m_maxwrite = max_write; m_nready = -1; m_pm = pm; m_quit = false; @@ -404,6 +405,10 @@ MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions, build_iovec(&iov, &iovlen, "ro", __DECONST(void*, &trueval), sizeof(bool)); } + if (async) { + build_iovec(&iov, &iovlen, "async", __DECONST(void*, &trueval), + sizeof(bool)); + } if (nmount(iov, iovlen, 0)) throw(std::system_error(errno, std::system_category(), "Couldn't mount filesystem")); @@ -449,15 +454,7 @@ void MockFS::init(uint32_t flags) { out->body.init.minor = m_kernel_minor_version;; out->body.init.flags = in->body.init.flags & flags; - /* - * The default max_write is set to this formula in libfuse, though - * individual filesystems can lower it. The "- 4096" was added in - * commit 154ffe2, with the commit message "fix". - */ - uint32_t default_max_write = 32 * getpagesize() + 0x1000 - 4096; - /* For testing purposes, it should be distinct from MAXPHYS */ - m_max_write = MIN(default_max_write, MAXPHYS / 2); - out->body.init.max_write = m_max_write; + out->body.init.max_write = m_maxwrite; out->body.init.max_readahead = m_maxreadahead; SET_OUT_HEADER_LEN(*out, init); diff --git a/tests/sys/fs/fusefs/mockfs.hh b/tests/sys/fs/fusefs/mockfs.hh index a6f0474cc05..e79976fe9a0 100644 --- a/tests/sys/fs/fusefs/mockfs.hh +++ b/tests/sys/fs/fusefs/mockfs.hh @@ -252,7 +252,7 @@ class MockFS { int m_kq; - /* The max_readahead filesystem option */ + /* The max_readahead file system option */ uint32_t m_maxreadahead; /* pid of the test process */ @@ -288,7 +288,7 @@ class MockFS { pid_t m_child_pid; /* Maximum size of a FUSE_WRITE write */ - uint32_t m_max_write; + uint32_t m_maxwrite; /* * Number of events that were available from /dev/fuse after the last @@ -303,7 +303,7 @@ class MockFS { MockFS(int max_readahead, bool allow_other, bool default_permissions, bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags, - uint32_t kernel_minor_version); + uint32_t kernel_minor_version, uint32_t max_write, bool async); virtual ~MockFS(); diff --git a/tests/sys/fs/fusefs/utils.cc b/tests/sys/fs/fusefs/utils.cc index 51195ed6f49..fae956fe23d 100644 --- a/tests/sys/fs/fusefs/utils.cc +++ b/tests/sys/fs/fusefs/utils.cc @@ -50,6 +50,20 @@ extern "C" { using namespace testing; +/* + * The default max_write is set to this formula in libfuse, though + * individual filesystems can lower it. The "- 4096" was added in + * commit 154ffe2, with the commit message "fix". + */ +const uint32_t libfuse_max_write = 32 * getpagesize() + 0x1000 - 4096; + +/* + * Set the default max_write to a distinct value from MAXPHYS to catch bugs + * that confuse the two. + */ +const uint32_t default_max_write = MIN(libfuse_max_write, MAXPHYS / 2); + + /* Check that fusefs(4) is accessible and the current user can mount(2) */ void check_environment() { @@ -98,7 +112,8 @@ void FuseTest::SetUp() { try { m_mock = new MockFS(m_maxreadahead, m_allow_other, m_default_permissions, m_push_symlinks_in, m_ro, - m_pm, m_init_flags, m_kernel_minor_version); + m_pm, m_init_flags, m_kernel_minor_version, + m_maxwrite, m_async); /* * FUSE_ACCESS is called almost universally. Expecting it in * each test case would be super-annoying. Instead, set a diff --git a/tests/sys/fs/fusefs/utils.hh b/tests/sys/fs/fusefs/utils.hh index b746444e2cc..82b32bceafe 100644 --- a/tests/sys/fs/fusefs/utils.hh +++ b/tests/sys/fs/fusefs/utils.hh @@ -40,9 +40,12 @@ inline void nap() usleep(NAP_NS / 1000); } +extern const uint32_t libfuse_max_write; +extern const uint32_t default_max_write; class FuseTest : public ::testing::Test { protected: uint32_t m_maxreadahead; + uint32_t m_maxwrite; uint32_t m_init_flags; bool m_allow_other; bool m_default_permissions; @@ -50,6 +53,7 @@ class FuseTest : public ::testing::Test { enum poll_method m_pm; bool m_push_symlinks_in; bool m_ro; + bool m_async; MockFS *m_mock = NULL; const static uint64_t FH = 0xdeadbeef1a7ebabe; @@ -62,13 +66,15 @@ class FuseTest : public ::testing::Test { * be lowered */ m_maxreadahead(UINT_MAX), + m_maxwrite(default_max_write), m_init_flags(0), m_allow_other(false), m_default_permissions(false), m_kernel_minor_version(FUSE_KERNEL_MINOR_VERSION), m_pm(BLOCKING), m_push_symlinks_in(false), - m_ro(false) + m_ro(false), + m_async(false) {} virtual void SetUp(); diff --git a/tests/sys/fs/fusefs/write.cc b/tests/sys/fs/fusefs/write.cc index 31b7f90a585..407effab7db 100644 --- a/tests/sys/fs/fusefs/write.cc +++ b/tests/sys/fs/fusefs/write.cc @@ -29,7 +29,7 @@ */ extern "C" { -#include +#include #include #include #include @@ -179,6 +179,19 @@ void expect_write(uint64_t ino, uint64_t offset, uint64_t isize, } }; +/* Tests for clustered writes with WriteBack cacheing */ +class WriteCluster: public WriteBack { +public: +virtual void SetUp() { + if (MAXPHYS < 2 * DFLTPHYS) + GTEST_SKIP() << "MAXPHYS must be at least twice DFLTPHYS" + << "for this test"; + m_async = true; + m_maxwrite = MAXPHYS; + WriteBack::SetUp(); +} +}; + void sigxfsz_handler(int __unused sig) { Write::s_sigxfsz = 1; } @@ -617,7 +630,7 @@ TEST_F(Write, write_large) int fd; ssize_t halfbufsize, bufsize; - halfbufsize = m_mock->m_max_write; + halfbufsize = m_mock->m_maxwrite; bufsize = halfbufsize * 2; contents = (int*)malloc(bufsize); ASSERT_NE(NULL, contents); @@ -711,6 +724,89 @@ TEST_F(WriteBack, close) close(fd); } +/* In writeback mode, adjacent writes will be clustered together */ +TEST_F(WriteCluster, clustering) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + uint64_t ino = 42; + int i, fd; + void *wbuf, *wbuf2x; + ssize_t bufsize = 65536; + off_t filesize = 327680; + + wbuf = malloc(bufsize); + ASSERT_NE(NULL, wbuf) << strerror(errno); + memset(wbuf, 'X', bufsize); + wbuf2x = malloc(2 * bufsize); + ASSERT_NE(NULL, wbuf2x) << strerror(errno); + memset(wbuf2x, 'X', 2 * bufsize); + + expect_lookup(RELPATH, ino, filesize); + expect_open(ino, 0, 1); + /* + * Writes of bufsize-bytes each should be clustered into greater sizes. + * The amount of clustering is adaptive, so the first write actually + * issued will be 2x bufsize and subsequent writes may be larger + */ + expect_write(ino, 0, 2 * bufsize, 2 * bufsize, wbuf2x); + expect_write(ino, 2 * bufsize, 2 * bufsize, 2 * bufsize, wbuf2x); + expect_flush(ino, 1, ReturnErrno(0)); + expect_release(ino, ReturnErrno(0)); + + fd = open(FULLPATH, O_RDWR); + ASSERT_LE(0, fd) << strerror(errno); + + for (i = 0; i < 4; i++) { + ASSERT_EQ(bufsize, write(fd, wbuf, bufsize)) + << strerror(errno); + } + close(fd); +} + +/* + * When clustering writes, an I/O error to any of the cluster's children should + * not panic the system on unmount + */ +/* + * Disabled because it panics. + * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=238565 + */ +TEST_F(WriteCluster, DISABLED_cluster_write_err) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + uint64_t ino = 42; + int i, fd; + void *wbuf; + ssize_t bufsize = 65536; + off_t filesize = 262144; + + wbuf = malloc(bufsize); + ASSERT_NE(NULL, wbuf) << strerror(errno); + memset(wbuf, 'X', bufsize); + + expect_lookup(RELPATH, ino, filesize); + expect_open(ino, 0, 1); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_WRITE); + }, Eq(true)), + _) + ).WillRepeatedly(Invoke(ReturnErrno(EIO))); + expect_flush(ino, 1, ReturnErrno(0)); + expect_release(ino, ReturnErrno(0)); + + fd = open(FULLPATH, O_RDWR); + ASSERT_LE(0, fd) << strerror(errno); + + for (i = 0; i < 3; i++) { + ASSERT_EQ(bufsize, write(fd, wbuf, bufsize)) + << strerror(errno); + } + close(fd); +} + /* * In writeback mode, writes to an O_WRONLY file could trigger reads from the * server. The FUSE protocol explicitly allows that.