diff --git a/sbin/mdconfig/mdconfig.8 b/sbin/mdconfig/mdconfig.8
index cf05d8f43ef..b91ffb3cb91 100644
--- a/sbin/mdconfig/mdconfig.8
+++ b/sbin/mdconfig/mdconfig.8
@@ -37,7 +37,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd August 28, 2017
+.Dd December 21, 2018
.Dt MDCONFIG 8
.Os
.Sh NAME
@@ -206,6 +206,32 @@ backed devices: avoid
.Dv IO_SYNC
for increased performance but
at the risk of deadlocking the entire kernel.
+.It Oo Cm no Oc Ns Cm cache
+For
+.Cm vnode
+backed devices: enable/disable caching of data in system caches.
+The default is to not cache.
+.Pp
+Accesses via the device are converted to accesses via the vnode.
+The caching policy for the vnode is used initially.
+This is normally to cache.
+This caching policy is retained if the
+.Cm cache
+option is used.
+Otherwise, caching is limited
+by releasing data from caches soon after each access.
+The release has the same semantics as the
+.Dv POSIX_FADV_DONTNEED
+feature of
+.Xr posix_fadvise 2 .
+The result is that with normal (non-zfs) caching,
+buffers are released from the buffer cache soon after they are constructed,
+but their data is kept in the page cache at lower priority.
+.Pp
+The
+.Cm cache
+option tends to waste memory by giving unwanted double caching,
+but it saves time if there is memory to spare.
.It Oo Cm no Oc Ns Cm reserve
Allocate and reserve all needed storage from the start, rather than as needed.
.It Oo Cm no Oc Ns Cm cluster
diff --git a/sbin/mdconfig/mdconfig.c b/sbin/mdconfig/mdconfig.c
index f6f642c0a02..852909aa903 100644
--- a/sbin/mdconfig/mdconfig.c
+++ b/sbin/mdconfig/mdconfig.c
@@ -88,7 +88,7 @@ usage(void)
" mdconfig -l [-v] [-n] [-f file] [-u unit]\n"
" mdconfig file\n");
fprintf(stderr, "\t\ttype = {malloc, vnode, swap}\n");
- fprintf(stderr, "\t\toption = {cluster, compress, force,\n");
+ fprintf(stderr, "\t\toption = {cache, cluster, compress, force,\n");
fprintf(stderr, "\t\t readonly, reserve, ro, verify}\n");
fprintf(stderr, "\t\tsize = %%d (512 byte blocks), %%db (B),\n");
fprintf(stderr, "\t\t %%dk (kB), %%dm (MB), %%dg (GB), \n");
@@ -178,6 +178,10 @@ main(int argc, char **argv)
mdio.md_options |= MD_ASYNC;
else if (!strcmp(optarg, "noasync"))
mdio.md_options &= ~MD_ASYNC;
+ else if (!strcmp(optarg, "cache"))
+ mdio.md_options |= MD_CACHE;
+ else if (!strcmp(optarg, "nocache"))
+ mdio.md_options &= ~MD_CACHE;
else if (!strcmp(optarg, "cluster"))
mdio.md_options |= MD_CLUSTER;
else if (!strcmp(optarg, "nocluster"))
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index de5cde68a7d..b2f6f912fa6 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -880,7 +880,7 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
struct buf *pb;
bus_dma_segment_t *vlist;
struct thread *td;
- off_t iolen, len, zerosize;
+ off_t iolen, iostart, len, zerosize;
int ma_offs, npages;
switch (bp->bio_cmd) {
@@ -983,13 +983,10 @@ unmapped_step:
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
}
- /*
- * When reading set IO_DIRECT to try to avoid double-caching
- * the data. When writing IO_DIRECT is not optimal.
- */
+ iostart = auio.uio_offset;
if (auio.uio_rw == UIO_READ) {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred);
+ error = VOP_READ(vp, &auio, 0, sc->cred);
VOP_UNLOCK(vp, 0);
} else {
(void) vn_start_write(vp, &mp, V_WAIT);
@@ -1002,6 +999,11 @@ unmapped_step:
sc->flags &= ~MD_VERIFY;
}
+ /* When MD_CACHE is set, try to avoid double-caching the data. */
+ if (error == 0 && (sc->flags & MD_CACHE) == 0)
+ VOP_ADVISE(vp, iostart, auio.uio_offset - 1,
+ POSIX_FADV_DONTNEED);
+
if (pb != NULL) {
pmap_qremove((vm_offset_t)pb->b_data, npages);
if (error == 0) {
@@ -1464,7 +1466,8 @@ mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td)
sc->fwheads = mdr->md_fwheads;
snprintf(sc->ident, sizeof(sc->ident), "MD-DEV%ju-INO%ju",
(uintmax_t)vattr.va_fsid, (uintmax_t)vattr.va_fileid);
- sc->flags = mdr->md_options & (MD_FORCE | MD_ASYNC | MD_VERIFY);
+ sc->flags = mdr->md_options & (MD_ASYNC | MD_CACHE | MD_FORCE |
+ MD_VERIFY);
if (!(flags & FWRITE))
sc->flags |= MD_READONLY;
sc->vnode = nd.ni_vp;
@@ -2184,6 +2187,9 @@ g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
g_conf_printf_escaped(sb, "%s", mp->file);
sbuf_printf(sb, "\n");
}
+ if (mp->type == MD_VNODE)
+ sbuf_printf(sb, "%s%s\n", indent,
+ (mp->flags & MD_CACHE) == 0 ? "off": "on");
sbuf_printf(sb, "%s\n");
diff --git a/sys/sys/mdioctl.h b/sys/sys/mdioctl.h
index 37a3804fe79..eac81012bdf 100644
--- a/sys/sys/mdioctl.h
+++ b/sys/sys/mdioctl.h
@@ -92,5 +92,6 @@ struct md_ioctl {
#define MD_FORCE 0x20 /* Don't try to prevent foot-shooting */
#define MD_ASYNC 0x40 /* Asynchronous mode */
#define MD_VERIFY 0x80 /* Open file with O_VERIFY (vnode only) */
+#define MD_CACHE 0x100 /* Cache vnode data */
#endif /* _SYS_MDIOCTL_H_*/