vfs: Add support for file cloning to VOP_COPY_FILE_RANGE

NFSv4 has a separate CLONE operation from COPY with
a couple of semantics differences. Unlike COPY, CLONE
must complete the "copy on write" and cannot return
partially copied. It also is required to use offsets (and
the length if not to EOF) that are aligned to a buffer
boundary.

Since VOP_COPY_FILE_RANGE() can already do "copy on write"
for file systems that support it, such as ZFS with block
cloning enabled, all this patch does is add a flag called
COPY_FILE_RANGE_CLONE so that it will conform to the
rule that it must do a "copy on write" to completion.

The patch also adds a new pathconf(2) name _PC_CLONE_BLKSIZE,
which acquires the blocksize requirement for cloning and
returns 0 for file systems that do not support the
"copy on write" feature. (This is needed for the NFSv4.2
clone_blksize attribute.)

This patch will allow the implementation of CLONE
for NFSv4.2.

Reviewed by:	asomers
Differential Revision:	https://reviews.freebsd.org/D51808
This commit is contained in:
Rick Macklem 2025-08-07 17:52:23 -07:00
parent e4c0ecba44
commit 37b2cb5ecb
6 changed files with 24 additions and 5 deletions

View file

@ -877,6 +877,9 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
pid_t pid;
int err;
if ((ap->a_flags & COPY_FILE_RANGE_CLONE) != 0)
return (EXTERROR(ENOSYS, "Cannot clone"));
if (mp == NULL || mp != vnode_mount(outvp))
return (EXTERROR(ENOSYS, "Mount points do not match"));

View file

@ -457,6 +457,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap)
case _PC_NAMEDATTR_ENABLED:
case _PC_HAS_NAMEDATTR:
case _PC_HAS_HIDDENSYSTEM:
case _PC_CLONE_BLKSIZE:
*ap->a_retval = 0;
return (0);
default:

View file

@ -5058,7 +5058,7 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
error = 0;
retlen = 0;
if (flags != 0) {
if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) {
error = EINVAL;
goto out;
}

View file

@ -3443,6 +3443,11 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
interrupted = 0;
dat = NULL;
if ((flags & COPY_FILE_RANGE_CLONE) != 0) {
error = ENOSYS;
goto out;
}
error = vn_lock(invp, LK_SHARED);
if (error != 0)
goto out;

View file

@ -159,6 +159,7 @@
#define _PC_XATTR_ENABLED _PC_NAMEDATTR_ENABLED /* Solaris Compatible */
#define _PC_XATTR_EXISTS _PC_HAS_NAMEDATTR /* Solaris Compatible */
#define _PC_HAS_HIDDENSYSTEM 68
#define _PC_CLONE_BLKSIZE 69
#endif
/* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */

View file

@ -397,8 +397,21 @@ struct vattr {
*/
#define VLKTIMEOUT (hz / 20 + 1)
/* copy_file_range flags */
#define COPY_FILE_RANGE_KFLAGS 0xff000000
/*
* copy_file_range flags visible to user space.
* Allocate high bits first, to try and avoid conflicting with Linux.
*/
#define COPY_FILE_RANGE_CLONE 0x00800000 /* Require cloning. */
#define COPY_FILE_RANGE_USERFLAGS (COPY_FILE_RANGE_CLONE)
#ifdef _KERNEL
/* copy_file_range flags only usable in the kernel */
#define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after 1sec. */
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_VNODE);
#endif
@ -621,10 +634,6 @@ typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int);
#define VN_OPEN_INVFS 0x00000008
#define VN_OPEN_WANTIOCTLCAPS 0x00000010
/* copy_file_range kernel flags */
#define COPY_FILE_RANGE_KFLAGS 0xff000000
#define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after 1sec. */
/*
* Public vnode manipulation functions.
*/