From b53077a9e75133e245344f5d6f5805b16f4512d1 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Thu, 21 Sep 2023 08:36:26 -0700 Subject: [PATCH 1/7] Add zfs_prepare_disk script for disk firmware install Have libzfs call a special `zfs_prepare_disk` script before a disk is included into the pool. The user can edit this script to add things like a disk firmware update or a disk health check. Use of the script is totally optional. See the zfs_prepare_disk manpage for full details. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #15243 --- cmd/zed/agents/zfs_mod.c | 43 +++++- cmd/zpool/zpool_iter.c | 33 +--- cmd/zpool/zpool_util.h | 4 + cmd/zpool/zpool_vdev.c | 43 +++++- config/Rules.am | 1 + contrib/debian/openzfs-zfsutils.install | 2 + include/libzfs.h | 9 ++ lib/libzfs/libzfs.abi | 4 + lib/libzfs/libzfs_util.c | 193 ++++++++++++++++++++++++ man/Makefile.am | 1 + man/man8/.gitignore | 1 + man/man8/zfs_prepare_disk.8.in | 70 +++++++++ scripts/Makefile.am | 2 + scripts/zfs_prepare_disk | 17 +++ 14 files changed, 388 insertions(+), 35 deletions(-) create mode 100644 man/man8/zfs_prepare_disk.8.in create mode 100755 scripts/zfs_prepare_disk diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 2f040ff7582..b2c008ad1d0 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -146,6 +146,17 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data) return (0); } +/* + * Write an array of strings to the zed log + */ +static void lines_to_zed_log_msg(char **lines, int lines_cnt) +{ + int i; + for (i = 0; i < lines_cnt; i++) { + zed_log_msg(LOG_INFO, "%s", lines[i]); + } +} + /* * Two stage replace on Linux * since we get disk notifications @@ -200,6 +211,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) boolean_t is_mpath_wholedisk = B_FALSE; uint_t c; vdev_stat_t *vs; + char **lines = NULL; + int lines_cnt = 0; if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0) return; @@ -383,6 +396,22 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) if (is_mpath_wholedisk) { /* Don't label device mapper or multipath disks. */ + zed_log_msg(LOG_INFO, + " it's a multipath wholedisk, don't label"); + if (zpool_prepare_disk(zhp, vdev, "autoreplace", &lines, + &lines_cnt) != 0) { + zed_log_msg(LOG_INFO, + " zpool_prepare_disk: could not " + "prepare '%s' (%s)", fullpath, + libzfs_error_description(g_zfshdl)); + if (lines_cnt > 0) { + zed_log_msg(LOG_INFO, + " zfs_prepare_disk output:"); + lines_to_zed_log_msg(lines, lines_cnt); + } + libzfs_free_str_array(lines, lines_cnt); + return; + } } else if (!labeled) { /* * we're auto-replacing a raw disk, so label it first @@ -405,10 +434,18 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) * If this is a request to label a whole disk, then attempt to * write out the label. */ - if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) { - zed_log_msg(LOG_INFO, " zpool_label_disk: could not " + if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname, + vdev, "autoreplace", &lines, &lines_cnt) != 0) { + zed_log_msg(LOG_INFO, + " zpool_prepare_and_label_disk: could not " "label '%s' (%s)", leafname, libzfs_error_description(g_zfshdl)); + if (lines_cnt > 0) { + zed_log_msg(LOG_INFO, + " zfs_prepare_disk output:"); + lines_to_zed_log_msg(lines, lines_cnt); + } + libzfs_free_str_array(lines, lines_cnt); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); @@ -468,6 +505,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) DEV_BYID_PATH, new_devid); } + libzfs_free_str_array(lines, lines_cnt); + /* * Construct the root vdev to pass to zpool_vdev_attach(). While adding * the entire vdev structure is harmless, we construct a reduced set of diff --git a/cmd/zpool/zpool_iter.c b/cmd/zpool/zpool_iter.c index 7c6549b0ae5..506b529dce4 100644 --- a/cmd/zpool/zpool_iter.c +++ b/cmd/zpool/zpool_iter.c @@ -443,37 +443,22 @@ vdev_run_cmd(vdev_cmd_data_t *data, char *cmd) { int rc; char *argv[2] = {cmd}; - char *env[5] = {(char *)"PATH=/bin:/sbin:/usr/bin:/usr/sbin"}; + char **env; char **lines = NULL; int lines_cnt = 0; int i; - /* Setup our custom environment variables */ - rc = asprintf(&env[1], "VDEV_PATH=%s", - data->path ? data->path : ""); - if (rc == -1) { - env[1] = NULL; + env = zpool_vdev_script_alloc_env(data->pool, data->path, data->upath, + data->vdev_enc_sysfs_path, NULL, NULL); + if (env == NULL) goto out; - } - - rc = asprintf(&env[2], "VDEV_UPATH=%s", - data->upath ? data->upath : ""); - if (rc == -1) { - env[2] = NULL; - goto out; - } - - rc = asprintf(&env[3], "VDEV_ENC_SYSFS_PATH=%s", - data->vdev_enc_sysfs_path ? - data->vdev_enc_sysfs_path : ""); - if (rc == -1) { - env[3] = NULL; - goto out; - } /* Run the command */ rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines, &lines_cnt); + + zpool_vdev_script_free_env(env); + if (rc != 0) goto out; @@ -485,10 +470,6 @@ vdev_run_cmd(vdev_cmd_data_t *data, char *cmd) out: if (lines != NULL) libzfs_free_str_array(lines, lines_cnt); - - /* Start with i = 1 since env[0] was statically allocated */ - for (i = 1; i < ARRAY_SIZE(env); i++) - free(env[i]); } /* diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index b35dea0cd44..db8e631dc6b 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -126,6 +126,10 @@ vdev_cmd_data_list_t *all_pools_for_each_vdev_run(int argc, char **argv, void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl); +void free_vdev_cmd_data(vdev_cmd_data_t *data); + +int vdev_run_cmd_simple(char *path, char *cmd); + int check_device(const char *path, boolean_t force, boolean_t isspare, boolean_t iswholedisk); boolean_t check_sector_size_database(char *path, int *sector_size); diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 99a521aa2a2..3d0fc089c32 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -936,6 +936,15 @@ zero_label(const char *path) return (0); } +static void +lines_to_stderr(char *lines[], int lines_cnt) +{ + int i; + for (i = 0; i < lines_cnt; i++) { + fprintf(stderr, "%s\n", lines[i]); + } +} + /* * Go through and find any whole disks in the vdev specification, labelling them * as appropriate. When constructing the vdev spec, we were unable to open this @@ -947,7 +956,7 @@ zero_label(const char *path) * need to get the devid after we label the disk. */ static int -make_disks(zpool_handle_t *zhp, nvlist_t *nv) +make_disks(zpool_handle_t *zhp, nvlist_t *nv, boolean_t replacing) { nvlist_t **child; uint_t c, children; @@ -1032,6 +1041,8 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) */ if (!is_exclusive && !is_spare(NULL, udevpath)) { char *devnode = strrchr(devpath, '/') + 1; + char **lines = NULL; + int lines_cnt = 0; ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT)); if (ret == 0) { @@ -1043,9 +1054,27 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) /* * When labeling a pool the raw device node name * is provided as it appears under /dev/. + * + * Note that 'zhp' will be NULL when we're creating a + * pool. */ - if (zpool_label_disk(g_zfs, zhp, devnode) == -1) + if (zpool_prepare_and_label_disk(g_zfs, zhp, devnode, + nv, zhp == NULL ? "create" : + replacing ? "replace" : "add", &lines, + &lines_cnt) != 0) { + (void) fprintf(stderr, + gettext( + "Error preparing/labeling disk.\n")); + if (lines_cnt > 0) { + (void) fprintf(stderr, + gettext("zfs_prepare_disk output:\n")); + lines_to_stderr(lines, lines_cnt); + } + + libzfs_free_str_array(lines, lines_cnt); return (-1); + } + libzfs_free_str_array(lines, lines_cnt); /* * Wait for udev to signal the device is available @@ -1082,19 +1111,19 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) } for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, child[c], replacing)) != 0) return (ret); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, child[c], replacing)) != 0) return (ret); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + if ((ret = make_disks(zhp, child[c], replacing)) != 0) return (ret); return (0); @@ -1752,7 +1781,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, return (NULL); } - if (!flags.dryrun && make_disks(zhp, newroot) != 0) { + if (!flags.dryrun && make_disks(zhp, newroot, B_FALSE) != 0) { nvlist_free(newroot); return (NULL); } @@ -1873,7 +1902,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep, /* * Run through the vdev specification and label any whole disks found. */ - if (!dryrun && make_disks(zhp, newroot) != 0) { + if (!dryrun && make_disks(zhp, newroot, replacing) != 0) { nvlist_free(newroot); return (NULL); } diff --git a/config/Rules.am b/config/Rules.am index abb4ced3323..7c266964f3f 100644 --- a/config/Rules.am +++ b/config/Rules.am @@ -33,6 +33,7 @@ AM_CPPFLAGS += -D_REENTRANT AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64 AM_CPPFLAGS += -D_LARGEFILE64_SOURCE AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\" +AM_CPPFLAGS += -DZFSEXECDIR=\"$(zfsexecdir)\" AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\" AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\" AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\" diff --git a/contrib/debian/openzfs-zfsutils.install b/contrib/debian/openzfs-zfsutils.install index 301d8f67b3a..e2ce5084c09 100644 --- a/contrib/debian/openzfs-zfsutils.install +++ b/contrib/debian/openzfs-zfsutils.install @@ -35,6 +35,7 @@ usr/bin/zvol_wait usr/lib/modules-load.d/ lib/ usr/lib/zfs-linux/zpool.d/ usr/lib/zfs-linux/zpool_influxdb +usr/lib/zfs-linux/zfs_prepare_disk usr/sbin/arc_summary usr/sbin/arcstat usr/sbin/dbufstat @@ -88,6 +89,7 @@ usr/share/man/man8/zfs-wait.8 usr/share/man/man8/zfs-zone.8 usr/share/man/man8/zfs.8 usr/share/man/man8/zfs_ids_to_path.8 +usr/share/man/man8/zfs_prepare_disk.8 usr/share/man/man7/zfsconcepts.7 usr/share/man/man7/zfsprops.7 usr/share/man/man8/zgenhostid.8 diff --git a/include/libzfs.h b/include/libzfs.h index fa05b7921bb..0b5501bbe39 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -326,6 +326,15 @@ _LIBZFS_H nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); _LIBZFS_H int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *); +_LIBZFS_H int zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv, + const char *prepare_str, char **lines[], int *lines_cnt); +_LIBZFS_H int zpool_prepare_and_label_disk(libzfs_handle_t *hdl, + zpool_handle_t *, const char *, nvlist_t *vdev_nv, const char *prepare_str, + char **lines[], int *lines_cnt); +_LIBZFS_H char ** zpool_vdev_script_alloc_env(const char *pool_name, + const char *vdev_path, const char *vdev_upath, + const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val); +_LIBZFS_H void zpool_vdev_script_free_env(char **env); _LIBZFS_H uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path); diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 0a8e9bcbd74..907b0191f75 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -514,6 +514,8 @@ + + @@ -561,6 +563,8 @@ + + diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index b94abea3d58..fdd1975fa67 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -2071,3 +2071,196 @@ printf_color(const char *color, const char *format, ...) return (rc); } + +/* PATH + 5 env vars + a NULL entry = 7 */ +#define ZPOOL_VDEV_SCRIPT_ENV_COUNT 7 + +/* + * There's a few places where ZFS will call external scripts (like the script + * in zpool.d/ and `zfs_prepare_disk`). These scripts are called with a + * reduced $PATH, and some vdev specific environment vars set. This function + * will allocate an populate the environment variable array that is passed to + * these scripts. The user must free the arrays with zpool_vdev_free_env() when + * they are done. + * + * The following env vars will be set (but value could be blank): + * + * POOL_NAME + * VDEV_PATH + * VDEV_UPATH + * VDEV_ENC_SYSFS_PATH + * + * In addition, you can set an optional environment variable named 'opt_key' + * to 'opt_val' if you want. + * + * Returns allocated env[] array on success, NULL otherwise. + */ +char ** +zpool_vdev_script_alloc_env(const char *pool_name, + const char *vdev_path, const char *vdev_upath, + const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val) +{ + char **env = NULL; + int rc; + + env = calloc(ZPOOL_VDEV_SCRIPT_ENV_COUNT, sizeof (*env)); + if (!env) + return (NULL); + + env[0] = strdup("PATH=/bin:/sbin:/usr/bin:/usr/sbin"); + if (!env[0]) + goto error; + + /* Setup our custom environment variables */ + rc = asprintf(&env[1], "POOL_NAME=%s", pool_name ? pool_name : ""); + if (rc == -1) { + env[1] = NULL; + goto error; + } + + rc = asprintf(&env[2], "VDEV_PATH=%s", vdev_path ? vdev_path : ""); + if (rc == -1) { + env[2] = NULL; + goto error; + } + + rc = asprintf(&env[3], "VDEV_UPATH=%s", vdev_upath ? vdev_upath : ""); + if (rc == -1) { + env[3] = NULL; + goto error; + } + + rc = asprintf(&env[4], "VDEV_ENC_SYSFS_PATH=%s", + vdev_enc_sysfs_path ? vdev_enc_sysfs_path : ""); + if (rc == -1) { + env[4] = NULL; + goto error; + } + + if (opt_key != NULL) { + rc = asprintf(&env[5], "%s=%s", opt_key, + opt_val ? opt_val : ""); + if (rc == -1) { + env[5] = NULL; + goto error; + } + } + + return (env); + +error: + for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++) + free(env[i]); + + free(env); + + return (NULL); +} + +/* + * Free the env[] array that was allocated by zpool_vdev_script_alloc_env(). + */ +void +zpool_vdev_script_free_env(char **env) +{ + for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++) + free(env[i]); + + free(env); +} + +/* + * Prepare a disk by (optionally) running a program before labeling the disk. + * This can be useful for installing disk firmware or doing some pre-flight + * checks on the disk before it becomes part of the pool. The program run is + * located at ZFSEXECDIR/zfs_prepare_disk + * (E.x: /usr/local/libexec/zfs/zfs_prepare_disk). + * + * Return 0 on success, non-zero on failure. + */ +int +zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv, + const char *prepare_str, char **lines[], int *lines_cnt) +{ + const char *script_path = ZFSEXECDIR "/zfs_prepare_disk"; + const char *pool_name; + int rc = 0; + + /* Path to script and a NULL entry */ + char *argv[2] = {(char *)script_path}; + char **env = NULL; + const char *path = NULL, *enc_sysfs_path = NULL; + char *upath; + *lines_cnt = 0; + + if (access(script_path, X_OK) != 0) { + /* No script, nothing to do */ + return (0); + } + + (void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH, &path); + (void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + &enc_sysfs_path); + + upath = zfs_get_underlying_path(path); + pool_name = zhp ? zpool_get_name(zhp) : NULL; + + env = zpool_vdev_script_alloc_env(pool_name, path, upath, + enc_sysfs_path, "VDEV_PREPARE", prepare_str); + + free(upath); + + if (env == NULL) { + return (ENOMEM); + } + + rc = libzfs_run_process_get_stdout(script_path, argv, env, lines, + lines_cnt); + + zpool_vdev_script_free_env(env); + + return (rc); +} + +/* + * Optionally run a script and then label a disk. The script can be used to + * prepare a disk for inclusion into the pool. For example, it might update + * the disk's firmware or check its health. + * + * The 'name' provided is the short name, stripped of any leading + * /dev path, and is passed to zpool_label_disk. vdev_nv is the nvlist for + * the vdev. prepare_str is a string that gets passed as the VDEV_PREPARE + * env variable to the script. + * + * The following env vars are passed to the script: + * + * POOL_NAME: The pool name (blank during zpool create) + * VDEV_PREPARE: Reason why the disk is being prepared for inclusion: + * "create", "add", "replace", or "autoreplace" + * VDEV_PATH: Path to the disk + * VDEV_UPATH: One of the 'underlying paths' to the disk. This is + * useful for DM devices. + * VDEV_ENC_SYSFS_PATH: Path to the disk's enclosure sysfs path, if available. + * + * Note, some of these values can be blank. + * + * Return 0 on success, non-zero otherwise. + */ +int +zpool_prepare_and_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, + const char *name, nvlist_t *vdev_nv, const char *prepare_str, + char **lines[], int *lines_cnt) +{ + int rc; + char vdev_path[MAXPATHLEN]; + (void) snprintf(vdev_path, sizeof (vdev_path), "%s/%s", DISK_ROOT, + name); + + /* zhp will be NULL when creating a pool */ + rc = zpool_prepare_disk(zhp, vdev_nv, prepare_str, lines, lines_cnt); + if (rc != 0) + return (rc); + + rc = zpool_label_disk(hdl, zhp, name); + return (rc); +} diff --git a/man/Makefile.am b/man/Makefile.am index 36c1aede106..45156571eec 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -62,6 +62,7 @@ dist_man_MANS = \ %D%/man8/zfs-userspace.8 \ %D%/man8/zfs-wait.8 \ %D%/man8/zfs_ids_to_path.8 \ + %D%/man8/zfs_prepare_disk.8 \ %D%/man8/zgenhostid.8 \ %D%/man8/zinject.8 \ %D%/man8/zpool.8 \ diff --git a/man/man8/.gitignore b/man/man8/.gitignore index f2fc702147e..a468f9cbf9d 100644 --- a/man/man8/.gitignore +++ b/man/man8/.gitignore @@ -1,2 +1,3 @@ /zed.8 /zfs-mount-generator.8 +/zfs_prepare_disk.8 diff --git a/man/man8/zfs_prepare_disk.8.in b/man/man8/zfs_prepare_disk.8.in new file mode 100644 index 00000000000..2a741531e41 --- /dev/null +++ b/man/man8/zfs_prepare_disk.8.in @@ -0,0 +1,70 @@ +.\" +.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049). +.\" Copyright (C) 2023 Lawrence Livermore National Security, LLC. +.\" Refer to the OpenZFS git commit log for authoritative copyright attribution. +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License Version 1.0 (CDDL-1.0). +.\" You can obtain a copy of the license from the top-level file +.\" "OPENSOLARIS.LICENSE" or at . +.\" You may not use this file except in compliance with the license. +.\" +.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049) +.\" +.Dd August 30, 2023 +.Dt ZFS_PREPARE_DISK 8 +.Os +. +.Sh NAME +.Nm zfs_prepare_disk +.Nd special script that gets run before bringing a disk into a pool +.Sh DESCRIPTION +.Nm +is an optional script that gets called by libzfs before bringing a disk into a +pool. +It can be modified by the user to run whatever commands are necessary to prepare +a disk for inclusion into the pool. +For example, users can add lines to +.Nm zfs_prepare_disk +to do things like update the drive's firmware or check the drive's health. +.Nm zfs_prepare_disk +is optional and can be removed if not needed. +libzfs will look for the script at @zfsexecdir@/zfs_prepare_disk. +. +.Ss Properties +.Nm zfs_prepare_disk +will be passed the following environment variables: +.sp +.Bl -tag -compact -width "VDEV_ENC_SYSFS_PATH" +. +.It Nm POOL_NAME +.No Name of the pool +.It Nm VDEV_PATH +.No Path to the disk (like /dev/sda) +.It Nm VDEV_PREPARE +.No Reason why the disk is being prepared for inclusion +('create', 'add', 'replace', or 'autoreplace'). +This can be useful if you only want the script to be run under certain actions. +.It Nm VDEV_UPATH +.No Path to one of the underlying devices for the +disk. +For multipath this would return one of the /dev/sd* paths to the disk. +If the device is not a device mapper device, then +.Nm VDEV_UPATH +just returns the same value as +.Nm VDEV_PATH +.It Nm VDEV_ENC_SYSFS_PATH +.No Path to the disk's enclosure sysfs path, if available +.El +.Pp +Note that some of these variables may have a blank value. +.Nm POOL_NAME +is blank at pool creation time, for example. +.Sh ENVIRONMENT +.Nm zfs_prepare_disk +runs with a limited $PATH. +.Sh EXIT STATUS +.Nm zfs_prepare_disk +should return 0 on success, non-zero otherwise. +If non-zero is returned, the disk will not be included in the pool. +. diff --git a/scripts/Makefile.am b/scripts/Makefile.am index 4175d27ea32..07543456643 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -20,6 +20,8 @@ scripts_scripts = \ if CONFIG_USER dist_scripts_SCRIPTS = $(scripts_scripts) +dist_zfsexec_SCRIPTS = \ + %D%/zfs_prepare_disk else dist_noinst_SCRIPTS += $(scripts_scripts) endif diff --git a/scripts/zfs_prepare_disk b/scripts/zfs_prepare_disk new file mode 100755 index 00000000000..02aa9f8a772 --- /dev/null +++ b/scripts/zfs_prepare_disk @@ -0,0 +1,17 @@ +#!/bin/sh +# +# This is an optional helper script that is automatically called by libzfs +# before a disk is about to be added into the pool. It can be modified by +# the user to run whatever commands are necessary to prepare a disk for +# inclusion into the pool. For example, users can add lines to this +# script to do things like update the drive's firmware or check the drive's +# health. The script is optional and can be removed if it is not needed. +# +# See the zfs_prepare_disk(8) man page for details. +# +# Example: +# +# echo "Prepare disk $VDEV_PATH ($VDEV_UPATH) for $VDEV_PREPARE in $POOL_NAME" +# + +exit 0 From 2dc89b922bd12f86db193b7cd8d87159dee93bc6 Mon Sep 17 00:00:00 2001 From: Rob N Date: Fri, 22 Sep 2023 10:54:15 +1000 Subject: [PATCH 2/7] tests/block_cloning: try harder to stay on same txg in fallback test We've observed this test failing intermittently. When it does, the "same block" check shows that both files have the same content, that is, the file was cloned. The only way this could have happened is if the open txg moved between the dd and clonefile calls. That's possible because although we set zfs_txg_timeout to be large, that only affects the wait time in the sync thread at the start of a new txg; it doesn't change anything if its currently waiting or working. So here we just force the txgs to move immediately before, which should get both operations onto the same txg as intented. Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Reviewed-by: Brian Behlendorf Signed-off-by: Rob Norris Rob Norris Closes #15303 --- .../block_cloning_copyfilerange_fallback_same_txg.ksh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh index a10545bc076..74c5a5bece6 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh @@ -51,6 +51,7 @@ log_onexit cleanup log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS log_must set_tunable64 TXG_TIMEOUT 5000 +log_must sync_pool $TESTPOOL true log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4 log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288 From b37f29341b9aec1b6e45d55b6f7a1bc5b2723f7a Mon Sep 17 00:00:00 2001 From: Coleman Kane Date: Mon, 11 Sep 2023 23:21:29 -0400 Subject: [PATCH 3/7] Linux 6.6 compat: use inode_get/set_ctime*(...) In Linux commit 13bc24457850583a2e7203ded05b7209ab4bc5ef, direct access to the i_ctime member of struct inode was removed. The new approach is to use accessor methods that exclusively handle passing the timestamp around by value. This change adds new tests for each of these functions and introduces zpl_* equivalents in include/os/linux/zfs/sys/zpl.h. In where the inode_get/set_ctime*() functions exist, these zpl_* calls will be mapped to the new functions. On older kernels, these macros just wrap direct-access calls. The code that operated on an address of ip->i_ctime to call ZFS_TIME_DECODE() now will take a local copy using zpl_inode_get_ctime(), and then pass the address of the local copy when performing the ZFS_TIME_DECODE() call, in all cases, rather than directly accessing the member. Reviewed-by: Brian Behlendorf Signed-off-by: Coleman Kane Closes #15263 Closes #15257 --- config/kernel-inode-times.m4 | 43 ++++++++++++++++++++++++++++++ include/os/linux/zfs/sys/zpl.h | 11 ++++++++ module/os/linux/zfs/zfs_ctldir.c | 2 +- module/os/linux/zfs/zfs_vnops_os.c | 12 ++++++--- module/os/linux/zfs/zfs_znode.c | 18 ++++++++----- module/os/linux/zfs/zpl_inode.c | 2 +- module/os/linux/zfs/zpl_xattr.c | 7 ++--- 7 files changed, 80 insertions(+), 15 deletions(-) diff --git a/config/kernel-inode-times.m4 b/config/kernel-inode-times.m4 index 9c016c79008..412e13b47df 100644 --- a/config/kernel-inode-times.m4 +++ b/config/kernel-inode-times.m4 @@ -27,6 +27,31 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [ memset(&ip, 0, sizeof(ip)); ts = ip.i_mtime; ]) + + dnl # + dnl # 6.6 API change + dnl # i_ctime no longer directly accessible, must use + dnl # inode_get_ctime(ip), inode_set_ctime*(ip) to + dnl # read/write. + dnl # + ZFS_LINUX_TEST_SRC([inode_get_ctime], [ + #include + ],[ + struct inode ip; + + memset(&ip, 0, sizeof(ip)); + inode_get_ctime(&ip); + ]) + + ZFS_LINUX_TEST_SRC([inode_set_ctime_to_ts], [ + #include + ],[ + struct inode ip; + struct timespec64 ts; + + memset(&ip, 0, sizeof(ip)); + inode_set_ctime_to_ts(&ip, ts); + ]) ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ @@ -47,4 +72,22 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1, [inode->i_*time's are timespec64]) ]) + + AC_MSG_CHECKING([whether inode_get_ctime() exists]) + ZFS_LINUX_TEST_RESULT([inode_get_ctime], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_GET_CTIME, 1, + [inode_get_ctime() exists in linux/fs.h]) + ],[ + AC_MSG_RESULT(no) + ]) + + AC_MSG_CHECKING([whether inode_set_ctime_to_ts() exists]) + ZFS_LINUX_TEST_RESULT([inode_set_ctime_to_ts], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_SET_CTIME_TO_TS, 1, + [inode_set_ctime_to_ts() exists in linux/fs.h]) + ],[ + AC_MSG_RESULT(no) + ]) ]) diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index 0bd20f64897..f4f1dcf95d4 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -263,4 +263,15 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg); #define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(dentry, ia) #endif +#ifdef HAVE_INODE_GET_CTIME +#define zpl_inode_get_ctime(ip) inode_get_ctime(ip) +#else +#define zpl_inode_get_ctime(ip) (ip->i_ctime) +#endif +#ifdef HAVE_INODE_SET_CTIME_TO_TS +#define zpl_inode_set_ctime_to_ts(ip, ts) inode_set_ctime_to_ts(ip, ts) +#else +#define zpl_inode_set_ctime_to_ts(ip, ts) (ip->i_ctime = ts) +#endif + #endif /* _SYS_ZPL_H */ diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c index 02cb379ea84..94e25fa0ae8 100644 --- a/module/os/linux/zfs/zfs_ctldir.c +++ b/module/os/linux/zfs/zfs_ctldir.c @@ -522,7 +522,7 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, ip->i_blkbits = SPA_MINBLOCKSHIFT; ip->i_atime = now; ip->i_mtime = now; - ip->i_ctime = now; + zpl_inode_set_ctime_to_ts(ip, now); ip->i_fop = fops; ip->i_op = ops; #if defined(IOP_XATTR) diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index b7d44f344da..e2c23a81fd6 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -2439,8 +2439,8 @@ top: if (mask & (ATTR_CTIME | ATTR_SIZE)) { ZFS_TIME_ENCODE(&vap->va_ctime, ctime); - ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime, - ZTOI(zp)); + zpl_inode_set_ctime_to_ts(ZTOI(zp), + zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp))); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, sizeof (ctime)); } @@ -3645,6 +3645,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, caddr_t va; int err = 0; uint64_t mtime[2], ctime[2]; + inode_timespec_t tmp_ctime; sa_bulk_attr_t bulk[3]; int cnt = 0; struct address_space *mapping; @@ -3809,7 +3810,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, /* Preserve the mtime and ctime provided by the inode */ ZFS_TIME_ENCODE(&ip->i_mtime, mtime); - ZFS_TIME_ENCODE(&ip->i_ctime, ctime); + tmp_ctime = zpl_inode_get_ctime(ip); + ZFS_TIME_ENCODE(&tmp_ctime, ctime); zp->z_atime_dirty = B_FALSE; zp->z_seq++; @@ -3859,6 +3861,7 @@ zfs_dirty_inode(struct inode *ip, int flags) zfsvfs_t *zfsvfs = ITOZSB(ip); dmu_tx_t *tx; uint64_t mode, atime[2], mtime[2], ctime[2]; + inode_timespec_t tmp_ctime; sa_bulk_attr_t bulk[4]; int error = 0; int cnt = 0; @@ -3905,7 +3908,8 @@ zfs_dirty_inode(struct inode *ip, int flags) /* Preserve the mode, mtime and ctime provided by the inode */ ZFS_TIME_ENCODE(&ip->i_atime, atime); ZFS_TIME_ENCODE(&ip->i_mtime, mtime); - ZFS_TIME_ENCODE(&ip->i_ctime, ctime); + tmp_ctime = zpl_inode_get_ctime(ip); + ZFS_TIME_ENCODE(&tmp_ctime, ctime); mode = ip->i_mode; zp->z_mode = mode; diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index 52c8e51df65..f71026da83c 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -542,6 +542,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, uint64_t links; uint64_t z_uid, z_gid; uint64_t atime[2], mtime[2], ctime[2], btime[2]; + inode_timespec_t tmp_ctime; uint64_t projid = ZFS_DEFAULT_PROJID; sa_bulk_attr_t bulk[12]; int count = 0; @@ -615,7 +616,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ZFS_TIME_DECODE(&ip->i_atime, atime); ZFS_TIME_DECODE(&ip->i_mtime, mtime); - ZFS_TIME_DECODE(&ip->i_ctime, ctime); + ZFS_TIME_DECODE(&tmp_ctime, ctime); + zpl_inode_set_ctime_to_ts(ip, tmp_ctime); ZFS_TIME_DECODE(&zp->z_btime, btime); ip->i_ino = zp->z_id; @@ -1195,6 +1197,7 @@ zfs_rezget(znode_t *zp) uint64_t gen; uint64_t z_uid, z_gid; uint64_t atime[2], mtime[2], ctime[2], btime[2]; + inode_timespec_t tmp_ctime; uint64_t projid = ZFS_DEFAULT_PROJID; znode_hold_t *zh; @@ -1289,7 +1292,8 @@ zfs_rezget(znode_t *zp) ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime); ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime); - ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime); + ZFS_TIME_DECODE(&tmp_ctime, ctime); + zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime); ZFS_TIME_DECODE(&zp->z_btime, btime); if ((uint32_t)gen != ZTOI(zp)->i_generation) { @@ -1397,7 +1401,7 @@ zfs_zinactive(znode_t *zp) boolean_t zfs_relatime_need_update(const struct inode *ip) { - inode_timespec_t now; + inode_timespec_t now, tmp_ctime; gethrestime(&now); /* @@ -1408,7 +1412,8 @@ zfs_relatime_need_update(const struct inode *ip) if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0) return (B_TRUE); - if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0) + tmp_ctime = zpl_inode_get_ctime(ip); + if (zfs_compare_timespec(&tmp_ctime, &ip->i_atime) >= 0) return (B_TRUE); if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60) @@ -1434,7 +1439,7 @@ void zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], uint64_t ctime[2]) { - inode_timespec_t now; + inode_timespec_t now, tmp_ctime; gethrestime(&now); @@ -1451,7 +1456,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], if (flag & ATTR_CTIME) { ZFS_TIME_ENCODE(&now, ctime); - ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime); + ZFS_TIME_DECODE(&tmp_ctime, ctime); + zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime); if (ZTOZSB(zp)->z_use_fuids) zp->z_pflags |= ZFS_ARCHIVE; } diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index 5f5ad186a61..ef50f868777 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -774,7 +774,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) return (-EMLINK); crhold(cr); - ip->i_ctime = current_time(ip); + zpl_inode_set_ctime_to_ts(ip, current_time(ip)); /* Must have an existing ref, so igrab() cannot return NULL */ VERIFY3P(igrab(ip), !=, NULL); diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c index 96d85991811..4e4f5210f85 100644 --- a/module/os/linux/zfs/zpl_xattr.c +++ b/module/os/linux/zfs/zpl_xattr.c @@ -513,7 +513,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value, error = -zfs_write_simple(xzp, value, size, pos, NULL); out: if (error == 0) { - ip->i_ctime = current_time(ip); + zpl_inode_set_ctime_to_ts(ip, current_time(ip)); zfs_mark_inode_dirty(ip); } @@ -1011,7 +1011,8 @@ zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type) */ if (ip->i_mode != mode) { ip->i_mode = ITOZ(ip)->z_mode = mode; - ip->i_ctime = current_time(ip); + zpl_inode_set_ctime_to_ts(ip, + current_time(ip)); zfs_mark_inode_dirty(ip); } @@ -1170,7 +1171,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir) return (PTR_ERR(acl)); if (!acl) { ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask()); - ip->i_ctime = current_time(ip); + zpl_inode_set_ctime_to_ts(ip, current_time(ip)); zfs_mark_inode_dirty(ip); return (0); } From 01d00dfa9e47dba025522790736fc34d65baf2f1 Mon Sep 17 00:00:00 2001 From: Coleman Kane Date: Fri, 15 Sep 2023 00:36:39 -0400 Subject: [PATCH 4/7] Linux 6.6 compat: generic_fillattr has a new u32 request_mask added at arg2 In commit 0d72b92883c651a11059d93335f33d65c6eb653b, a new u32 argument for the request_mask was added to generic_fillattr. This is the same request_mask for statx that's present in the most recent API implemented by zpl_getattr_impl. This commit conditionally adds it to the zpl_generic_fillattr(...) macro, as well as the zfs_getattr_fast(...) implementation, when configure determines it's present in the kernel's generic_fillattr(...). Reviewed-by: Brian Behlendorf Signed-off-by: Coleman Kane Closes #15263 --- config/kernel-generic_fillattr.m4 | 39 +++++++++++++++++----- include/os/linux/kernel/linux/vfs_compat.h | 6 ++++ include/os/linux/zfs/sys/zfs_vnops_os.h | 5 +++ module/os/linux/zfs/zfs_vnops_os.c | 9 +++++ module/os/linux/zfs/zpl_ctldir.c | 11 +++++- module/os/linux/zfs/zpl_inode.c | 4 ++- 6 files changed, 63 insertions(+), 11 deletions(-) diff --git a/config/kernel-generic_fillattr.m4 b/config/kernel-generic_fillattr.m4 index 02dee4d4c00..f5323f0dcb9 100644 --- a/config/kernel-generic_fillattr.m4 +++ b/config/kernel-generic_fillattr.m4 @@ -7,6 +7,10 @@ dnl # dnl # 6.3 API dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument dnl # +dnl # 6.6 API +dnl # generic_fillattr() now takes u32 as second argument, representing a +dnl # request_mask for statx +dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [ ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [ #include @@ -25,22 +29,39 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [ struct kstat *k = NULL; generic_fillattr(idmap, in, k); ]) + + ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap_reqmask], [ + #include + ],[ + struct mnt_idmap *idmap = NULL; + struct inode *in = NULL; + struct kstat *k = NULL; + generic_fillattr(idmap, 0, in, k); + ]) ]) AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [ - AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*]) - ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [ + AC_MSG_CHECKING( + [whether generic_fillattr requires struct mnt_idmap* and request_mask]) + ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap_reqmask], [ AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1, - [generic_fillattr requires struct mnt_idmap*]) + AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK, 1, + [generic_fillattr requires struct mnt_idmap* and u32 request_mask]) ],[ - AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*]) - ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [ + AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*]) + ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [ AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1, - [generic_fillattr requires struct user_namespace*]) + AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1, + [generic_fillattr requires struct mnt_idmap*]) ],[ - AC_MSG_RESULT([no]) + AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*]) + ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1, + [generic_fillattr requires struct user_namespace*]) + ],[ + AC_MSG_RESULT([no]) + ]) ]) ]) ]) diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index e156ed41c28..aea8bd5ed22 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -461,10 +461,16 @@ zpl_is_32bit_api(void) * 6.3 API change * generic_fillattr() first arg is changed to struct mnt_idmap * * + * 6.6 API change + * generic_fillattr() gets new second arg request_mask, a u32 type + * */ #ifdef HAVE_GENERIC_FILLATTR_IDMAP #define zpl_generic_fillattr(idmap, ip, sp) \ generic_fillattr(idmap, ip, sp) +#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK) +#define zpl_generic_fillattr(idmap, rqm, ip, sp) \ + generic_fillattr(idmap, rqm, ip, sp) #elif defined(HAVE_GENERIC_FILLATTR_USERNS) #define zpl_generic_fillattr(user_ns, ip, sp) \ generic_fillattr(user_ns, ip, sp) diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h index 7a1db7deeec..830c76e5743 100644 --- a/include/os/linux/zfs/sys/zfs_vnops_os.h +++ b/include/os/linux/zfs/sys/zfs_vnops_os.h @@ -56,7 +56,12 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr, int flags); extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr); +#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK +extern int zfs_getattr_fast(zidmap_t *, u32 request_mask, struct inode *ip, + struct kstat *sp); +#else extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp); +#endif extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr, zidmap_t *mnt_ns); extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index e2c23a81fd6..1770e237257 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -1649,7 +1649,12 @@ out: * RETURN: 0 (always succeeds) */ int +#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK +zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip, + struct kstat *sp) +#else zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp) +#endif { znode_t *zp = ITOZ(ip); zfsvfs_t *zfsvfs = ITOZSB(ip); @@ -1662,7 +1667,11 @@ zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp) mutex_enter(&zp->z_lock); +#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK + zpl_generic_fillattr(user_ns, request_mask, ip, sp); +#else zpl_generic_fillattr(user_ns, ip, sp); +#endif /* * +1 link count for root inode with visible '.zfs' directory. */ diff --git a/module/os/linux/zfs/zpl_ctldir.c b/module/os/linux/zfs/zpl_ctldir.c index 7786444fea3..8ee7fcecc7b 100644 --- a/module/os/linux/zfs/zpl_ctldir.c +++ b/module/os/linux/zfs/zpl_ctldir.c @@ -124,6 +124,8 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat, generic_fillattr(user_ns, ip, stat); #elif defined(HAVE_GENERIC_FILLATTR_IDMAP) generic_fillattr(user_ns, ip, stat); +#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK) + generic_fillattr(user_ns, request_mask, ip, stat); #else (void) user_ns; #endif @@ -435,6 +437,8 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat, generic_fillattr(user_ns, ip, stat); #elif defined(HAVE_GENERIC_FILLATTR_IDMAP) generic_fillattr(user_ns, ip, stat); +#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK) + generic_fillattr(user_ns, request_mask, ip, stat); #else (void) user_ns; #endif @@ -609,6 +613,8 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat, generic_fillattr(user_ns, path->dentry->d_inode, stat); #elif defined(HAVE_GENERIC_FILLATTR_IDMAP) generic_fillattr(user_ns, path->dentry->d_inode, stat); +#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK) + generic_fillattr(user_ns, request_mask, ip, stat); #else (void) user_ns; #endif @@ -623,7 +629,10 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat, error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp); if (error == 0) { -#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) +#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK + error = -zfs_getattr_fast(user_ns, request_mask, ZTOI(dzp), + stat); +#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat); #else error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat); diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index ef50f868777..96f65b9e94e 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -435,7 +435,9 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask, * XXX query_flags currently ignored. */ -#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) +#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK + error = -zfs_getattr_fast(user_ns, request_mask, ip, stat); +#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR)) error = -zfs_getattr_fast(user_ns, ip, stat); #else error = -zfs_getattr_fast(kcred->user_ns, ip, stat); From 7ac56b86cd69487fa018b6c92cb2cfb2f82fefba Mon Sep 17 00:00:00 2001 From: Coleman Kane Date: Fri, 15 Sep 2023 01:07:03 -0400 Subject: [PATCH 5/7] Linux 6.6 compat: fsync_bdev() has been removed in favor of sync_blockdev() In Linux commit 560e20e4bf6484a0c12f9f3c7a1aa55056948e1e, the fsync_bdev() function was removed in favor of sync_blockdev() to do (roughly) the same thing, given the same input. This change conditionally attempts to call sync_blockdev() if fsync_bdev() isn't discovered during configure. Reviewed-by: Brian Behlendorf Signed-off-by: Coleman Kane Closes #15263 --- config/kernel-fsync-bdev.m4 | 36 +++++++++++++++++++++++++++++++++++ config/kernel.m4 | 2 ++ module/os/linux/zfs/zvol_os.c | 6 ++++++ 3 files changed, 44 insertions(+) create mode 100644 config/kernel-fsync-bdev.m4 diff --git a/config/kernel-fsync-bdev.m4 b/config/kernel-fsync-bdev.m4 new file mode 100644 index 00000000000..c47e236f705 --- /dev/null +++ b/config/kernel-fsync-bdev.m4 @@ -0,0 +1,36 @@ +dnl # +dnl # 6.6 API change, +dnl # fsync_bdev was removed in favor of sync_blockdev +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SYNC_BDEV], [ + ZFS_LINUX_TEST_SRC([fsync_bdev], [ + #include + ],[ + fsync_bdev(NULL); + ]) + + ZFS_LINUX_TEST_SRC([sync_blockdev], [ + #include + ],[ + sync_blockdev(NULL); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SYNC_BDEV], [ + AC_MSG_CHECKING([whether fsync_bdev() exists]) + ZFS_LINUX_TEST_RESULT([fsync_bdev], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FSYNC_BDEV, 1, + [fsync_bdev() is declared in include/blkdev.h]) + ],[ + AC_MSG_CHECKING([whether sync_blockdev() exists]) + ZFS_LINUX_TEST_RESULT([sync_blockdev], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SYNC_BLOCKDEV, 1, + [sync_blockdev() is declared in include/blkdev.h]) + ],[ + ZFS_LINUX_TEST_ERROR( + [neither fsync_bdev() nor sync_blockdev() exist]) + ]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index df194ec7220..056517a841f 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -162,6 +162,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ + ZFS_AC_KERNEL_SRC_SYNC_BDEV case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE @@ -303,6 +304,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE ZFS_AC_KERNEL_COPY_SPLICE_READ + ZFS_AC_KERNEL_SYNC_BDEV case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index 7a95b54bdf0..f94ce69fb9e 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -873,7 +873,13 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd) { case BLKFLSBUF: +#ifdef HAVE_FSYNC_BDEV fsync_bdev(bdev); +#elif defined(HAVE_SYNC_BLOCKDEV) + sync_blockdev(bdev); +#else +#error "Neither fsync_bdev() nor sync_blockdev() found" +#endif invalidate_bdev(bdev); rw_enter(&zv->zv_suspend_lock, RW_READER); From e5d70f4677afb567151c9396b5ab1e8e3549e2ee Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 21 Sep 2023 21:40:13 -0400 Subject: [PATCH 6/7] ZIL: Avoid dbuf_read() in ztest_get_data() While working on similar patches for zfs and zvol in #15153 I've forgot about ztest. Update it also so that we test the same code paths as use in production. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15301 --- cmd/ztest.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmd/ztest.c b/cmd/ztest.c index 59c4be225f9..9c5a6944035 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -2457,8 +2457,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd, object, offset, size, RL_READER); - error = dmu_buf_hold(os, object, offset, zgd, &db, - DMU_READ_NO_PREFETCH); + error = dmu_buf_hold_noread(os, object, offset, zgd, &db); if (error == 0) { blkptr_t *bp = &lr->lr_blkptr; From 2e2a46e0a597b3ee606ea7dc5bc527459077322f Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Fri, 22 Sep 2023 16:08:51 -0700 Subject: [PATCH 7/7] Invoke zdb by guid to avoid import errors The problem that was occurring is basically that a device was removed by ztest and replaced with another device. It was then reguided. The import then failed because there were two possible imports with the same name; one with the new guid, and one with the old. This can happen because the label writes from the device removal/replacement can be subject to ztest's error injection. The other ways to fix this would be to change the error injection to not trigger on removals (which may not be technically feasible), or to change the import code to not report configurations that are so short on devices (which would potentially have unpleasant end-user effects when trying to recover from data losses/device configuration issues). Reviewed-by: Brian Behlendorf Reviewed-by: Matthew Ahrens Reviewed-by: George Melikov Signed-off-by: Paul Dagnelie Closes #15298 --- cmd/ztest.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/cmd/ztest.c b/cmd/ztest.c index 9c5a6944035..8cfbdfe1c2e 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -6378,6 +6378,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id) spa_t *spa = ztest_spa; uint64_t orig, load; int error; + ztest_shared_t *zs = ztest_shared; if (ztest_opts.zo_mmp_test) return; @@ -6387,6 +6388,7 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id) (void) pthread_rwlock_wrlock(&ztest_name_lock); error = spa_change_guid(spa); + zs->zs_guid = spa_guid(spa); (void) pthread_rwlock_unlock(&ztest_name_lock); if (error != 0) @@ -6916,7 +6918,7 @@ ztest_trim(ztest_ds_t *zd, uint64_t id) * Verify pool integrity by running zdb. */ static void -ztest_run_zdb(const char *pool) +ztest_run_zdb(uint64_t guid) { int status; char *bin; @@ -6940,13 +6942,13 @@ ztest_run_zdb(const char *pool) free(set_gvars_args); size_t would = snprintf(zdb, len, - "%s -bcc%s%s -G -d -Y -e -y %s -p %s %s", + "%s -bcc%s%s -G -d -Y -e -y %s -p %s %"PRIu64, bin, ztest_opts.zo_verbose >= 3 ? "s" : "", ztest_opts.zo_verbose >= 4 ? "v" : "", set_gvars_args_joined, ztest_opts.zo_dir, - pool); + guid); ASSERT3U(would, <, len); umem_free(set_gvars_args_joined, strlen(set_gvars_args_joined) + 1); @@ -7524,14 +7526,15 @@ ztest_import(ztest_shared_t *zs) VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); zs->zs_metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; + zs->zs_guid = spa_guid(spa); spa_close(spa, FTAG); kernel_fini(); if (!ztest_opts.zo_mmp_test) { - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); ztest_freeze(); - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); } (void) pthread_rwlock_destroy(&ztest_name_lock); @@ -7602,7 +7605,6 @@ ztest_run(ztest_shared_t *zs) dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); - zs->zs_guid = dds.dds_guid; dmu_objset_disown(os, B_TRUE, FTAG); /* @@ -7873,14 +7875,15 @@ ztest_init(ztest_shared_t *zs) VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); zs->zs_metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; + zs->zs_guid = spa_guid(spa); spa_close(spa, FTAG); kernel_fini(); if (!ztest_opts.zo_mmp_test) { - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); ztest_freeze(); - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); } (void) pthread_rwlock_destroy(&ztest_name_lock); @@ -8303,7 +8306,7 @@ main(int argc, char **argv) } if (!ztest_opts.zo_mmp_test) - ztest_run_zdb(ztest_opts.zo_pool); + ztest_run_zdb(zs->zs_guid); } if (ztest_opts.zo_verbose >= 1) {