From 00ac77464ef15c56b2cffb049518b78545552a9d Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 18 May 2022 12:25:33 -0500 Subject: [PATCH 01/41] Expose zpool guids through kstats There are times when end-users may wish to have a fast and convenient method to get zpool guid without having to use libzfs. This commit exposes the zpool guid via kstats in similar manner to the zpool state. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Andrew Walker Closes #13466 --- include/sys/spa.h | 1 + module/zfs/spa_stats.c | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/sys/spa.h b/include/sys/spa.h index 232855449c1..442bc7792b2 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -898,6 +898,7 @@ typedef struct spa_stats { spa_history_kstat_t tx_assign_histogram; spa_history_list_t mmp_history; spa_history_kstat_t state; /* pool state */ + spa_history_kstat_t guid; /* pool guid */ spa_history_kstat_t iostats; } spa_stats_t; diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index 2a75b37f020..f1d644bc68c 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -819,6 +819,41 @@ spa_state_init(spa_t *spa) kmem_strfree(name); } +static int +spa_guid_data(char *buf, size_t size, void *data) +{ + spa_t *spa = (spa_t *)data; + (void) snprintf(buf, size, "%llu\n", (u_longlong_t)spa_guid(spa)); + return (0); +} + +static void +spa_guid_init(spa_t *spa) +{ + spa_history_kstat_t *shk = &spa->spa_stats.guid; + char *name; + kstat_t *ksp; + + mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL); + + name = kmem_asprintf("zfs/%s", spa_name(spa)); + + ksp = kstat_create(name, 0, "guid", "misc", + KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); + + shk->kstat = ksp; + if (ksp) { + ksp->ks_lock = &shk->lock; + ksp->ks_data = NULL; + ksp->ks_private = spa; + ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS; + kstat_set_raw_ops(ksp, NULL, spa_guid_data, spa_state_addr); + kstat_install(ksp); + } + + kmem_strfree(name); +} + static void spa_health_destroy(spa_t *spa) { @@ -830,6 +865,17 @@ spa_health_destroy(spa_t *spa) mutex_destroy(&shk->lock); } +static void +spa_guid_destroy(spa_t *spa) +{ + spa_history_kstat_t *shk = &spa->spa_stats.guid; + kstat_t *ksp = shk->kstat; + if (ksp) + kstat_delete(ksp); + + mutex_destroy(&shk->lock); +} + static const spa_iostats_t spa_iostats_template = { { "trim_extents_written", KSTAT_DATA_UINT64 }, { "trim_bytes_written", KSTAT_DATA_UINT64 }, @@ -950,6 +996,7 @@ spa_stats_init(spa_t *spa) spa_tx_assign_init(spa); spa_mmp_history_init(spa); spa_state_init(spa); + spa_guid_init(spa); spa_iostats_init(spa); } @@ -962,6 +1009,7 @@ spa_stats_destroy(spa_t *spa) spa_txg_history_destroy(spa); spa_read_history_destroy(spa); spa_mmp_history_destroy(spa); + spa_guid_destroy(spa); } ZFS_MODULE_PARAM(zfs, zfs_, read_history, INT, ZMOD_RW, From 08b32c6fa9dd68d024447979e7c9b711b9e60c56 Mon Sep 17 00:00:00 2001 From: heeplr <32984777+heeplr@users.noreply.github.com> Date: Wed, 18 May 2022 19:27:53 +0200 Subject: [PATCH 02/41] zed: support subject as header in zed_notify_email() Some minimal MUAs don't support passing the subjects as cmdline option. This commit checks if "@SUBJECT@" is missing in ZED_EMAIL_OPTS and then prepends a subject header to the notification message. Also set a default for ${subject}. Reviewed-by: Ahelenia Ziemia<84>ska Reviewed-by: Tony Hutter Signed-off-by: Daniel Hiepler Closes #13440 --- cmd/zed/zed.d/zed-functions.sh | 19 ++++++++++++++++--- cmd/zed/zed.d/zed.rc | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/cmd/zed/zed.d/zed-functions.sh b/cmd/zed/zed.d/zed-functions.sh index 70a7113c658..49b6b54029a 100644 --- a/cmd/zed/zed.d/zed-functions.sh +++ b/cmd/zed/zed.d/zed-functions.sh @@ -223,6 +223,8 @@ zed_notify() # ZED_EMAIL_OPTS. This undergoes the following keyword substitutions: # - @ADDRESS@ is replaced with the space-delimited recipient email address(es) # - @SUBJECT@ is replaced with the notification subject +# If @SUBJECT@ was omited here, a "Subject: ..." header will be added to notification +# # # Arguments # subject: notification subject @@ -240,7 +242,7 @@ zed_notify() # zed_notify_email() { - local subject="$1" + local subject="${1:-"ZED notification"}" local pathname="${2:-"/dev/null"}" : "${ZED_EMAIL_PROG:="mail"}" @@ -261,12 +263,23 @@ zed_notify_email() return 1 fi - ZED_EMAIL_OPTS="$(echo "${ZED_EMAIL_OPTS}" \ + # construct cmdline options + ZED_EMAIL_OPTS_PARSED="$(echo "${ZED_EMAIL_OPTS}" \ | sed -e "s/@ADDRESS@/${ZED_EMAIL_ADDR}/g" \ -e "s/@SUBJECT@/${subject}/g")" + # pipe message to email prog # shellcheck disable=SC2086,SC2248 - eval ${ZED_EMAIL_PROG} ${ZED_EMAIL_OPTS} < "${pathname}" >/dev/null 2>&1 + { + # no subject passed as option? + if [ "${ZED_EMAIL_OPTS%@SUBJECT@*}" = "${ZED_EMAIL_OPTS}" ] ; then + # inject subject header + printf "Subject: %s\n" "${subject}" + fi + # output message + cat "${pathname}" + } | + eval ${ZED_EMAIL_PROG} ${ZED_EMAIL_OPTS_PARSED} >/dev/null 2>&1 rv=$? if [ "${rv}" -ne 0 ]; then zed_log_err "${ZED_EMAIL_PROG##*/} exit=${rv}" diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 3c58a2c281f..c55a70c79f7 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -29,6 +29,7 @@ ZED_EMAIL_ADDR="root" # The string @SUBJECT@ will be replaced with the notification subject; # this should be protected with quotes to prevent word-splitting. # Email will only be sent if ZED_EMAIL_ADDR is defined. +# If @SUBJECT@ was omited here, a "Subject: ..." header will be added to notification # #ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@" From 7506f5af922895afe52377e2ea1e9813e3be8111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 10 May 2022 22:05:20 +0200 Subject: [PATCH 03/41] Add make regen-tests to regenerate the test bundle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13447 --- Makefile.am | 4 ++++ tests/zfs-tests/tests/Makefile.am | 28 +++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index ebb385fabc0..0a73a63870b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -170,6 +170,10 @@ flake8: echo "skipping flake8 because flake8 is not installed"; \ fi +PHONY += regen-tests +regen-tests: + @$(MAKE) -C tests/zfs-tests/tests regen + PHONY += ctags ctags: $(RM) tags diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 4bc05c92f59..49f9f3aaa7d 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -32,7 +32,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS = \ \ perf/scripts/prefetch_io.sh -# These lists can be regenerated by running, on a *clean* source: +# These lists can be regenerated by running make regen-tests at the root, or, on a *clean* source: # find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' ! -executable -name '*.in' | sort | sed 's/\.in$//;s/^/\t/;$!s/$/ \\/' # find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' -executable -name '*.in' | sort | sed 's/\.in$//;s/^/\t/;$!s/$/ \\/' # find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' ! -name '*.in' ! -name '*.c' | grep -Fe /simd -e /tmpfile | sort | sed 's/^/\t/;$!s/$/ \\/' @@ -43,6 +43,32 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS = \ # # C programs are specced in ../Makefile.am above as part of the main Makefile +find_common := find functional/ ! -type d ! -name .gitignore ! -name .dirstamp ! -name '*.Po' +regen: + @$(MAKE) -C $(top_builddir) clean + @$(MAKE) clean + $(SED) $(ac_inplace) '/^# -- >8 --/q' Makefile.am + echo >> Makefile.am + echo 'nobase_nodist_datadir_zfs_tests_tests_DATA = \' >> Makefile.am + $(find_common) ! -executable -name '*.in' | sort | sed 's/\.in$$//;s/^/\t/;$$!s/$$/ \\/' >> Makefile.am + echo 'nobase_nodist_datadir_zfs_tests_tests_SCRIPTS = \' >> Makefile.am + $(find_common) -executable -name '*.in' | sort | sed 's/\.in$$//;s/^/\t/;$$!s/$$/ \\/' >> Makefile.am + echo >> Makefile.am + echo 'SUBSTFILES += $$(nobase_nodist_datadir_zfs_tests_tests_DATA) $$(nobase_nodist_datadir_zfs_tests_tests_SCRIPTS)' >> Makefile.am + echo >> Makefile.am + echo 'if BUILD_LINUX' >> Makefile.am + echo 'nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \' >> Makefile.am + $(find_common) ! -name '*.in' ! -name '*.c' | grep -Fe /simd -e /tmpfile | sort | sed 's/^/\t/;$$!s/$$/ \\/' >> Makefile.am + echo 'endif' >> Makefile.am + echo >> Makefile.am + echo 'nobase_dist_datadir_zfs_tests_tests_DATA += \' >> Makefile.am + $(find_common) ! -executable ! -name '*.in' ! -name '*.c' | grep -vFe /simd -e /tmpfile | sort | sed 's/^/\t/;$$!s/$$/ \\/' >> Makefile.am + echo >> Makefile.am + echo 'nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \' >> Makefile.am + $(find_common) -executable ! -name '*.in' ! -name '*.c' | grep -vFe /simd -e /tmpfile | sort | sed 's/^/\t/;$$!s/$$/ \\/' >> Makefile.am + +# -- >8 -- + nobase_nodist_datadir_zfs_tests_tests_DATA = \ functional/pam/utilities.kshlib nobase_nodist_datadir_zfs_tests_tests_SCRIPTS = \ From 7062a956f79b264084d71e542ce74bc5144f504d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 10 May 2022 22:10:57 +0200 Subject: [PATCH 04/41] rpm: don't spec obsolete_name/version anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13447 --- rpm/generic/zfs-kmod.spec.in | 4 +-- rpm/redhat/zfs-kmod.spec.in | 5 +-- scripts/kmodtool | 61 +++++------------------------------- 3 files changed, 10 insertions(+), 60 deletions(-) diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in index 4ed719b8fe1..79fe2753c1e 100644 --- a/rpm/generic/zfs-kmod.spec.in +++ b/rpm/generic/zfs-kmod.spec.in @@ -98,7 +98,7 @@ BuildRequires: %{_bindir}/kmodtool # Kmodtool does its magic here. A patched version of kmodtool is shipped # with the source rpm until kmod development packages are supported upstream. # https://bugzilla.rpmfusion.org/show_bug.cgi?id=2714 -%{expand:%(bash %{SOURCE10} --target %{_target_cpu} %{?repo:--repo %{?repo}} --kmodname %{name} %{?buildforkernels:--%{buildforkernels}} --devel %{?prefix:--prefix "%{?prefix}"} %{?kernels:--for-kernels "%{?kernels}"} %{?kernelbuildroot:--buildroot "%{?kernelbuildroot}"} --obsolete-name spl --obsolete-version 0.8 2>/dev/null) } +%{expand:%(bash %{SOURCE10} --target %{_target_cpu} %{?repo:--repo %{?repo}} --kmodname %{name} %{?buildforkernels:--%{buildforkernels}} --devel %{?prefix:--prefix "%{?prefix}"} %{?kernels:--for-kernels "%{?kernels}"} %{?kernelbuildroot:--buildroot "%{?kernelbuildroot}"} 2>/dev/null) } %description @@ -109,7 +109,7 @@ This package contains the ZFS kernel modules. %{?kmodtool_check} # Print kmodtool output for debugging purposes: -bash %{SOURCE10} --target %{_target_cpu} %{?repo:--repo %{?repo}} --kmodname %{name} %{?buildforkernels:--%{buildforkernels}} --devel %{?prefix:--prefix "%{?prefix}"} %{?kernels:--for-kernels "%{?kernels}"} %{?kernelbuildroot:--buildroot "%{?kernelbuildroot}"} --obsolete-name spl --obsolete-version 0.8 2>/dev/null +bash %{SOURCE10} --target %{_target_cpu} %{?repo:--repo %{?repo}} --kmodname %{name} %{?buildforkernels:--%{buildforkernels}} --devel %{?prefix:--prefix "%{?prefix}"} %{?kernels:--for-kernels "%{?kernels}"} %{?kernelbuildroot:--buildroot "%{?kernelbuildroot}"} 2>/dev/null %if %{with debug} %define debug --enable-debug diff --git a/rpm/redhat/zfs-kmod.spec.in b/rpm/redhat/zfs-kmod.spec.in index 7ee04e2340c..f59551c0b43 100644 --- a/rpm/redhat/zfs-kmod.spec.in +++ b/rpm/redhat/zfs-kmod.spec.in @@ -17,9 +17,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) # by generating a preamble text file which kmodtool can append to the spec file. %(/bin/echo -e "\ Requires: @PACKAGE@ = %{version}\n\ -Conflicts: @PACKAGE@-dkms\n\ -Obsoletes: kmod-spl\n\ -Obsoletes: spl-kmod\n\n" > %{_sourcedir}/kmod-preamble) +Conflicts: @PACKAGE@-dkms) # LDFLAGS are not sanitized by arch/*/Makefile for these architectures. %ifarch ppc ppc64 ppc64le aarch64 @@ -39,7 +37,6 @@ This package contains the ZFS kernel modules. %package -n kmod-%{kmod_name}-devel Summary: ZFS kernel module(s) devel common Group: System Environment/Kernel -Provides: kmod-spl-devel = %{version} %description -n kmod-%{kmod_name}-devel This package provides the header files and objects to build kernel modules. diff --git a/scripts/kmodtool b/scripts/kmodtool index f66341196ac..bda5c41ee7a 100755 --- a/scripts/kmodtool +++ b/scripts/kmodtool @@ -72,7 +72,7 @@ LANG=C rpmbuild --define "_sourcedir %{_sourcedir}" \\\ ln -s \$(ls \$RPM_BUILD_ROOT/%{_usrsrc}/akmods/) \$RPM_BUILD_ROOT/%{_usrsrc}/akmods/${kmodname}-kmod.latest %package -n akmod-${kmodname} -Summary: Akmod package for ${kmodname} kernel module(s) +Summary: Akmod package for ${kmodname} kernel module(s) Group: System Environment/Kernel Requires: kmodtool Requires: akmods @@ -82,11 +82,6 @@ Requires: ${kmodname}-kmod-common >= %{?epoch:%{epoch}:}%{version} Provides: ${kmodname}-kmod = %{?epoch:%{epoch}:}%{version}-%{release} EOF - if [ -n "${obsolete_name}" ]; then - echo "Provides: akmod-${obsolete_name} = ${obsolete_version}" - echo "Obsoletes: akmod-${obsolete_name} < ${obsolete_version}" - fi - cat <= %{?epoch:%{epoch}:}%{version}-%{release}" fi - if [ -n "${obsolete_name}" ]; then - echo "Provides: kmod-${obsolete_name}-devel = ${obsolete_version}" - echo "Obsoletes: kmod-${obsolete_name}-devel < ${obsolete_version}" - fi - cat <= %{?epoch:%{epoch}:}%{vers %{?KmodsMetaRequires:Requires: %{?KmodsMetaRequires}} EOF - if [ -n "${obsolete_name}" ]; then - echo "Provides: kmod-${obsolete_name}${kernel_variant} = ${obsolete_version}" - echo "Obsoletes: kmod-${obsolete_name}${kernel_variant} < ${obsolete_version}" - fi - cat <&2 - elif [ ! -e "${1}" ]; then + elif [ ! -e "${1}" ]; then error_out 2 "Filterfile ${1} not found" >&2 fi filterfile="${1}" @@ -505,22 +476,6 @@ while [ -n "${1}" ] ; do shift noakmod="true" ;; - --obsolete-name) - shift - if [ -z "${1}" ] ; then - error_out 2 "Please provide the name of the kmod to obsolete together with --obsolete-name" >&2 - fi - obsolete_name="${1}" - shift - ;; - --obsolete-version) - shift - if [ -z "${1}" ] ; then - error_out 2 "Please provide the version of the kmod to obsolete together with --obsolete-version" >&2 - fi - obsolete_version="${1}" - shift - ;; --target) shift target="${1}" @@ -574,8 +529,6 @@ elif [ -z "${kmodname}" ]; then error_out 2 "please pass kmodname with --kmodname" elif [ -z "${kernels_known_variants}" ] ; then error_out 2 "could not determine known variants" -elif { [ -n "${obsolete_name}" ] && [ -z "${obsolete_version}" ]; } || { [ -z "${obsolete_name}" ] && [ -n "${obsolete_version}" ]; } ; then - error_out 2 "you need to provide both --obsolete-name and --obsolete-version" fi # go @@ -599,7 +552,7 @@ else # call buildsys-build-${repo}-kerneldevpkgs to get the list of kernels cmdoptions="--target ${target}" - # filterfile to filter list of kernels? + # filterfile to filter list of kernels? if [ -n "${filterfile}" ] ; then cmdoptions="${cmdoptions} --filterfile ${filterfile}" fi @@ -607,7 +560,7 @@ else kernel_versions_to_build_for=$(buildsys-build-${repo}-kerneldevpkgs "--${build_kernels}" ${cmdoptions}) returncode=$? if [ "$returncode" -ne 0 ]; then - + error_out 2 "buildsys-build-${repo}-kerneldevpkgs failed: ${kernel_versions_to_build_for}" fi @@ -615,5 +568,5 @@ else print_akmodtemplate fi - print_rpmtemplate + print_rpmtemplate fi From 2f713390d18ff3ae8ad4fdb8e97f9fdfbf867bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 10 May 2022 22:11:30 +0200 Subject: [PATCH 05/41] kmodtool: cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13447 --- scripts/kmodtool | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scripts/kmodtool b/scripts/kmodtool index bda5c41ee7a..a79ad0c7a0f 100755 --- a/scripts/kmodtool +++ b/scripts/kmodtool @@ -557,12 +557,8 @@ else cmdoptions="${cmdoptions} --filterfile ${filterfile}" fi - kernel_versions_to_build_for=$(buildsys-build-${repo}-kerneldevpkgs "--${build_kernels}" ${cmdoptions}) - returncode=$? - if [ "$returncode" -ne 0 ]; then - + kernel_versions_to_build_for=$(buildsys-build-${repo}-kerneldevpkgs "--${build_kernels}" ${cmdoptions}) || error_out 2 "buildsys-build-${repo}-kerneldevpkgs failed: ${kernel_versions_to_build_for}" - fi if [ "${build_kernels}" = "current" ] && [ -z "${noakmod}" ]; then print_akmodtemplate From 6b575417e2bd500906ede0c82c0d29780ee26e9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 10 May 2022 23:25:43 +0200 Subject: [PATCH 06/41] libspl/include: remove unused/empty headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13447 --- include/libzfs.h | 2 - include/thread_pool.h | 1 - lib/libspl/include/Makefile.am | 13 -- lib/libspl/include/libdevinfo.h | 30 ---- lib/libspl/include/limits.h | 45 ----- lib/libspl/include/locale.h | 35 ---- lib/libspl/include/os/freebsd/sys/byteorder.h | 2 +- lib/libspl/include/os/freebsd/sys/mnttab.h | 4 +- .../include/os/freebsd/sys/zfs_context_os.h | 1 - lib/libspl/include/os/linux/sys/byteorder.h | 2 +- lib/libspl/include/stropts.h | 25 --- lib/libspl/include/sys/int_limits.h | 30 ---- lib/libspl/include/sys/int_types.h | 32 ---- lib/libspl/include/sys/kstat.h | 35 +--- lib/libspl/include/sys/stropts.h | 29 ---- lib/libspl/include/sys/types.h | 2 +- lib/libspl/include/sys/tzfile.h | 164 ------------------ lib/libspl/include/sys/va_list.h | 32 ---- lib/libspl/include/sys/varargs.h | 30 ---- lib/libspl/include/thread.h | 30 ---- lib/libspl/include/tzfile.h | 32 ---- lib/libspl/include/ucred.h | 32 ---- lib/libzfs/libzfs_dataset.c | 1 - lib/libzfs/libzfs_diff.c | 1 - 24 files changed, 6 insertions(+), 604 deletions(-) delete mode 100644 lib/libspl/include/libdevinfo.h delete mode 100644 lib/libspl/include/limits.h delete mode 100644 lib/libspl/include/locale.h delete mode 100644 lib/libspl/include/stropts.h delete mode 100644 lib/libspl/include/sys/int_limits.h delete mode 100644 lib/libspl/include/sys/int_types.h delete mode 100644 lib/libspl/include/sys/stropts.h delete mode 100644 lib/libspl/include/sys/tzfile.h delete mode 100644 lib/libspl/include/sys/va_list.h delete mode 100644 lib/libspl/include/sys/varargs.h delete mode 100644 lib/libspl/include/thread.h delete mode 100644 lib/libspl/include/tzfile.h delete mode 100644 lib/libspl/include/ucred.h diff --git a/include/libzfs.h b/include/libzfs.h index 04f464d12fa..46cb6052016 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -40,10 +40,8 @@ #include #include #include -#include #include #include -#include #include #ifdef __cplusplus diff --git a/include/thread_pool.h b/include/thread_pool.h index 43090c3c664..19edebd7da3 100644 --- a/include/thread_pool.h +++ b/include/thread_pool.h @@ -28,7 +28,6 @@ #define _THREAD_POOL_H_ extern __attribute__((visibility("default"))) #include -#include #include #ifdef __cplusplus diff --git a/lib/libspl/include/Makefile.am b/lib/libspl/include/Makefile.am index 2c028431072..6f0e1818d22 100644 --- a/lib/libspl/include/Makefile.am +++ b/lib/libspl/include/Makefile.am @@ -2,18 +2,11 @@ libspldir = $(includedir)/libspl libspl_HEADERS = \ %D%/assert.h \ %D%/atomic.h \ - %D%/libdevinfo.h \ %D%/libgen.h \ %D%/libshare.h \ - %D%/limits.h \ - %D%/locale.h \ %D%/statcommon.h \ %D%/stdlib.h \ %D%/string.h \ - %D%/stropts.h \ - %D%/thread.h \ - %D%/tzfile.h \ - %D%/ucred.h \ %D%/umem.h \ %D%/unistd.h \ %D%/zone.h @@ -40,8 +33,6 @@ libspl_sys_HEADERS = \ %D%/sys/dkio.h \ %D%/sys/dklabel.h \ %D%/sys/feature_tests.h \ - %D%/sys/int_limits.h \ - %D%/sys/int_types.h \ %D%/sys/inttypes.h \ %D%/sys/isa_defs.h \ %D%/sys/kmem.h \ @@ -59,7 +50,6 @@ libspl_sys_HEADERS = \ %D%/sys/stack.h \ %D%/sys/stdtypes.h \ %D%/sys/string.h \ - %D%/sys/stropts.h \ %D%/sys/sunddi.h \ %D%/sys/systeminfo.h \ %D%/sys/time.h \ @@ -67,10 +57,7 @@ libspl_sys_HEADERS = \ %D%/sys/trace_zfs.h \ %D%/sys/types.h \ %D%/sys/types32.h \ - %D%/sys/tzfile.h \ %D%/sys/uio.h \ - %D%/sys/va_list.h \ - %D%/sys/varargs.h \ %D%/sys/vnode.h \ %D%/sys/wmsum.h \ %D%/sys/zone.h diff --git a/lib/libspl/include/libdevinfo.h b/lib/libspl/include/libdevinfo.h deleted file mode 100644 index be1d291f405..00000000000 --- a/lib/libspl/include/libdevinfo.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_LIBDEVINFO_H -#define _LIBSPL_LIBDEVINFO_H - -#endif /* _LIBSPL_LIBDEVINFO_H */ diff --git a/lib/libspl/include/limits.h b/lib/libspl/include/limits.h deleted file mode 100644 index 5d996eb846d..00000000000 --- a/lib/libspl/include/limits.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include_next -#include - -#ifndef _LIBSPL_LIMITS_H -#define _LIBSPL_LIMITS_H - -#ifndef DBL_DIG -#define DBL_DIG 15 -#define DBL_MAX 1.7976931348623157081452E+308 -#define DBL_MIN 2.2250738585072013830903E-308 -#endif - -#ifndef FLT_DIG -#define FLT_DIG 6 -#define FLT_MAX 3.4028234663852885981170E+38F -#define FLT_MIN 1.1754943508222875079688E-38F -#endif - -#endif /* _LIBSPL_LIMITS_H */ diff --git a/lib/libspl/include/locale.h b/lib/libspl/include/locale.h deleted file mode 100644 index 6c74df72072..00000000000 --- a/lib/libspl/include/locale.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include_next - -#ifndef _LIBSPL_LOCALE_H -#define _LIBSPL_LOCALE_H - -#include -#include - -#endif diff --git a/lib/libspl/include/os/freebsd/sys/byteorder.h b/lib/libspl/include/os/freebsd/sys/byteorder.h index cd692d3616e..d4227ddc546 100644 --- a/lib/libspl/include/os/freebsd/sys/byteorder.h +++ b/lib/libspl/include/os/freebsd/sys/byteorder.h @@ -43,7 +43,7 @@ #include #include #include -#include +#include #if defined(__GNUC__) && defined(_ASM_INLINES) && \ (defined(__i386) || defined(__amd64)) diff --git a/lib/libspl/include/os/freebsd/sys/mnttab.h b/lib/libspl/include/os/freebsd/sys/mnttab.h index c08349bdf9b..54c1bc59ab5 100644 --- a/lib/libspl/include/os/freebsd/sys/mnttab.h +++ b/lib/libspl/include/os/freebsd/sys/mnttab.h @@ -79,7 +79,7 @@ extern int _sol_getmntent(FILE *fp, struct mnttab *mp); extern int getextmntent(const char *path, struct extmnttab *entry, struct stat64 *statbuf); extern void statfs2mnttab(struct statfs *sfs, struct mnttab *mp); -char *hasmntopt(struct mnttab *mnt, char *opt); -int getmntent(FILE *fp, struct mnttab *mp); +extern char *hasmntopt(struct mnttab *mnt, char *opt); +extern int getmntent(FILE *fp, struct mnttab *mp); #endif diff --git a/lib/libspl/include/os/freebsd/sys/zfs_context_os.h b/lib/libspl/include/os/freebsd/sys/zfs_context_os.h index b9bf487c2ae..9b9d69bddcf 100644 --- a/lib/libspl/include/os/freebsd/sys/zfs_context_os.h +++ b/lib/libspl/include/os/freebsd/sys/zfs_context_os.h @@ -30,6 +30,5 @@ #define ZFS_CONTEXT_OS_H_ #define HAVE_LARGE_STACKS 1 -#define ZFS_EXPORTS_PATH "/etc/zfs/exports" #endif diff --git a/lib/libspl/include/os/linux/sys/byteorder.h b/lib/libspl/include/os/linux/sys/byteorder.h index d5ee3e26f5a..47af7700f03 100644 --- a/lib/libspl/include/os/linux/sys/byteorder.h +++ b/lib/libspl/include/os/linux/sys/byteorder.h @@ -46,7 +46,7 @@ #endif #include -#include +#include #ifdef __cplusplus extern "C" { diff --git a/lib/libspl/include/stropts.h b/lib/libspl/include/stropts.h deleted file mode 100644 index 37acd4052b0..00000000000 --- a/lib/libspl/include/stropts.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -#ifndef _LIBSPL_STROPTS_H -#define _LIBSPL_STROPTS_H - -#endif /* _LIBSPL_STROPTS_H */ diff --git a/lib/libspl/include/sys/int_limits.h b/lib/libspl/include/sys/int_limits.h deleted file mode 100644 index 7af68cdb299..00000000000 --- a/lib/libspl/include/sys/int_limits.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_SYS_INT_LIMITS_H -#define _LIBSPL_SYS_INT_LIMITS_H - -#endif diff --git a/lib/libspl/include/sys/int_types.h b/lib/libspl/include/sys/int_types.h deleted file mode 100644 index 51e9e028549..00000000000 --- a/lib/libspl/include/sys/int_types.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SOL_SYS_INT_TYPES_H -#define _SOL_SYS_INT_TYPES_H - -#include - -#endif diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h index 7f9aa6d8dbd..c2de0778485 100644 --- a/lib/libspl/include/sys/kstat.h +++ b/lib/libspl/include/sys/kstat.h @@ -92,39 +92,6 @@ typedef struct kstat { void *ks_lock; /* protects this kstat's data */ } kstat_t; -#ifdef _SYSCALL32 - -typedef int32_t kid32_t; - -typedef struct kstat32 { - /* - * Fields relevant to both kernel and user - */ - hrtime_t ks_crtime; - caddr32_t ks_next; /* struct kstat pointer */ - kid32_t ks_kid; - char ks_module[KSTAT_STRLEN]; - uint8_t ks_resv; - int32_t ks_instance; - char ks_name[KSTAT_STRLEN]; - uint8_t ks_type; - char ks_class[KSTAT_STRLEN]; - uint8_t ks_flags; - caddr32_t ks_data; /* type-specific data */ - uint32_t ks_ndata; - size32_t ks_data_size; - hrtime_t ks_snaptime; - /* - * Fields relevant to kernel only (only needed here for padding) - */ - int32_t _ks_update; - caddr32_t _ks_private; - int32_t _ks_snapshot; - caddr32_t _ks_lock; -} kstat32_t; - -#endif /* _SYSCALL32 */ - /* * kstat structure and locking strategy * @@ -467,7 +434,7 @@ typedef struct kstat_named { * 64-bit compilation environments or 32-bit non-maximally conformant * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the * C99 ANSI C compilation environment, the long long type is supported. - * The _INT64_TYPE is defined by the implementation (see sys/int_types.h). + * The _INT64_TYPE is defined by the implementation (see sys/inttypes.h). */ #if defined(_INT64_TYPE) int64_t i64; diff --git a/lib/libspl/include/sys/stropts.h b/lib/libspl/include/sys/stropts.h deleted file mode 100644 index 08c2e79bc53..00000000000 --- a/lib/libspl/include/sys/stropts.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_SYS_STROPTS_H -#define _LIBSPL_SYS_STROPTS_H - -#endif /* _LIBSPL_SYS_STROPTS_H */ diff --git a/lib/libspl/include/sys/types.h b/lib/libspl/include/sys/types.h index ea02ffac93a..f32c2188a11 100644 --- a/lib/libspl/include/sys/types.h +++ b/lib/libspl/include/sys/types.h @@ -37,7 +37,7 @@ #include #include_next #include -#include +#include #include #ifndef HAVE_INTTYPES diff --git a/lib/libspl/include/sys/tzfile.h b/lib/libspl/include/sys/tzfile.h deleted file mode 100644 index e30e7566366..00000000000 --- a/lib/libspl/include/sys/tzfile.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * from Arthur Olson's 6.1 - */ - -#ifndef _LIBSPL_SYS_TZFILE_H -#define _LIBSPL_SYS_TZFILE_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Information about time zone files. - */ - -#define TZDIR "/usr/share/lib/zoneinfo" /* Time zone object file directory */ - -#define TZDEFAULT (getenv("TZ")) - -#define TZDEFRULES "posixrules" - -/* - * Each file begins with. . . - */ - -struct tzhead { - char tzh_reserved[24]; /* reserved for future use */ - char tzh_ttisstdcnt[4]; /* coded number of trans. time flags */ - char tzh_leapcnt[4]; /* coded number of leap seconds */ - char tzh_timecnt[4]; /* coded number of transition times */ - char tzh_typecnt[4]; /* coded number of local time types */ - char tzh_charcnt[4]; /* coded number of abbr. chars */ -}; - -/* - * . . .followed by. . . - * - * tzh_timecnt (char [4])s coded transition times a la time(2) - * tzh_timecnt (unsigned char)s types of local time starting at above - * tzh_typecnt repetitions of - * one (char [4]) coded GMT offset in seconds - * one (unsigned char) used to set tm_isdst - * one (unsigned char) that's an abbreviation list index - * tzh_charcnt (char)s '\0'-terminated zone abbreviations - * tzh_leapcnt repetitions of - * one (char [4]) coded leap second transition times - * one (char [4]) total correction after above - * tzh_ttisstdcnt (char)s indexed by type; if TRUE, transition - * time is standard time, if FALSE, - * transition time is wall clock time - * if absent, transition times are - * assumed to be wall clock time - */ - -/* - * In the current implementation, "tzset()" refuses to deal with files that - * exceed any of the limits below. - */ - -/* - * The TZ_MAX_TIMES value below is enough to handle a bit more than a - * year's worth of solar time (corrected daily to the nearest second) or - * 138 years of Pacific Presidential Election time - * (where there are three time zone transitions every fourth year). - */ -#define TZ_MAX_TIMES 370 - -#define TZ_MAX_TYPES 256 /* Limited by what (unsigned char)'s can hold */ - -#define TZ_MAX_CHARS 50 /* Maximum number of abbreviation characters */ - -#define TZ_MAX_LEAPS 50 /* Maximum number of leap second corrections */ - -#define SECSPERMIN 60 -#define MINSPERHOUR 60 -#define HOURSPERDAY 24 -#define DAYSPERWEEK 7 -#define DAYSPERNYEAR 365 -#define DAYSPERLYEAR 366 -#define SECSPERHOUR (SECSPERMIN * MINSPERHOUR) -#define SECSPERDAY ((long)SECSPERHOUR * HOURSPERDAY) -#define MONSPERYEAR 12 - -#define TM_SUNDAY 0 -#define TM_MONDAY 1 -#define TM_TUESDAY 2 -#define TM_WEDNESDAY 3 -#define TM_THURSDAY 4 -#define TM_FRIDAY 5 -#define TM_SATURDAY 6 - -#define TM_JANUARY 0 -#define TM_FEBRUARY 1 -#define TM_MARCH 2 -#define TM_APRIL 3 -#define TM_MAY 4 -#define TM_JUNE 5 -#define TM_JULY 6 -#define TM_AUGUST 7 -#define TM_SEPTEMBER 8 -#define TM_OCTOBER 9 -#define TM_NOVEMBER 10 -#define TM_DECEMBER 11 - -#define TM_YEAR_BASE 1900 - -#define EPOCH_YEAR 1970 -#define EPOCH_WDAY TM_THURSDAY - -/* - * Accurate only for the past couple of centuries; - * that will probably do. - */ - -#define isleap(y) (((y) % 4) == 0 && ((y) % 100) != 0 || ((y) % 400) == 0) - -/* - * Use of the underscored variants may cause problems if you move your code to - * certain System-V-based systems; for maximum portability, use the - * underscore-free variants. The underscored variants are provided for - * backward compatibility only; they may disappear from future versions of - * this file. - */ - -#define SECS_PER_MIN SECSPERMIN -#define MINS_PER_HOUR MINSPERHOUR -#define HOURS_PER_DAY HOURSPERDAY -#define DAYS_PER_WEEK DAYSPERWEEK -#define DAYS_PER_NYEAR DAYSPERNYEAR -#define DAYS_PER_LYEAR DAYSPERLYEAR -#define SECS_PER_HOUR SECSPERHOUR -#define SECS_PER_DAY SECSPERDAY -#define MONS_PER_YEAR MONSPERYEAR - -#ifdef __cplusplus -} -#endif - -#endif /* _LIBSPL_SYS_TZFILE_H */ diff --git a/lib/libspl/include/sys/va_list.h b/lib/libspl/include/sys/va_list.h deleted file mode 100644 index a36f5c77daa..00000000000 --- a/lib/libspl/include/sys/va_list.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_VA_LIST_H -#define _SYS_VA_LIST_H - -#include - -#endif diff --git a/lib/libspl/include/sys/varargs.h b/lib/libspl/include/sys/varargs.h deleted file mode 100644 index 3d00a3361d8..00000000000 --- a/lib/libspl/include/sys/varargs.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_SYS_VARARGS_H -#define _LIBSPL_SYS_VARARGS_H - -#endif diff --git a/lib/libspl/include/thread.h b/lib/libspl/include/thread.h deleted file mode 100644 index 74694e23eed..00000000000 --- a/lib/libspl/include/thread.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_THREAD_H -#define _LIBSPL_THREAD_H - -#endif /* _LIBSPL_THREAD_H */ diff --git a/lib/libspl/include/tzfile.h b/lib/libspl/include/tzfile.h deleted file mode 100644 index 7bd4087cd5d..00000000000 --- a/lib/libspl/include/tzfile.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_TZFILE_H -#define _LIBSPL_TZFILE_H - -#include - -#endif /* _LIBSPL_TZFILE_H */ diff --git a/lib/libspl/include/ucred.h b/lib/libspl/include/ucred.h deleted file mode 100644 index 8178fdec4c7..00000000000 --- a/lib/libspl/include/ucred.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _LIBSPL_UCRED_H -#define _LIBSPL_UCRED_H - -typedef int ucred_t; - -#endif diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 50053e35f0d..1f6addcae58 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -49,7 +49,6 @@ #include #include #include -#include #ifdef HAVE_IDMAP #include #include diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c index e8b8a1dfc39..a414024bb42 100644 --- a/lib/libzfs/libzfs_diff.c +++ b/lib/libzfs/libzfs_diff.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include From 89e81bc6adf58ec442ddaa23e629bf888bc55488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 10 May 2022 23:28:02 +0200 Subject: [PATCH 07/41] Remove final K&R definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang trunk now warns -Wstrict-prototypes on this, and they're removed in C2x Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13447 --- cmd/zed/agents/zfs_mod.c | 4 ++-- cmd/zed/zed_disk_event.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 59d8182c0b2..17e47873ebb 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -1073,7 +1073,7 @@ zfs_enum_pools(void *arg) * For now, each agent has its own libzfs instance */ int -zfs_slm_init() +zfs_slm_init(void) { if ((g_zfshdl = libzfs_init()) == NULL) return (-1); @@ -1099,7 +1099,7 @@ zfs_slm_init() } void -zfs_slm_fini() +zfs_slm_fini(void) { unavailpool_t *pool; pendingdev_t *device; diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c index 52b80d8c4c9..c4ca5452354 100644 --- a/cmd/zed/zed_disk_event.c +++ b/cmd/zed/zed_disk_event.c @@ -362,7 +362,7 @@ zed_udev_monitor(void *arg) } int -zed_disk_event_init() +zed_disk_event_init(void) { int fd, fflags; @@ -398,7 +398,7 @@ zed_disk_event_init() } void -zed_disk_event_fini() +zed_disk_event_fini(void) { /* cancel monitor thread at recvmsg() */ (void) pthread_cancel(g_mon_tid); @@ -416,13 +416,13 @@ zed_disk_event_fini() #include "zed_disk_event.h" int -zed_disk_event_init() +zed_disk_event_init(void) { return (0); } void -zed_disk_event_fini() +zed_disk_event_fini(void) { } From 38f4d99f769efbb472ab3c35558700163e4f023c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 14 Apr 2022 23:30:41 +0200 Subject: [PATCH 08/41] linux: libzfs: simplify module-loaded check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The short-path is now one access() call, we always modprobe zfs (ZFS_MODULE_LOADING which doesn't use the libzfs boolean parsing is gone), and we use a simple inotify IN_CREATE loop with a timerfd timeout rather than 10ms kernel-style polling There's one substantial difference: ZFS_MODULE_TIMEOUT=-1 now means "never give up", rather than "wait 10 minutes" Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13330 --- include/sys/fs/zfs.h | 1 + lib/libzfs/os/linux/libzfs_util_os.c | 163 +++++++++++++-------------- man/man8/zfs.8 | 20 +++- man/man8/zpool.8 | 14 +++ 4 files changed, 109 insertions(+), 89 deletions(-) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index f9fd5cf676e..9cd1e32cd05 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1292,6 +1292,7 @@ typedef struct ddt_histogram { #define ZVOL_DRIVER "zvol" #define ZFS_DRIVER "zfs" #define ZFS_DEV "/dev/zfs" +#define ZFS_DEVDIR "/dev" #define ZFS_SUPER_MAGIC 0x2fc12fc1 diff --git a/lib/libzfs/os/linux/libzfs_util_os.c b/lib/libzfs/os/linux/libzfs_util_os.c index a32aa1513b3..c1443fda669 100644 --- a/lib/libzfs/os/linux/libzfs_util_os.c +++ b/lib/libzfs/os/linux/libzfs_util_os.c @@ -20,20 +20,24 @@ */ +#include #include #include #include +#include +#include #include #include #include #include -#include -#include -#include -#include +#include #include +#include +#include +#include #include #include +#include #include #include @@ -57,7 +61,7 @@ libzfs_error_init(int error) switch (error) { case ENXIO: return (dgettext(TEXT_DOMAIN, "The ZFS modules are not " - "loaded.\nTry running '/sbin/modprobe zfs' as root " + "loaded.\nTry running 'modprobe zfs' as root " "to load them.")); case ENOENT: return (dgettext(TEXT_DOMAIN, "/dev/zfs and /proc/self/mounts " @@ -65,7 +69,7 @@ libzfs_error_init(int error) "-t proc proc /proc' as root.")); case ENOEXEC: return (dgettext(TEXT_DOMAIN, "The ZFS modules cannot be " - "auto-loaded.\nTry running '/sbin/modprobe zfs' as " + "auto-loaded.\nTry running 'modprobe zfs' as " "root to manually load them.")); case EACCES: return (dgettext(TEXT_DOMAIN, "Permission denied the " @@ -76,93 +80,80 @@ libzfs_error_init(int error) } } -static int -libzfs_module_loaded(const char *module) -{ - const char path_prefix[] = "/sys/module/"; - char path[256]; - - memcpy(path, path_prefix, sizeof (path_prefix) - 1); - strcpy(path + sizeof (path_prefix) - 1, module); - - return (access(path, F_OK) == 0); -} - /* - * Verify the required ZFS_DEV device is available and optionally attempt - * to load the ZFS modules. Under normal circumstances the modules - * should already have been loaded by some external mechanism. + * zfs(4) is loaded by udev if there's a fstype=zfs device present, + * but if there isn't, load them automatically; + * always wait for ZFS_DEV to appear via udev. * * Environment variables: - * - ZFS_MODULE_LOADING="YES|yes|ON|on" - Attempt to load modules. - * - ZFS_MODULE_TIMEOUT="" - Seconds to wait for ZFS_DEV + * - ZFS_MODULE_TIMEOUT="" - Seconds to wait for ZFS_DEV, + * defaults to 10, max. 10 min. */ -static int -libzfs_load_module_impl(const char *module) -{ - char *argv[4] = {"/sbin/modprobe", "-q", (char *)module, (char *)0}; - char *load_str, *timeout_str; - long timeout = 10; /* seconds */ - long busy_timeout = 10; /* milliseconds */ - int load = 0, fd; - hrtime_t start; - - /* Optionally request module loading */ - if (!libzfs_module_loaded(module)) { - load_str = getenv("ZFS_MODULE_LOADING"); - if (load_str) { - if (!strncasecmp(load_str, "YES", strlen("YES")) || - !strncasecmp(load_str, "ON", strlen("ON"))) - load = 1; - else - load = 0; - } - - if (load) { - if (libzfs_run_process("/sbin/modprobe", argv, 0)) - return (ENOEXEC); - } - - if (!libzfs_module_loaded(module)) - return (ENXIO); - } - - /* - * Device creation by udev is asynchronous and waiting may be - * required. Busy wait for 10ms and then fall back to polling every - * 10ms for the allowed timeout (default 10s, max 10m). This is - * done to optimize for the common case where the device is - * immediately available and to avoid penalizing the possible - * case where udev is slow or unable to create the device. - */ - timeout_str = getenv("ZFS_MODULE_TIMEOUT"); - if (timeout_str) { - timeout = strtol(timeout_str, NULL, 0); - timeout = MAX(MIN(timeout, (10 * 60)), 0); /* 0 <= N <= 600 */ - } - - start = gethrtime(); - do { - fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC); - if (fd >= 0) { - (void) close(fd); - return (0); - } else if (errno != ENOENT) { - return (errno); - } else if (NSEC2MSEC(gethrtime() - start) < busy_timeout) { - sched_yield(); - } else { - usleep(10 * MILLISEC); - } - } while (NSEC2MSEC(gethrtime() - start) < (timeout * MILLISEC)); - - return (ENOENT); -} - int libzfs_load_module(void) { - return (libzfs_load_module_impl(ZFS_DRIVER)); + if (access(ZFS_DEV, F_OK) == 0) + return (0); + + if (access(ZFS_SYSFS_DIR, F_OK) != 0) { + char *argv[] = {"modprobe", "zfs", NULL}; + if (libzfs_run_process("modprobe", argv, 0)) + return (ENOEXEC); + + if (access(ZFS_SYSFS_DIR, F_OK) != 0) + return (ENXIO); + } + + const char *timeout_str = getenv("ZFS_MODULE_TIMEOUT"); + int seconds = 10; + if (timeout_str) + seconds = MIN(strtol(timeout_str, NULL, 0), 600); + struct itimerspec timeout = {.it_value.tv_sec = MAX(seconds, 0)}; + + int ino = inotify_init1(IN_CLOEXEC); + if (ino == -1) + return (ENOENT); + inotify_add_watch(ino, ZFS_DEVDIR, IN_CREATE); + + if (access(ZFS_DEV, F_OK) == 0) { + close(ino); + return (0); + } else if (seconds == 0) { + close(ino); + return (ENOENT); + } + + size_t evsz = sizeof (struct inotify_event) + NAME_MAX + 1; + struct inotify_event *ev = alloca(evsz); + + int tout = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC); + if (tout == -1) { + close(ino); + return (ENOENT); + } + timerfd_settime(tout, 0, &timeout, NULL); + + int ret = ENOENT; + struct pollfd pfds[] = { + {.fd = ino, .events = POLLIN}, + {.fd = tout, .events = POLLIN}, + }; + while (poll(pfds, ARRAY_SIZE(pfds), -1) != -1) { + if (pfds[0].revents & POLLIN) { + verify(read(ino, ev, evsz) > + sizeof (struct inotify_event)); + if (strcmp(ev->name, &ZFS_DEV[sizeof (ZFS_DEVDIR)]) + == 0) { + ret = 0; + break; + } + } + if (pfds[1].revents & POLLIN) + break; + } + close(tout); + close(ino); + return (ret); } int diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index cac4327e072..eaa93e0ea75 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -736,7 +736,7 @@ Do note that any changes done with the command will be undone if the share is ever unshared (like via a reboot). . .Sh ENVIRONMENT VARIABLES -.Bl -tag -width "ZFS_MOUNT_HELPER" +.Bl -tag -width "ZFS_MODULE_TIMEOUT" .It Sy ZFS_MOUNT_HELPER Cause .Nm zfs Cm mount @@ -744,14 +744,28 @@ to use .Xr mount 8 to mount ZFS datasets. This option is provided for backwards compatibility with older ZFS versions. -.El -.Bl -tag -width "ZFS_SET_PIPE_MAX" +. .It Sy ZFS_SET_PIPE_MAX Tells .Nm zfs to set the maximum pipe size for sends/recieves. Disabled by default on Linux due to an unfixed deadlock in Linux's pipe size handling code. +. +.\" Shared with zpool.8 +.It Sy ZFS_MODULE_TIMEOUT +Time, in seconds, to wait for +.Pa /dev/zfs +to appear. +Defaults to +.Sy 10 , +max +.Sy 600 Pq 10 minutes . +If +.Pf < Sy 0 , +wait forever; if +.Sy 0 , +don't wait. .El . .Sh INTERFACE STABILITY diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index 34c816bf034..075e31bbeb7 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -524,6 +524,20 @@ If .Sy ZPOOL_SCRIPTS_ENABLED is not set, it is assumed that the user is allowed to run .Nm zpool Cm status Ns / Ns Cm iostat Fl c . +.\" Shared with zfs.8 +.It Sy ZFS_MODULE_TIMEOUT +Time, in seconds, to wait for +.Pa /dev/zfs +to appear. +Defaults to +.Sy 10 , +max +.Sy 600 Pq 10 minutes . +If +.Pf < Sy 0 , +wait forever; if +.Sy 0 , +don't wait. .El . .Sh INTERFACE STABILITY From 2b4f2fc93c4298415c5bc70554514c3e8892e5d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 15 Apr 2022 00:00:02 +0200 Subject: [PATCH 09/41] libzfs: return (allocated) strings instead of filling buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also expands the zfs version output from 127 characters to However Many Are Actually Set Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13330 --- cmd/zfs/zfs_main.c | 6 +- cmd/zpool/zpool_main.c | 6 +- include/libzfs.h | 4 +- lib/libzfs/libzfs.abi | 2764 ++++++++++++------------- lib/libzfs/libzfs_util.c | 25 +- lib/libzfs/os/freebsd/libzfs_compat.c | 24 +- lib/libzfs/os/linux/libzfs_util_os.c | 40 +- 7 files changed, 1427 insertions(+), 1442 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 69f5bdf4d49..6282d894638 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -8577,11 +8577,7 @@ static int zfs_do_version(int argc, char **argv) { (void) argc, (void) argv; - - if (zfs_version_print() == -1) - return (1); - - return (0); + return (zfs_version_print() != 0); } int diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index ac4a0b23ada..598d8b1bbb6 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -10818,11 +10818,7 @@ static int zpool_do_version(int argc, char **argv) { (void) argc, (void) argv; - - if (zfs_version_print() == -1) - return (1); - - return (0); + return (zfs_version_print() != 0); } /* diff --git a/include/libzfs.h b/include/libzfs.h index 46cb6052016..2c2aa3faf14 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -914,8 +914,8 @@ _LIBZFS_H int libzfs_envvar_is_set(char *); /* * Utility functions for zfs version */ -_LIBZFS_H void zfs_version_userland(char *, int); -_LIBZFS_H int zfs_version_kernel(char *, int); +_LIBZFS_H const char *zfs_version_userland(void); +_LIBZFS_H char *zfs_version_kernel(void); _LIBZFS_H int zfs_version_print(void); /* diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 1c7695275f5..8a71da95148 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -590,110 +590,7 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + @@ -835,7 +732,7 @@ - + @@ -877,7 +774,7 @@ - + @@ -936,10 +833,10 @@ - + - + @@ -953,7 +850,7 @@ - + @@ -1291,12 +1188,12 @@ - + - + @@ -1407,7 +1304,7 @@ - + @@ -1415,12 +1312,12 @@ - + - + @@ -1639,18 +1536,18 @@ - + - + - + @@ -1658,7 +1555,7 @@ - + @@ -1666,13 +1563,13 @@ - + - + @@ -1684,6 +1581,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1758,75 +1724,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1878,1074 +1775,10 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -3079,6 +1912,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -3205,110 +2142,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -3362,6 +2195,11 @@ + + + + + @@ -3373,7 +2211,7 @@ - + @@ -3422,7 +2260,7 @@ - + @@ -4001,7 +2839,7 @@ - + @@ -4011,7 +2849,7 @@ - + @@ -4065,7 +2903,7 @@ - + @@ -4122,7 +2960,7 @@ - + @@ -4232,7 +3070,7 @@ - + @@ -4861,7 +3699,7 @@ - + @@ -5023,7 +3861,7 @@ - + @@ -5084,7 +3922,7 @@ - + @@ -5263,9 +4101,7 @@ - - - + @@ -5289,7 +4125,7 @@ - + @@ -5320,7 +4156,7 @@ - + @@ -5328,7 +4164,7 @@ - + @@ -5576,12 +4412,10 @@ - - - + - + @@ -5616,7 +4450,7 @@ - + @@ -5652,7 +4486,7 @@ - + @@ -5676,7 +4510,7 @@ - + @@ -5727,7 +4561,7 @@ - + @@ -5772,7 +4606,7 @@ - + @@ -5828,4 +4662,1166 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index c47a5782c8f..1c067e21480 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -1910,37 +1910,30 @@ zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, return (zprop_iter_common(func, cb, show_all, ordered, type)); } -/* - * Fill given version buffer with zfs userland version - */ -void -zfs_version_userland(char *version, int len) +const char * +zfs_version_userland(void) { - (void) strlcpy(version, ZFS_META_ALIAS, len); + return (ZFS_META_ALIAS); } /* * Prints both zfs userland and kernel versions - * Returns 0 on success, and -1 on error (with errno set) + * Returns 0 on success, and -1 on error */ int zfs_version_print(void) { - char zver_userland[128]; - char zver_kernel[128]; + (void) puts(ZFS_META_ALIAS); - zfs_version_userland(zver_userland, sizeof (zver_userland)); - - (void) printf("%s\n", zver_userland); - - if (zfs_version_kernel(zver_kernel, sizeof (zver_kernel)) == -1) { + char *kver = zfs_version_kernel(); + if (kver == NULL) { fprintf(stderr, "zfs_version_kernel() failed: %s\n", strerror(errno)); return (-1); } - (void) printf("zfs-kmod-%s\n", zver_kernel); - + (void) printf("zfs-kmod-%s\n", kver); + free(kver); return (0); } diff --git a/lib/libzfs/os/freebsd/libzfs_compat.c b/lib/libzfs/os/freebsd/libzfs_compat.c index d2bc6ddfc23..289c6703c2d 100644 --- a/lib/libzfs/os/freebsd/libzfs_compat.c +++ b/lib/libzfs/os/freebsd/libzfs_compat.c @@ -351,14 +351,22 @@ zpool_nextboot(libzfs_handle_t *hdl, uint64_t pool_guid, uint64_t dev_guid, } /* - * Fill given version buffer with zfs kernel version. - * Returns 0 on success, and -1 on error (with errno set) + * Return allocated loaded module version, or NULL on error (with errno set) */ -int -zfs_version_kernel(char *version, int len) +char * +zfs_version_kernel(void) { - size_t l = len; - - return (sysctlbyname("vfs.zfs.version.module", - version, &l, NULL, 0)); + size_t l; + if (sysctlbyname("vfs.zfs.version.module", + NULL, &l, NULL, 0) == -1) + return (NULL); + char *version = malloc(l); + if (version == NULL) + return (NULL); + if (sysctlbyname("vfs.zfs.version.module", + version, &l, NULL, 0) == -1) { + free(version); + return (NULL); + } + return (version); } diff --git a/lib/libzfs/os/linux/libzfs_util_os.c b/lib/libzfs/os/linux/libzfs_util_os.c index c1443fda669..9d6f574a554 100644 --- a/lib/libzfs/os/linux/libzfs_util_os.c +++ b/lib/libzfs/os/linux/libzfs_util_os.c @@ -183,31 +183,27 @@ zfs_destroy_snaps_nvl_os(libzfs_handle_t *hdl, nvlist_t *snaps) } /* - * Fill given version buffer with zfs kernel version read from ZFS_SYSFS_DIR - * Returns 0 on success, and -1 on error (with errno set) + * Return allocated loaded module version, or NULL on error (with errno set) */ -int -zfs_version_kernel(char *version, int len) +char * +zfs_version_kernel(void) { - int _errno; - int fd; - int rlen; + FILE *f = fopen(ZFS_SYSFS_DIR "/version", "re"); + if (f == NULL) + return (NULL); - if ((fd = open(ZFS_SYSFS_DIR "/version", O_RDONLY | O_CLOEXEC)) == -1) - return (-1); - - if ((rlen = read(fd, version, len)) == -1) { - version[0] = '\0'; - _errno = errno; - (void) close(fd); - errno = _errno; - return (-1); + char *ret = NULL; + size_t l; + ssize_t read; + if ((read = getline(&ret, &l, f)) == -1) { + int err = errno; + fclose(f); + errno = err; + return (NULL); } - version[rlen-1] = '\0'; /* discard '\n' */ - - if (close(fd) == -1) - return (-1); - - return (0); + fclose(f); + if (ret[read - 1] == '\n') + ret[read - 1] = '\0'; + return (ret); } From e02d84d3a5158a942c3f09d11c973e5aabb6fa40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 19 May 2022 00:56:38 +0200 Subject: [PATCH 10/41] linux: libshare: smb: don't swallow net(1) errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13191 Closes #13470 --- lib/libshare/os/linux/smb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/libshare/os/linux/smb.c b/lib/libshare/os/linux/smb.c index 2a2b32bc10e..57965ebfaad 100644 --- a/lib/libshare/os/linux/smb.c +++ b/lib/libshare/os/linux/smb.c @@ -248,7 +248,7 @@ smb_enable_share_one(const char *sharename, const char *sharepath) NULL, }; - if (libzfs_run_process(argv[0], argv, 0) < 0) + if (libzfs_run_process(argv[0], argv, 0) != 0) return (SA_SYSTEM_ERR); /* Reload the share file */ @@ -297,7 +297,7 @@ smb_disable_share_one(const char *sharename) NULL, }; - if (libzfs_run_process(argv[0], argv, 0) < 0) + if (libzfs_run_process(argv[0], argv, 0) != 0) return (SA_SYSTEM_ERR); else return (SA_OK); From 03df6bad94504b80929b332a3e5fbfbc8bdae04b Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Fri, 20 May 2022 13:32:49 -0400 Subject: [PATCH 11/41] zdb: Fix handling of nul termination in symlink targets The SA attribute containing the symlink target does not include a nul terminator, so when printing the target zdb would sometimes include garbage at the end of the string. Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Mark Johnston Closes #13482 --- cmd/zdb/zdb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 3d4d956e569..ce95759dc70 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -3194,13 +3194,18 @@ dump_znode_symlink(sa_handle_t *hdl) { int sa_symlink_size = 0; char linktarget[MAXPATHLEN]; - linktarget[0] = '\0'; int error; error = sa_size(hdl, sa_attr_table[ZPL_SYMLINK], &sa_symlink_size); if (error || sa_symlink_size == 0) { return; } + if (sa_symlink_size >= sizeof (linktarget)) { + (void) printf("symlink size %d is too large\n", + sa_symlink_size); + return; + } + linktarget[sa_symlink_size] = '\0'; if (sa_lookup(hdl, sa_attr_table[ZPL_SYMLINK], &linktarget, sa_symlink_size) == 0) (void) printf("\ttarget %s\n", linktarget); From 2cd0f98f4aae7110a48cb0623e1e3d66b9f58785 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 20 May 2022 10:36:14 -0700 Subject: [PATCH 12/41] Verify BPs in spa_load_verify_cb() and dsl_scan_visitbp() We want `zpool import` to be highly robust and never panic, even when encountering corrupt metadata. This is already handled in the arc_read() code path, which covers most cases, but spa_load_verify_cb() relies on zio_read() and is responsible for verifying the block pointer. During import it is also possible to encounter blocks pointers which contain ZIO_COMPRESS_INHERIT and ZIO_CHECKSUM_INHERIT values. Relax the verification function slightly to allow this. Futhermore, extend dsl_scan_recurse() to verify the block pointer contents of level zero blocks which are not of type DMU_OT_DNODE or DMU_OT_OBJSET. This is handled by arc_read() in the other cases. Reviewed-by: Paul Dagnelie Signed-off-by: Brian Behlendorf Closes #13124 Closes #13360 --- module/zfs/dsl_scan.c | 30 +++++++++++++----------------- module/zfs/spa.c | 19 ++++++++++++++++--- module/zfs/zio.c | 6 ++---- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index dedf9c8a669..89448f0ecea 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1824,6 +1824,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, const zbookmark_phys_t *zb, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; + spa_t *spa = dp->dp_spa; int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; int err; @@ -1838,7 +1839,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, if (dnp != NULL && dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) { scn->scn_phys.scn_errors++; - spa_log_error(dp->dp_spa, zb); + spa_log_error(spa, zb); return (SET_ERROR(EINVAL)); } @@ -1849,7 +1850,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; arc_buf_t *buf; - err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, + err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; @@ -1877,7 +1878,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, zio_flags |= ZIO_FLAG_RAW; } - err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, + err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; @@ -1896,7 +1897,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, objset_phys_t *osp; arc_buf_t *buf; - err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, + err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb); if (err) { scn->scn_phys.scn_errors++; @@ -1927,6 +1928,14 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, DMU_USERUSED_OBJECT, tx); } arc_buf_destroy(buf, &buf); + } else if (!zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_LOG)) { + /* + * Sanity check the block pointer contents, this is handled + * by arc_read() for the cases above. + */ + scn->scn_phys.scn_errors++; + spa_log_error(spa, zb); + return (SET_ERROR(EINVAL)); } return (0); @@ -1977,19 +1986,6 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, scn->scn_visited_this_txg++; - /* - * This debugging is commented out to conserve stack space. This - * function is called recursively and the debugging adds several - * bytes to the stack for each call. It can be commented back in - * if required to debug an issue in dsl_scan_visitbp(). - * - * dprintf_bp(bp, - * "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx bp=%p", - * ds, ds ? ds->ds_object : 0, - * zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid, - * bp); - */ - if (BP_IS_HOLE(bp)) { scn->scn_holes_this_txg++; return; diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 01114dedef4..baa5fc24761 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -2310,9 +2310,6 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, (void) zilog, (void) dnp; - if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) || - BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp)) - return (0); /* * Note: normally this routine will not be called if * spa_load_verify_metadata is not set. However, it may be useful @@ -2320,6 +2317,22 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, */ if (!spa_load_verify_metadata) return (0); + + /* + * Sanity check the block pointer in order to detect obvious damage + * before using the contents in subsequent checks or in zio_read(). + * When damaged consider it to be a metadata error since we cannot + * trust the BP_GET_TYPE and BP_GET_LEVEL values. + */ + if (!zfs_blkptr_verify(spa, bp, B_FALSE, BLK_VERIFY_LOG)) { + atomic_inc_64(&sle->sle_meta_count); + return (0); + } + + if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) || + BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp)) + return (0); + if (!BP_IS_METADATA(bp) && (!spa_load_verify_data || !sle->sle_verify_data)) return (0); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 2a16d5cef2e..ae99f1e6450 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -962,14 +962,12 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held, "blkptr at %p has invalid TYPE %llu", bp, (longlong_t)BP_GET_TYPE(bp)); } - if (BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS || - BP_GET_CHECKSUM(bp) <= ZIO_CHECKSUM_ON) { + if (BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %p has invalid CHECKSUM %llu", bp, (longlong_t)BP_GET_CHECKSUM(bp)); } - if (BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_FUNCTIONS || - BP_GET_COMPRESS(bp) <= ZIO_COMPRESS_ON) { + if (BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_FUNCTIONS) { errors += zfs_blkptr_verify_log(spa, bp, blk_verify, "blkptr at %p has invalid COMPRESS %llu", bp, (longlong_t)BP_GET_COMPRESS(bp)); From cf70c0f8ae01c24699767b5ecfbe1882c009d53a Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 24 May 2022 09:36:07 -0700 Subject: [PATCH 13/41] zed: Take no action on scrub/resilver checksum errors When scrubbing/resilvering a pool it can be counter productive to cancel the scan and kick of a replace operation to a hot spare when encountering checksum errors. In this case, the best course of action is to allow the scrub/resilver to complete as quickly as possible and to keep the vdevs fully online if possible. Realistically, this is less of an issue for a RAIDZ since a traditional resilver must be used and checksums will be verified. However, this is not the case for a mirror or dRAID pool which is sequentially resilvered and checksum verification is deferred until after the replace operation completes. Regardless, we apply this policy to all pool types since it's a good idea for all vdevs. Degrading additional vdevs has the potential to make a bad situation worse. Note the checksum errors will still be reported as both an event and by `zpool status`. This change only prevents the ZED from proactively taking any action. Reviewed-by: Tony Hutter Reviewed-by: Tony Nguyen Signed-off-by: Brian Behlendorf Closes #13499 --- cmd/zed/agents/zfs_diagnosis.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/cmd/zed/agents/zfs_diagnosis.c b/cmd/zed/agents/zfs_diagnosis.c index 4fe9ee2bc5b..813916d2e82 100644 --- a/cmd/zed/agents/zfs_diagnosis.c +++ b/cmd/zed/agents/zfs_diagnosis.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "zfs_agents.h" #include "fmd_api.h" @@ -770,6 +771,8 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) { char *failmode = NULL; boolean_t checkremove = B_FALSE; + uint32_t pri = 0; + int32_t flags = 0; /* * If this is a checksum or I/O error, then toss it into the @@ -792,6 +795,23 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) checkremove = B_TRUE; } else if (fmd_nvl_class_match(hdl, nvl, ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) { + /* + * We ignore ereports for checksum errors generated by + * scrub/resilver I/O to avoid potentially further + * degrading the pool while it's being repaired. + */ + if (((nvlist_lookup_uint32(nvl, + FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY, &pri) == 0) && + (pri == ZIO_PRIORITY_SCRUB || + pri == ZIO_PRIORITY_REBUILD)) || + ((nvlist_lookup_int32(nvl, + FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags) == 0) && + (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)))) { + fmd_hdl_debug(hdl, "ignoring '%s' for " + "scrub/resilver I/O", class); + return; + } + if (zcp->zc_data.zc_serd_checksum[0] == '\0') { zfs_serd_name(zcp->zc_data.zc_serd_checksum, pool_guid, vdev_guid, "checksum"); From 3c356622994f1837f42a0d4bcd6558a3b3749521 Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Tue, 24 May 2022 12:39:32 -0400 Subject: [PATCH 14/41] Modified ncompress requirement in RPM to exclude RHEL9 The bug this was working around is no longer present. Reviewed-by: Brian Behlendorf Signed-off-by: Rich Ercolani Closes #13480 Closes #13490 --- rpm/generic/zfs-dkms.spec.in | 2 +- rpm/generic/zfs-kmod.spec.in | 2 +- rpm/generic/zfs.spec.in | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/rpm/generic/zfs-dkms.spec.in b/rpm/generic/zfs-dkms.spec.in index cd0a3ded2a0..d0cf38caa19 100644 --- a/rpm/generic/zfs-dkms.spec.in +++ b/rpm/generic/zfs-dkms.spec.in @@ -36,7 +36,7 @@ Obsoletes: spl-dkms Provides: %{module}-kmod = %{version} AutoReqProv: no -%if 0%{?rhel}%{?fedora}%{?suse_version} +%if (0%{?fedora}%{?suse_version}) || (0 < 0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in index 79fe2753c1e..c25d968d1c9 100644 --- a/rpm/generic/zfs-kmod.spec.in +++ b/rpm/generic/zfs-kmod.spec.in @@ -57,7 +57,7 @@ BuildRequires: gcc, make BuildRequires: elfutils-libelf-devel %endif -%if 0%{?rhel}%{?fedora}%{?suse_version} +%if (0%{?fedora}%{?suse_version}) || (0 < 0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 0a29601639c..8cf702c70e6 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -110,12 +110,15 @@ BuildRequires: libblkid-devel BuildRequires: libudev-devel BuildRequires: libattr-devel BuildRequires: openssl-devel +%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?centos} >= 8 +BuildRequires: libtirpc-devel +%endif + +%if (0%{?fedora}%{?suse_version}) || (0 < 0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 BuildRequires: ncompress -%if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?centos} >= 8 -BuildRequires: libtirpc-devel %endif Requires: openssl From 2e05765006913b0c381fdbbbf0370b35c0e61be4 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Tue, 24 May 2022 12:40:20 -0400 Subject: [PATCH 15/41] FreeBSD: libspl: Add locking around statfs globals Makes getmntent and getmntany thread-safe for external consumers of libzfs zpool_disable_datasets, zfs_iter_mounted, libzfs_mnttab_update, libzfs_mnttab_find. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #13484 --- lib/libspl/os/freebsd/mnttab.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/libspl/os/freebsd/mnttab.c b/lib/libspl/os/freebsd/mnttab.c index d830257fbd1..a240ca70ba8 100644 --- a/lib/libspl/os/freebsd/mnttab.c +++ b/lib/libspl/os/freebsd/mnttab.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -136,6 +137,7 @@ statfs2mnttab(struct statfs *sfs, struct mnttab *mp) mp->mnt_mntopts = gmntopts; } +static pthread_rwlock_t gsfs_lock = PTHREAD_RWLOCK_INITIALIZER; static struct statfs *gsfs = NULL; static int allfs = 0; @@ -145,6 +147,8 @@ statfs_init(void) struct statfs *sfs; int error; + (void) pthread_rwlock_wrlock(&gsfs_lock); + if (gsfs != NULL) { free(gsfs); gsfs = NULL; @@ -162,6 +166,7 @@ statfs_init(void) sfs = realloc(gsfs, allfs * sizeof (gsfs[0])); if (sfs != NULL) gsfs = sfs; + (void) pthread_rwlock_unlock(&gsfs_lock); return (0); fail: error = errno; @@ -169,6 +174,7 @@ fail: free(gsfs); gsfs = NULL; allfs = 0; + (void) pthread_rwlock_unlock(&gsfs_lock); return (error); } @@ -181,6 +187,8 @@ getmntany(FILE *fd __unused, struct mnttab *mgetp, struct mnttab *mrefp) if (error != 0) return (error); + (void) pthread_rwlock_rdlock(&gsfs_lock); + for (i = 0; i < allfs; i++) { if (mrefp->mnt_special != NULL && strcmp(mrefp->mnt_special, gsfs[i].f_mntfromname) != 0) { @@ -195,8 +203,10 @@ getmntany(FILE *fd __unused, struct mnttab *mgetp, struct mnttab *mrefp) continue; } statfs2mnttab(&gsfs[i], mgetp); + (void) pthread_rwlock_unlock(&gsfs_lock); return (0); } + (void) pthread_rwlock_unlock(&gsfs_lock); return (-1); } @@ -214,9 +224,13 @@ getmntent(FILE *fp, struct mnttab *mp) if (error != 0) return (error); } - if (nfs >= allfs) + (void) pthread_rwlock_rdlock(&gsfs_lock); + if (nfs >= allfs) { + (void) pthread_rwlock_unlock(&gsfs_lock); return (-1); + } statfs2mnttab(&gsfs[nfs], mp); + (void) pthread_rwlock_unlock(&gsfs_lock); if (lseek(fileno(fp), 1, SEEK_CUR) == -1) return (errno); return (0); From f375b23c026aec00cc9527470084191b5071d9b2 Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Tue, 24 May 2022 12:43:22 -0400 Subject: [PATCH 16/41] Tiered early abort, zstd edition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that "do LZ4 and zstd-1 both fail" is a great heuristic for "don't even bother trying higher zstd tiers". By way of illustration: $ cat /incompress | mbuffer | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_normal summary: 39.8 GiByte in 3min 40.2sec - average of 185 MiB/s $ echo 3 | sudo tee /sys/module/zzstd/parameters/zstd_lz4_pass 3 $ cat /incompress | mbuffer -m 4G | zfs recv -o compression=zstd-12 evenfaster/lowcomp_1M_zstd12_patched summary: 39.8 GiByte in 48.6sec - average of 839 MiB/s $ sudo zfs list -p -o name,used,lused,ratio evenfaster/lowcomp_1M_zstd12_normal evenfaster/lowcomp_1M_zstd12_patched NAME USED LUSED RATIO evenfaster/lowcomp_1M_zstd12_normal 39549931520 42721221632 1.08 evenfaster/lowcomp_1M_zstd12_patched 39626399744 42721217536 1.07 $ python3 -c "print(39626399744 - 39549931520)" 76468224 $ I'll take 76 MB out of 42 GB for > 4x speedup. Reviewed-by: Allan Jude Reviewed-by: Brian Behlendorf Reviewed-by: George Melikov Reviewed-by: Kjeld Schouten Reviewed-by: Ahelenia Ziemiańska Signed-off-by: Rich Ercolani Closes #13244 --- include/sys/zstd/zstd.h | 4 ++ man/man4/zfs.4 | 8 +++ module/zfs/zio_compress.c | 2 +- module/zstd/zfs_zstd.c | 126 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 134 insertions(+), 6 deletions(-) diff --git a/include/sys/zstd/zstd.h b/include/sys/zstd/zstd.h index ca32a746455..ec2341b7693 100644 --- a/include/sys/zstd/zstd.h +++ b/include/sys/zstd/zstd.h @@ -78,6 +78,8 @@ typedef struct zfs_zstd_meta { * kstat helper macros */ #define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64) +#define ZSTDSTAT_ZERO(stat) \ + (atomic_store_64(&zstd_stats.stat.value.ui64, 0)) #define ZSTDSTAT_ADD(stat, val) \ atomic_add_64(&zstd_stats.stat.value.ui64, (val)) #define ZSTDSTAT_SUB(stat, val) \ @@ -90,6 +92,8 @@ void zstd_fini(void); size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int level); +size_t zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, + size_t d_len, int level); int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level); int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, size_t d_len, uint8_t *level); diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 5ef517c46fa..c95fa98c5fd 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -2129,6 +2129,14 @@ However, if there are fewer than metaslabs in the vdev, this functionality is disabled. This ensures that we don't set aside an unreasonable amount of space for the ZIL. . +.It Sy zfs_zstd_earlyabort_pass Ns = Ns Sy 1 Pq int +Whether heuristic for detection of incompressible data with zstd levels >= 3 +using LZ4 and zstd-1 passes is enabled. +. +.It Sy zfs_zstd_abort_size Ns = Ns Sy 131072 Pq int +Minimal uncompressed size (inclusive) of a record before the early abort +heuristic will be attempted. +. .It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int If non-zero, the zio deadman will produce debugging messages .Pq see Sy zfs_dbgmsg_enable diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index cded11f4cbd..38020ce220b 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -66,7 +66,7 @@ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = { {"gzip-9", 9, gzip_compress, gzip_decompress, NULL}, {"zle", 64, zle_compress, zle_decompress, NULL}, {"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL}, - {"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress, + {"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress_wrap, zfs_zstd_decompress, zfs_zstd_decompress_level}, }; diff --git a/module/zstd/zfs_zstd.c b/module/zstd/zfs_zstd.c index 04e52ae3cec..41351898981 100644 --- a/module/zstd/zfs_zstd.c +++ b/module/zstd/zfs_zstd.c @@ -50,6 +50,10 @@ #include "lib/zstd.h" #include "lib/common/zstd_errors.h" +static int zstd_earlyabort_pass = 1; +static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3; +static unsigned int zstd_abort_size = (128 * 1024); + static kstat_t *zstd_ksp = NULL; typedef struct zstd_stats { @@ -62,6 +66,21 @@ typedef struct zstd_stats { kstat_named_t zstd_stat_dec_header_inval; kstat_named_t zstd_stat_com_fail; kstat_named_t zstd_stat_dec_fail; + /* + * LZ4 first-pass early abort verdict + */ + kstat_named_t zstd_stat_lz4pass_allowed; + kstat_named_t zstd_stat_lz4pass_rejected; + /* + * zstd-1 second-pass early abort verdict + */ + kstat_named_t zstd_stat_zstdpass_allowed; + kstat_named_t zstd_stat_zstdpass_rejected; + /* + * We excluded this from early abort for some reason + */ + kstat_named_t zstd_stat_passignored; + kstat_named_t zstd_stat_passignored_size; kstat_named_t zstd_stat_buffers; kstat_named_t zstd_stat_size; } zstd_stats_t; @@ -76,10 +95,44 @@ static zstd_stats_t zstd_stats = { { "decompress_header_invalid", KSTAT_DATA_UINT64 }, { "compress_failed", KSTAT_DATA_UINT64 }, { "decompress_failed", KSTAT_DATA_UINT64 }, + { "lz4pass_allowed", KSTAT_DATA_UINT64 }, + { "lz4pass_rejected", KSTAT_DATA_UINT64 }, + { "zstdpass_allowed", KSTAT_DATA_UINT64 }, + { "zstdpass_rejected", KSTAT_DATA_UINT64 }, + { "passignored", KSTAT_DATA_UINT64 }, + { "passignored_size", KSTAT_DATA_UINT64 }, { "buffers", KSTAT_DATA_UINT64 }, { "size", KSTAT_DATA_UINT64 }, }; +#ifdef _KERNEL +static int +kstat_zstd_update(kstat_t *ksp, int rw) +{ + ASSERT(ksp != NULL); + + if (rw == KSTAT_WRITE && ksp == zstd_ksp) { + ZSTDSTAT_ZERO(zstd_stat_alloc_fail); + ZSTDSTAT_ZERO(zstd_stat_alloc_fallback); + ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail); + ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail); + ZSTDSTAT_ZERO(zstd_stat_com_inval); + ZSTDSTAT_ZERO(zstd_stat_dec_inval); + ZSTDSTAT_ZERO(zstd_stat_dec_header_inval); + ZSTDSTAT_ZERO(zstd_stat_com_fail); + ZSTDSTAT_ZERO(zstd_stat_dec_fail); + ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed); + ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected); + ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed); + ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected); + ZSTDSTAT_ZERO(zstd_stat_passignored); + ZSTDSTAT_ZERO(zstd_stat_passignored_size); + } + + return (0); +} +#endif + /* Enums describing the allocator type specified by kmem_type in zstd_kmem */ enum zstd_kmem_type { ZSTD_KMEM_UNKNOWN = 0, @@ -377,6 +430,64 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level) } +size_t +zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len, + int level) +{ + int16_t zstd_level; + if (zstd_enum_to_level(level, &zstd_level)) { + ZSTDSTAT_BUMP(zstd_stat_com_inval); + return (s_len); + } + /* + * A zstd early abort heuristic. + * + * - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently + * 128k), don't try any of this, just go. + * (because experimentally that was a reasonable cutoff for a perf win + * with tiny ratio change) + * - First, we try LZ4 compression, and if it doesn't early abort, we + * jump directly to whatever compression level we intended to try. + * - Second, we try zstd-1 - if that errors out (usually, but not + * exclusively, if it would overflow), we give up early. + * + * If it works, instead we go on and compress anyway. + * + * Why two passes? LZ4 alone gets you a lot of the way, but on highly + * compressible data, it was losing up to 8.5% of the compressed + * savings versus no early abort, and all the zstd-fast levels are + * worse indications on their own than LZ4, and don't improve the LZ4 + * pass noticably if stacked like this. + */ + size_t actual_abort_size = zstd_abort_size; + if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level && + s_len >= actual_abort_size) { + int pass_len = 1; + pass_len = lz4_compress_zfs(s_start, d_start, s_len, d_len, 0); + if (pass_len < d_len) { + ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed); + goto keep_trying; + } + ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected); + + pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len, + ZIO_ZSTD_LEVEL_1); + if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) { + ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected); + return (s_len); + } + ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed); + } else { + ZSTDSTAT_BUMP(zstd_stat_passignored); + if (s_len < actual_abort_size) { + ZSTDSTAT_BUMP(zstd_stat_passignored_size); + } + } +keep_trying: + return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level)); + +} + /* Compress block using zstd */ size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, @@ -437,8 +548,10 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, * too small, that is not a failure. Everything else is a * failure, so increment the compression failure counter. */ - if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) { + int err = ZSTD_getErrorCode(c_len); + if (err != ZSTD_error_dstSize_tooSmall) { ZSTDSTAT_BUMP(zstd_stat_com_fail); + dprintf("Error: %s", ZSTD_getErrorString(err)); } return (s_len); } @@ -753,6 +866,9 @@ zstd_init(void) if (zstd_ksp != NULL) { zstd_ksp->ks_data = &zstd_stats; kstat_install(zstd_ksp); +#ifdef _KERNEL + zstd_ksp->ks_update = kstat_zstd_update; +#endif } return (0); @@ -781,8 +897,8 @@ module_init(zstd_init); module_exit(zstd_fini); #endif -EXPORT_SYMBOL(zfs_zstd_compress); -EXPORT_SYMBOL(zfs_zstd_decompress_level); -EXPORT_SYMBOL(zfs_zstd_decompress); -EXPORT_SYMBOL(zfs_zstd_cache_reap_now); +ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, INT, ZMOD_RW, + "Enable early abort attempts when using zstd"); +ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW, + "Minimal size of block to attempt early abort"); #endif From 84d0a03f3e38b1c4c9361a4b4ec613a2f46248b3 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 24 May 2022 12:46:35 -0400 Subject: [PATCH 17/41] Refactor Log Size Limit Original Log Size Limit implementation blocked all writes in case of limit reached until the TXG is committed and the log is freed. It caused huge delays and following speed spikes in application writes. This implementation instead smoothly throttles writes, using exactly the same mechanism as used for dirty data. Reviewed-by: Brian Behlendorf Reviewed-by: jxdking Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. Issue #12284 Closes #13476 --- include/sys/dmu_tx.h | 2 +- include/sys/dsl_pool.h | 2 +- man/man4/zfs.4 | 7 +++-- module/zfs/dmu_tx.c | 60 ++++++++++++++++++++++++++---------------- module/zfs/dsl_pool.c | 17 +++++++----- 5 files changed, 54 insertions(+), 34 deletions(-) diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h index 71a9ac7ca7b..ad3f1b0e47c 100644 --- a/include/sys/dmu_tx.h +++ b/include/sys/dmu_tx.h @@ -124,8 +124,8 @@ typedef struct dmu_tx_stats { kstat_named_t dmu_tx_dirty_throttle; kstat_named_t dmu_tx_dirty_delay; kstat_named_t dmu_tx_dirty_over_max; - kstat_named_t dmu_tx_wrlog_over_max; kstat_named_t dmu_tx_dirty_frees_delay; + kstat_named_t dmu_tx_wrlog_delay; kstat_named_t dmu_tx_quota; } dmu_tx_stats_t; diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h index 32d973f0981..9270fb7d0b1 100644 --- a/include/sys/dsl_pool.h +++ b/include/sys/dsl_pool.h @@ -164,7 +164,7 @@ uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp, zfs_space_check_t slop_policy); uint64_t dsl_pool_deferred_space(dsl_pool_t *dp); void dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg); -boolean_t dsl_pool_wrlog_over_max(dsl_pool_t *dp); +boolean_t dsl_pool_need_wrlog_delay(dsl_pool_t *dp); void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx); void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg); void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp); diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index c95fa98c5fd..d1ca69f8030 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1098,10 +1098,9 @@ This should be less than . .It Sy zfs_wrlog_data_max Ns = Pq int The upper limit of write-transaction zil log data size in bytes. -Once it is reached, write operation is blocked, until log data is cleared out -after transaction group sync. -Because of some overhead, it should be set -at least 2 times the size of +Write operations are throttled when approaching the limit until log data is +cleared out after transaction group sync. +Because of some overhead, it should be set at least 2 times the size of .Sy zfs_dirty_data_max .No to prevent harming normal write throughput. It also should be smaller than the size of the slog device if slog is present. diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index fe9860066d3..e7bd53136b8 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -53,8 +53,8 @@ dmu_tx_stats_t dmu_tx_stats = { { "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 }, { "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 }, { "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 }, - { "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 }, { "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 }, + { "dmu_tx_wrlog_delay", KSTAT_DATA_UINT64 }, { "dmu_tx_quota", KSTAT_DATA_UINT64 }, }; @@ -779,34 +779,49 @@ static void dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty) { dsl_pool_t *dp = tx->tx_pool; - uint64_t delay_min_bytes = - zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; - hrtime_t wakeup, min_tx_time, now; + uint64_t delay_min_bytes, wrlog; + hrtime_t wakeup, tx_time = 0, now; - if (dirty <= delay_min_bytes) + /* Calculate minimum transaction time for the dirty data amount. */ + delay_min_bytes = + zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; + if (dirty > delay_min_bytes) { + /* + * The caller has already waited until we are under the max. + * We make them pass us the amount of dirty data so we don't + * have to handle the case of it being >= the max, which + * could cause a divide-by-zero if it's == the max. + */ + ASSERT3U(dirty, <, zfs_dirty_data_max); + + tx_time = zfs_delay_scale * (dirty - delay_min_bytes) / + (zfs_dirty_data_max - dirty); + } + + /* Calculate minimum transaction time for the TX_WRITE log size. */ + wrlog = aggsum_upper_bound(&dp->dp_wrlog_total); + delay_min_bytes = + zfs_wrlog_data_max * zfs_delay_min_dirty_percent / 100; + if (wrlog >= zfs_wrlog_data_max) { + tx_time = zfs_delay_max_ns; + } else if (wrlog > delay_min_bytes) { + tx_time = MAX(zfs_delay_scale * (wrlog - delay_min_bytes) / + (zfs_wrlog_data_max - wrlog), tx_time); + } + + if (tx_time == 0) return; - /* - * The caller has already waited until we are under the max. - * We make them pass us the amount of dirty data so we don't - * have to handle the case of it being >= the max, which could - * cause a divide-by-zero if it's == the max. - */ - ASSERT3U(dirty, <, zfs_dirty_data_max); - + tx_time = MIN(tx_time, zfs_delay_max_ns); now = gethrtime(); - min_tx_time = zfs_delay_scale * - (dirty - delay_min_bytes) / (zfs_dirty_data_max - dirty); - min_tx_time = MIN(min_tx_time, zfs_delay_max_ns); - if (now > tx->tx_start + min_tx_time) + if (now > tx->tx_start + tx_time) return; DTRACE_PROBE3(delay__mintime, dmu_tx_t *, tx, uint64_t, dirty, - uint64_t, min_tx_time); + uint64_t, tx_time); mutex_enter(&dp->dp_lock); - wakeup = MAX(tx->tx_start + min_tx_time, - dp->dp_last_wakeup + min_tx_time); + wakeup = MAX(tx->tx_start + tx_time, dp->dp_last_wakeup + tx_time); dp->dp_last_wakeup = wakeup; mutex_exit(&dp->dp_lock); @@ -884,8 +899,9 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) } if (!tx->tx_dirty_delayed && - dsl_pool_wrlog_over_max(tx->tx_pool)) { - DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max); + dsl_pool_need_wrlog_delay(tx->tx_pool)) { + tx->tx_wait_dirty = B_TRUE; + DMU_TX_STAT_BUMP(dmu_tx_wrlog_delay); return (SET_ERROR(ERESTART)); } diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index c8766090558..90d7579cbd3 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -105,9 +105,8 @@ int zfs_dirty_data_max_percent = 10; int zfs_dirty_data_max_max_percent = 25; /* - * zfs_wrlog_data_max, the upper limit of TX_WRITE log data. - * Once it is reached, write operation is blocked, - * until log data is cleared out after txg sync. + * The upper limit of TX_WRITE log data. Write operations are throttled + * when approaching the limit until log data is cleared out after txg sync. * It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY. */ unsigned long zfs_wrlog_data_max = 0; @@ -623,15 +622,18 @@ dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg) /* Choose a value slightly bigger than min dirty sync bytes */ uint64_t sync_min = - zfs_dirty_data_max * (zfs_dirty_data_sync_percent + 10) / 100; + zfs_wrlog_data_max * (zfs_dirty_data_sync_percent + 10) / 200; if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0) txg_kick(dp, txg); } boolean_t -dsl_pool_wrlog_over_max(dsl_pool_t *dp) +dsl_pool_need_wrlog_delay(dsl_pool_t *dp) { - return (aggsum_compare(&dp->dp_wrlog_total, zfs_wrlog_data_max) > 0); + uint64_t delay_min_bytes = + zfs_wrlog_data_max * zfs_delay_min_dirty_percent / 100; + + return (aggsum_compare(&dp->dp_wrlog_total, delay_min_bytes) > 0); } static void @@ -641,6 +643,9 @@ dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg) delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]); aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta); aggsum_add(&dp->dp_wrlog_total, delta); + /* Compact per-CPU sums after the big change. */ + (void) aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]); + (void) aggsum_value(&dp->dp_wrlog_total); } #ifdef ZFS_DEBUG From 4dc1c8a0b84678a98e1c541493988b348ea0e644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Neal=20Gompa=20=28=E3=83=8B=E3=83=BC=E3=83=AB=E3=83=BB?= =?UTF-8?q?=E3=82=B4=E3=83=B3=E3=83=91=29?= Date: Tue, 24 May 2022 17:07:01 -0400 Subject: [PATCH 18/41] rpm: Use the correct version-release information in dependencies This tightly links the subpackages together and ensures that everything is upgraded together. Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Neal Gompa Closes #13489 --- rpm/generic/zfs.spec.in | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index 8cf702c70e6..c9eae765a61 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -90,12 +90,12 @@ License: @ZFS_META_LICENSE@ URL: https://github.com/openzfs/zfs Source0: %{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -Requires: libzpool5 = %{version} -Requires: libnvpair3 = %{version} -Requires: libuutil3 = %{version} -Requires: libzfs5 = %{version} +Requires: libzpool5%{?_isa} = %{version}-%{release} +Requires: libnvpair3%{?_isa} = %{version}-%{release} +Requires: libuutil3%{?_isa} = %{version}-%{release} +Requires: libzfs5%{?_isa} = %{version}-%{release} Requires: %{name}-kmod = %{version} -Provides: %{name}-kmod-common = %{version} +Provides: %{name}-kmod-common = %{version}-%{release} Obsoletes: spl # zfs-fuse provides the same commands and man pages that OpenZFS does. @@ -221,13 +221,13 @@ This package provides support for managing ZFS filesystems %package -n libzfs5-devel Summary: Development headers Group: System Environment/Kernel -Requires: libzfs5 = %{version} -Requires: libzpool5 = %{version} -Requires: libnvpair3 = %{version} -Requires: libuutil3 = %{version} -Provides: libzpool5-devel -Provides: libnvpair3-devel -Provides: libuutil3-devel +Requires: libzfs5%{?_isa} = %{version}-%{release} +Requires: libzpool5%{?_isa} = %{version}-%{release} +Requires: libnvpair3%{?_isa} = %{version}-%{release} +Requires: libuutil3%{?_isa} = %{version}-%{release} +Provides: libzpool5-devel = %{version}-%{release} +Provides: libnvpair3-devel = %{version}-%{release} +Provides: libuutil3-devel = %{version}-%{release} Obsoletes: zfs-devel Obsoletes: libzfs2-devel Obsoletes: libzfs4-devel @@ -281,8 +281,8 @@ Summary: Python %{python_version} wrapper for libzfs_core Group: Development/Languages/Python License: Apache-2.0 BuildArch: noarch -Requires: libzfs5 = %{version} -Requires: libnvpair3 = %{version} +Requires: libzfs5 = %{version}-%{release} +Requires: libnvpair3 = %{version}-%{release} Requires: libffi Requires: python%{__python_pkg_version} @@ -317,7 +317,6 @@ This package provides a python wrapper for the libzfs_core C library. Summary: Initramfs module Group: System Environment/Kernel Requires: %{name}%{?_isa} = %{version}-%{release} -Requires: %{name} = %{version}-%{release} Requires: initramfs-tools %description initramfs From 82aa4f6f858549ba51d8afa207b179e4a3403d95 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 25 May 2022 09:13:51 -0700 Subject: [PATCH 19/41] Switch sed -E to -r for better portability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GNU sed 4.1.2 does not support the -E flag and this version is used by some cross-compiling tool chains. Switch -E to -r which is understood. Reviewed-by: Ahelenia Ziemiańska Signed-off-by: Brian Behlendorf Closes #13502 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 7e356cbd20b..64974aa3292 100644 --- a/configure.ac +++ b/configure.ac @@ -43,7 +43,7 @@ AM_INIT_AUTOMAKE([subdir-objects foreign]) # Remove default macros from config.h: # PACKAGE, PACKAGE_{BUGREPORT,NAME,STRING,TARNAME,VERSION}, STDC_HEADERS, VERSION AC_CONFIG_HEADERS([zfs_config.h], [ - sed -nEi~ -e '/^$/be' -e 'N;N;/#define (PACKAGE|VERSION|STDC_HEADERS)/d' -e ':e' -e 'p' zfs_config.h && rm zfs_config.h~ || exit]) + sed -nri~ -e '/^$/be' -e 'N;N;/#define (PACKAGE|VERSION|STDC_HEADERS)/d' -e ':e' -e 'p' zfs_config.h && rm zfs_config.h~ || exit]) LT_INIT AC_PROG_INSTALL From 3bbc26097e53c472084d978dd37343b07ed17e3c Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Wed, 25 May 2022 12:18:49 -0400 Subject: [PATCH 20/41] Unbreak zstd build on sparc64 It turns out that wrapping the atomic macro in () breaks build on Linux/SPARC64. Oops. Reviewed-by: Brian Behlendorf Signed-off-by: Rich Ercolani Closes #13506 --- include/sys/zstd/zstd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sys/zstd/zstd.h b/include/sys/zstd/zstd.h index ec2341b7693..d8c3fa86dce 100644 --- a/include/sys/zstd/zstd.h +++ b/include/sys/zstd/zstd.h @@ -79,7 +79,7 @@ typedef struct zfs_zstd_meta { */ #define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64) #define ZSTDSTAT_ZERO(stat) \ - (atomic_store_64(&zstd_stats.stat.value.ui64, 0)) + atomic_store_64(&zstd_stats.stat.value.ui64, 0) #define ZSTDSTAT_ADD(stat, val) \ atomic_add_64(&zstd_stats.stat.value.ui64, (val)) #define ZSTDSTAT_SUB(stat, val) \ From 61ef68727b0b3c53e27d6e503947f6c5efd1318c Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 25 May 2022 09:20:17 -0700 Subject: [PATCH 21/41] Standardize RHEL version check in packages This is a follow up to 3c356622994 which standardizes how the RHEL version check is done. This simpler "0%{?rhel}" check is used elsewhere in the packages so we do the same here. Reviewed-by: Neal Gompa Reviewed-by: Rich Ercolani Signed-off-by: Brian Behlendorf Closes #13501 --- rpm/generic/zfs-dkms.spec.in | 2 +- rpm/generic/zfs-kmod.spec.in | 2 +- rpm/generic/zfs.spec.in | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rpm/generic/zfs-dkms.spec.in b/rpm/generic/zfs-dkms.spec.in index d0cf38caa19..f23347380d0 100644 --- a/rpm/generic/zfs-dkms.spec.in +++ b/rpm/generic/zfs-dkms.spec.in @@ -36,7 +36,7 @@ Obsoletes: spl-dkms Provides: %{module}-kmod = %{version} AutoReqProv: no -%if (0%{?fedora}%{?suse_version}) || (0 < 0%{?rhel} && 0%{?rhel} < 9) +%if (0%{?fedora}%{?suse_version}) || (0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in index c25d968d1c9..ae079542786 100644 --- a/rpm/generic/zfs-kmod.spec.in +++ b/rpm/generic/zfs-kmod.spec.in @@ -57,7 +57,7 @@ BuildRequires: gcc, make BuildRequires: elfutils-libelf-devel %endif -%if (0%{?fedora}%{?suse_version}) || (0 < 0%{?rhel} && 0%{?rhel} < 9) +%if (0%{?fedora}%{?suse_version}) || (0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in index c9eae765a61..493e93c1f3e 100644 --- a/rpm/generic/zfs.spec.in +++ b/rpm/generic/zfs.spec.in @@ -114,7 +114,7 @@ BuildRequires: openssl-devel BuildRequires: libtirpc-devel %endif -%if (0%{?fedora}%{?suse_version}) || (0 < 0%{?rhel} && 0%{?rhel} < 9) +%if (0%{?fedora}%{?suse_version}) || (0%{?rhel} && 0%{?rhel} < 9) # We don't directly use it, but if this isn't installed, rpmbuild as root can # crash+corrupt rpmdb # See issue #12071 From b37093a188d94279e5e2faaf09e6ff754873b0a2 Mon Sep 17 00:00:00 2001 From: Umer Saleem Date: Wed, 25 May 2022 21:22:11 +0500 Subject: [PATCH 22/41] rpm: Keep debug symbols if configured with '--enable-debuginfo' Do not strip debug information from packages if '--enable-debuginfo' is configured. Reviewed-by: Brian Behlendorf Signed-off-by: Umer Saleem Closes #13500 --- config/zfs-build.m4 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index 59bca557925..582c479f5b5 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -333,6 +333,10 @@ AC_DEFUN([ZFS_AC_RPM], [ RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(ASAN_ZFS) 1"' RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(UBSAN_ZFS) 1"' + AS_IF([test "x$enable_debuginfo" = xyes], [ + RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "__strip /bin/true"' + ]) + RPM_DEFINE_UTIL=' --define "_initconfdir $(initconfdir)"' dnl # Make the next three RPM_DEFINE_UTIL additions conditional, since From 7829b465a7f736e9458257fcb9dcefd8eb882ee0 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Wed, 25 May 2022 09:25:13 -0700 Subject: [PATCH 23/41] Cancel in-progress rebuilds when we finish removal This issue was discovered by zloop runs. When a mirror or other redundant top-level vdev has a disk failure, and the disk is replaced, the rebuild process occurs. A removal can happen while this is in progress. If the removal completes before the rebuild does, the removal process will try to free the vdev that is still in use. Reviewed-by: Brian Behlendorf Signed-off-by: Paul Dagnelie Closes #13498 --- module/zfs/vdev_removal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index 5508d273758..7dfc4345f23 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -1364,6 +1364,8 @@ vdev_remove_complete(spa_t *spa) ASSERT3P(vd->vdev_initialize_thread, ==, NULL); ASSERT3P(vd->vdev_trim_thread, ==, NULL); ASSERT3P(vd->vdev_autotrim_thread, ==, NULL); + vdev_rebuild_stop_wait(vd); + ASSERT3P(vd->vdev_rebuild_thread, ==, NULL); uint64_t vdev_space = spa_deflate(spa) ? vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; From 1d89b989c15acdc9d70878253d68162c3c5c5836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 25 May 2022 18:29:47 +0200 Subject: [PATCH 24/41] automake: don't install /e/d/zfs or /e/z/zfs-functions +x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _SCRIPTS means it's made +x when installing; _DATA is made -x. Reviewed-by: Brian Behlendorf Signed-off-by: Ahelenia Ziemiańska Closes #13496 Closes #13503 --- etc/Makefile.am | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/etc/Makefile.am b/etc/Makefile.am index ffe5fad2b4a..a41f04c9042 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -14,21 +14,21 @@ dist_sysconf_zfs_DATA = \ %D%/zfs/vdev_id.conf.sas_switch.example \ %D%/zfs/vdev_id.conf.scsi.example -sysconf_zfs_SCRIPTS = \ +sysconf_zfs_DATA = \ %D%/zfs/zfs-functions -SUBSTFILES += $(sysconf_zfs_SCRIPTS) -SHELLCHECKSCRIPTS += $(sysconf_zfs_SCRIPTS) -$(call SHELLCHECK_OPTS,$(sysconf_zfs_SCRIPTS)): SHELLCHECK_SHELL = sh +SUBSTFILES += $(sysconf_zfs_DATA) +SHELLCHECKSCRIPTS += $(sysconf_zfs_DATA) +$(call SHELLCHECK_OPTS,$(sysconf_zfs_DATA)): SHELLCHECK_SHELL = sh if BUILD_LINUX -initconf_SCRIPTS = \ +initconf_DATA = \ %D%/default/zfs -SUBSTFILES += $(initconf_SCRIPTS) -SHELLCHECKSCRIPTS += $(initconf_SCRIPTS) -$(call SHELLCHECK_OPTS,$(initconf_SCRIPTS)): SHELLCHECK_SHELL = sh +SUBSTFILES += $(initconf_DATA) +SHELLCHECKSCRIPTS += $(initconf_DATA) +$(call SHELLCHECK_OPTS,$(initconf_DATA)): SHELLCHECK_SHELL = sh if INIT_SYSV From 6aa8c21a2ad29ddd4564cdfd4c99048c891b717a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 25 May 2022 13:12:52 -0400 Subject: [PATCH 25/41] More speculative prefetcher improvements - Make prefetch distance adaptive: up to 4MB prefetch doubles for every, hit same as before, but after that it grows by 1/8 every time the prefetch read does not complete in time to satisfy the demand. My tests show that 4MB is sufficient for wide NVMe pool to saturate single reader thread at 2.5GB/s, while new 64MB maximum allows the same thread to reach 1.5GB/s on wide HDD pool. Further distance increase may increase speed even more, but less dramatic and with higher latency. - Allow early reuse of inactive prefetch streams: streams that never saw hits can be reused immediately if there is a demand, while others can be reused after 1s of inactivity, starting with the oldest. After 2s of inactivity streams are deleted to free resources same as before. This allows by several times increase strided read performance on HDD pool in presence of simultaneous random reads, previously filling the zfetch_max_streams limit for seconds and so blocking most of prefetch. - Always issue intermediate indirect block reads with SYNC priority. Each of those reads if delayed for longer may delay up to 1024 other block prefetches, that may be not good for wide pools. Reviewed-by: Allan Jude Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. Closes #13452 --- include/sys/dbuf.h | 2 +- include/sys/dmu_zfetch.h | 16 ++-- man/man4/zfs.4 | 17 +++- module/zfs/dbuf.c | 14 +-- module/zfs/dmu_zfetch.c | 185 +++++++++++++++++++++------------------ 5 files changed, 133 insertions(+), 101 deletions(-) diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index deaab82b797..60f8d5d74d6 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -329,7 +329,7 @@ typedef struct dbuf_hash_table { krwlock_t hash_rwlocks[DBUF_RWLOCKS] ____cacheline_aligned; } dbuf_hash_table_t; -typedef void (*dbuf_prefetch_fn)(void *, boolean_t); +typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t); uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level, const uint64_t offset); diff --git a/include/sys/dmu_zfetch.h b/include/sys/dmu_zfetch.h index 4c220b0c79e..cd1b79eb8e4 100644 --- a/include/sys/dmu_zfetch.h +++ b/include/sys/dmu_zfetch.h @@ -49,20 +49,18 @@ typedef struct zfetch { typedef struct zstream { uint64_t zs_blkid; /* expect next access at this blkid */ - uint64_t zs_pf_blkid1; /* first block to prefetch */ - uint64_t zs_pf_blkid; /* block to prefetch up to */ - - /* - * We will next prefetch the L1 indirect block of this level-0 - * block id. - */ - uint64_t zs_ipf_blkid1; /* first block to prefetch */ - uint64_t zs_ipf_blkid; /* block to prefetch up to */ + unsigned int zs_pf_dist; /* data prefetch distance in bytes */ + unsigned int zs_ipf_dist; /* L1 prefetch distance in bytes */ + uint64_t zs_pf_start; /* first data block to prefetch */ + uint64_t zs_pf_end; /* data block to prefetch up to */ + uint64_t zs_ipf_start; /* first data block to prefetch L1 */ + uint64_t zs_ipf_end; /* data block to prefetch L1 up to */ list_node_t zs_node; /* link for zf_stream */ hrtime_t zs_atime; /* time last prefetch issued */ zfetch_t *zs_fetch; /* parent fetch */ boolean_t zs_missed; /* stream saw cache misses */ + boolean_t zs_more; /* need more distant prefetch */ zfs_refcount_t zs_callers; /* number of pending callers */ /* * Number of stream references: dnode, callers and pending blocks. diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index d1ca69f8030..fa3159ab82c 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -487,7 +487,15 @@ However, this is limited by .It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq ulong If prefetching is enabled, disable prefetching for reads larger than this size. . -.It Sy zfetch_max_distance Ns = Ns Sy 8388608 Ns B Po 8 MiB Pc Pq uint +.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint +Min bytes to prefetch per stream. +Prefetch distance starts from the demand access size and quickly grows to +this value, doubling on each hit. +After that it may grow further by 1/8 per hit, but only if some prefetch +since last time haven't completed in time to satisfy demand request, i.e. +prefetch depth didn't cover the read latency or the pool got saturated. +. +.It Sy zfetch_max_distance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint Max bytes to prefetch per stream. . .It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint @@ -496,8 +504,11 @@ Max bytes to prefetch indirects for per stream. .It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint Max number of streams per zfetch (prefetch streams per file). . -.It Sy zfetch_min_sec_reap Ns = Ns Sy 2 Pq uint -Min time before an active prefetch stream can be reclaimed +.It Sy zfetch_min_sec_reap Ns = Ns Sy 1 Pq uint +Min time before inactive prefetch stream can be reclaimed +. +.It Sy zfetch_max_sec_reap Ns = Ns Sy 2 Pq uint +Max time before inactive prefetch stream can be deleted . .It Sy zfs_abd_scatter_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int Enables ARC from using scatter/gather lists and forces all allocations to be diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 9a273b010fb..55a3686fac4 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -3185,8 +3185,10 @@ typedef struct dbuf_prefetch_arg { static void dbuf_prefetch_fini(dbuf_prefetch_arg_t *dpa, boolean_t io_done) { - if (dpa->dpa_cb != NULL) - dpa->dpa_cb(dpa->dpa_arg, io_done); + if (dpa->dpa_cb != NULL) { + dpa->dpa_cb(dpa->dpa_arg, dpa->dpa_zb.zb_level, + dpa->dpa_zb.zb_blkid, io_done); + } kmem_free(dpa, sizeof (*dpa)); } @@ -3320,7 +3322,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb, dpa->dpa_zb.zb_object, dpa->dpa_curlevel, nextblkid); (void) arc_read(dpa->dpa_zio, dpa->dpa_spa, - bp, dbuf_prefetch_indirect_done, dpa, dpa->dpa_prio, + bp, dbuf_prefetch_indirect_done, dpa, + ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &iter_aflags, &zb); } @@ -3455,7 +3458,8 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid, SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET, dn->dn_object, curlevel, curblkid); (void) arc_read(dpa->dpa_zio, dpa->dpa_spa, - &bp, dbuf_prefetch_indirect_done, dpa, prio, + &bp, dbuf_prefetch_indirect_done, dpa, + ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &iter_aflags, &zb); } @@ -3467,7 +3471,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid, return (1); no_issue: if (cb != NULL) - cb(arg, B_FALSE); + cb(arg, level, blkid, B_FALSE); return (0); } diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index a6facdc65bb..d00a48159ae 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -48,9 +48,13 @@ static int zfs_prefetch_disable = B_FALSE; /* max # of streams per zfetch */ static unsigned int zfetch_max_streams = 8; /* min time before stream reclaim */ -static unsigned int zfetch_min_sec_reap = 2; -/* max bytes to prefetch per stream (default 8MB) */ -unsigned int zfetch_max_distance = 8 * 1024 * 1024; +static unsigned int zfetch_min_sec_reap = 1; +/* max time before stream delete */ +static unsigned int zfetch_max_sec_reap = 2; +/* min bytes to prefetch per stream (default 4MB) */ +static unsigned int zfetch_min_distance = 4 * 1024 * 1024; +/* max bytes to prefetch per stream (default 64MB) */ +unsigned int zfetch_max_distance = 64 * 1024 * 1024; /* max bytes to prefetch indirects for per stream (default 64MB) */ unsigned int zfetch_max_idistance = 64 * 1024 * 1024; /* max number of bytes in an array_read in which we allow prefetching (1MB) */ @@ -195,74 +199,99 @@ dmu_zfetch_fini(zfetch_t *zf) } /* - * If there aren't too many streams already, create a new stream. + * If there aren't too many active streams already, create one more. + * In process delete/reuse all streams without hits for zfetch_max_sec_reap. + * If needed, reuse oldest stream without hits for zfetch_min_sec_reap or ever. * The "blkid" argument is the next block that we expect this stream to access. - * While we're here, clean up old streams (which haven't been - * accessed for at least zfetch_min_sec_reap seconds). */ static void dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid) { - zstream_t *zs_next; - hrtime_t now = gethrtime(); + zstream_t *zs, *zs_next, *zs_old = NULL; + hrtime_t now = gethrtime(), t; ASSERT(MUTEX_HELD(&zf->zf_lock)); /* - * Clean up old streams. + * Delete too old streams, reusing the first found one. */ - for (zstream_t *zs = list_head(&zf->zf_stream); - zs != NULL; zs = zs_next) { + t = now - SEC2NSEC(zfetch_max_sec_reap); + for (zs = list_head(&zf->zf_stream); zs != NULL; zs = zs_next) { zs_next = list_next(&zf->zf_stream, zs); /* * Skip if still active. 1 -- zf_stream reference. */ if (zfs_refcount_count(&zs->zs_refs) != 1) continue; - if (((now - zs->zs_atime) / NANOSEC) > - zfetch_min_sec_reap) + if (zs->zs_atime > t) + continue; + if (zs_old) dmu_zfetch_stream_remove(zf, zs); + else + zs_old = zs; + } + if (zs_old) { + zs = zs_old; + goto reuse; } /* * The maximum number of streams is normally zfetch_max_streams, * but for small files we lower it such that it's at least possible * for all the streams to be non-overlapping. - * - * If we are already at the maximum number of streams for this file, - * even after removing old streams, then don't create this stream. */ uint32_t max_streams = MAX(1, MIN(zfetch_max_streams, zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz / zfetch_max_distance)); if (zf->zf_numstreams >= max_streams) { + t = now - SEC2NSEC(zfetch_min_sec_reap); + for (zs = list_head(&zf->zf_stream); zs != NULL; + zs = list_next(&zf->zf_stream, zs)) { + if (zfs_refcount_count(&zs->zs_refs) != 1) + continue; + if (zs->zs_atime > t) + continue; + if (zs_old == NULL || zs->zs_atime < zs_old->zs_atime) + zs_old = zs; + } + if (zs_old) { + zs = zs_old; + goto reuse; + } ZFETCHSTAT_BUMP(zfetchstat_max_streams); return; } - zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP); - zs->zs_blkid = blkid; - zs->zs_pf_blkid1 = blkid; - zs->zs_pf_blkid = blkid; - zs->zs_ipf_blkid1 = blkid; - zs->zs_ipf_blkid = blkid; - zs->zs_atime = now; + zs = kmem_zalloc(sizeof (*zs), KM_SLEEP); zs->zs_fetch = zf; - zs->zs_missed = B_FALSE; zfs_refcount_create(&zs->zs_callers); zfs_refcount_create(&zs->zs_refs); /* One reference for zf_stream. */ zfs_refcount_add(&zs->zs_refs, NULL); zf->zf_numstreams++; list_insert_head(&zf->zf_stream, zs); + +reuse: + zs->zs_blkid = blkid; + zs->zs_pf_dist = 0; + zs->zs_pf_start = blkid; + zs->zs_pf_end = blkid; + zs->zs_ipf_dist = 0; + zs->zs_ipf_start = blkid; + zs->zs_ipf_end = blkid; + /* Allow immediate stream reuse until first hit. */ + zs->zs_atime = now - SEC2NSEC(zfetch_min_sec_reap); + zs->zs_missed = B_FALSE; + zs->zs_more = B_FALSE; } static void -dmu_zfetch_stream_done(void *arg, boolean_t io_issued) +dmu_zfetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t io_issued) { - (void) io_issued; zstream_t *zs = arg; + if (io_issued && level == 0 && blkid < zs->zs_blkid) + zs->zs_more = B_TRUE; if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0) dmu_zfetch_stream_fini(zs); } @@ -284,11 +313,6 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data, boolean_t have_lock) { zstream_t *zs; - int64_t pf_start, ipf_start; - int64_t pf_ahead_blks, max_blks; - int max_dist_blks, pf_nblks, ipf_nblks; - uint64_t end_of_access_blkid, maxblkid; - end_of_access_blkid = blkid + nblks; spa_t *spa = zf->zf_dnode->dn_objset->os_spa; if (zfs_prefetch_disable) @@ -317,7 +341,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, * A fast path for small files for which no prefetch will * happen. */ - maxblkid = zf->zf_dnode->dn_maxblkid; + uint64_t maxblkid = zf->zf_dnode->dn_maxblkid; if (maxblkid < 2) { if (!have_lock) rw_exit(&zf->zf_dnode->dn_struct_rwlock); @@ -345,6 +369,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, * If the file is ending, remove the matching stream if found. * If not found then it is too late to create a new one now. */ + uint64_t end_of_access_blkid = blkid + nblks; if (end_of_access_blkid >= maxblkid) { if (zs != NULL) dmu_zfetch_stream_remove(zf, zs); @@ -377,60 +402,48 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, /* * This access was to a block that we issued a prefetch for on - * behalf of this stream. Issue further prefetches for this stream. + * behalf of this stream. Calculate further prefetch distances. * - * Normally, we start prefetching where we stopped - * prefetching last (zs_pf_blkid). But when we get our first - * hit on this stream, zs_pf_blkid == zs_blkid, we don't - * want to prefetch the block we just accessed. In this case, - * start just after the block we just accessed. - */ - pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid); - if (zs->zs_pf_blkid1 < end_of_access_blkid) - zs->zs_pf_blkid1 = end_of_access_blkid; - if (zs->zs_ipf_blkid1 < end_of_access_blkid) - zs->zs_ipf_blkid1 = end_of_access_blkid; - - /* - * Double our amount of prefetched data, but don't let the - * prefetch get further ahead than zfetch_max_distance. + * Start prefetch from the demand access size (nblks). Double the + * distance every access up to zfetch_min_distance. After that only + * if needed increase the distance by 1/8 up to zfetch_max_distance. */ + unsigned int nbytes = nblks << zf->zf_dnode->dn_datablkshift; + unsigned int pf_nblks; if (fetch_data) { - max_dist_blks = - zfetch_max_distance >> zf->zf_dnode->dn_datablkshift; - /* - * Previously, we were (zs_pf_blkid - blkid) ahead. We - * want to now be double that, so read that amount again, - * plus the amount we are catching up by (i.e. the amount - * read just now). - */ - pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks; - max_blks = max_dist_blks - (pf_start - end_of_access_blkid); - pf_nblks = MIN(pf_ahead_blks, max_blks); + if (unlikely(zs->zs_pf_dist < nbytes)) + zs->zs_pf_dist = nbytes; + else if (zs->zs_pf_dist < zfetch_min_distance) + zs->zs_pf_dist *= 2; + else if (zs->zs_more) + zs->zs_pf_dist += zs->zs_pf_dist / 8; + zs->zs_more = B_FALSE; + if (zs->zs_pf_dist > zfetch_max_distance) + zs->zs_pf_dist = zfetch_max_distance; + pf_nblks = zs->zs_pf_dist >> zf->zf_dnode->dn_datablkshift; } else { pf_nblks = 0; } - - zs->zs_pf_blkid = pf_start + pf_nblks; + if (zs->zs_pf_start < end_of_access_blkid) + zs->zs_pf_start = end_of_access_blkid; + if (zs->zs_pf_end < end_of_access_blkid + pf_nblks) + zs->zs_pf_end = end_of_access_blkid + pf_nblks; /* - * Do the same for indirects, starting from where we stopped last, - * or where we will stop reading data blocks (and the indirects - * that point to them). + * Do the same for indirects, starting where we will stop reading + * data blocks (and the indirects that point to them). */ - ipf_start = MAX(zs->zs_ipf_blkid, zs->zs_pf_blkid); - max_dist_blks = zfetch_max_idistance >> zf->zf_dnode->dn_datablkshift; - /* - * We want to double our distance ahead of the data prefetch - * (or reader, if we are not prefetching data). Previously, we - * were (zs_ipf_blkid - blkid) ahead. To double that, we read - * that amount again, plus the amount we are catching up by - * (i.e. the amount read now + the amount of data prefetched now). - */ - pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks; - max_blks = max_dist_blks - (ipf_start - zs->zs_pf_blkid); - ipf_nblks = MIN(pf_ahead_blks, max_blks); - zs->zs_ipf_blkid = ipf_start + ipf_nblks; + if (unlikely(zs->zs_ipf_dist < nbytes)) + zs->zs_ipf_dist = nbytes; + else + zs->zs_ipf_dist *= 2; + if (zs->zs_ipf_dist > zfetch_max_idistance) + zs->zs_ipf_dist = zfetch_max_idistance; + pf_nblks = zs->zs_ipf_dist >> zf->zf_dnode->dn_datablkshift; + if (zs->zs_ipf_start < zs->zs_pf_end) + zs->zs_ipf_start = zs->zs_pf_end; + if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks) + zs->zs_ipf_end = zs->zs_pf_end + pf_nblks; zs->zs_blkid = end_of_access_blkid; /* Protect the stream from reclamation. */ @@ -471,13 +484,13 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock) mutex_enter(&zf->zf_lock); if (zs->zs_missed) { - pf_start = zs->zs_pf_blkid1; - pf_end = zs->zs_pf_blkid1 = zs->zs_pf_blkid; + pf_start = zs->zs_pf_start; + pf_end = zs->zs_pf_start = zs->zs_pf_end; } else { pf_start = pf_end = 0; } - ipf_start = MAX(zs->zs_pf_blkid1, zs->zs_ipf_blkid1); - ipf_end = zs->zs_ipf_blkid1 = zs->zs_ipf_blkid; + ipf_start = zs->zs_ipf_start; + ipf_end = zs->zs_ipf_start = zs->zs_ipf_end; mutex_exit(&zf->zf_lock); ASSERT3S(pf_start, <=, pf_end); ASSERT3S(ipf_start, <=, ipf_end); @@ -505,12 +518,12 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock) for (int64_t blk = pf_start; blk < pf_end; blk++) { issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk, ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH, - dmu_zfetch_stream_done, zs); + dmu_zfetch_done, zs); } for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) { issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk, ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH, - dmu_zfetch_stream_done, zs); + dmu_zfetch_done, zs); } if (!have_lock) @@ -540,6 +553,12 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_streams, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_sec_reap, UINT, ZMOD_RW, "Min time before stream reclaim"); +ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_sec_reap, UINT, ZMOD_RW, + "Max time before stream delete"); + +ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_distance, UINT, ZMOD_RW, + "Min bytes to prefetch per stream"); + ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW, "Max bytes to prefetch per stream"); From b62829295e9529d1c321816a1027fac5afc7d6f5 Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Wed, 25 May 2022 20:26:59 -0400 Subject: [PATCH 26/41] Silence unused-but-set-variable warning This was breaking the kmod port build on FreeBSD with Clang 13. Use the same trick as we do for ASSERT() to make DNODE_VERIFY() use its parameter at compile time without actually using it at run time in non-debug builds. Reviewed-by: Brian Behlendorf Signed-off-by: Ryan Moeller Closes #13507 --- include/sys/dnode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sys/dnode.h b/include/sys/dnode.h index 3f5fcc958c3..33d9389d5a3 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -616,7 +616,7 @@ extern dnode_stats_t dnode_stats; #else #define dprintf_dnode(db, fmt, ...) -#define DNODE_VERIFY(dn) +#define DNODE_VERIFY(dn) ((void) sizeof ((uintptr_t)(dn))) #define FREE_VERIFY(db, start, end, tx) #endif From d98a67a53a180bd88ec8d9aeea75d92e1c9968b5 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 26 May 2022 09:24:50 -0700 Subject: [PATCH 27/41] Replace EXTRA_DIST with dist_noinst_DATA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EXTRA_DIST variable is ignored when used in the FALSE conditional of a Makefile.am. This results in the `make dist` target omitting these files from the generated tarball unless CONFIG_USER is defined. This issue can be avoided by switching to use the dist_noinst_DATA variable which is handled as expected by autoconf. This change also adds support for --with-config=dist as an alias for --with-config=srpm and updates the GitHub workflows to use it. Reviewed-by: Ahelenia Ziemiańska Signed-off-by: Brian Behlendorf Closes #13459 Closes #13505 --- .github/workflows/zfs-tests-functional.yml | 2 +- .github/workflows/zfs-tests-sanity.yml | 2 +- .github/workflows/zloop.yml | 2 +- Makefile.am | 38 +++++++++++----------- cmd/Makefile.am | 8 ++--- cmd/zed/Makefile.am | 2 +- cmd/zed/zed.d/Makefile.am | 2 +- cmd/zpool/Makefile.am | 2 +- config/Substfiles.am | 2 +- config/zfs-build.m4 | 1 + contrib/bpftrace/Makefile.am | 4 +-- contrib/dracut/Makefile.am | 2 +- contrib/initramfs/Makefile.am | 2 +- contrib/pyzfs/Makefile.am | 2 +- contrib/zcp/Makefile.am | 2 +- etc/Makefile.am | 4 +-- lib/libnvpair/Makefile.am | 2 +- lib/libuutil/Makefile.am | 2 +- lib/libzfs/Makefile.am | 4 +-- lib/libzfs_core/Makefile.am | 2 +- lib/libzfsbootenv/Makefile.am | 2 +- man/Makefile.am | 5 +-- rpm/Makefile.am | 2 +- scripts/Makefile.am | 9 +++-- tests/Makefile.am | 2 +- tests/zfs-tests/cmd/Makefile.am | 4 +-- tests/zfs-tests/tests/Makefile.am | 2 +- 27 files changed, 56 insertions(+), 57 deletions(-) diff --git a/.github/workflows/zfs-tests-functional.yml b/.github/workflows/zfs-tests-functional.yml index 0273610af04..328cb97f10e 100644 --- a/.github/workflows/zfs-tests-functional.yml +++ b/.github/workflows/zfs-tests-functional.yml @@ -28,7 +28,7 @@ jobs: ./autogen.sh - name: Configure run: | - ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan + ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan --with-config=dist - name: Make run: | make -j$(nproc) --no-print-directory --silent pkg-utils pkg-kmod diff --git a/.github/workflows/zfs-tests-sanity.yml b/.github/workflows/zfs-tests-sanity.yml index 73606f909e1..4c15cecf58d 100644 --- a/.github/workflows/zfs-tests-sanity.yml +++ b/.github/workflows/zfs-tests-sanity.yml @@ -24,7 +24,7 @@ jobs: ./autogen.sh - name: Configure run: | - ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan + ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan --with-config=dist - name: Make run: | make -j$(nproc) --no-print-directory --silent pkg-utils pkg-kmod diff --git a/.github/workflows/zloop.yml b/.github/workflows/zloop.yml index d49eeae1653..64fe96a3ab6 100644 --- a/.github/workflows/zloop.yml +++ b/.github/workflows/zloop.yml @@ -23,7 +23,7 @@ jobs: ./autogen.sh - name: Configure run: | - ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan + ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan --with-config=dist - name: Make run: | make -j$(nproc) --no-print-directory --silent pkg-utils pkg-kmod diff --git a/Makefile.am b/Makefile.am index 0a73a63870b..54d300e7d40 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,5 @@ CLEANFILES = -EXTRA_DIST = +dist_noinst_DATA = INSTALL_DATA_HOOKS = ALL_LOCAL = CLEAN_LOCAL = @@ -37,26 +37,26 @@ extradir = $(prefix)/src/zfs-$(VERSION) extra_HEADERS = zfs.release.in zfs_config.h.in endif -EXTRA_DIST += autogen.sh copy-builtin -EXTRA_DIST += AUTHORS CODE_OF_CONDUCT.md COPYRIGHT LICENSE META NEWS NOTICE -EXTRA_DIST += README.md RELEASES.md -EXTRA_DIST += module/lua/README.zfs module/os/linux/spl/README.md +dist_noinst_DATA += autogen.sh copy-builtin +dist_noinst_DATA += AUTHORS CODE_OF_CONDUCT.md COPYRIGHT LICENSE META NEWS NOTICE +dist_noinst_DATA += README.md RELEASES.md +dist_noinst_DATA += module/lua/README.zfs module/os/linux/spl/README.md # Include all the extra licensing information for modules -EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE -EXTRA_DIST += module/icp/algs/skein/THIRDPARTYLICENSE.descrip -EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman -EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip -EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl -EXTRA_DIST += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip -EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams -EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip -EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl -EXTRA_DIST += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip -EXTRA_DIST += module/os/linux/spl/THIRDPARTYLICENSE.gplv2 -EXTRA_DIST += module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip -EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash -EXTRA_DIST += module/zfs/THIRDPARTYLICENSE.cityhash.descrip +dist_noinst_DATA += module/icp/algs/skein/THIRDPARTYLICENSE +dist_noinst_DATA += module/icp/algs/skein/THIRDPARTYLICENSE.descrip +dist_noinst_DATA += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman +dist_noinst_DATA += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.gladman.descrip +dist_noinst_DATA += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl +dist_noinst_DATA += module/icp/asm-x86_64/aes/THIRDPARTYLICENSE.openssl.descrip +dist_noinst_DATA += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams +dist_noinst_DATA += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.cryptogams.descrip +dist_noinst_DATA += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl +dist_noinst_DATA += module/icp/asm-x86_64/modes/THIRDPARTYLICENSE.openssl.descrip +dist_noinst_DATA += module/os/linux/spl/THIRDPARTYLICENSE.gplv2 +dist_noinst_DATA += module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip +dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.cityhash +dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.cityhash.descrip @CODE_COVERAGE_RULES@ diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 9959760b523..65de980da30 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -10,7 +10,7 @@ mounthelper_PROGRAMS = sbin_SCRIPTS += fsck.zfs SHELLCHECKSCRIPTS += fsck.zfs CLEANFILES += fsck.zfs -EXTRA_DIST += %D%/fsck.zfs.in +dist_noinst_DATA += %D%/fsck.zfs.in $(call SUBST,fsck.zfs,%D%/) @@ -100,9 +100,9 @@ endif if USING_PYTHON -bin_SCRIPTS += arc_summary arcstat dbufstat -CLEANFILES += arc_summary arcstat dbufstat -EXTRA_DIST += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in +bin_SCRIPTS += arc_summary arcstat dbufstat +CLEANFILES += arc_summary arcstat dbufstat +dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in $(call SUBST,arcstat,%D%/) $(call SUBST,dbufstat,%D%/) diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index 6eea1a135f5..c437ff51dd2 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -43,4 +43,4 @@ zed_LDADD = \ zed_LDADD += -lrt $(LIBATOMIC_LIBS) $(LIBUDEV_LIBS) $(LIBUUID_LIBS) zed_LDFLAGS = -pthread -EXTRA_DIST += $(addprefix %D%/,agents/README.md) +dist_noinst_DATA += %D%/agents/README.md diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am index 122d2c0c51b..c65b43fb027 100644 --- a/cmd/zed/zed.d/Makefile.am +++ b/cmd/zed/zed.d/Makefile.am @@ -38,7 +38,7 @@ zedconfdefaults = \ vdev_attach-led.sh \ vdev_clear-led.sh -EXTRA_DIST += $(addprefix %D%/,README) +dist_noinst_DATA += %D%/README INSTALL_DATA_HOOKS += zed-install-data-hook zed-install-data-hook: diff --git a/cmd/zpool/Makefile.am b/cmd/zpool/Makefile.am index 9ba72eadf3a..3c7c8a9aebe 100644 --- a/cmd/zpool/Makefile.am +++ b/cmd/zpool/Makefile.am @@ -38,7 +38,7 @@ zpool_LDADD += -lgeom endif zpool_LDADD += -lm $(LIBBLKID_LIBS) $(LIBUUID_LIBS) -EXTRA_DIST += $(addprefix %D%/,zpool.d/README compatibility.d) +dist_noinst_DATA += %D%/zpool.d/README SHELLCHECKSCRIPTS += $(dist_zpoolexec_SCRIPTS) zpoolexecdir = $(zfsexecdir)/zpool.d diff --git a/config/Substfiles.am b/config/Substfiles.am index 733073dd20c..38e870b2f50 100644 --- a/config/Substfiles.am +++ b/config/Substfiles.am @@ -41,6 +41,6 @@ endef SUBSTFILES = CLEANFILES += $(SUBSTFILES) -EXTRA_DIST += $(SUBSTFILES:=.in) +dist_noinst_DATA += $(SUBSTFILES:=.in) $(call SUBST,%,) diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index 582c479f5b5..b40776da7a7 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -268,6 +268,7 @@ AC_DEFUN([ZFS_AC_CONFIG], [ user) ZFS_AC_CONFIG_USER ;; all) ZFS_AC_CONFIG_USER ZFS_AC_CONFIG_KERNEL ;; + dist) ;; srpm) ;; *) AC_MSG_RESULT([Error!]) diff --git a/contrib/bpftrace/Makefile.am b/contrib/bpftrace/Makefile.am index 5382f1e1de4..4f649cf5433 100644 --- a/contrib/bpftrace/Makefile.am +++ b/contrib/bpftrace/Makefile.am @@ -1,5 +1,3 @@ -EXTRA_DIST += $(addprefix %D%/, \ - taskqlatency.bt \ - zfs-trace.sh) +dist_noinst_DATA += %D%/taskqlatency.bt %D%/zfs-trace.sh SHELLCHECKSCRIPTS += %D%/zfs-trace.sh diff --git a/contrib/dracut/Makefile.am b/contrib/dracut/Makefile.am index f0c103b0a4d..73ca52b6631 100644 --- a/contrib/dracut/Makefile.am +++ b/contrib/dracut/Makefile.am @@ -24,4 +24,4 @@ SHELLCHECKSCRIPTS += $(pkgdracut_02_SCRIPTS) $(pkgdracut_90_SCRIPTS) # Provided by /bin/sleep, and, again, every implementation of that supports this $(call SHELLCHECK_OPTS,$(pkgdracut_90_SCRIPTS)): CHECKBASHISMS_IGNORE = -e 'sleep only takes one integer' -e 'sleep 0.' -EXTRA_DIST += $(addprefix %D%/,README.md) +dist_noinst_DATA += %D%/README.md diff --git a/contrib/initramfs/Makefile.am b/contrib/initramfs/Makefile.am index 6917b517f33..a583341ea2b 100644 --- a/contrib/initramfs/Makefile.am +++ b/contrib/initramfs/Makefile.am @@ -36,4 +36,4 @@ SHELLCHECKSCRIPTS += $(i_t_check_scripts) $(call SHELLCHECK_OPTS,$(i_t_check_scripts)): SHELLCHECK_SHELL = sh -EXTRA_DIST += $(addprefix %D%/,README.md) +dist_noinst_DATA += %D%/README.md diff --git a/contrib/pyzfs/Makefile.am b/contrib/pyzfs/Makefile.am index 505cd3c8b35..06d9a09d7f1 100644 --- a/contrib/pyzfs/Makefile.am +++ b/contrib/pyzfs/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST += $(addprefix %D%/,libzfs_core README LICENSE docs) +dist_noinst_DATA += %D%/libzfs_core %D%/README %D%/LICENSE %D%/docs SUBSTFILES += %D%/setup.py if PYZFS_ENABLED diff --git a/contrib/zcp/Makefile.am b/contrib/zcp/Makefile.am index 759a9d529dc..fc3f01a233c 100644 --- a/contrib/zcp/Makefile.am +++ b/contrib/zcp/Makefile.am @@ -1 +1 @@ -EXTRA_DIST += $(addprefix %D%/,autosnap.lua) +dist_noinst_DATA += %D%/autosnap.lua diff --git a/etc/Makefile.am b/etc/Makefile.am index a41f04c9042..53064eb6f6e 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -2,7 +2,7 @@ sudoersddir = $(sysconfdir)/sudoers.d sudoersd_DATA = \ %D%/sudoers.d/zfs -EXTRA_DIST += $(sudoersd_DATA) +dist_noinst_DATA += $(sudoersd_DATA) sysconf_zfsdir = $(sysconfdir)/zfs @@ -32,7 +32,7 @@ $(call SHELLCHECK_OPTS,$(initconf_DATA)): SHELLCHECK_SHELL = sh if INIT_SYSV -EXTRA_DIST += $(addprefix %D%/,init.d/README.md) +dist_noinst_DATA += %D%/init.d/README.md init_SCRIPTS = \ %D%/init.d/zfs-import \ diff --git a/lib/libnvpair/Makefile.am b/lib/libnvpair/Makefile.am index 6d9345f5289..87b8d32aa17 100644 --- a/lib/libnvpair/Makefile.am +++ b/lib/libnvpair/Makefile.am @@ -32,4 +32,4 @@ endif libnvpair_la_LDFLAGS += -version-info 3:0:0 -EXTRA_DIST += $(addprefix %D%/,libnvpair.abi libnvpair.suppr) +dist_noinst_DATA += %D%/libnvpair.abi %D%/libnvpair.suppr diff --git a/lib/libuutil/Makefile.am b/lib/libuutil/Makefile.am index cb89d423dc4..339f9a06474 100644 --- a/lib/libuutil/Makefile.am +++ b/lib/libuutil/Makefile.am @@ -26,4 +26,4 @@ endif libuutil_la_LDFLAGS += -version-info 3:0:0 -EXTRA_DIST += $(addprefix %D%/,libuutil.abi libuutil.suppr) +dist_noinst_DATA += %D%/libuutil.abi %D%/libuutil.suppr diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am index 07414c4a308..f5eb8467920 100644 --- a/lib/libzfs/Makefile.am +++ b/lib/libzfs/Makefile.am @@ -76,5 +76,5 @@ libzfs_la_LDFLAGS += -version-info 5:0:1 pkgconfig_DATA += %D%/libzfs.pc -EXTRA_DIST += $(addprefix %D%/,libzfs.abi libzfs.suppr) -EXTRA_DIST += $(addprefix %D%/,THIRDPARTYLICENSE.openssl THIRDPARTYLICENSE.openssl.descrip) +dist_noinst_DATA += %D%/libzfs.abi %D%/libzfs.suppr +dist_noinst_DATA += %D%/THIRDPARTYLICENSE.openssl %D%/THIRDPARTYLICENSE.openssl.descrip diff --git a/lib/libzfs_core/Makefile.am b/lib/libzfs_core/Makefile.am index 9370da4e3e7..d1c6fb86d18 100644 --- a/lib/libzfs_core/Makefile.am +++ b/lib/libzfs_core/Makefile.am @@ -43,4 +43,4 @@ libzfs_core_la_LDFLAGS += -version-info 3:0:0 pkgconfig_DATA += %D%/libzfs_core.pc -EXTRA_DIST += $(addprefix %D%/,libzfs_core.abi libzfs_core.suppr) +dist_noinst_DATA += %D%/libzfs_core.abi %D%/libzfs_core.suppr diff --git a/lib/libzfsbootenv/Makefile.am b/lib/libzfsbootenv/Makefile.am index 7b87bfe6786..118f154821f 100644 --- a/lib/libzfsbootenv/Makefile.am +++ b/lib/libzfsbootenv/Makefile.am @@ -26,4 +26,4 @@ libzfsbootenv_la_LDFLAGS += -version-info 1:0:0 pkgconfig_DATA += %D%/libzfsbootenv.pc -EXTRA_DIST += $(addprefix %D%/,libzfsbootenv.abi libzfsbootenv.suppr) +dist_noinst_DATA += %D%/libzfsbootenv.abi %D%/libzfsbootenv.suppr diff --git a/man/Makefile.am b/man/Makefile.am index 362ee9b7e14..8fa21d2fd23 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST += \ +dist_noinst_man_MANS = \ %D%/man1/cstyle.1 dist_man_MANS = \ @@ -105,8 +105,9 @@ nodist_man_MANS = \ %D%/man8/zed.8 \ %D%/man8/zfs-mount-generator.8 -SUBSTFILES += $(nodist_man_MANS) +dist_noinst_DATA += $(dist_noinst_man_MANS) $(dist_man_MANS) +SUBSTFILES += $(nodist_man_MANS) CHECKS += mancheck mancheck: diff --git a/rpm/Makefile.am b/rpm/Makefile.am index 216cb731fea..af7b25021ea 100644 --- a/rpm/Makefile.am +++ b/rpm/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST += \ +dist_noinst_DATA += \ %D%/generic/zfs-dkms.spec.in \ %D%/generic/zfs-kmod.spec.in \ %D%/generic/zfs.spec.in \ diff --git a/scripts/Makefile.am b/scripts/Makefile.am index 6f1dc9ccdba..79719e621b6 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -6,7 +6,7 @@ dist_scripts_SCRIPTS = \ %D%/zimport.sh \ %D%/zloop.sh -EXTRA_SCRIPTS = \ +dist_noinst_SCRIPTS = \ %D%/commitcheck.sh \ %D%/common.sh.in \ %D%/dkms.mkconf \ @@ -18,14 +18,13 @@ EXTRA_SCRIPTS = \ %D%/paxcheck.sh \ %D%/zfs-tests-color.sh -EXTRA_DIST += \ +dist_noinst_DATA += \ %D%/cstyle.pl \ %D%/enum-extract.pl \ %D%/zfs2zol-patch.sed \ - %D%/zol2zfs-patch.sed \ - $(EXTRA_SCRIPTS) + %D%/zol2zfs-patch.sed -SHELLCHECKSCRIPTS += $(dist_scripts_SCRIPTS) $(EXTRA_SCRIPTS) +SHELLCHECKSCRIPTS += $(dist_scripts_SCRIPTS) $(dist_noinst_SCRIPTS) define SCRIPTS_EXTRA_ENVIRONMENT diff --git a/tests/Makefile.am b/tests/Makefile.am index d6ca957eccc..2e633041ab5 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -25,6 +25,6 @@ dist_scripts_runfiles_DATA = \ %D%/runfiles/sunos.run -EXTRA_DIST += $(addprefix %D%/,README.md) +dist_noinst_DATA += %D%/README.md SHELLCHECKSCRIPTS += $(shell find $(srcdir)/%D% -name '*.sh') diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 71edd493942..e3c9874dcd5 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -41,7 +41,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/draid libnvpair.la %C%_draid_LDADD += $(ZLIB_LIBS) -EXTRA_DIST += $(addprefix %D%/,file/file_common.h) +dist_noinst_DATA += %D%/file/file_common.h scripts_zfs_tests_bin_PROGRAMS += %D%/file_append %D%/file_check %D%/file_trunc %D%/file_write %D%/largest_file %D%/randwritecomp %C%_file_append_SOURCES = %D%/file/file_append.c %C%_file_check_SOURCES = %D%/file/file_check.c @@ -116,7 +116,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/xattrtest scripts_zfs_tests_bin_PROGRAMS += %D%/zed_fd_spill-zedlet -EXTRA_DIST += $(addprefix %D%/,linux_dos_attributes/dos_attributes.h) +dist_noinst_DATA += %D%/linux_dos_attributes/dos_attributes.h scripts_zfs_tests_bin_PROGRAMS += %D%/read_dos_attributes %D%/write_dos_attributes %C%_read_dos_attributes_SOURCES = %D%/linux_dos_attributes/read_dos_attributes.c %C%_write_dos_attributes_SOURCES = %D%/linux_dos_attributes/write_dos_attributes.c diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 49f9f3aaa7d..a91a24d1668 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1,5 +1,5 @@ CLEANFILES = -EXTRA_DIST = +dist_noinst_DATA = include $(top_srcdir)/config/Substfiles.am From 152d6fda54e61042a70059c95c44b364aea0bbd8 Mon Sep 17 00:00:00 2001 From: Kevin Jin <33590050+jxdking@users.noreply.github.com> Date: Thu, 26 May 2022 12:36:14 -0400 Subject: [PATCH 28/41] Fix inflated quiesce time caused by lwb_tx during zil_commit() In current zil_commit() process, transaction lwb_tx is assigned in zil_lwb_write_issue(), and is committed in zil_lwb_flush_vdevs_done(). Thus, during lwb write out process, the txg is held in open or quiesing state, until zil_lwb_flush_vdevs_done() is called. If the zil's zio latency is high, it will cause txg_sync_thread() to starve. The goal here is to defer waiting for zil_lwb_flush_vdevs_done to the 'syncing' txg state. That is, in zil_sync(). In this patch, it achieves the goal without holding transaction. A new function zil_lwb_flush_wait_all() is introduced. It waits for the completion of all the zil_lwb_flush_vdevs_done() by given txg. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Reviewed-by: Prakash Surya Signed-off-by: jxdking Closes #12321 --- include/sys/zil_impl.h | 8 +++- module/zfs/zil.c | 89 ++++++++++++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 21 deletions(-) diff --git a/include/sys/zil_impl.h b/include/sys/zil_impl.h index d2f4018653a..8409ce864e9 100644 --- a/include/sys/zil_impl.h +++ b/include/sys/zil_impl.h @@ -99,7 +99,7 @@ typedef struct lwb { char *lwb_buf; /* log write buffer */ zio_t *lwb_write_zio; /* zio for the lwb buffer */ zio_t *lwb_root_zio; /* root zio for lwb write and flushes */ - dmu_tx_t *lwb_tx; /* tx for log block allocation */ + uint64_t lwb_issued_txg; /* the txg when the write is issued */ uint64_t lwb_max_txg; /* highest txg in this lwb */ list_node_t lwb_node; /* zilog->zl_lwb_list linkage */ list_t lwb_itxs; /* list of itx's */ @@ -209,6 +209,12 @@ struct zilog { uint_t zl_prev_rotor; /* rotor for zl_prev[] */ txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */ uint64_t zl_dirty_max_txg; /* highest txg used to dirty zilog */ + + kmutex_t zl_lwb_io_lock; /* protect following members */ + uint64_t zl_lwb_inflight[TXG_SIZE]; /* io issued, but not done */ + kcondvar_t zl_lwb_io_cv; /* signal when the flush is done */ + uint64_t zl_lwb_max_issued_txg; /* max txg when lwb io issued */ + /* * Max block size for this ZIL. Note that this can not be changed * while the ZIL is in use because consumers (ZPL/zvol) need to take diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 161ce3c97e7..ec09691b241 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -559,8 +559,8 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg, lwb->lwb_max_txg = txg; lwb->lwb_write_zio = NULL; lwb->lwb_root_zio = NULL; - lwb->lwb_tx = NULL; lwb->lwb_issued_timestamp = 0; + lwb->lwb_issued_txg = 0; if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) { lwb->lwb_nused = sizeof (zil_chain_t); lwb->lwb_sz = BP_GET_LSIZE(bp); @@ -1183,9 +1183,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio) { lwb_t *lwb = zio->io_private; zilog_t *zilog = lwb->lwb_zilog; - dmu_tx_t *tx = lwb->lwb_tx; zil_commit_waiter_t *zcw; itx_t *itx; + uint64_t txg; spa_config_exit(zilog->zl_spa, SCL_STATE, lwb); @@ -1194,15 +1194,13 @@ zil_lwb_flush_vdevs_done(zio_t *zio) mutex_enter(&zilog->zl_lock); /* - * Ensure the lwb buffer pointer is cleared before releasing the - * txg. If we have had an allocation failure and the txg is + * If we have had an allocation failure and the txg is * waiting to sync then we want zil_sync() to remove the lwb so * that it's not picked up as the next new one in * zil_process_commit_list(). zil_sync() will only remove the * lwb if lwb_buf is null. */ lwb->lwb_buf = NULL; - lwb->lwb_tx = NULL; ASSERT3U(lwb->lwb_issued_timestamp, >, 0); zilog->zl_last_lwb_latency = gethrtime() - lwb->lwb_issued_timestamp; @@ -1261,12 +1259,47 @@ zil_lwb_flush_vdevs_done(zio_t *zio) mutex_exit(&zilog->zl_lock); - /* - * Now that we've written this log block, we have a stable pointer - * to the next block in the chain, so it's OK to let the txg in - * which we allocated the next block sync. - */ - dmu_tx_commit(tx); + mutex_enter(&zilog->zl_lwb_io_lock); + txg = lwb->lwb_issued_txg; + ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0); + zilog->zl_lwb_inflight[txg & TXG_MASK]--; + if (zilog->zl_lwb_inflight[txg & TXG_MASK] == 0) + cv_broadcast(&zilog->zl_lwb_io_cv); + mutex_exit(&zilog->zl_lwb_io_lock); +} + +/* + * Wait for the completion of all issued write/flush of that txg provided. + * It guarantees zil_lwb_flush_vdevs_done() is called and returned. + */ +static void +zil_lwb_flush_wait_all(zilog_t *zilog, uint64_t txg) +{ + ASSERT3U(txg, ==, spa_syncing_txg(zilog->zl_spa)); + + mutex_enter(&zilog->zl_lwb_io_lock); + while (zilog->zl_lwb_inflight[txg & TXG_MASK] > 0) + cv_wait(&zilog->zl_lwb_io_cv, &zilog->zl_lwb_io_lock); + mutex_exit(&zilog->zl_lwb_io_lock); + +#ifdef ZFS_DEBUG + mutex_enter(&zilog->zl_lock); + mutex_enter(&zilog->zl_lwb_io_lock); + lwb_t *lwb = list_head(&zilog->zl_lwb_list); + while (lwb != NULL && lwb->lwb_max_txg <= txg) { + if (lwb->lwb_issued_txg <= txg) { + ASSERT(lwb->lwb_state != LWB_STATE_ISSUED); + ASSERT(lwb->lwb_state != LWB_STATE_WRITE_DONE); + IMPLY(lwb->lwb_issued_txg > 0, + lwb->lwb_state == LWB_STATE_FLUSH_DONE); + } + IMPLY(lwb->lwb_state == LWB_STATE_FLUSH_DONE, + lwb->lwb_buf == NULL); + lwb = list_next(&zilog->zl_lwb_list, lwb); + } + mutex_exit(&zilog->zl_lwb_io_lock); + mutex_exit(&zilog->zl_lock); +#endif } /* @@ -1562,11 +1595,6 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) /* * Allocate the next block and save its address in this block * before writing it in order to establish the log chain. - * Note that if the allocation of nlwb synced before we wrote - * the block that points at it (lwb), we'd leak it if we crashed. - * Therefore, we don't do dmu_tx_commit() until zil_lwb_write_done(). - * We dirty the dataset to ensure that zil_sync() will be called - * to clean up in the event of allocation failure or I/O failure. */ tx = dmu_tx_create(zilog->zl_os); @@ -1582,7 +1610,11 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); txg = dmu_tx_get_txg(tx); - lwb->lwb_tx = tx; + mutex_enter(&zilog->zl_lwb_io_lock); + lwb->lwb_issued_txg = txg; + zilog->zl_lwb_inflight[txg & TXG_MASK]++; + zilog->zl_lwb_max_issued_txg = MAX(txg, zilog->zl_lwb_max_issued_txg); + mutex_exit(&zilog->zl_lwb_io_lock); /* * Log blocks are pre-allocated. Here we select the size of the next @@ -1657,6 +1689,8 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) zio_nowait(lwb->lwb_root_zio); zio_nowait(lwb->lwb_write_zio); + dmu_tx_commit(tx); + /* * If there was an allocation failure then nlwb will be null which * forces a txg_wait_synced(). @@ -3124,6 +3158,8 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) if (spa_sync_pass(spa) != 1) return; + zil_lwb_flush_wait_all(zilog, txg); + mutex_enter(&zilog->zl_lock); ASSERT(zilog->zl_stop_sync == 0); @@ -3290,6 +3326,7 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zilog->zl_issuer_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&zilog->zl_lwb_io_lock, NULL, MUTEX_DEFAULT, NULL); for (int i = 0; i < TXG_SIZE; i++) { mutex_init(&zilog->zl_itxg[i].itxg_lock, NULL, @@ -3303,6 +3340,7 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) offsetof(itx_t, itx_node)); cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL); + cv_init(&zilog->zl_lwb_io_cv, NULL, CV_DEFAULT, NULL); return (zilog); } @@ -3338,8 +3376,10 @@ zil_free(zilog_t *zilog) mutex_destroy(&zilog->zl_issuer_lock); mutex_destroy(&zilog->zl_lock); + mutex_destroy(&zilog->zl_lwb_io_lock); cv_destroy(&zilog->zl_cv_suspend); + cv_destroy(&zilog->zl_lwb_io_cv); kmem_free(zilog, sizeof (zilog_t)); } @@ -3387,9 +3427,18 @@ zil_close(zilog_t *zilog) mutex_exit(&zilog->zl_lock); /* - * We need to use txg_wait_synced() to wait long enough for the - * ZIL to be clean, and to wait for all pending lwbs to be - * written out. + * zl_lwb_max_issued_txg may be larger than lwb_max_txg. It depends + * on the time when the dmu_tx transaction is assigned in + * zil_lwb_write_issue(). + */ + mutex_enter(&zilog->zl_lwb_io_lock); + txg = MAX(zilog->zl_lwb_max_issued_txg, txg); + mutex_exit(&zilog->zl_lwb_io_lock); + + /* + * We need to use txg_wait_synced() to wait until that txg is synced. + * zil_sync() will guarantee all lwbs up to that txg have been + * written out, flushed, and cleaned. */ if (txg != 0) txg_wait_synced(zilog->zl_dmu_pool, txg); From 5f264996f4dc2d5279afe96698688a20c281c473 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 May 2022 20:28:51 +0000 Subject: [PATCH 29/41] Linux 5.18 compat: bio_alloc() As for the Linux 5.18 kernel bio_alloc() expects a block_device struct as an argument. This removes the need for the bio_set_dev() compatibility code for 5.18 and newer kernels. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13515 --- module/os/linux/zfs/vdev_disk.c | 53 ++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 235cd1691c1..cf2d7386b95 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -460,6 +460,13 @@ vdev_submit_bio_impl(struct bio *bio) #define preempt_schedule_notrace(x) preempt_schedule(x) #endif +/* + * As for the Linux 5.18 kernel bio_alloc() expects a block_device struct + * as an argument removing the need to set it with bio_set_dev(). This + * removes the need for all of the following compatibility code. + */ +#if !defined(HAVE_BIO_ALLOC_4ARG) + #ifdef HAVE_BIO_SET_DEV #if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY) /* @@ -556,6 +563,7 @@ bio_set_dev(struct bio *bio, struct block_device *bdev) bio->bi_bdev = bdev; } #endif /* HAVE_BIO_SET_DEV */ +#endif /* !HAVE_BIO_ALLOC_4ARG */ static inline void vdev_submit_bio(struct bio *bio) @@ -566,10 +574,36 @@ vdev_submit_bio(struct bio *bio) current->bio_list = bio_list; } +static inline struct bio * +vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask, + unsigned short nr_vecs) +{ + struct bio *bio; + #ifdef HAVE_BIO_ALLOC_4ARG -#define bio_alloc(gfp_mask, nr_iovecs) bio_alloc(NULL, nr_iovecs, 0, gfp_mask) + bio = bio_alloc(bdev, nr_vecs, 0, gfp_mask); +#else + bio = bio_alloc(gfp_mask, nr_vecs); + if (likely(bio != NULL)) + bio_set_dev(bio, bdev); #endif + return (bio); +} + +static inline unsigned int +vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset) +{ + unsigned long nr_segs = abd_nr_pages_off(zio->io_abd, + bio_size, abd_offset); + +#ifdef HAVE_BIO_MAX_SEGS + return (bio_max_segs(nr_segs)); +#else + return (MIN(nr_segs, BIO_MAX_PAGES)); +#endif +} + static int __vdev_disk_physio(struct block_device *bdev, zio_t *zio, size_t io_size, uint64_t io_offset, int rw, int flags) @@ -581,6 +615,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, int bio_count = 16; int error = 0; struct blk_plug plug; + unsigned short nr_vecs; /* * Accessing outside the block device is never allowed. @@ -632,15 +667,8 @@ retry: goto retry; } - /* bio_alloc() with __GFP_WAIT never returns NULL */ -#ifdef HAVE_BIO_MAX_SEGS - dr->dr_bio[i] = bio_alloc(GFP_NOIO, bio_max_segs( - abd_nr_pages_off(zio->io_abd, bio_size, abd_offset))); -#else - dr->dr_bio[i] = bio_alloc(GFP_NOIO, - MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset), - BIO_MAX_PAGES)); -#endif + nr_vecs = vdev_bio_max_segs(zio, bio_size, abd_offset); + dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs); if (unlikely(dr->dr_bio[i] == NULL)) { vdev_disk_dio_free(dr); return (SET_ERROR(ENOMEM)); @@ -649,7 +677,6 @@ retry: /* Matching put called by vdev_disk_physio_completion */ vdev_disk_dio_get(dr); - bio_set_dev(dr->dr_bio[i], bdev); BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9; dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion; dr->dr_bio[i]->bi_private = dr; @@ -713,14 +740,12 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) if (!q) return (SET_ERROR(ENXIO)); - bio = bio_alloc(GFP_NOIO, 0); - /* bio_alloc() with __GFP_WAIT never returns NULL */ + bio = vdev_bio_alloc(bdev, GFP_NOIO, 0); if (unlikely(bio == NULL)) return (SET_ERROR(ENOMEM)); bio->bi_end_io = vdev_disk_io_flush_completion; bio->bi_private = zio; - bio_set_dev(bio, bdev); bio_set_flush(bio); vdev_submit_bio(bio); invalidate_bdev(bdev); From 5e4aedaca7cee981ed21ac856fd27b4682bb7888 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 May 2022 17:51:55 +0000 Subject: [PATCH 30/41] Linux 5.19 compat: bdev_max_discard_sectors() Linux 5.19 commit torvalds/linux@70200574cc removed the blk_queue_discard() helper function. The preferred interface is to now use the bdev_max_discard_sectors() function to check for discard support. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13515 --- config/kernel-blk-queue.m4 | 32 ++++++++++++++++--- config/kernel-vfs-iov_iter.m4 | 2 ++ include/os/linux/kernel/linux/blkdev_compat.h | 19 +++++++++++ module/os/linux/zfs/vdev_disk.c | 2 +- module/os/linux/zfs/zvol_os.c | 2 ++ 5 files changed, 51 insertions(+), 6 deletions(-) diff --git a/config/kernel-blk-queue.m4 b/config/kernel-blk-queue.m4 index 2472c49dda4..c6021de2e6c 100644 --- a/config/kernel-blk-queue.m4 +++ b/config/kernel-blk-queue.m4 @@ -74,6 +74,8 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD], [ AC_DEFINE(HAVE_BLK_QUEUE_UPDATE_READAHEAD, 1, [blk_queue_update_readahead() exists]) ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether disk_update_readahead() exists]) ZFS_LINUX_TEST_RESULT([disk_update_readahead], [ AC_MSG_RESULT(yes) @@ -86,10 +88,19 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD], [ ]) dnl # -dnl # 2.6.32 API, -dnl # blk_queue_discard() +dnl # 5.19: bdev_max_discard_sectors() available +dnl # 2.6.32: blk_queue_discard() available dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD], [ + ZFS_LINUX_TEST_SRC([bdev_max_discard_sectors], [ + #include + ],[ + struct block_device *bdev __attribute__ ((unused)) = NULL; + unsigned int error __attribute__ ((unused)); + + error = bdev_max_discard_sectors(bdev); + ]) + ZFS_LINUX_TEST_SRC([blk_queue_discard], [ #include ],[ @@ -102,11 +113,22 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD], [ ]) AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ - AC_MSG_CHECKING([whether blk_queue_discard() is available]) - ZFS_LINUX_TEST_RESULT([blk_queue_discard], [ + AC_MSG_CHECKING([whether bdev_max_discard_sectors() is available]) + ZFS_LINUX_TEST_RESULT([bdev_max_discard_sectors], [ AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BDEV_MAX_DISCARD_SECTORS, 1, + [bdev_max_discard_sectors() is available]) ],[ - ZFS_LINUX_TEST_ERROR([blk_queue_discard]) + AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether blk_queue_discard() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_discard], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1, + [blk_queue_discard() is available]) + ],[ + ZFS_LINUX_TEST_ERROR([blk_queue_discard]) + ]) ]) ]) diff --git a/config/kernel-vfs-iov_iter.m4 b/config/kernel-vfs-iov_iter.m4 index 57f78745a24..e0617faab02 100644 --- a/config/kernel-vfs-iov_iter.m4 +++ b/config/kernel-vfs-iov_iter.m4 @@ -134,6 +134,8 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [ AC_DEFINE(HAVE_IOV_ITER_FAULT_IN_READABLE, 1, [iov_iter_fault_in_readable() is available]) ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available]) ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [ AC_MSG_RESULT(yes) diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h index 9fa8884bb7a..084d48c876d 100644 --- a/include/os/linux/kernel/linux/blkdev_compat.h +++ b/include/os/linux/kernel/linux/blkdev_compat.h @@ -494,6 +494,25 @@ blk_queue_discard_granularity(struct request_queue *q, unsigned int dg) q->limits.discard_granularity = dg; } +/* + * 5.19 API, + * bdev_max_discard_sectors() + * + * 2.6.32 API, + * blk_queue_discard() + */ +static inline boolean_t +bdev_discard_supported(struct block_device *bdev) +{ +#if defined(HAVE_BDEV_MAX_DISCARD_SECTORS) + return (!!bdev_max_discard_sectors(bdev)); +#elif defined(HAVE_BLK_QUEUE_DISCARD) + return (!!blk_queue_discard(bdev_get_queue(bdev))); +#else +#error "Unsupported kernel" +#endif +} + /* * 4.8 API, * blk_queue_secure_erase() diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index cf2d7386b95..90930b83f88 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -316,7 +316,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, v->vdev_nowritecache = B_FALSE; /* Set when device reports it supports TRIM. */ - v->vdev_has_trim = !!blk_queue_discard(q); + v->vdev_has_trim = bdev_discard_supported(vd->vd_bdev); /* Set when device reports it supports secure TRIM. */ v->vdev_has_securetrim = !!blk_queue_discard_secure(q); diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index 4ebdf833169..39441700ae8 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -1053,7 +1053,9 @@ zvol_os_create_minor(const char *name) (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9); blk_queue_discard_granularity(zv->zv_zso->zvo_queue, zv->zv_volblocksize); +#ifdef QUEUE_FLAG_DISCARD blk_queue_flag_set(QUEUE_FLAG_DISCARD, zv->zv_zso->zvo_queue); +#endif #ifdef QUEUE_FLAG_NONROT blk_queue_flag_set(QUEUE_FLAG_NONROT, zv->zv_zso->zvo_queue); #endif From e2c31f2bc7d190fbd8fc5c13bac23daffc5d7b56 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 May 2022 18:20:04 +0000 Subject: [PATCH 31/41] Linux 5.19 compat: bdev_max_secure_erase_sectors() Linux 5.19 commit torvalds/linux@44abff2c0 removed the blk_queue_secure_erase() helper function. The preferred interface is to now use the bdev_max_secure_erase_sectors() function to check for discard support. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13515 --- config/kernel-blk-queue.m4 | 44 +++++++++++++------ include/os/linux/kernel/linux/blkdev_compat.h | 17 ++++--- module/os/linux/zfs/vdev_disk.c | 6 +-- 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/config/kernel-blk-queue.m4 b/config/kernel-blk-queue.m4 index c6021de2e6c..6f42b98125c 100644 --- a/config/kernel-blk-queue.m4 +++ b/config/kernel-blk-queue.m4 @@ -133,13 +133,20 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ ]) dnl # -dnl # 4.8 API, -dnl # blk_queue_secure_erase() -dnl # -dnl # 2.6.36 - 4.7 API, -dnl # blk_queue_secdiscard() +dnl # 5.19: bdev_max_secure_erase_sectors() available +dnl # 4.8: blk_queue_secure_erase() available +dnl # 2.6.36: blk_queue_secdiscard() available dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [ + ZFS_LINUX_TEST_SRC([bdev_max_secure_erase_sectors], [ + #include + ],[ + struct block_device *bdev __attribute__ ((unused)) = NULL; + unsigned int error __attribute__ ((unused)); + + error = bdev_max_secure_erase_sectors(bdev); + ]) + ZFS_LINUX_TEST_SRC([blk_queue_secure_erase], [ #include ],[ @@ -162,21 +169,30 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [ ]) AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ - AC_MSG_CHECKING([whether blk_queue_secure_erase() is available]) - ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [ + AC_MSG_CHECKING([whether bdev_max_secure_erase_sectors() is available]) + ZFS_LINUX_TEST_RESULT([bdev_max_secure_erase_sectors], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1, - [blk_queue_secure_erase() is available]) + AC_DEFINE(HAVE_BDEV_MAX_SECURE_ERASE_SECTORS, 1, + [bdev_max_secure_erase_sectors() is available]) ],[ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether blk_queue_secdiscard() is available]) - ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [ + AC_MSG_CHECKING([whether blk_queue_secure_erase() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1, - [blk_queue_secdiscard() is available]) + AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1, + [blk_queue_secure_erase() is available]) ],[ - ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase]) + AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether blk_queue_secdiscard() is available]) + ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1, + [blk_queue_secdiscard() is available]) + ],[ + ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase]) + ]) ]) ]) ]) diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h index 084d48c876d..da188b4eaf8 100644 --- a/include/os/linux/kernel/linux/blkdev_compat.h +++ b/include/os/linux/kernel/linux/blkdev_compat.h @@ -514,21 +514,26 @@ bdev_discard_supported(struct block_device *bdev) } /* + * 5.19 API, + * bdev_max_secure_erase_sectors() + * * 4.8 API, * blk_queue_secure_erase() * * 2.6.36 - 4.7 API, * blk_queue_secdiscard() */ -static inline int -blk_queue_discard_secure(struct request_queue *q) +static inline boolean_t +bdev_secure_discard_supported(struct block_device *bdev) { -#if defined(HAVE_BLK_QUEUE_SECURE_ERASE) - return (blk_queue_secure_erase(q)); +#if defined(HAVE_BDEV_MAX_SECURE_ERASE_SECTORS) + return (!!bdev_max_secure_erase_sectors(bdev)); +#elif defined(HAVE_BLK_QUEUE_SECURE_ERASE) + return (!!blk_queue_secure_erase(bdev_get_queue(bdev))); #elif defined(HAVE_BLK_QUEUE_SECDISCARD) - return (blk_queue_secdiscard(q)); + return (!!blk_queue_secdiscard(bdev_get_queue(bdev))); #else - return (0); +#error "Unsupported kernel" #endif } diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 90930b83f88..5d87e92118e 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -304,8 +304,6 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, rw_exit(&vd->vd_lock); } - struct request_queue *q = bdev_get_queue(vd->vd_bdev); - /* Determine the physical block size */ int physical_block_size = bdev_physical_block_size(vd->vd_bdev); @@ -319,10 +317,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, v->vdev_has_trim = bdev_discard_supported(vd->vd_bdev); /* Set when device reports it supports secure TRIM. */ - v->vdev_has_securetrim = !!blk_queue_discard_secure(q); + v->vdev_has_securetrim = bdev_secure_discard_supported(vd->vd_bdev); /* Inform the ZIO pipeline that we are non-rotational */ - v->vdev_nonrot = blk_queue_nonrot(q); + v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev)); /* Physical volume size in bytes for the partition */ *psize = bdev_capacity(vd->vd_bdev); From a12a5cb5b821f24f26d388094cdac79deb0e879f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 May 2022 19:40:22 +0000 Subject: [PATCH 32/41] Linux 5.19 compat: blkdev_issue_secure_erase() Linux 5.19 commit torvalds/linux@44abff2c0 splits the secure erase functionality from the blkdev_issue_discard() function. The blkdev_issue_secure_erase() must now be issued to issue a secure erase. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13515 --- config/kernel-blkdev.m4 | 53 +++++++++++++++++++++++++++++++++ module/os/linux/zfs/vdev_disk.c | 37 +++++++++++++++++------ 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/config/kernel-blkdev.m4 b/config/kernel-blkdev.m4 index 9c60e5dd421..fb7b1a45863 100644 --- a/config/kernel-blkdev.m4 +++ b/config/kernel-blkdev.m4 @@ -294,6 +294,57 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE], [ ]) ]) +dnl # +dnl # 5.19 API: blkdev_issue_secure_erase() +dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE) +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [ + ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [ + #include + ],[ + struct block_device *bdev = NULL; + sector_t sector = 0; + sector_t nr_sects = 0; + int error __attribute__ ((unused)); + + error = blkdev_issue_secure_erase(bdev, + sector, nr_sects, GFP_KERNEL); + ]) + + ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [ + #include + ],[ + struct block_device *bdev = NULL; + sector_t sector = 0; + sector_t nr_sects = 0; + unsigned long flags = 0; + int error __attribute__ ((unused)); + + error = blkdev_issue_discard(bdev, + sector, nr_sects, GFP_KERNEL, flags); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [ + AC_MSG_CHECKING([whether blkdev_issue_secure_erase() is available]) + ZFS_LINUX_TEST_RESULT([blkdev_issue_secure_erase], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLKDEV_ISSUE_SECURE_ERASE, 1, + [blkdev_issue_secure_erase() is available]) + ],[ + AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether blkdev_issue_discard() is available]) + ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1, + [blkdev_issue_discard() is available]) + ],[ + ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()]) + ]) + ]) +]) + dnl # dnl # 5.13 API change dnl # blkdev_get_by_path() no longer handles ERESTARTSYS @@ -326,6 +377,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [ ZFS_AC_KERNEL_SRC_BLKDEV_CHECK_DISK_CHANGE ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE + ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE ]) AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [ @@ -340,4 +392,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [ ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS + ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE ]) diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 5d87e92118e..9a382261df7 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -751,12 +751,38 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) return (0); } +static int +vdev_disk_io_trim(zio_t *zio) +{ + vdev_t *v = zio->io_vd; + vdev_disk_t *vd = v->vdev_tsd; + +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) + if (zio->io_trim_flags & ZIO_TRIM_SECURE) { + return (-blkdev_issue_secure_erase(vd->vd_bdev, + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); + } else { + return (-blkdev_issue_discard(vd->vd_bdev, + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); + } +#elif defined(HAVE_BLKDEV_ISSUE_DISCARD) + unsigned long trim_flags = 0; +#if defined(BLKDEV_DISCARD_SECURE) + if (zio->io_trim_flags & ZIO_TRIM_SECURE) + trim_flags |= BLKDEV_DISCARD_SECURE; +#endif + return (-blkdev_issue_discard(vd->vd_bdev, + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags)); +#else +#error "Unsupported kernel" +#endif +} + static void vdev_disk_io_start(zio_t *zio) { vdev_t *v = zio->io_vd; vdev_disk_t *vd = v->vdev_tsd; - unsigned long trim_flags = 0; int rw, error; /* @@ -829,14 +855,7 @@ vdev_disk_io_start(zio_t *zio) break; case ZIO_TYPE_TRIM: -#if defined(BLKDEV_DISCARD_SECURE) - if (zio->io_trim_flags & ZIO_TRIM_SECURE) - trim_flags |= BLKDEV_DISCARD_SECURE; -#endif - zio->io_error = -blkdev_issue_discard(vd->vd_bdev, - zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, - trim_flags); - + zio->io_error = vdev_disk_io_trim(zio); rw_exit(&vd->vd_lock); zio_interrupt(zio); return; From c2c2e7bb8b7c269904777b61f4b0a678f1ffb9a3 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 May 2022 20:44:43 +0000 Subject: [PATCH 33/41] Linux 5.19 compat: aops->read_folio() As of the Linux 5.19 kernel the readpage() address space operation has been replaced by read_folio(). Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13515 --- config/kernel-vfs-read_folio.m4 | 32 ++++++++++++++++++++++++++++++++ config/kernel.m4 | 2 ++ module/os/linux/zfs/zpl_file.c | 12 ++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 config/kernel-vfs-read_folio.m4 diff --git a/config/kernel-vfs-read_folio.m4 b/config/kernel-vfs-read_folio.m4 new file mode 100644 index 00000000000..234d1212ab9 --- /dev/null +++ b/config/kernel-vfs-read_folio.m4 @@ -0,0 +1,32 @@ +dnl # +dnl # Linux 5.19 uses read_folio in lieu of readpage +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO], [ + ZFS_LINUX_TEST_SRC([vfs_has_read_folio], [ + #include + + static int + test_read_folio(struct file *file, struct folio *folio) { + (void) file; (void) folio; + return (0); + } + + static const struct address_space_operations + aops __attribute__ ((unused)) = { + .read_folio = test_read_folio, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_VFS_READ_FOLIO], [ + dnl # + dnl # Linux 5.19 uses read_folio in lieu of readpage + dnl # + AC_MSG_CHECKING([read_folio exists]) + ZFS_LINUX_TEST_RESULT([vfs_has_read_folio], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VFS_READ_FOLIO, 1, [read_folio exists]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index a70db91a836..eba3c066993 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -102,6 +102,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO + ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO ZFS_AC_KERNEL_SRC_VFS_GETATTR ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_SRC_VFS_ITERATE @@ -219,6 +220,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_LSEEK_EXECUTE ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO + ZFS_AC_KERNEL_VFS_READ_FOLIO ZFS_AC_KERNEL_VFS_GETATTR ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_ITERATE diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 8b84eb795fc..9a640fb40b6 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -674,11 +674,19 @@ zpl_readpage_common(struct page *pp) return (error); } +#ifdef HAVE_VFS_READ_FOLIO +static int +zpl_read_folio(struct file *filp, struct folio *folio) +{ + return (zpl_readpage_common(&folio->page)); +} +#else static int zpl_readpage(struct file *filp, struct page *pp) { return (zpl_readpage_common(pp)); } +#endif static int zpl_readpage_filler(void *data, struct page *pp) @@ -1208,7 +1216,11 @@ const struct address_space_operations zpl_address_space_operations = { #else .readahead = zpl_readahead, #endif +#ifdef HAVE_VFS_READ_FOLIO + .read_folio = zpl_read_folio, +#else .readpage = zpl_readpage, +#endif .writepage = zpl_writepage, .writepages = zpl_writepages, .direct_IO = zpl_direct_IO, From d41e864181e4544eca08332b31f85318a3b0e3b3 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 May 2022 21:31:03 +0000 Subject: [PATCH 34/41] Linux 5.19 compat: bdev_start_io_acct() / bdev_end_io_acct() As of the Linux 5.19 kernel the disk_*_io_acct() helper functions have been replaced by the bdev_*_io_acct() functions. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13515 --- config/kernel-generic_io_acct.m4 | 83 ++++++++++++------- include/os/linux/kernel/linux/blkdev_compat.h | 9 +- 2 files changed, 62 insertions(+), 30 deletions(-) diff --git a/config/kernel-generic_io_acct.m4 b/config/kernel-generic_io_acct.m4 index 0f4381db4c5..a8a448c6fe9 100644 --- a/config/kernel-generic_io_acct.m4 +++ b/config/kernel-generic_io_acct.m4 @@ -2,6 +2,19 @@ dnl # dnl # Check for generic io accounting interface. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [ + ZFS_LINUX_TEST_SRC([bdev_io_acct], [ + #include + ], [ + struct block_device *bdev = NULL; + struct bio *bio = NULL; + unsigned long passed_time = 0; + unsigned long start_time; + + start_time = bdev_start_io_acct(bdev, bio_sectors(bio), + bio_op(bio), passed_time); + bdev_end_io_acct(bdev, bio_op(bio), start_time); + ]) + ZFS_LINUX_TEST_SRC([disk_io_acct], [ #include ], [ @@ -50,61 +63,75 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [ dnl # - dnl # 5.12 API, + dnl # 5.19 API, dnl # - dnl # bio_start_io_acct() and bio_end_io_acct() became GPL-exported - dnl # so use disk_start_io_acct() and disk_end_io_acct() instead + dnl # disk_start_io_acct() and disk_end_io_acct() have been replaced by + dnl # bdev_start_io_acct() and bdev_end_io_acct(). dnl # - AC_MSG_CHECKING([whether generic disk_*_io_acct() are available]) - ZFS_LINUX_TEST_RESULT([disk_io_acct], [ + AC_MSG_CHECKING([whether generic bdev_*_io_acct() are available]) + ZFS_LINUX_TEST_RESULT([bdev_io_acct], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DISK_IO_ACCT, 1, [disk_*_io_acct() available]) + AC_DEFINE(HAVE_BDEV_IO_ACCT, 1, [bdev_*_io_acct() available]) ], [ AC_MSG_RESULT(no) dnl # - dnl # 5.7 API, + dnl # 5.12 API, dnl # - dnl # Added bio_start_io_acct() and bio_end_io_acct() helpers. + dnl # bio_start_io_acct() and bio_end_io_acct() became GPL-exported + dnl # so use disk_start_io_acct() and disk_end_io_acct() instead dnl # - AC_MSG_CHECKING([whether generic bio_*_io_acct() are available]) - ZFS_LINUX_TEST_RESULT([bio_io_acct], [ + AC_MSG_CHECKING([whether generic disk_*_io_acct() are available]) + ZFS_LINUX_TEST_RESULT([disk_io_acct], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_IO_ACCT, 1, [bio_*_io_acct() available]) + AC_DEFINE(HAVE_DISK_IO_ACCT, 1, [disk_*_io_acct() available]) ], [ AC_MSG_RESULT(no) dnl # - dnl # 4.14 API, + dnl # 5.7 API, dnl # - dnl # generic_start_io_acct/generic_end_io_acct now require - dnl # request_queue to be provided. No functional changes, - dnl # but preparation for inflight accounting. + dnl # Added bio_start_io_acct() and bio_end_io_acct() helpers. dnl # - AC_MSG_CHECKING([whether generic_*_io_acct wants 4 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args], - [generic_start_io_acct], [block/bio.c], [ + AC_MSG_CHECKING([whether generic bio_*_io_acct() are available]) + ZFS_LINUX_TEST_RESULT([bio_io_acct], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, - [generic_*_io_acct() 4 arg available]) + AC_DEFINE(HAVE_BIO_IO_ACCT, 1, [bio_*_io_acct() available]) ], [ AC_MSG_RESULT(no) dnl # - dnl # 3.19 API addition + dnl # 4.14 API, dnl # - dnl # torvalds/linux@394ffa50 allows us to increment - dnl # iostat counters without generic_make_request(). + dnl # generic_start_io_acct/generic_end_io_acct now require + dnl # request_queue to be provided. No functional changes, + dnl # but preparation for inflight accounting. dnl # - AC_MSG_CHECKING( - [whether generic_*_io_acct wants 3 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args], + AC_MSG_CHECKING([whether generic_*_io_acct wants 4 args]) + ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args], [generic_start_io_acct], [block/bio.c], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, - [generic_*_io_acct() 3 arg available]) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1, + [generic_*_io_acct() 4 arg available]) ], [ AC_MSG_RESULT(no) + + dnl # + dnl # 3.19 API addition + dnl # + dnl # torvalds/linux@394ffa50 allows us to increment + dnl # iostat counters without generic_make_request(). + dnl # + AC_MSG_CHECKING( + [whether generic_*_io_acct wants 3 args]) + ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args], + [generic_start_io_acct], [block/bio.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, + [generic_*_io_acct() 3 arg available]) + ], [ + AC_MSG_RESULT(no) + ]) ]) ]) ]) diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h index da188b4eaf8..fd91560a3cc 100644 --- a/include/os/linux/kernel/linux/blkdev_compat.h +++ b/include/os/linux/kernel/linux/blkdev_compat.h @@ -551,7 +551,10 @@ blk_generic_start_io_acct(struct request_queue *q __attribute__((unused)), struct gendisk *disk __attribute__((unused)), int rw __attribute__((unused)), struct bio *bio) { -#if defined(HAVE_DISK_IO_ACCT) +#if defined(HAVE_BDEV_IO_ACCT) + return (bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), jiffies)); +#elif defined(HAVE_DISK_IO_ACCT) return (disk_start_io_acct(disk, bio_sectors(bio), bio_op(bio))); #elif defined(HAVE_BIO_IO_ACCT) return (bio_start_io_acct(bio)); @@ -574,7 +577,9 @@ blk_generic_end_io_acct(struct request_queue *q __attribute__((unused)), struct gendisk *disk __attribute__((unused)), int rw __attribute__((unused)), struct bio *bio, unsigned long start_time) { -#if defined(HAVE_DISK_IO_ACCT) +#if defined(HAVE_BDEV_IO_ACCT) + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); +#elif defined(HAVE_DISK_IO_ACCT) disk_end_io_acct(disk, bio_op(bio), start_time); #elif defined(HAVE_BIO_IO_ACCT) bio_end_io_acct(bio, start_time); From 91350681b8c8b3f0a9b04e6ab3b8931406e87355 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 May 2022 15:56:05 -0700 Subject: [PATCH 35/41] Linux 5.19 compat: zap_flags_t conflict As of the Linux 5.19 kernel an identically named zap_flags_t typedef is declared in the include/linux/mm_types.h linux header. Sadly, the inclusion of this header cannot be easily avoided. To resolve the conflict a #define is used to remap the name in the OpenZFS sources when building against the Linux kernel. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13515 --- include/sys/zap.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/sys/zap.h b/include/sys/zap.h index b19b4643879..fd7a3a1599b 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -110,7 +110,12 @@ typedef enum zap_flags { * already randomly distributed. */ ZAP_FLAG_PRE_HASHED_KEY = 1 << 2, +#if defined(__linux__) && defined(_KERNEL) +} zfs_zap_flags_t; +#define zap_flags_t zfs_zap_flags_t +#else } zap_flags_t; +#endif /* * Create a new zapobj with no attributes and return its object number. From a70e613070a8ca96f8214ba1ff61549cbbad0a2f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 31 May 2022 14:38:00 -0700 Subject: [PATCH 36/41] Linux 5.18 compat: META Update the META file to reflect compatibility with the 5.18 kernel. Reviewed-by: George Melikov Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #13527 --- META | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/META b/META index e9580350587..f16f7972575 100644 --- a/META +++ b/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 5.17 +Linux-Maximum: 5.18 Linux-Minimum: 3.10 From 2310dba9ebf6259515b63fda3202199831669271 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Tue, 31 May 2022 18:37:46 -0400 Subject: [PATCH 37/41] Fix typo in zil_commit() comment block Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Allan Jude Closes #13518 --- module/zfs/zil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/zil.c b/module/zfs/zil.c index ec09691b241..9adf815517a 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -2965,7 +2965,7 @@ zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw) * queue prior to zil_commit() having been called, and which itxs were * added after zil_commit() was called. * - * The commit it is special; it doesn't have any on-disk representation. + * The commit itx is special; it doesn't have any on-disk representation. * When a commit itx is "committed" to an lwb, the waiter associated * with it is linked onto the lwb's list of waiters. Then, when that lwb * completes, each waiter on the lwb's list is marked done and signaled From bc8192cd5b14cf4182bd2b19a6a8ed4f0bbed12b Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Tue, 31 May 2022 18:41:33 -0400 Subject: [PATCH 38/41] Corrected parameters for zstd early abort That'll teach me to try and recall them from the definition. Reviewed-by: Brian Behlendorf Signed-off-by: Rich Ercolani Closes #13519 --- man/man4/zfs.4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index fa3159ab82c..a086e1a5d56 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -2139,11 +2139,11 @@ However, if there are fewer than metaslabs in the vdev, this functionality is disabled. This ensures that we don't set aside an unreasonable amount of space for the ZIL. . -.It Sy zfs_zstd_earlyabort_pass Ns = Ns Sy 1 Pq int +.It Sy zstd_earlyabort_pass Ns = Ns Sy 1 Pq int Whether heuristic for detection of incompressible data with zstd levels >= 3 using LZ4 and zstd-1 passes is enabled. . -.It Sy zfs_zstd_abort_size Ns = Ns Sy 131072 Pq int +.It Sy zstd_abort_size Ns = Ns Sy 131072 Pq int Minimal uncompressed size (inclusive) of a record before the early abort heuristic will be attempted. . From 42cf2ad0e4e2adfa232f42e4254693467a4cc08c Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 1 Jun 2022 12:54:35 -0400 Subject: [PATCH 39/41] Remove wrong assertion in log spacemap It is typical, but not generally true that if log summary has more blocks it must also have unflushed metaslabs. Normally with metaslabs flushed in order it works, but there are known exceptions, such as device removal or metaslab being loaded during its flush attempt. Before 600a02b8844 if spa_flush_metaslabs() hit loading metaslab it usually stopped (unless memlimit is also exceeded), but now it may flush more metaslabs, just skipping that particular one. This increased chances of assertion to fire when the skipped metaslab is flushed on next iteration if all other metaslabs in that summary entry are already flushed out of order. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. Closes #13486 Closes #13513 --- module/zfs/spa_log_spacemap.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c index f831509a424..c5a1039bd83 100644 --- a/module/zfs/spa_log_spacemap.c +++ b/module/zfs/spa_log_spacemap.c @@ -511,12 +511,6 @@ spa_log_summary_decrement_blkcount(spa_t *spa, uint64_t blocks_gone) e->lse_txgcount--; for (; e != NULL; e = list_head(&spa->spa_log_summary)) { if (e->lse_blkcount > blocks_gone) { - /* - * Assert that we stopped at an entry that is not - * obsolete. - */ - ASSERT(e->lse_mscount != 0); - e->lse_blkcount -= blocks_gone; blocks_gone = 0; break; From 4c6526208db0d3d5abf44664e74d1e28156a3db7 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 31 May 2022 16:30:59 -0700 Subject: [PATCH 40/41] Linux 5.19 compat: asm/fpu/internal.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of the Linux 5.19 kernel the asm/fpu/internal.h header was entirely removed. It has been effectively empty since the 5.16 kernel and provides no required functionality. Reviewed-by: Tony Hutter Reviewed-by: Attila Fülöp Signed-off-by: Brian Behlendorf Closes #13529 --- config/kernel-fpu.m4 | 23 +++++++++++++++++++++-- include/os/linux/kernel/linux/simd_x86.h | 2 ++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index eb9520c60a7..c6efebd8cf6 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -2,6 +2,9 @@ dnl # dnl # Handle differences in kernel FPU code. dnl # dnl # Kernel +dnl # 5.19: The asm/fpu/internal.h header was removed, it has been +dnl # effectively empty since the 5.16 kernel. +dnl # dnl # 5.11: kernel_fpu_begin() is an inlined function now, so don't check dnl # for it inside the kernel symbols. dnl # @@ -27,10 +30,22 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU_HEADER], [ ],[ AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1, [kernel has asm/fpu/api.h]) - AC_MSG_RESULT(asm/fpu/api.h) + + ZFS_LINUX_TRY_COMPILE([ + #include + #include + ],[ + ],[ + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL_HEADER, 1, + [kernel has asm/fpu/internal.h]) + AC_MSG_RESULT([asm/fpu/api.h asm/fpu/internal.h]) + ],[ + AC_MSG_RESULT([asm/fpu/api.h]) + ]) ],[ - AC_MSG_RESULT(i387.h) + AC_MSG_RESULT([i387.h]) ]) + ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ @@ -38,7 +53,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ #include #ifdef HAVE_KERNEL_FPU_API_HEADER #include + #ifdef HAVE_KERNEL_FPU_INTERNAL_HEADER #include + #endif #else #include #endif @@ -51,7 +68,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ #include #ifdef HAVE_KERNEL_FPU_API_HEADER #include + #ifdef HAVE_KERNEL_FPU_INTERNAL_HEADER #include + #endif #else #include #endif diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h index 0fc4168b74e..3564ea7f139 100644 --- a/include/os/linux/kernel/linux/simd_x86.h +++ b/include/os/linux/kernel/linux/simd_x86.h @@ -93,7 +93,9 @@ #if defined(HAVE_KERNEL_FPU_API_HEADER) #include +#if defined(HAVE_KERNEL_FPU_INTERNAL_HEADER) #include +#endif #else #include #endif From b9d98453f9387c413f91d1d9cdb0cba8e04dbd95 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 31 May 2022 16:42:49 -0700 Subject: [PATCH 41/41] autoconf: AC_MSG_CHECKING consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the wording more consistent for the kernel AC_MSG_CHECKING output (e.g. "checking whether ...".). Additionally, group some of the VFS interface checks with the others. No functional change. Reviewed-by: Tony Hutter Reviewed-by: Attila Fülöp Signed-off-by: Brian Behlendorf Closes #13529 --- config/kernel-bio.m4 | 4 ++-- config/kernel-pagemap-folio_wait_bit.m4 | 2 +- config/kernel-readpages.m4 | 2 +- config/kernel-shrink.m4 | 10 +++++----- config/kernel-sysfs.m4 | 2 +- config/kernel-vfs-filemap_dirty_folio.m4 | 2 +- config/kernel-vfs-read_folio.m4 | 2 +- config/kernel-vfs-set_page_dirty.m4 | 2 +- config/kernel.m4 | 8 ++++---- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/config/kernel-bio.m4 b/config/kernel-bio.m4 index b5d25448155..18620ca5b7e 100644 --- a/config/kernel-bio.m4 +++ b/config/kernel-bio.m4 @@ -464,7 +464,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER], [ ]) AC_DEFUN([ZFS_AC_KERNEL_BLK_CGROUP_HEADER], [ - AC_MSG_CHECKING([for existence of linux/blk-cgroup.h]) + AC_MSG_CHECKING([whether linux/blk-cgroup.h exists]) ZFS_LINUX_TEST_RESULT([blk_cgroup_header],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_LINUX_BLK_CGROUP_HEADER, 1, @@ -500,7 +500,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG], [ ]) AC_DEFUN([ZFS_AC_KERNEL_BIO_ALLOC_4ARG], [ - AC_MSG_CHECKING([for 4-argument bio_alloc()]) + AC_MSG_CHECKING([whether bio_alloc() wants 4 args]) ZFS_LINUX_TEST_RESULT([bio_alloc_4arg],[ AC_MSG_RESULT(yes) AC_DEFINE([HAVE_BIO_ALLOC_4ARG], 1, [bio_alloc() takes 4 arguments]) diff --git a/config/kernel-pagemap-folio_wait_bit.m4 b/config/kernel-pagemap-folio_wait_bit.m4 index e0aaa4a5741..12d8841f51e 100644 --- a/config/kernel-pagemap-folio_wait_bit.m4 +++ b/config/kernel-pagemap-folio_wait_bit.m4 @@ -15,7 +15,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT], [ ]) AC_DEFUN([ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT], [ - AC_MSG_CHECKING([folio_wait_bit() exists]) + AC_MSG_CHECKING([whether folio_wait_bit() exists]) ZFS_LINUX_TEST_RESULT([pagemap_has_folio_wait_bit], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_PAGEMAP_FOLIO_WAIT_BIT, 1, diff --git a/config/kernel-readpages.m4 b/config/kernel-readpages.m4 index 0bf67ffd933..be65a0d5e4b 100644 --- a/config/kernel-readpages.m4 +++ b/config/kernel-readpages.m4 @@ -14,7 +14,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_READPAGES], [ ]) AC_DEFUN([ZFS_AC_KERNEL_VFS_READPAGES], [ - AC_MSG_CHECKING([address_space_operations->readpages exists]) + AC_MSG_CHECKING([whether aops->readpages exists]) ZFS_LINUX_TEST_RESULT([vfs_has_readpages], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_VFS_READPAGES, 1, diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index a40c86d5c57..8cf0f2761bd 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -84,7 +84,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ dnl # dnl # 3.0 - 3.11 API change - dnl # ->shrink(struct shrinker *, struct shrink_control *sc) + dnl # cs->shrink(struct shrinker *, struct shrink_control *sc) dnl # AC_MSG_CHECKING([whether new 2-argument shrinker exists]) ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [ @@ -96,14 +96,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ dnl # dnl # 3.12 API change, - dnl # ->shrink() is logically split in to - dnl # ->count_objects() and ->scan_objects() + dnl # cs->shrink() is logically split in to + dnl # cs->count_objects() and cs->scan_objects() dnl # - AC_MSG_CHECKING([whether ->count_objects callback exists]) + AC_MSG_CHECKING([whether cs->count_objects callback exists]) ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control_split], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, - [->count_objects exists]) + [cs->count_objects exists]) ],[ ZFS_LINUX_TEST_ERROR([shrinker]) ]) diff --git a/config/kernel-sysfs.m4 b/config/kernel-sysfs.m4 index 668def5fe6b..bbc77c8fc5c 100644 --- a/config/kernel-sysfs.m4 +++ b/config/kernel-sysfs.m4 @@ -19,7 +19,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYSFS_DEFAULT_GROUPS], [ ]) AC_DEFUN([ZFS_AC_KERNEL_SYSFS_DEFAULT_GROUPS], [ - AC_MSG_CHECKING([for struct kobj_type.default_groups]) + AC_MSG_CHECKING([whether struct kobj_type.default_groups exists]) ZFS_LINUX_TEST_RESULT([sysfs_default_groups],[ AC_MSG_RESULT(yes) AC_DEFINE([HAVE_SYSFS_DEFAULT_GROUPS], 1, [struct kobj_type has default_groups]) diff --git a/config/kernel-vfs-filemap_dirty_folio.m4 b/config/kernel-vfs-filemap_dirty_folio.m4 index 87287900292..729ca670da0 100644 --- a/config/kernel-vfs-filemap_dirty_folio.m4 +++ b/config/kernel-vfs-filemap_dirty_folio.m4 @@ -19,7 +19,7 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO], [ dnl # Linux 5.18 uses filemap_dirty_folio in lieu of dnl # ___set_page_dirty_nobuffers dnl # - AC_MSG_CHECKING([filemap_dirty_folio exists]) + AC_MSG_CHECKING([whether filemap_dirty_folio exists]) ZFS_LINUX_TEST_RESULT([vfs_has_filemap_dirty_folio], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_VFS_FILEMAP_DIRTY_FOLIO, 1, diff --git a/config/kernel-vfs-read_folio.m4 b/config/kernel-vfs-read_folio.m4 index 234d1212ab9..9ca0faff218 100644 --- a/config/kernel-vfs-read_folio.m4 +++ b/config/kernel-vfs-read_folio.m4 @@ -22,7 +22,7 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_READ_FOLIO], [ dnl # dnl # Linux 5.19 uses read_folio in lieu of readpage dnl # - AC_MSG_CHECKING([read_folio exists]) + AC_MSG_CHECKING([whether read_folio exists]) ZFS_LINUX_TEST_RESULT([vfs_has_read_folio], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_VFS_READ_FOLIO, 1, [read_folio exists]) diff --git a/config/kernel-vfs-set_page_dirty.m4 b/config/kernel-vfs-set_page_dirty.m4 index a9d252e4e01..90cb28f3682 100644 --- a/config/kernel-vfs-set_page_dirty.m4 +++ b/config/kernel-vfs-set_page_dirty.m4 @@ -23,7 +23,7 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS], [ dnl # Linux 5.14 change requires set_page_dirty() to be assigned dnl # in address_space_operations() dnl # - AC_MSG_CHECKING([__set_page_dirty_nobuffers exists]) + AC_MSG_CHECKING([whether __set_page_dirty_nobuffers exists]) ZFS_LINUX_TEST_RESULT([vfs_has_set_page_dirty_nobuffers], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS, 1, diff --git a/config/kernel.m4 b/config/kernel.m4 index eba3c066993..9530367507d 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -107,6 +107,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_SRC_VFS_ITERATE ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO + ZFS_AC_KERNEL_SRC_VFS_READPAGES + ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS ZFS_AC_KERNEL_SRC_VFS_IOV_ITER @@ -137,8 +139,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SIGINFO ZFS_AC_KERNEL_SRC_SYSFS ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE - ZFS_AC_KERNEL_SRC_VFS_READPAGES - ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_SRC_ADD_DISK @@ -225,6 +225,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_VFS_FSYNC_2ARGS ZFS_AC_KERNEL_VFS_ITERATE ZFS_AC_KERNEL_VFS_DIRECT_IO + ZFS_AC_KERNEL_VFS_READPAGES + ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_VFS_RW_ITERATE ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS ZFS_AC_KERNEL_VFS_IOV_ITER @@ -255,8 +257,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SIGINFO ZFS_AC_KERNEL_SYSFS ZFS_AC_KERNEL_SET_SPECIAL_STATE - ZFS_AC_KERNEL_VFS_READPAGES - ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_ADD_DISK