diff --git a/sys/contrib/openzfs/.github/codeql-cpp.yml b/sys/contrib/openzfs/.github/codeql-cpp.yml new file mode 100644 index 00000000000..88b8c608602 --- /dev/null +++ b/sys/contrib/openzfs/.github/codeql-cpp.yml @@ -0,0 +1,4 @@ +name: "Custom CodeQL Analysis" + +queries: + - uses: ./.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql diff --git a/sys/contrib/openzfs/.github/codeql-python.yml b/sys/contrib/openzfs/.github/codeql-python.yml new file mode 100644 index 00000000000..93cb4a435ed --- /dev/null +++ b/sys/contrib/openzfs/.github/codeql-python.yml @@ -0,0 +1,4 @@ +name: "Custom CodeQL Analysis" + +paths-ignore: + - tests diff --git a/sys/contrib/openzfs/.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql b/sys/contrib/openzfs/.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql new file mode 100644 index 00000000000..eb4b7bd6299 --- /dev/null +++ b/sys/contrib/openzfs/.github/codeql/custom-queries/cpp/deprecatedFunctionUsage.ql @@ -0,0 +1,59 @@ +/** + * @name Deprecated function usage detection + * @description Detects functions whose usage is banned from the OpenZFS + * codebase due to QA concerns. + * @kind problem + * @severity error + * @id cpp/deprecated-function-usage +*/ + +import cpp + +predicate isDeprecatedFunction(Function f) { + f.getName() = "strtok" or + f.getName() = "__xpg_basename" or + f.getName() = "basename" or + f.getName() = "dirname" or + f.getName() = "bcopy" or + f.getName() = "bcmp" or + f.getName() = "bzero" or + f.getName() = "asctime" or + f.getName() = "asctime_r" or + f.getName() = "gmtime" or + f.getName() = "localtime" or + f.getName() = "strncpy" + +} + +string getReplacementMessage(Function f) { + if f.getName() = "strtok" then + result = "Use strtok_r(3) instead!" + else if f.getName() = "__xpg_basename" then + result = "basename(3) is underspecified. Use zfs_basename() instead!" + else if f.getName() = "basename" then + result = "basename(3) is underspecified. Use zfs_basename() instead!" + else if f.getName() = "dirname" then + result = "dirname(3) is underspecified. Use zfs_dirnamelen() instead!" + else if f.getName() = "bcopy" then + result = "bcopy(3) is deprecated. Use memcpy(3)/memmove(3) instead!" + else if f.getName() = "bcmp" then + result = "bcmp(3) is deprecated. Use memcmp(3) instead!" + else if f.getName() = "bzero" then + result = "bzero(3) is deprecated. Use memset(3) instead!" + else if f.getName() = "asctime" then + result = "Use strftime(3) instead!" + else if f.getName() = "asctime_r" then + result = "Use strftime(3) instead!" + else if f.getName() = "gmtime" then + result = "gmtime(3) isn't thread-safe. Use gmtime_r(3) instead!" + else if f.getName() = "localtime" then + result = "localtime(3) isn't thread-safe. Use localtime_r(3) instead!" + else + result = "strncpy(3) is deprecated. Use strlcpy(3) instead!" +} + +from FunctionCall fc, Function f +where + fc.getTarget() = f and + isDeprecatedFunction(f) +select fc, getReplacementMessage(f) diff --git a/sys/contrib/openzfs/.github/codeql/custom-queries/cpp/qlpack.yml b/sys/contrib/openzfs/.github/codeql/custom-queries/cpp/qlpack.yml new file mode 100644 index 00000000000..cbe0f1cbe3c --- /dev/null +++ b/sys/contrib/openzfs/.github/codeql/custom-queries/cpp/qlpack.yml @@ -0,0 +1,4 @@ +name: openzfs-cpp-queries +version: 0.0.0 +libraryPathDependencies: codeql-cpp +suites: openzfs-cpp-suite diff --git a/sys/contrib/openzfs/.github/workflows/README.md b/sys/contrib/openzfs/.github/workflows/README.md index 8255dd21082..eef47dae3dc 100644 --- a/sys/contrib/openzfs/.github/workflows/README.md +++ b/sys/contrib/openzfs/.github/workflows/README.md @@ -4,44 +4,54 @@ ```mermaid flowchart TB subgraph CleanUp and Summary - Part1-20.04-->CleanUp+nice+Summary - Part2-20.04-->CleanUp+nice+Summary - PartN-20.04-->CleanUp+nice+Summary - Part1-22.04-->CleanUp+nice+Summary - Part2-22.04-->CleanUp+nice+Summary - PartN-22.04-->CleanUp+nice+Summary + CleanUp+Summary end subgraph Functional Testings + sanity-checks-20.04 + zloop-checks-20.04 functional-testing-20.04-->Part1-20.04 functional-testing-20.04-->Part2-20.04 - functional-testing-20.04-->PartN-20.04 + functional-testing-20.04-->Part3-20.04 + functional-testing-20.04-->Part4-20.04 functional-testing-22.04-->Part1-22.04 functional-testing-22.04-->Part2-22.04 - functional-testing-22.04-->PartN-22.04 -end - -subgraph Sanity and zloop Testings - sanity-checks-20.04-->functional-testing-20.04 - sanity-checks-22.04-->functional-testing-22.04 - zloop-checks-20.04-->functional - zloop-checks-22.04-->functional + functional-testing-22.04-->Part3-22.04 + functional-testing-22.04-->Part4-22.04 + sanity-checks-22.04 + zloop-checks-22.04 end subgraph Code Checking + Building + Build-Ubuntu-20.04 codeql.yml checkstyle.yml - Build-Ubuntu-20.04-->sanity-checks-20.04 - Build-Ubuntu-22.04-->sanity-checks-22.04 - Build-Ubuntu-20.04-->zloop-checks-20.04 - Build-Ubuntu-22.04-->zloop-checks-22.04 + Build-Ubuntu-22.04 end + + Build-Ubuntu-20.04-->sanity-checks-20.04 + Build-Ubuntu-20.04-->zloop-checks-20.04 + Build-Ubuntu-20.04-->functional-testing-20.04 + Build-Ubuntu-22.04-->sanity-checks-22.04 + Build-Ubuntu-22.04-->zloop-checks-22.04 + Build-Ubuntu-22.04-->functional-testing-22.04 + + sanity-checks-20.04-->CleanUp+Summary + Part1-20.04-->CleanUp+Summary + Part2-20.04-->CleanUp+Summary + Part3-20.04-->CleanUp+Summary + Part4-20.04-->CleanUp+Summary + Part1-22.04-->CleanUp+Summary + Part2-22.04-->CleanUp+Summary + Part3-22.04-->CleanUp+Summary + Part4-22.04-->CleanUp+Summary + sanity-checks-22.04-->CleanUp+Summary ``` 1) build zfs modules for Ubuntu 20.04 and 22.04 (~15m) 2) 2x zloop test (~10m) + 2x sanity test (~25m) -3) functional testings in parts 1..5 (each ~1h) +3) 4x functional testings in parts 1..4 (each ~1h) 4) cleanup and create summary - content of summary depends on the results of the steps diff --git a/sys/contrib/openzfs/.github/workflows/checkstyle.yaml b/sys/contrib/openzfs/.github/workflows/checkstyle.yaml index b0fdc570d47..abcb358fc04 100644 --- a/sys/contrib/openzfs/.github/workflows/checkstyle.yaml +++ b/sys/contrib/openzfs/.github/workflows/checkstyle.yaml @@ -8,7 +8,7 @@ jobs: checkstyle: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - name: Install dependencies @@ -52,7 +52,7 @@ jobs: if: failure() && steps.CheckABI.outcome == 'failure' run: | find -name *.abi | tar -cf abi_files.tar -T - - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: failure() && steps.CheckABI.outcome == 'failure' with: name: New ABI files (use only if you're sure about interface changes) diff --git a/sys/contrib/openzfs/.github/workflows/codeql.yml b/sys/contrib/openzfs/.github/workflows/codeql.yml index 037f8aca0ea..e015b2cb71d 100644 --- a/sys/contrib/openzfs/.github/workflows/codeql.yml +++ b/sys/contrib/openzfs/.github/workflows/codeql.yml @@ -24,11 +24,12 @@ jobs: echo "MAKEFLAGS=-j$(nproc)" >> $GITHUB_ENV - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: + config-file: .github/codeql-${{ matrix.language }}.yml languages: ${{ matrix.language }} - name: Autobuild diff --git a/sys/contrib/openzfs/.github/workflows/scripts/generate-summary.sh b/sys/contrib/openzfs/.github/workflows/scripts/generate-summary.sh index cd5ea3421c9..b5d89208a5d 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/generate-summary.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/generate-summary.sh @@ -87,7 +87,7 @@ function summarize_f() { output "\n## $headline\n" rm -rf testfiles for i in $(seq 1 $FUNCTIONAL_PARTS); do - tarfile="$2/part$i.tar" + tarfile="$2-part$i/part$i.tar" check_tarfile "$tarfile" check_logfile "testfiles/log" done diff --git a/sys/contrib/openzfs/.github/workflows/scripts/setup-dependencies.sh b/sys/contrib/openzfs/.github/workflows/scripts/setup-dependencies.sh index 440d5e8e5ac..b40f9290f91 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/setup-dependencies.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/setup-dependencies.sh @@ -55,29 +55,24 @@ function mod_install() { cat /proc/spl/kstat/zfs/chksum_bench echo "::endgroup::" - echo "::group::Reclaim and report disk space" - # remove 4GiB of images - sudo systemd-run docker system prune --force --all --volumes + echo "::group::Optimize storage for ZFS testings" + # remove swap and umount fast storage + # 89GiB -> rootfs + bootfs with ~80MB/s -> don't care + # 64GiB -> /mnt with 420MB/s -> new testing ssd + sudo swapoff -a - # remove unused software - sudo systemd-run --wait rm -rf \ - "$AGENT_TOOLSDIRECTORY" \ - /opt/* \ - /usr/local/* \ - /usr/share/az* \ - /usr/share/dotnet \ - /usr/share/gradle* \ - /usr/share/miniconda \ - /usr/share/swift \ - /var/lib/gems \ - /var/lib/mysql \ - /var/lib/snapd - - # trim the cleaned space - sudo fstrim / + # this one is fast and mounted @ /mnt + # -> we reformat with ext4 + move it to /var/tmp + DEV="/dev/disk/azure/resource-part1" + sudo umount /mnt + sudo mkfs.ext4 -O ^has_journal -F $DEV + sudo mount -o noatime,barrier=0 $DEV /var/tmp + sudo chmod 1777 /var/tmp # disk usage afterwards - df -h / + sudo df -h / + sudo df -h /var/tmp + sudo fstrim -a echo "::endgroup::" } diff --git a/sys/contrib/openzfs/.github/workflows/zfs-linux-tests.yml b/sys/contrib/openzfs/.github/workflows/zfs-linux-tests.yml index c4fe930d092..753f3cd0214 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-linux-tests.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-linux-tests.yml @@ -13,10 +13,10 @@ jobs: zloop: runs-on: ubuntu-${{ inputs.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: modules-${{ inputs.os }} - name: Install modules @@ -34,7 +34,7 @@ jobs: if: failure() run: | sudo chmod +r -R /var/tmp/zloop/ - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: failure() with: name: Zpool-logs-${{ inputs.os }} @@ -43,7 +43,7 @@ jobs: !/var/tmp/zloop/*/vdev/ retention-days: 14 if-no-files-found: ignore - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: failure() with: name: Zpool-files-${{ inputs.os }} @@ -55,10 +55,10 @@ jobs: sanity: runs-on: ubuntu-${{ inputs.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: modules-${{ inputs.os }} - name: Install modules @@ -77,7 +77,7 @@ jobs: RESPATH="/var/tmp/test_results" mv -f $RESPATH/current $RESPATH/testfiles tar cf $RESPATH/sanity.tar -h -C $RESPATH testfiles - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: success() || failure() with: name: Logs-${{ inputs.os }}-sanity @@ -91,10 +91,10 @@ jobs: matrix: tests: [ part1, part2, part3, part4 ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: modules-${{ inputs.os }} - name: Install modules @@ -116,9 +116,9 @@ jobs: RESPATH="/var/tmp/test_results" mv -f $RESPATH/current $RESPATH/testfiles tar cf $RESPATH/${{ matrix.tests }}.tar -h -C $RESPATH testfiles - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: success() || failure() with: - name: Logs-${{ inputs.os }}-functional + name: Logs-${{ inputs.os }}-functional-${{ matrix.tests }} path: /var/tmp/test_results/${{ matrix.tests }}.tar if-no-files-found: ignore diff --git a/sys/contrib/openzfs/.github/workflows/zfs-linux.yml b/sys/contrib/openzfs/.github/workflows/zfs-linux.yml index be3908deb94..e6b705c8605 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-linux.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-linux.yml @@ -14,14 +14,14 @@ jobs: os: [20.04, 22.04] runs-on: ubuntu-${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - name: Build modules run: .github/workflows/scripts/setup-dependencies.sh build - name: Prepare modules upload run: tar czf modules-${{ matrix.os }}.tgz *.deb .github tests/test-runner tests/ImageOS.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: modules-${{ matrix.os }} path: modules-${{ matrix.os }}.tgz @@ -44,7 +44,7 @@ jobs: runs-on: ubuntu-22.04 needs: testings steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 - name: Generating summary run: | tar xzf modules-22.04/modules-22.04.tgz .github tests @@ -58,7 +58,7 @@ jobs: run: .github/workflows/scripts/generate-summary.sh 3 - name: Summary for errors #4 run: .github/workflows/scripts/generate-summary.sh 4 - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Summary Files path: Summary/ diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 93045ec3abe..d64414e3222 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -1,10 +1,10 @@ Meta: 1 Name: zfs Branch: 1.0 -Version: 2.2.2 +Version: 2.2.3 Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.6 +Linux-Maximum: 6.7 Linux-Minimum: 3.10 diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 3fc9fd2a9d8..d81199765c6 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -2360,7 +2360,7 @@ static void snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, const blkptr_t *bp) { - abd_t *pabd; + static abd_t *pabd = NULL; void *buf; zio_t *zio; zfs_zstdhdr_t zstd_hdr; @@ -2391,7 +2391,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, return; } - pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); + if (!pabd) + pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE); zio = zio_root(spa, NULL, NULL, 0); /* Decrypt but don't decompress so we can read the compression header */ @@ -8040,6 +8041,17 @@ dump_mos_leaks(spa_t *spa) } } + if (spa->spa_brt != NULL) { + brt_t *brt = spa->spa_brt; + for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) { + brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid]; + if (brtvd != NULL && brtvd->bv_initiated) { + mos_obj_refd(brtvd->bv_mos_brtvdev); + mos_obj_refd(brtvd->bv_mos_entries); + } + } + } + /* * Visit all allocated objects and make sure they are referenced. */ @@ -8490,6 +8502,14 @@ zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize, *cfuncp++ = ZIO_COMPRESS_LZ4; *cfuncp++ = ZIO_COMPRESS_LZJB; mask |= ZIO_COMPRESS_MASK(LZ4) | ZIO_COMPRESS_MASK(LZJB); + /* + * Every gzip level has the same decompressor, no need to + * run it 9 times per bruteforce attempt. + */ + mask |= ZIO_COMPRESS_MASK(GZIP_2) | ZIO_COMPRESS_MASK(GZIP_3); + mask |= ZIO_COMPRESS_MASK(GZIP_4) | ZIO_COMPRESS_MASK(GZIP_5); + mask |= ZIO_COMPRESS_MASK(GZIP_6) | ZIO_COMPRESS_MASK(GZIP_7); + mask |= ZIO_COMPRESS_MASK(GZIP_8) | ZIO_COMPRESS_MASK(GZIP_9); for (int c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) if (((1ULL << c) & mask) == 0) *cfuncp++ = c; diff --git a/sys/contrib/openzfs/cmd/zdb/zdb_il.c b/sys/contrib/openzfs/cmd/zdb/zdb_il.c index 970c45c9b3b..63d95ddedc3 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb_il.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb_il.c @@ -168,7 +168,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset, (u_longlong_t)lr->lr_length); - if (txtype == TX_WRITE2 || verbose < 5) + if (txtype == TX_WRITE2 || verbose < 4) return; if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { @@ -178,6 +178,8 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) "will claim" : "won't claim"); print_log_bp(bp, tab_prefix); + if (verbose < 5) + return; if (BP_IS_HOLE(bp)) { (void) printf("\t\t\tLSIZE 0x%llx\n", (u_longlong_t)BP_GET_LSIZE(bp)); @@ -202,6 +204,9 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) if (error) goto out; } else { + if (verbose < 5) + return; + /* data is stored after the end of the lr_write record */ data = abd_alloc(lr->lr_length, B_FALSE); abd_copy_from_buf(data, lr + 1, lr->lr_length); @@ -217,6 +222,28 @@ out: abd_free(data); } +static void +zil_prt_rec_write_enc(zilog_t *zilog, int txtype, const void *arg) +{ + (void) txtype; + const lr_write_t *lr = arg; + const blkptr_t *bp = &lr->lr_blkptr; + int verbose = MAX(dump_opt['d'], dump_opt['i']); + + (void) printf("%s(encrypted)\n", tab_prefix); + + if (verbose < 4) + return; + + if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { + (void) printf("%shas blkptr, %s\n", tab_prefix, + !BP_IS_HOLE(bp) && + bp->blk_birth >= spa_min_claim_txg(zilog->zl_spa) ? + "will claim" : "won't claim"); + print_log_bp(bp, tab_prefix); + } +} + static void zil_prt_rec_truncate(zilog_t *zilog, int txtype, const void *arg) { @@ -312,11 +339,34 @@ zil_prt_rec_clone_range(zilog_t *zilog, int txtype, const void *arg) { (void) zilog, (void) txtype; const lr_clone_range_t *lr = arg; + int verbose = MAX(dump_opt['d'], dump_opt['i']); (void) printf("%sfoid %llu, offset %llx, length %llx, blksize %llx\n", tab_prefix, (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset, (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blksz); + if (verbose < 4) + return; + + for (unsigned int i = 0; i < lr->lr_nbps; i++) { + (void) printf("%s[%u/%llu] ", tab_prefix, i + 1, + (u_longlong_t)lr->lr_nbps); + print_log_bp(&lr->lr_bps[i], ""); + } +} + +static void +zil_prt_rec_clone_range_enc(zilog_t *zilog, int txtype, const void *arg) +{ + (void) zilog, (void) txtype; + const lr_clone_range_t *lr = arg; + int verbose = MAX(dump_opt['d'], dump_opt['i']); + + (void) printf("%s(encrypted)\n", tab_prefix); + + if (verbose < 4) + return; + for (unsigned int i = 0; i < lr->lr_nbps; i++) { (void) printf("%s[%u/%llu] ", tab_prefix, i + 1, (u_longlong_t)lr->lr_nbps); @@ -327,6 +377,7 @@ zil_prt_rec_clone_range(zilog_t *zilog, int txtype, const void *arg) typedef void (*zil_prt_rec_func_t)(zilog_t *, int, const void *); typedef struct zil_rec_info { zil_prt_rec_func_t zri_print; + zil_prt_rec_func_t zri_print_enc; const char *zri_name; uint64_t zri_count; } zil_rec_info_t; @@ -341,7 +392,9 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { {.zri_print = zil_prt_rec_remove, .zri_name = "TX_RMDIR "}, {.zri_print = zil_prt_rec_link, .zri_name = "TX_LINK "}, {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME "}, - {.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE "}, + {.zri_print = zil_prt_rec_write, + .zri_print_enc = zil_prt_rec_write_enc, + .zri_name = "TX_WRITE "}, {.zri_print = zil_prt_rec_truncate, .zri_name = "TX_TRUNCATE "}, {.zri_print = zil_prt_rec_setattr, .zri_name = "TX_SETATTR "}, {.zri_print = zil_prt_rec_acl, .zri_name = "TX_ACL_V0 "}, @@ -358,6 +411,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_EXCHANGE "}, {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_WHITEOUT "}, {.zri_print = zil_prt_rec_clone_range, + .zri_print_enc = zil_prt_rec_clone_range_enc, .zri_name = "TX_CLONE_RANGE "}, }; @@ -384,6 +438,8 @@ print_log_record(zilog_t *zilog, const lr_t *lr, void *arg, uint64_t claim_txg) if (txtype && verbose >= 3) { if (!zilog->zl_os->os_encrypted) { zil_rec_info[txtype].zri_print(zilog, txtype, lr); + } else if (zil_rec_info[txtype].zri_print_enc) { + zil_rec_info[txtype].zri_print_enc(zilog, txtype, lr); } else { (void) printf("%s(encrypted)\n", tab_prefix); } diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c index 9636c99fc85..69163b80bd5 100644 --- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c +++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c @@ -233,8 +233,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) } (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath); + + update_vdev_config_dev_sysfs_path(vdev, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &enc_sysfs_path); + (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted); diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh index 150012abe71..06acce93b8a 100755 --- a/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh @@ -5,7 +5,7 @@ # # Bad SCSI disks can often "disappear and reappear" causing all sorts of chaos # as they flip between FAULTED and ONLINE. If -# ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT is set in zed.rc, and the disk gets +# ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT is set in zed.rc, and the disk gets # FAULTED, then power down the slot via sysfs: # # /sys/class/enclosure///power_status @@ -19,7 +19,7 @@ # Exit codes: # 0: slot successfully powered off # 1: enclosure not available -# 2: ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT disabled +# 2: ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT disabled # 3: vdev was not FAULTED # 4: The enclosure sysfs path passed from ZFS does not exist # 5: Enclosure slot didn't actually turn off after we told it to @@ -32,7 +32,7 @@ if [ ! -d /sys/class/enclosure ] ; then exit 1 fi -if [ "${ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT}" != "1" ] ; then +if [ "${ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT}" != "1" ] ; then exit 2 fi diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh b/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh index 49b6b54029a..3a2519633d0 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh +++ b/sys/contrib/openzfs/cmd/zed/zed.d/zed-functions.sh @@ -205,6 +205,10 @@ zed_notify() [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + zed_notify_ntfy "${subject}" "${pathname}"; rv=$? + [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) + [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + [ "${num_success}" -gt 0 ] && return 0 [ "${num_failure}" -gt 0 ] && return 1 return 2 @@ -527,6 +531,100 @@ zed_notify_pushover() } +# zed_notify_ntfy (subject, pathname) +# +# Send a notification via Ntfy.sh . +# The ntfy topic (ZED_NTFY_TOPIC) identifies the topic that the notification +# will be sent to Ntfy.sh server. The ntfy url (ZED_NTFY_URL) defines the +# self-hosted or provided hosted ntfy service location. The ntfy access token +# (ZED_NTFY_ACCESS_TOKEN) reprsents an +# access token that could be used if a topic is read/write protected. If a +# topic can be written to publicaly, a ZED_NTFY_ACCESS_TOKEN is not required. +# +# Requires curl and sed executables to be installed in the standard PATH. +# +# References +# https://docs.ntfy.sh +# +# Arguments +# subject: notification subject +# pathname: pathname containing the notification message (OPTIONAL) +# +# Globals +# ZED_NTFY_TOPIC +# ZED_NTFY_ACCESS_TOKEN (OPTIONAL) +# ZED_NTFY_URL +# +# Return +# 0: notification sent +# 1: notification failed +# 2: not configured +# +zed_notify_ntfy() +{ + local subject="$1" + local pathname="${2:-"/dev/null"}" + local msg_body + local msg_out + local msg_err + + [ -n "${ZED_NTFY_TOPIC}" ] || return 2 + local url="${ZED_NTFY_URL:-"https://ntfy.sh"}/${ZED_NTFY_TOPIC}" + + if [ ! -r "${pathname}" ]; then + zed_log_err "ntfy cannot read \"${pathname}\"" + return 1 + fi + + zed_check_cmd "curl" "sed" || return 1 + + # Read the message body in. + # + msg_body="$(cat "${pathname}")" + + if [ -z "${msg_body}" ] + then + msg_body=$subject + subject="" + fi + + # Send the POST request and check for errors. + # + if [ -n "${ZED_NTFY_ACCESS_TOKEN}" ]; then + msg_out="$( \ + curl \ + -u ":${ZED_NTFY_ACCESS_TOKEN}" \ + -H "Title: ${subject}" \ + -d "${msg_body}" \ + -H "Priority: high" \ + "${url}" \ + 2>/dev/null \ + )"; rv=$? + else + msg_out="$( \ + curl \ + -H "Title: ${subject}" \ + -d "${msg_body}" \ + -H "Priority: high" \ + "${url}" \ + 2>/dev/null \ + )"; rv=$? + fi + if [ "${rv}" -ne 0 ]; then + zed_log_err "curl exit=${rv}" + return 1 + fi + msg_err="$(echo "${msg_out}" \ + | sed -n -e 's/.*"errors" *:.*\[\(.*\)\].*/\1/p')" + if [ -n "${msg_err}" ]; then + zed_log_err "ntfy \"${msg_err}"\" + return 1 + fi + return 0 +} + + + # zed_rate_limit (tag, [interval]) # # Check whether an event of a given type [tag] has already occurred within the diff --git a/sys/contrib/openzfs/cmd/zed/zed.d/zed.rc b/sys/contrib/openzfs/cmd/zed/zed.d/zed.rc index 78dc1afc7b1..bc269b155d7 100644 --- a/sys/contrib/openzfs/cmd/zed/zed.d/zed.rc +++ b/sys/contrib/openzfs/cmd/zed/zed.d/zed.rc @@ -146,4 +146,26 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event" # Power off the drive's slot in the enclosure if it becomes FAULTED. This can # help silence misbehaving drives. This assumes your drive enclosure fully # supports slot power control via sysfs. -#ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1 +#ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT=1 + +## +# Ntfy topic +# This defines which topic will receive the ntfy notification. +# +# Disabled by default; uncomment to enable. +#ZED_NTFY_TOPIC="" + +## +# Ntfy access token (optional for public topics) +# This defines an access token which can be used +# to allow you to authenticate when sending to topics +# +# Disabled by default; uncomment to enable. +#ZED_NTFY_ACCESS_TOKEN="" + +## +# Ntfy Service URL +# This defines which service the ntfy call will be directed toward +# +# https://ntfy.sh by default; uncomment to enable an alternative service url. +#ZED_NTFY_URL="https://ntfy.sh" diff --git a/sys/contrib/openzfs/cmd/zed/zed_event.c b/sys/contrib/openzfs/cmd/zed/zed_event.c index c60d5a4bc22..7e586769223 100644 --- a/sys/contrib/openzfs/cmd/zed/zed_event.c +++ b/sys/contrib/openzfs/cmd/zed/zed_event.c @@ -35,6 +35,7 @@ #include "zed_strings.h" #include "agents/zfs_agents.h" +#include #define MAXBUF 4096 @@ -922,6 +923,25 @@ _zed_event_add_time_strings(uint64_t eid, zed_strings_t *zsp, int64_t etime[]) } } + +static void +_zed_event_update_enc_sysfs_path(nvlist_t *nvl) +{ + const char *vdev_path; + + if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, + &vdev_path) != 0) { + return; /* some other kind of event, ignore it */ + } + + if (vdev_path == NULL) { + return; + } + + update_vdev_config_dev_sysfs_path(nvl, vdev_path, + FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH); +} + /* * Service the next zevent, blocking until one is available. */ @@ -969,6 +989,17 @@ zed_event_service(struct zed_conf *zcp) zed_log_msg(LOG_WARNING, "Failed to lookup zevent class (eid=%llu)", eid); } else { + /* + * Special case: If we can dynamically detect an enclosure sysfs + * path, then use that value rather than the one stored in the + * vd->vdev_enc_sysfs_path. There have been rare cases where + * vd->vdev_enc_sysfs_path becomes outdated. However, there + * will be other times when we can not dynamically detect the + * sysfs path (like if a disk disappears) and have to rely on + * the old value for things like turning on the fault LED. + */ + _zed_event_update_enc_sysfs_path(nvl); + /* let internal modules see this event first */ zfs_agent_post_event(class, NULL, nvl); diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 5a2285ce434..f7e07b8199d 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -3672,15 +3672,25 @@ zfs_do_list(int argc, char **argv) for (char *tok; (tok = strsep(&optarg, ",")); ) { static const char *const type_subopts[] = { - "filesystem", "volume", - "snapshot", "snap", + "filesystem", + "fs", + "volume", + "vol", + "snapshot", + "snap", "bookmark", - "all" }; + "all" + }; static const int type_types[] = { - ZFS_TYPE_FILESYSTEM, ZFS_TYPE_VOLUME, - ZFS_TYPE_SNAPSHOT, ZFS_TYPE_SNAPSHOT, + ZFS_TYPE_FILESYSTEM, + ZFS_TYPE_FILESYSTEM, + ZFS_TYPE_VOLUME, + ZFS_TYPE_VOLUME, + ZFS_TYPE_SNAPSHOT, + ZFS_TYPE_SNAPSHOT, ZFS_TYPE_BOOKMARK, - ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK }; + ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK + }; for (c = 0; c < ARRAY_SIZE(type_subopts); ++c) if (strcmp(tok, type_subopts[c]) == 0) { @@ -7230,7 +7240,8 @@ share_mount(int op, int argc, char **argv) pthread_mutex_init(&share_mount_state.sm_lock, NULL); /* For a 'zfs share -a' operation start with a clean slate. */ - zfs_truncate_shares(NULL); + if (op == OP_SHARE) + zfs_truncate_shares(NULL); /* * libshare isn't mt-safe, so only do the operation in parallel diff --git a/sys/contrib/openzfs/cmd/zpool/os/freebsd/zpool_vdev_os.c b/sys/contrib/openzfs/cmd/zpool/os/freebsd/zpool_vdev_os.c index 231ca97f1f6..c57c689afa9 100644 --- a/sys/contrib/openzfs/cmd/zpool/os/freebsd/zpool_vdev_os.c +++ b/sys/contrib/openzfs/cmd/zpool/os/freebsd/zpool_vdev_os.c @@ -124,3 +124,24 @@ check_file(const char *file, boolean_t force, boolean_t isspare) { return (check_file_generic(file, force, isspare)); } + +int +zpool_power_current_state(zpool_handle_t *zhp, char *vdev) +{ + + (void) zhp; + (void) vdev; + /* Enclosure slot power not supported on FreeBSD yet */ + return (-1); +} + +int +zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on) +{ + + (void) zhp; + (void) vdev; + (void) turn_on; + /* Enclosure slot power not supported on FreeBSD yet */ + return (ENOTSUP); +} diff --git a/sys/contrib/openzfs/cmd/zpool/os/linux/zpool_vdev_os.c b/sys/contrib/openzfs/cmd/zpool/os/linux/zpool_vdev_os.c index 7f4486e062f..006a3a7d8e0 100644 --- a/sys/contrib/openzfs/cmd/zpool/os/linux/zpool_vdev_os.c +++ b/sys/contrib/openzfs/cmd/zpool/os/linux/zpool_vdev_os.c @@ -416,3 +416,258 @@ check_file(const char *file, boolean_t force, boolean_t isspare) { return (check_file_generic(file, force, isspare)); } + +/* + * Read from a sysfs file and return an allocated string. Removes + * the newline from the end of the string if there is one. + * + * Returns a string on success (which must be freed), or NULL on error. + */ +static char *zpool_sysfs_gets(char *path) +{ + int fd; + struct stat statbuf; + char *buf = NULL; + ssize_t count = 0; + fd = open(path, O_RDONLY); + if (fd < 0) + return (NULL); + + if (fstat(fd, &statbuf) != 0) { + close(fd); + return (NULL); + } + + buf = calloc(sizeof (*buf), statbuf.st_size + 1); + if (buf == NULL) { + close(fd); + return (NULL); + } + + /* + * Note, we can read less bytes than st_size, and that's ok. Sysfs + * files will report their size is 4k even if they only return a small + * string. + */ + count = read(fd, buf, statbuf.st_size); + if (count < 0) { + /* Error doing read() or we overran the buffer */ + close(fd); + free(buf); + return (NULL); + } + + /* Remove trailing newline */ + if (buf[count - 1] == '\n') + buf[count - 1] = 0; + + close(fd); + + return (buf); +} + +/* + * Write a string to a sysfs file. + * + * Returns 0 on success, non-zero otherwise. + */ +static int zpool_sysfs_puts(char *path, char *str) +{ + FILE *file; + + file = fopen(path, "w"); + if (!file) { + return (-1); + } + + if (fputs(str, file) < 0) { + fclose(file); + return (-2); + } + fclose(file); + return (0); +} + +/* Given a vdev nvlist_t, rescan its enclosure sysfs path */ +static void +rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv) +{ + update_vdev_config_dev_sysfs_path(vdev_nv, + fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH), + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); +} + +/* + * Given a power string: "on", "off", "1", or "0", return 0 if it's an + * off value, 1 if it's an on value, and -1 if the value is unrecognized. + */ +static int zpool_power_parse_value(char *str) +{ + if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0)) + return (0); + + if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0)) + return (1); + + return (-1); +} + +/* + * Given a vdev string return an allocated string containing the sysfs path to + * its power control file. Also do a check if the power control file really + * exists and has correct permissions. + * + * Example returned strings: + * + * /sys/class/enclosure/0:0:122:0/10/power_status + * /sys/bus/pci/slots/10/power + * + * Returns allocated string on success (which must be freed), NULL on failure. + */ +static char * +zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev) +{ + const char *enc_sysfs_dir = NULL; + char *path = NULL; + nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL); + + if (vdev_nv == NULL) { + return (NULL); + } + + /* Make sure we're getting the updated enclosure sysfs path */ + rescan_vdev_config_dev_sysfs_path(vdev_nv); + + if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + &enc_sysfs_dir) != 0) { + return (NULL); + } + + if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1) + return (NULL); + + if (access(path, W_OK) != 0) { + free(path); + path = NULL; + /* No HDD 'power_control' file, maybe it's NVMe? */ + if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) { + return (NULL); + } + + if (access(path, R_OK | W_OK) != 0) { + /* Not NVMe either */ + free(path); + return (NULL); + } + } + + return (path); +} + +/* + * Given a path to a sysfs power control file, return B_TRUE if you should use + * "on/off" words to control it, or B_FALSE otherwise ("0/1" to control). + */ +static boolean_t +zpool_power_use_word(char *sysfs_path) +{ + if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")], + "power_status") == 0) { + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Check the sysfs power control value for a vdev. + * + * Returns: + * 0 - Power is off + * 1 - Power is on + * -1 - Error or unsupported + */ +int +zpool_power_current_state(zpool_handle_t *zhp, char *vdev) +{ + char *val; + int rc; + + char *path = zpool_power_sysfs_path(zhp, vdev); + if (path == NULL) + return (-1); + + val = zpool_sysfs_gets(path); + if (val == NULL) { + free(path); + return (-1); + } + + rc = zpool_power_parse_value(val); + free(val); + free(path); + return (rc); +} + +/* + * Turn on or off the slot to a device + * + * Device path is the full path to the device (like /dev/sda or /dev/sda1). + * + * Return code: + * 0: Success + * ENOTSUP: Power control not supported for OS + * EBADSLT: Couldn't read current power state + * ENOENT: No sysfs path to power control + * EIO: Couldn't write sysfs power value + * EBADE: Sysfs power value didn't change + */ +int +zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on) +{ + char *sysfs_path; + const char *val; + int rc; + int timeout_ms; + + rc = zpool_power_current_state(zhp, vdev); + if (rc == -1) { + return (EBADSLT); + } + + /* Already correct value? */ + if (rc == (int)turn_on) + return (0); + + sysfs_path = zpool_power_sysfs_path(zhp, vdev); + if (sysfs_path == NULL) + return (ENOENT); + + if (zpool_power_use_word(sysfs_path)) { + val = turn_on ? "on" : "off"; + } else { + val = turn_on ? "1" : "0"; + } + + rc = zpool_sysfs_puts(sysfs_path, (char *)val); + + free(sysfs_path); + if (rc != 0) { + return (EIO); + } + + /* + * Wait up to 30 seconds for sysfs power value to change after + * writing it. + */ + timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000); + for (int i = 0; i < MAX(1, timeout_ms / 200); i++) { + rc = zpool_power_current_state(zhp, vdev); + if (rc == (int)turn_on) + return (0); /* success */ + + fsleep(0.200); /* 200ms */ + } + + /* sysfs value never changed */ + return (EBADE); +} diff --git a/sys/contrib/openzfs/cmd/zpool/zpool.d/ses b/sys/contrib/openzfs/cmd/zpool/zpool.d/ses index 638145c95d4..19ef92ad67b 100755 --- a/sys/contrib/openzfs/cmd/zpool/zpool.d/ses +++ b/sys/contrib/openzfs/cmd/zpool/zpool.d/ses @@ -33,10 +33,18 @@ for i in $scripts ; do val="" case $i in enc) - val=$(ls "$VDEV_ENC_SYSFS_PATH/../../" 2>/dev/null) + if echo "$VDEV_ENC_SYSFS_PATH" | grep -q '/sys/bus/pci/slots' ; then + val="$VDEV_ENC_SYSFS_PATH" + else + val="$(ls """$VDEV_ENC_SYSFS_PATH/../../""" 2>/dev/null)" + fi ;; slot) - val=$(cat "$VDEV_ENC_SYSFS_PATH/slot" 2>/dev/null) + if echo "$VDEV_ENC_SYSFS_PATH" | grep -q '/sys/bus/pci/slots' ; then + val="$(basename """$VDEV_ENC_SYSFS_PATH""")" + else + val="$(cat """$VDEV_ENC_SYSFS_PATH/slot""" 2>/dev/null)" + fi ;; encdev) val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null) diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c index 506b529dce4..ae2e9da9108 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_iter.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_iter.c @@ -554,6 +554,10 @@ for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl) if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) return (1); + /* Make sure we're getting the updated enclosure sysfs path */ + update_vdev_config_dev_sysfs_path(nv, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &vdev_enc_sysfs_path); diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 5507f9d3fd6..69bf9649acf 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -353,7 +353,7 @@ get_usage(zpool_help_t idx) return (gettext("\tattach [-fsw] [-o property=value] " " \n")); case HELP_CLEAR: - return (gettext("\tclear [-nF] [device]\n")); + return (gettext("\tclear [[--power]|[-nF]] [device]\n")); case HELP_CREATE: return (gettext("\tcreate [-fnd] [-o property=value] ... \n" "\t [-O file-system-property=value] ... \n" @@ -389,9 +389,11 @@ get_usage(zpool_help_t idx) "[-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_OFFLINE: - return (gettext("\toffline [-f] [-t] ...\n")); + return (gettext("\toffline [--power]|[[-f][-t]] " + " ...\n")); case HELP_ONLINE: - return (gettext("\tonline [-e] ...\n")); + return (gettext("\tonline [--power][-e] " + "...\n")); case HELP_REPLACE: return (gettext("\treplace [-fsw] [-o property=value] " " [new-device]\n")); @@ -410,7 +412,7 @@ get_usage(zpool_help_t idx) return (gettext("\ttrim [-dw] [-r ] [-c | -s] " "[ ...]\n")); case HELP_STATUS: - return (gettext("\tstatus [-c [script1,script2,...]] " + return (gettext("\tstatus [--power] [-c [script1,script2,...]] " "[-igLpPstvxD] [-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_UPGRADE: @@ -516,6 +518,77 @@ print_vdev_prop_cb(int prop, void *cb) return (ZPROP_CONT); } +/* + * Given a leaf vdev name like 'L5' return its VDEV_CONFIG_PATH like + * '/dev/disk/by-vdev/L5'. + */ +static const char * +vdev_name_to_path(zpool_handle_t *zhp, char *vdev) +{ + nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL); + if (vdev_nv == NULL) { + return (NULL); + } + return (fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH)); +} + +static int +zpool_power_on(zpool_handle_t *zhp, char *vdev) +{ + return (zpool_power(zhp, vdev, B_TRUE)); +} + +static int +zpool_power_on_and_disk_wait(zpool_handle_t *zhp, char *vdev) +{ + int rc; + + rc = zpool_power_on(zhp, vdev); + if (rc != 0) + return (rc); + + zpool_disk_wait(vdev_name_to_path(zhp, vdev)); + + return (0); +} + +static int +zpool_power_on_pool_and_wait_for_devices(zpool_handle_t *zhp) +{ + nvlist_t *nv; + const char *path = NULL; + int rc; + + /* Power up all the devices first */ + FOR_EACH_REAL_LEAF_VDEV(zhp, nv) { + path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + if (path != NULL) { + rc = zpool_power_on(zhp, (char *)path); + if (rc != 0) { + return (rc); + } + } + } + + /* + * Wait for their devices to show up. Since we powered them on + * at roughly the same time, they should all come online around + * the same time. + */ + FOR_EACH_REAL_LEAF_VDEV(zhp, nv) { + path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + zpool_disk_wait(path); + } + + return (0); +} + +static int +zpool_power_off(zpool_handle_t *zhp, char *vdev) +{ + return (zpool_power(zhp, vdev, B_FALSE)); +} + /* * Display usage message. If we're inside a command, display only the usage for * that command. Otherwise, iterate over the entire command table and display @@ -2088,11 +2161,13 @@ typedef struct status_cbdata { boolean_t cb_explain; boolean_t cb_first; boolean_t cb_dedup_stats; + boolean_t cb_print_unhealthy; boolean_t cb_print_status; boolean_t cb_print_slow_ios; boolean_t cb_print_vdev_init; boolean_t cb_print_vdev_trim; vdev_cmd_data_list_t *vcdl; + boolean_t cb_print_power; } status_cbdata_t; /* Return 1 if string is NULL, empty, or whitespace; return 0 otherwise. */ @@ -2283,6 +2358,35 @@ health_str_to_color(const char *health) return (NULL); } +/* + * Called for each leaf vdev. Returns 0 if the vdev is healthy. + * A vdev is unhealthy if any of the following are true: + * 1) there are read, write, or checksum errors, + * 2) its state is not ONLINE, or + * 3) slow IO reporting was requested (-s) and there are slow IOs. + */ +static int +vdev_health_check_cb(void *hdl_data, nvlist_t *nv, void *data) +{ + status_cbdata_t *cb = data; + vdev_stat_t *vs; + uint_t vsc; + (void) hdl_data; + + if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) != 0) + return (1); + + if (vs->vs_checksum_errors || vs->vs_read_errors || + vs->vs_write_errors || vs->vs_state != VDEV_STATE_HEALTHY) + return (1); + + if (cb->cb_print_slow_ios && vs->vs_slow_ios) + return (1); + + return (0); +} + /* * Print out configuration state as requested by status_callback. */ @@ -2301,7 +2405,8 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, const char *state; const char *type; const char *path = NULL; - const char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL; + const char *rcolor = NULL, *wcolor = NULL, *ccolor = NULL, + *scolor = NULL; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) @@ -2328,6 +2433,15 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, state = gettext("AVAIL"); } + /* + * If '-e' is specified then top-level vdevs and their children + * can be pruned if all of their leaves are healthy. + */ + if (cb->cb_print_unhealthy && depth > 0 && + for_each_vdev_in_nvlist(nv, vdev_health_check_cb, cb) == 0) { + return; + } + printf_color(health_str_to_color(state), "\t%*s%-*s %-8s", depth, "", cb->cb_namewidth - depth, name, state); @@ -2342,6 +2456,9 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, if (vs->vs_checksum_errors) ccolor = ANSI_RED; + if (vs->vs_slow_ios) + scolor = ANSI_BLUE; + if (cb->cb_literal) { fputc(' ', stdout); printf_color(rcolor, "%5llu", @@ -2374,9 +2491,30 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, } if (cb->cb_literal) - printf(" %5llu", (u_longlong_t)vs->vs_slow_ios); + printf_color(scolor, " %5llu", + (u_longlong_t)vs->vs_slow_ios); else - printf(" %5s", rbuf); + printf_color(scolor, " %5s", rbuf); + } + if (cb->cb_print_power) { + if (children == 0) { + /* Only leaf vdevs have physical slots */ + switch (zpool_power_current_state(zhp, (char *) + fnvlist_lookup_string(nv, + ZPOOL_CONFIG_PATH))) { + case 0: + printf_color(ANSI_RED, " %5s", + gettext("off")); + break; + case 1: + printf(" %5s", gettext("on")); + break; + default: + printf(" %5s", "-"); + } + } else { + printf(" %5s", "-"); + } } } @@ -5428,19 +5566,6 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval, interval, count); } -/* - * Floating point sleep(). Allows you to pass in a floating point value for - * seconds. - */ -static void -fsleep(float sec) -{ - struct timespec req; - req.tv_sec = floor(sec); - req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; - nanosleep(&req, NULL); -} - /* * Terminal height, in rows. Returns -1 if stdout is not connected to a TTY or * if we were unable to determine its size. @@ -6939,10 +7064,12 @@ zpool_do_split(int argc, char **argv) return (ret); } - +#define POWER_OPT 1024 /* - * zpool online ... + * zpool online [--power] ... + * + * --power: Power on the enclosure slot to the drive (if possible) */ int zpool_do_online(int argc, char **argv) @@ -6953,13 +7080,21 @@ zpool_do_online(int argc, char **argv) int ret = 0; vdev_state_t newstate; int flags = 0; + boolean_t is_power_on = B_FALSE; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "e")) != -1) { + while ((c = getopt_long(argc, argv, "e", long_options, NULL)) != -1) { switch (c) { case 'e': flags |= ZFS_ONLINE_EXPAND; break; + case POWER_OPT: + is_power_on = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -6967,6 +7102,9 @@ zpool_do_online(int argc, char **argv) } } + if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT")) + is_power_on = B_TRUE; + argc -= optind; argv += optind; @@ -6988,6 +7126,18 @@ zpool_do_online(int argc, char **argv) for (i = 1; i < argc; i++) { vdev_state_t oldstate; boolean_t avail_spare, l2cache; + int rc; + + if (is_power_on) { + rc = zpool_power_on_and_disk_wait(zhp, argv[i]); + if (rc == ENOTSUP) { + (void) fprintf(stderr, + gettext("Power control not supported\n")); + } + if (rc != 0) + return (rc); + } + nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare, &l2cache, NULL); if (tgt == NULL) { @@ -7033,12 +7183,15 @@ zpool_do_online(int argc, char **argv) } /* - * zpool offline [-ft] ... + * zpool offline [-ft]|[--power] ... + * * * -f Force the device into a faulted state. * * -t Only take the device off-line temporarily. The offline/faulted * state will not be persistent across reboots. + * + * --power Power off the enclosure slot to the drive (if possible) */ int zpool_do_offline(int argc, char **argv) @@ -7049,9 +7202,15 @@ zpool_do_offline(int argc, char **argv) int ret = 0; boolean_t istmp = B_FALSE; boolean_t fault = B_FALSE; + boolean_t is_power_off = B_FALSE; + + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "ft")) != -1) { + while ((c = getopt_long(argc, argv, "ft", long_options, NULL)) != -1) { switch (c) { case 'f': fault = B_TRUE; @@ -7059,6 +7218,9 @@ zpool_do_offline(int argc, char **argv) case 't': istmp = B_TRUE; break; + case POWER_OPT: + is_power_off = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -7066,6 +7228,20 @@ zpool_do_offline(int argc, char **argv) } } + if (is_power_off && fault) { + (void) fprintf(stderr, + gettext("-0 and -f cannot be used together\n")); + usage(B_FALSE); + return (1); + } + + if (is_power_off && istmp) { + (void) fprintf(stderr, + gettext("-0 and -t cannot be used together\n")); + usage(B_FALSE); + return (1); + } + argc -= optind; argv += optind; @@ -7085,8 +7261,22 @@ zpool_do_offline(int argc, char **argv) return (1); for (i = 1; i < argc; i++) { - if (fault) { - uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + if (is_power_off) { + /* + * Note: we have to power off first, then set REMOVED, + * or else zpool_vdev_set_removed_state() returns + * EAGAIN. + */ + ret = zpool_power_off(zhp, argv[i]); + if (ret != 0) { + (void) fprintf(stderr, "%s %s %d\n", + gettext("unable to power off slot for"), + argv[i], ret); + } + zpool_vdev_set_removed_state(zhp, guid, VDEV_AUX_NONE); + + } else if (fault) { vdev_aux_t aux; if (istmp == B_FALSE) { /* Force the fault to persist across imports */ @@ -7109,7 +7299,7 @@ zpool_do_offline(int argc, char **argv) } /* - * zpool clear [device] + * zpool clear [-nF]|[--power] [device] * * Clear all errors associated with a pool or a particular device. */ @@ -7121,13 +7311,20 @@ zpool_do_clear(int argc, char **argv) boolean_t dryrun = B_FALSE; boolean_t do_rewind = B_FALSE; boolean_t xtreme_rewind = B_FALSE; + boolean_t is_power_on = B_FALSE; uint32_t rewind_policy = ZPOOL_NO_REWIND; nvlist_t *policy = NULL; zpool_handle_t *zhp; char *pool, *device; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "FnX")) != -1) { + while ((c = getopt_long(argc, argv, "FnX", long_options, + NULL)) != -1) { switch (c) { case 'F': do_rewind = B_TRUE; @@ -7138,6 +7335,9 @@ zpool_do_clear(int argc, char **argv) case 'X': xtreme_rewind = B_TRUE; break; + case POWER_OPT: + is_power_on = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -7145,6 +7345,9 @@ zpool_do_clear(int argc, char **argv) } } + if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT")) + is_power_on = B_TRUE; + argc -= optind; argv += optind; @@ -7185,6 +7388,14 @@ zpool_do_clear(int argc, char **argv) return (1); } + if (is_power_on) { + if (device == NULL) { + zpool_power_on_pool_and_wait_for_devices(zhp); + } else { + zpool_power_on_and_disk_wait(zhp, device); + } + } + if (zpool_clear(zhp, device, policy) != 0) ret = 1; @@ -8801,6 +9012,10 @@ status_callback(zpool_handle_t *zhp, void *data) printf_color(ANSI_BOLD, " %5s", gettext("SLOW")); } + if (cbp->cb_print_power) { + printf_color(ANSI_BOLD, " %5s", gettext("POWER")); + } + if (cbp->vcdl != NULL) print_cmd_columns(cbp->vcdl, 0); @@ -8828,9 +9043,11 @@ status_callback(zpool_handle_t *zhp, void *data) (void) printf(gettext( "errors: No known data errors\n")); } else if (!cbp->cb_verbose) { + color_start(ANSI_RED); (void) printf(gettext("errors: %llu data " "errors, use '-v' for a list\n"), (u_longlong_t)nerr); + color_end(); } else { print_error_log(zhp); } @@ -8847,10 +9064,11 @@ status_callback(zpool_handle_t *zhp, void *data) } /* - * zpool status [-c [script1,script2,...]] [-igLpPstvx] [-T d|u] [pool] ... - * [interval [count]] + * zpool status [-c [script1,script2,...]] [-igLpPstvx] [--power] [-T d|u] ... + * [pool] [interval [count]] * * -c CMD For each vdev, run command CMD + * -e Display only unhealthy vdevs * -i Display vdev initialization status. * -g Display guid for individual vdev name. * -L Follow links when resolving vdev path name. @@ -8862,6 +9080,7 @@ status_callback(zpool_handle_t *zhp, void *data) * -D Display dedup status (undocumented) * -t Display vdev TRIM status. * -T Display a timestamp in date(1) or Unix format + * --power Display vdev enclosure slot power status * * Describes the health status of all pools or some subset. */ @@ -8875,8 +9094,14 @@ zpool_do_status(int argc, char **argv) status_cbdata_t cb = { 0 }; char *cmd = NULL; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "c:igLpPsvxDtT:")) != -1) { + while ((c = getopt_long(argc, argv, "c:eigLpPsvxDtT:", long_options, + NULL)) != -1) { switch (c) { case 'c': if (cmd != NULL) { @@ -8902,6 +9127,9 @@ zpool_do_status(int argc, char **argv) } cmd = optarg; break; + case 'e': + cb.cb_print_unhealthy = B_TRUE; + break; case 'i': cb.cb_print_vdev_init = B_TRUE; break; @@ -8935,6 +9163,9 @@ zpool_do_status(int argc, char **argv) case 'T': get_timestamp_arg(*optarg); break; + case POWER_OPT: + cb.cb_print_power = B_TRUE; + break; case '?': if (optopt == 'c') { print_zpool_script_list("status"); @@ -10752,6 +10983,9 @@ print_wait_status_row(wait_data_t *wd, zpool_handle_t *zhp, int row) col_widths[i] = MAX(strlen(headers[i]), 6) + 2; } + if (timestamp_fmt != NODATE) + print_timestamp(timestamp_fmt); + /* Print header if appropriate */ int term_height = terminal_height(); boolean_t reprint_header = (!wd->wd_headers_once && term_height > 0 && @@ -10819,9 +11053,6 @@ print_wait_status_row(wait_data_t *wd, zpool_handle_t *zhp, int row) if (vdev_any_spare_replacing(nvroot)) bytes_rem[ZPOOL_WAIT_REPLACE] = bytes_rem[ZPOOL_WAIT_RESILVER]; - if (timestamp_fmt != NODATE) - print_timestamp(timestamp_fmt); - for (i = 0; i < ZPOOL_WAIT_NUM_ACTIVITIES; i++) { char buf[64]; if (!wd->wd_enabled[i]) diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_util.h b/sys/contrib/openzfs/cmd/zpool/zpool_util.h index db8e631dc6b..7f5406f063e 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_util.h +++ b/sys/contrib/openzfs/cmd/zpool/zpool_util.h @@ -138,6 +138,9 @@ int check_file(const char *file, boolean_t force, boolean_t isspare); void after_zpool_upgrade(zpool_handle_t *zhp); int check_file_generic(const char *file, boolean_t force, boolean_t isspare); +int zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on); +int zpool_power_current_state(zpool_handle_t *zhp, char *vdev); + #ifdef __cplusplus } #endif diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c index 3d0fc089c32..fbd4b81dfac 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c @@ -372,6 +372,10 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0); verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0); + /* Lookup and add the enclosure sysfs path (if exists) */ + update_vdev_config_dev_sysfs_path(vdev, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + if (strcmp(type, VDEV_TYPE_DISK) == 0) verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, (uint64_t)wholedisk) == 0); diff --git a/sys/contrib/openzfs/config/Rules.am b/sys/contrib/openzfs/config/Rules.am index 7c266964f3f..2e463ae6083 100644 --- a/sys/contrib/openzfs/config/Rules.am +++ b/sys/contrib/openzfs/config/Rules.am @@ -42,21 +42,6 @@ AM_CPPFLAGS += $(DEBUG_CPPFLAGS) AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS) AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-@ac_system_l@-user\" -AM_CPPFLAGS_NOCHECK = -D"strtok(...)=strtok(__VA_ARGS__) __attribute__((deprecated(\"Use strtok_r(3) instead!\")))" -AM_CPPFLAGS_NOCHECK += -D"__xpg_basename(...)=__xpg_basename(__VA_ARGS__) __attribute__((deprecated(\"basename(3) is underspecified. Use zfs_basename() instead!\")))" -AM_CPPFLAGS_NOCHECK += -D"basename(...)=basename(__VA_ARGS__) __attribute__((deprecated(\"basename(3) is underspecified. Use zfs_basename() instead!\")))" -AM_CPPFLAGS_NOCHECK += -D"dirname(...)=dirname(__VA_ARGS__) __attribute__((deprecated(\"dirname(3) is underspecified. Use zfs_dirnamelen() instead!\")))" -AM_CPPFLAGS_NOCHECK += -D"bcopy(...)=__attribute__((deprecated(\"bcopy(3) is deprecated. Use memcpy(3)/memmove(3) instead!\"))) bcopy(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"bcmp(...)=__attribute__((deprecated(\"bcmp(3) is deprecated. Use memcmp(3) instead!\"))) bcmp(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"bzero(...)=__attribute__((deprecated(\"bzero(3) is deprecated. Use memset(3) instead!\"))) bzero(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"asctime(...)=__attribute__((deprecated(\"Use strftime(3) instead!\"))) asctime(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"asctime_r(...)=__attribute__((deprecated(\"Use strftime(3) instead!\"))) asctime_r(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"gmtime(...)=__attribute__((deprecated(\"gmtime(3) isn't thread-safe. Use gmtime_r(3) instead!\"))) gmtime(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"localtime(...)=__attribute__((deprecated(\"localtime(3) isn't thread-safe. Use localtime_r(3) instead!\"))) localtime(__VA_ARGS__)" -AM_CPPFLAGS_NOCHECK += -D"strncpy(...)=__attribute__((deprecated(\"strncpy(3) is deprecated. Use strlcpy(3) instead!\"))) strncpy(__VA_ARGS__)" - -AM_CPPFLAGS += $(AM_CPPFLAGS_NOCHECK) - if ASAN_ENABLED AM_CPPFLAGS += -DZFS_ASAN_ENABLED endif diff --git a/sys/contrib/openzfs/config/kernel-acl.m4 b/sys/contrib/openzfs/config/kernel-acl.m4 index be08c3c6072..3ae5dc6b6db 100644 --- a/sys/contrib/openzfs/config/kernel-acl.m4 +++ b/sys/contrib/openzfs/config/kernel-acl.m4 @@ -172,7 +172,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_get_acl], [ #include - struct posix_acl *get_acl_fn(struct inode *inode, int type) + static struct posix_acl *get_acl_fn(struct inode *inode, int type) { return NULL; } static const struct inode_operations @@ -184,7 +184,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_get_acl_rcu], [ #include - struct posix_acl *get_acl_fn(struct inode *inode, int type, + static struct posix_acl *get_acl_fn(struct inode *inode, int type, bool rcu) { return NULL; } static const struct inode_operations @@ -196,7 +196,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_get_inode_acl], [ #include - struct posix_acl *get_inode_acl_fn(struct inode *inode, int type, + static struct posix_acl *get_inode_acl_fn(struct inode *inode, int type, bool rcu) { return NULL; } static const struct inode_operations @@ -243,7 +243,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_mnt_idmap_dentry], [ #include - int set_acl_fn(struct mnt_idmap *idmap, + static int set_acl_fn(struct mnt_idmap *idmap, struct dentry *dent, struct posix_acl *acl, int type) { return 0; } @@ -255,7 +255,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns_dentry], [ #include - int set_acl_fn(struct user_namespace *userns, + static int set_acl_fn(struct user_namespace *userns, struct dentry *dent, struct posix_acl *acl, int type) { return 0; } @@ -267,7 +267,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns], [ #include - int set_acl_fn(struct user_namespace *userns, + static int set_acl_fn(struct user_namespace *userns, struct inode *inode, struct posix_acl *acl, int type) { return 0; } @@ -279,7 +279,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ZFS_LINUX_TEST_SRC([inode_operations_set_acl], [ #include - int set_acl_fn(struct inode *inode, struct posix_acl *acl, + static int set_acl_fn(struct inode *inode, struct posix_acl *acl, int type) { return 0; } static const struct inode_operations diff --git a/sys/contrib/openzfs/config/kernel-automount.m4 b/sys/contrib/openzfs/config/kernel-automount.m4 index f7bb63c6815..52f1931b748 100644 --- a/sys/contrib/openzfs/config/kernel-automount.m4 +++ b/sys/contrib/openzfs/config/kernel-automount.m4 @@ -8,7 +8,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_AUTOMOUNT], [ ZFS_LINUX_TEST_SRC([dentry_operations_d_automount], [ #include - struct vfsmount *d_automount(struct path *p) { return NULL; } + static struct vfsmount *d_automount(struct path *p) { return NULL; } struct dentry_operations dops __attribute__ ((unused)) = { .d_automount = d_automount, }; diff --git a/sys/contrib/openzfs/config/kernel-bio.m4 b/sys/contrib/openzfs/config/kernel-bio.m4 index 18620ca5b7e..b22c1a3de7e 100644 --- a/sys/contrib/openzfs/config/kernel-bio.m4 +++ b/sys/contrib/openzfs/config/kernel-bio.m4 @@ -247,7 +247,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS], [ ZFS_LINUX_TEST_SRC([bio_end_io_t_args], [ #include - void wanted_end_io(struct bio *bio) { return; } + static void wanted_end_io(struct bio *bio) { return; } bio_end_io_t *end_io __attribute__ ((unused)) = wanted_end_io; ], []) ]) diff --git a/sys/contrib/openzfs/config/kernel-blkdev.m4 b/sys/contrib/openzfs/config/kernel-blkdev.m4 index e04a2bd2c3b..c5a353ca920 100644 --- a/sys/contrib/openzfs/config/kernel-blkdev.m4 +++ b/sys/contrib/openzfs/config/kernel-blkdev.m4 @@ -35,6 +35,25 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [ ]) ]) +dnl # +dnl # 6.8.x API change +dnl # bdev_open_by_path() replaces blkdev_get_by_path() +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH], [ + ZFS_LINUX_TEST_SRC([bdev_open_by_path], [ + #include + #include + ], [ + struct bdev_handle *bdh __attribute__ ((unused)) = NULL; + const char *path = "path"; + fmode_t mode = 0; + void *holder = NULL; + struct blk_holder_ops h; + + bdh = bdev_open_by_path(path, mode, holder, &h); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [ AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args]) ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [ @@ -47,7 +66,15 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [ [blkdev_get_by_path() exists and takes 4 args]) AC_MSG_RESULT(yes) ], [ - ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()]) + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether bdev_open_by_path() exists]) + ZFS_LINUX_TEST_RESULT([bdev_open_by_path], [ + AC_DEFINE(HAVE_BDEV_OPEN_BY_PATH, 1, + [bdev_open_by_path() exists]) + AC_MSG_RESULT(yes) + ], [ + ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()]) + ]) ]) ]) ]) @@ -108,18 +135,41 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER], [ ]) ]) +dnl # +dnl # 6.8.x API change +dnl # bdev_release() replaces blkdev_put() +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE], [ + ZFS_LINUX_TEST_SRC([bdev_release], [ + #include + #include + ], [ + struct bdev_handle *bdh = NULL; + bdev_release(bdh); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [ AC_MSG_CHECKING([whether blkdev_put() exists]) ZFS_LINUX_TEST_RESULT([blkdev_put], [ AC_MSG_RESULT(yes) ], [ + AC_MSG_RESULT(no) AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2]) ZFS_LINUX_TEST_RESULT([blkdev_put_holder], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_BLKDEV_PUT_HOLDER, 1, [blkdev_put() accepts void* as arg 2]) ], [ - ZFS_LINUX_TEST_ERROR([blkdev_put()]) + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether bdev_release() exists]) + ZFS_LINUX_TEST_RESULT([bdev_release], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BDEV_RELEASE, 1, + [bdev_release() exists]) + ], [ + ZFS_LINUX_TEST_ERROR([blkdev_put()]) + ]) ]) ]) ]) @@ -474,6 +524,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEVNAME], [ dnl # dnl # 5.19 API: blkdev_issue_secure_erase() +dnl # 4.7 API: __blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE) dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE) dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [ @@ -489,6 +540,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [ sector, nr_sects, GFP_KERNEL); ]) + ZFS_LINUX_TEST_SRC([blkdev_issue_discard_async_flags], [ + #include + ],[ + struct block_device *bdev = NULL; + sector_t sector = 0; + sector_t nr_sects = 0; + unsigned long flags = 0; + struct bio *biop = NULL; + int error __attribute__ ((unused)); + + error = __blkdev_issue_discard(bdev, + sector, nr_sects, GFP_KERNEL, flags, &biop); + ]) + ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [ #include ],[ @@ -512,13 +577,22 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [ ],[ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether blkdev_issue_discard() is available]) - ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [ + AC_MSG_CHECKING([whether __blkdev_issue_discard() is available]) + ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_async_flags], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1, - [blkdev_issue_discard() is available]) + AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC, 1, + [__blkdev_issue_discard() is available]) ],[ - ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()]) + AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether blkdev_issue_discard() is available]) + ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1, + [blkdev_issue_discard() is available]) + ],[ + ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()]) + ]) ]) ]) ]) @@ -570,8 +644,10 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [ ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG + ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_OPEN_BY_PATH ZFS_AC_KERNEL_SRC_BLKDEV_PUT ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER + ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_RELEASE ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART ZFS_AC_KERNEL_SRC_BLKDEV_INVALIDATE_BDEV ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV diff --git a/sys/contrib/openzfs/config/kernel-block-device-operations.m4 b/sys/contrib/openzfs/config/kernel-block-device-operations.m4 index d13c1337b1f..4ff20b9c413 100644 --- a/sys/contrib/openzfs/config/kernel-block-device-operations.m4 +++ b/sys/contrib/openzfs/config/kernel-block-device-operations.m4 @@ -5,7 +5,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ ZFS_LINUX_TEST_SRC([block_device_operations_check_events], [ #include - unsigned int blk_check_events(struct gendisk *disk, + static unsigned int blk_check_events(struct gendisk *disk, unsigned int clearing) { (void) disk, (void) clearing; return (0); @@ -34,7 +34,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ ZFS_LINUX_TEST_SRC([block_device_operations_release_void], [ #include - void blk_release(struct gendisk *g, fmode_t mode) { + static void blk_release(struct gendisk *g, fmode_t mode) { (void) g, (void) mode; return; } @@ -56,7 +56,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [ ZFS_LINUX_TEST_SRC([block_device_operations_release_void_1arg], [ #include - void blk_release(struct gendisk *g) { + static void blk_release(struct gendisk *g) { (void) g; return; } @@ -96,7 +96,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [ ZFS_LINUX_TEST_SRC([block_device_operations_revalidate_disk], [ #include - int blk_revalidate_disk(struct gendisk *disk) { + static int blk_revalidate_disk(struct gendisk *disk) { (void) disk; return(0); } diff --git a/sys/contrib/openzfs/config/kernel-commit-metadata.m4 b/sys/contrib/openzfs/config/kernel-commit-metadata.m4 index 7df9b980290..49bffbf609d 100644 --- a/sys/contrib/openzfs/config/kernel-commit-metadata.m4 +++ b/sys/contrib/openzfs/config/kernel-commit-metadata.m4 @@ -7,7 +7,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_COMMIT_METADATA], [ ZFS_LINUX_TEST_SRC([export_operations_commit_metadata], [ #include - int commit_metadata(struct inode *inode) { return 0; } + static int commit_metadata(struct inode *inode) { return 0; } static struct export_operations eops __attribute__ ((unused))={ .commit_metadata = commit_metadata, }; diff --git a/sys/contrib/openzfs/config/kernel-current-time.m4 b/sys/contrib/openzfs/config/kernel-current-time.m4 index 3ceb5f63efa..ab7d9c5cedb 100644 --- a/sys/contrib/openzfs/config/kernel-current-time.m4 +++ b/sys/contrib/openzfs/config/kernel-current-time.m4 @@ -2,12 +2,15 @@ dnl # dnl # 4.9, current_time() added dnl # 4.18, return type changed from timespec to timespec64 dnl # +dnl # Note that we don't care about the return type in this check. If we have +dnl # to implement a fallback, we'll know we're <4.9, which was timespec. +dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_TIME], [ ZFS_LINUX_TEST_SRC([current_time], [ #include ], [ struct inode ip __attribute__ ((unused)); - ip.i_atime = current_time(&ip); + (void) current_time(&ip); ]) ]) diff --git a/sys/contrib/openzfs/config/kernel-dentry-operations.m4 b/sys/contrib/openzfs/config/kernel-dentry-operations.m4 index dd470d7607b..500f61e26ae 100644 --- a/sys/contrib/openzfs/config/kernel-dentry-operations.m4 +++ b/sys/contrib/openzfs/config/kernel-dentry-operations.m4 @@ -98,7 +98,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA], [ #include #include - int revalidate (struct dentry *dentry, + static int revalidate (struct dentry *dentry, struct nameidata *nidata) { return 0; } static const struct dentry_operations diff --git a/sys/contrib/openzfs/config/kernel-dirty-inode.m4 b/sys/contrib/openzfs/config/kernel-dirty-inode.m4 index dc7667fa488..2ef8658748c 100644 --- a/sys/contrib/openzfs/config/kernel-dirty-inode.m4 +++ b/sys/contrib/openzfs/config/kernel-dirty-inode.m4 @@ -8,7 +8,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_DIRTY_INODE], [ ZFS_LINUX_TEST_SRC([dirty_inode_with_flags], [ #include - void dirty_inode(struct inode *a, int b) { return; } + static void dirty_inode(struct inode *a, int b) { return; } static const struct super_operations sops __attribute__ ((unused)) = { diff --git a/sys/contrib/openzfs/config/kernel-encode-fh-inode.m4 b/sys/contrib/openzfs/config/kernel-encode-fh-inode.m4 index 9d4ba5f0f61..b3ec040b5e9 100644 --- a/sys/contrib/openzfs/config/kernel-encode-fh-inode.m4 +++ b/sys/contrib/openzfs/config/kernel-encode-fh-inode.m4 @@ -7,7 +7,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE], [ ZFS_LINUX_TEST_SRC([export_operations_encode_fh], [ #include - int encode_fh(struct inode *inode, __u32 *fh, int *max_len, + static int encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { return 0; } static struct export_operations eops __attribute__ ((unused))={ .encode_fh = encode_fh, diff --git a/sys/contrib/openzfs/config/kernel-evict-inode.m4 b/sys/contrib/openzfs/config/kernel-evict-inode.m4 index 66f10492de5..87082c9a283 100644 --- a/sys/contrib/openzfs/config/kernel-evict-inode.m4 +++ b/sys/contrib/openzfs/config/kernel-evict-inode.m4 @@ -6,7 +6,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_EVICT_INODE], [ ZFS_LINUX_TEST_SRC([evict_inode], [ #include - void evict_inode (struct inode * t) { return; } + static void evict_inode (struct inode * t) { return; } static struct super_operations sops __attribute__ ((unused)) = { .evict_inode = evict_inode, }; diff --git a/sys/contrib/openzfs/config/kernel-fallocate.m4 b/sys/contrib/openzfs/config/kernel-fallocate.m4 index 815602d3e2c..95186dada45 100644 --- a/sys/contrib/openzfs/config/kernel-fallocate.m4 +++ b/sys/contrib/openzfs/config/kernel-fallocate.m4 @@ -11,7 +11,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [ ZFS_LINUX_TEST_SRC([file_fallocate], [ #include - long test_fallocate(struct file *file, int mode, + static long test_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { return 0; } static const struct file_operations diff --git a/sys/contrib/openzfs/config/kernel-flush_dcache_page.m4 b/sys/contrib/openzfs/config/kernel-flush_dcache_page.m4 index 2340c386ef5..aa916c87d53 100644 --- a/sys/contrib/openzfs/config/kernel-flush_dcache_page.m4 +++ b/sys/contrib/openzfs/config/kernel-flush_dcache_page.m4 @@ -1,7 +1,8 @@ dnl # dnl # Starting from Linux 5.13, flush_dcache_page() becomes an inline -dnl # function and may indirectly referencing GPL-only cpu_feature_keys on -dnl # powerpc +dnl # function and may indirectly referencing GPL-only symbols: +dnl # on powerpc: cpu_feature_keys +dnl # on riscv: PageHuge (added from 6.2) dnl # dnl # diff --git a/sys/contrib/openzfs/config/kernel-fpu.m4 b/sys/contrib/openzfs/config/kernel-fpu.m4 index c6efebd8cf6..edfde1a02d3 100644 --- a/sys/contrib/openzfs/config/kernel-fpu.m4 +++ b/sys/contrib/openzfs/config/kernel-fpu.m4 @@ -79,6 +79,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ __kernel_fpu_end(); ], [], [ZFS_META_LICENSE]) + ZFS_LINUX_TEST_SRC([kernel_neon], [ + #include + ], [ + kernel_neon_begin(); + kernel_neon_end(); + ], [], [ZFS_META_LICENSE]) ]) AC_DEFUN([ZFS_AC_KERNEL_FPU], [ @@ -105,9 +111,20 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ AC_DEFINE(KERNEL_EXPORTS_X86_FPU, 1, [kernel exports FPU functions]) ],[ - AC_MSG_RESULT(internal) - AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, - [kernel fpu internal]) + dnl # + dnl # ARM neon symbols (only on arm and arm64) + dnl # could be GPL-only on arm64 after Linux 6.2 + dnl # + ZFS_LINUX_TEST_RESULT([kernel_neon_license],[ + AC_MSG_RESULT(kernel_neon_*) + AC_DEFINE(HAVE_KERNEL_NEON, 1, + [kernel has kernel_neon_* functions]) + ],[ + # catch-all + AC_MSG_RESULT(internal) + AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1, + [kernel fpu internal]) + ]) ]) ]) ]) diff --git a/sys/contrib/openzfs/config/kernel-fsync.m4 b/sys/contrib/openzfs/config/kernel-fsync.m4 index d198191d3ab..c155f8af81a 100644 --- a/sys/contrib/openzfs/config/kernel-fsync.m4 +++ b/sys/contrib/openzfs/config/kernel-fsync.m4 @@ -5,7 +5,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [ ZFS_LINUX_TEST_SRC([fsync_without_dentry], [ #include - int test_fsync(struct file *f, int x) { return 0; } + static int test_fsync(struct file *f, int x) { return 0; } static const struct file_operations fops __attribute__ ((unused)) = { @@ -16,7 +16,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [ ZFS_LINUX_TEST_SRC([fsync_range], [ #include - int test_fsync(struct file *f, loff_t a, loff_t b, int c) + static int test_fsync(struct file *f, loff_t a, loff_t b, int c) { return 0; } static const struct file_operations diff --git a/sys/contrib/openzfs/config/kernel-get-link.m4 b/sys/contrib/openzfs/config/kernel-get-link.m4 index e4f478e37c1..1f8f5b0c8b7 100644 --- a/sys/contrib/openzfs/config/kernel-get-link.m4 +++ b/sys/contrib/openzfs/config/kernel-get-link.m4 @@ -5,7 +5,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ ZFS_LINUX_TEST_SRC([inode_operations_get_link], [ #include - const char *get_link(struct dentry *de, struct inode *ip, + static const char *get_link(struct dentry *de, struct inode *ip, struct delayed_call *done) { return "symlink"; } static struct inode_operations iops __attribute__ ((unused)) = { @@ -15,7 +15,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ ZFS_LINUX_TEST_SRC([inode_operations_get_link_cookie], [ #include - const char *get_link(struct dentry *de, struct + static const char *get_link(struct dentry *de, struct inode *ip, void **cookie) { return "symlink"; } static struct inode_operations iops __attribute__ ((unused)) = { @@ -25,7 +25,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ ZFS_LINUX_TEST_SRC([inode_operations_follow_link], [ #include - const char *follow_link(struct dentry *de, + static const char *follow_link(struct dentry *de, void **cookie) { return "symlink"; } static struct inode_operations iops __attribute__ ((unused)) = { @@ -35,7 +35,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ ZFS_LINUX_TEST_SRC([inode_operations_follow_link_nameidata], [ #include - void *follow_link(struct dentry *de, struct + static void *follow_link(struct dentry *de, struct nameidata *nd) { return (void *)NULL; } static struct inode_operations iops __attribute__ ((unused)) = { diff --git a/sys/contrib/openzfs/config/kernel-idmap_mnt_api.m4 b/sys/contrib/openzfs/config/kernel-idmap_mnt_api.m4 index 47ddc5702fb..d1bdd053203 100644 --- a/sys/contrib/openzfs/config/kernel-idmap_mnt_api.m4 +++ b/sys/contrib/openzfs/config/kernel-idmap_mnt_api.m4 @@ -23,3 +23,28 @@ AC_DEFUN([ZFS_AC_KERNEL_IDMAP_MNT_API], [ ]) ]) +dnl # +dnl # 6.8 decouples mnt_idmap from user_namespace. This is all internal +dnl # to mnt_idmap so we can't detect it directly, but we detect a related +dnl # change as use that as a signal. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS], [ + ZFS_LINUX_TEST_SRC([idmap_no_userns], [ + #include + ], [ + struct uid_gid_map *map = NULL; + map_id_down(map, 0); + ]) +]) + + +AC_DEFUN([ZFS_AC_KERNEL_IDMAP_NO_USERNS], [ + AC_MSG_CHECKING([whether idmapped mounts have a user namespace]) + ZFS_LINUX_TEST_RESULT([idmap_no_userns], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_IDMAP_NO_USERNS, 1, + [mnt_idmap does not have user_namespace]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/sys/contrib/openzfs/config/kernel-inode-create.m4 b/sys/contrib/openzfs/config/kernel-inode-create.m4 index 9e9e4318097..95f8aa2d522 100644 --- a/sys/contrib/openzfs/config/kernel-inode-create.m4 +++ b/sys/contrib/openzfs/config/kernel-inode-create.m4 @@ -7,7 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [ #include #include - int inode_create(struct mnt_idmap *idmap, + static int inode_create(struct mnt_idmap *idmap, struct inode *inode ,struct dentry *dentry, umode_t umode, bool flag) { return 0; } @@ -25,7 +25,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [ #include #include - int inode_create(struct user_namespace *userns, + static int inode_create(struct user_namespace *userns, struct inode *inode ,struct dentry *dentry, umode_t umode, bool flag) { return 0; } @@ -42,7 +42,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [ #include #include - int inode_create(struct inode *inode ,struct dentry *dentry, + static int inode_create(struct inode *inode ,struct dentry *dentry, umode_t umode, bool flag) { return 0; } static const struct inode_operations diff --git a/sys/contrib/openzfs/config/kernel-inode-getattr.m4 b/sys/contrib/openzfs/config/kernel-inode-getattr.m4 index c8bfb07862a..5f7ce1ad9a5 100644 --- a/sys/contrib/openzfs/config/kernel-inode-getattr.m4 +++ b/sys/contrib/openzfs/config/kernel-inode-getattr.m4 @@ -7,7 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_getattr_mnt_idmap], [ #include - int test_getattr( + static int test_getattr( struct mnt_idmap *idmap, const struct path *p, struct kstat *k, u32 request_mask, unsigned int query_flags) @@ -28,7 +28,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_getattr_userns], [ #include - int test_getattr( + static int test_getattr( struct user_namespace *userns, const struct path *p, struct kstat *k, u32 request_mask, unsigned int query_flags) @@ -47,7 +47,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_getattr_path], [ #include - int test_getattr( + static int test_getattr( const struct path *p, struct kstat *k, u32 request_mask, unsigned int query_flags) { return 0; } @@ -61,7 +61,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_getattr_vfsmount], [ #include - int test_getattr( + static int test_getattr( struct vfsmount *mnt, struct dentry *d, struct kstat *k) { return 0; } diff --git a/sys/contrib/openzfs/config/kernel-inode-lookup.m4 b/sys/contrib/openzfs/config/kernel-inode-lookup.m4 index 1a56e69b04a..c7373056422 100644 --- a/sys/contrib/openzfs/config/kernel-inode-lookup.m4 +++ b/sys/contrib/openzfs/config/kernel-inode-lookup.m4 @@ -6,7 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS], [ #include #include - struct dentry *inode_lookup(struct inode *inode, + static struct dentry *inode_lookup(struct inode *inode, struct dentry *dentry, unsigned int flags) { return NULL; } static const struct inode_operations iops diff --git a/sys/contrib/openzfs/config/kernel-inode-permission.m4 b/sys/contrib/openzfs/config/kernel-inode-permission.m4 index 01d23635b0c..f7fc1643909 100644 --- a/sys/contrib/openzfs/config/kernel-inode-permission.m4 +++ b/sys/contrib/openzfs/config/kernel-inode-permission.m4 @@ -8,12 +8,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PERMISSION], [ #include #include - int inode_permission(struct mnt_idmap *idmap, + static int test_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { return 0; } static const struct inode_operations iops __attribute__ ((unused)) = { - .permission = inode_permission, + .permission = test_permission, }; ],[]) @@ -25,12 +25,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PERMISSION], [ #include #include - int inode_permission(struct user_namespace *userns, + static int test_permission(struct user_namespace *userns, struct inode *inode, int mask) { return 0; } static const struct inode_operations iops __attribute__ ((unused)) = { - .permission = inode_permission, + .permission = test_permission, }; ],[]) ]) diff --git a/sys/contrib/openzfs/config/kernel-inode-setattr.m4 b/sys/contrib/openzfs/config/kernel-inode-setattr.m4 index 45755b4eb27..69289e897be 100644 --- a/sys/contrib/openzfs/config/kernel-inode-setattr.m4 +++ b/sys/contrib/openzfs/config/kernel-inode-setattr.m4 @@ -7,7 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_setattr_mnt_idmap], [ #include - int test_setattr( + static int test_setattr( struct mnt_idmap *idmap, struct dentry *de, struct iattr *ia) { return 0; } @@ -27,7 +27,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_setattr_userns], [ #include - int test_setattr( + static int test_setattr( struct user_namespace *userns, struct dentry *de, struct iattr *ia) { return 0; } @@ -41,7 +41,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ ZFS_LINUX_TEST_SRC([inode_operations_setattr], [ #include - int test_setattr( + static int test_setattr( struct dentry *de, struct iattr *ia) { return 0; } diff --git a/sys/contrib/openzfs/config/kernel-inode-times.m4 b/sys/contrib/openzfs/config/kernel-inode-times.m4 index aae95abf172..4d861596ed0 100644 --- a/sys/contrib/openzfs/config/kernel-inode-times.m4 +++ b/sys/contrib/openzfs/config/kernel-inode-times.m4 @@ -52,6 +52,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [ memset(&ip, 0, sizeof(ip)); inode_set_ctime_to_ts(&ip, ts); ]) + + dnl # + dnl # 6.7 API change + dnl # i_atime/i_mtime no longer directly accessible, must use + dnl # inode_get_mtime(ip), inode_set_mtime*(ip) to + dnl # read/write. + dnl # + ZFS_LINUX_TEST_SRC([inode_get_atime], [ + #include + ],[ + struct inode ip; + + memset(&ip, 0, sizeof(ip)); + inode_get_atime(&ip); + ]) + ZFS_LINUX_TEST_SRC([inode_get_mtime], [ + #include + ],[ + struct inode ip; + + memset(&ip, 0, sizeof(ip)); + inode_get_mtime(&ip); + ]) + + ZFS_LINUX_TEST_SRC([inode_set_atime_to_ts], [ + #include + ],[ + struct inode ip; + struct timespec64 ts = {0}; + + memset(&ip, 0, sizeof(ip)); + inode_set_atime_to_ts(&ip, ts); + ]) + ZFS_LINUX_TEST_SRC([inode_set_mtime_to_ts], [ + #include + ],[ + struct inode ip; + struct timespec64 ts = {0}; + + memset(&ip, 0, sizeof(ip)); + inode_set_mtime_to_ts(&ip, ts); + ]) ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ @@ -90,4 +132,40 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ ],[ AC_MSG_RESULT(no) ]) + + AC_MSG_CHECKING([whether inode_get_atime() exists]) + ZFS_LINUX_TEST_RESULT([inode_get_atime], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_GET_ATIME, 1, + [inode_get_atime() exists in linux/fs.h]) + ],[ + AC_MSG_RESULT(no) + ]) + + AC_MSG_CHECKING([whether inode_set_atime_to_ts() exists]) + ZFS_LINUX_TEST_RESULT([inode_set_atime_to_ts], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_SET_ATIME_TO_TS, 1, + [inode_set_atime_to_ts() exists in linux/fs.h]) + ],[ + AC_MSG_RESULT(no) + ]) + + AC_MSG_CHECKING([whether inode_get_mtime() exists]) + ZFS_LINUX_TEST_RESULT([inode_get_mtime], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_GET_MTIME, 1, + [inode_get_mtime() exists in linux/fs.h]) + ],[ + AC_MSG_RESULT(no) + ]) + + AC_MSG_CHECKING([whether inode_set_mtime_to_ts() exists]) + ZFS_LINUX_TEST_RESULT([inode_set_mtime_to_ts], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_INODE_SET_MTIME_TO_TS, 1, + [inode_set_mtime_to_ts() exists in linux/fs.h]) + ],[ + AC_MSG_RESULT(no) + ]) ]) diff --git a/sys/contrib/openzfs/config/kernel-make-request-fn.m4 b/sys/contrib/openzfs/config/kernel-make-request-fn.m4 index f17416acca6..4d20dd45c4a 100644 --- a/sys/contrib/openzfs/config/kernel-make-request-fn.m4 +++ b/sys/contrib/openzfs/config/kernel-make-request-fn.m4 @@ -4,7 +4,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ZFS_LINUX_TEST_SRC([make_request_fn_void], [ #include - void make_request(struct request_queue *q, + static void make_request(struct request_queue *q, struct bio *bio) { return; } ],[ blk_queue_make_request(NULL, &make_request); @@ -12,7 +12,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ZFS_LINUX_TEST_SRC([make_request_fn_blk_qc_t], [ #include - blk_qc_t make_request(struct request_queue *q, + static blk_qc_t make_request(struct request_queue *q, struct bio *bio) { return (BLK_QC_T_NONE); } ],[ blk_queue_make_request(NULL, &make_request); @@ -20,7 +20,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn], [ #include - blk_qc_t make_request(struct request_queue *q, + static blk_qc_t make_request(struct request_queue *q, struct bio *bio) { return (BLK_QC_T_NONE); } ],[ struct request_queue *q __attribute__ ((unused)); @@ -29,7 +29,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn_rh], [ #include - blk_qc_t make_request(struct request_queue *q, + static blk_qc_t make_request(struct request_queue *q, struct bio *bio) { return (BLK_QC_T_NONE); } ],[ struct request_queue *q __attribute__ ((unused)); diff --git a/sys/contrib/openzfs/config/kernel-mkdir.m4 b/sys/contrib/openzfs/config/kernel-mkdir.m4 index 7407a791b84..367f100094d 100644 --- a/sys/contrib/openzfs/config/kernel-mkdir.m4 +++ b/sys/contrib/openzfs/config/kernel-mkdir.m4 @@ -9,7 +9,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [ ZFS_LINUX_TEST_SRC([mkdir_mnt_idmap], [ #include - int mkdir(struct mnt_idmap *idmap, + static int mkdir(struct mnt_idmap *idmap, struct inode *inode, struct dentry *dentry, umode_t umode) { return 0; } static const struct inode_operations @@ -26,7 +26,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [ ZFS_LINUX_TEST_SRC([mkdir_user_namespace], [ #include - int mkdir(struct user_namespace *userns, + static int mkdir(struct user_namespace *userns, struct inode *inode, struct dentry *dentry, umode_t umode) { return 0; } @@ -47,7 +47,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [ ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [ #include - int mkdir(struct inode *inode, struct dentry *dentry, + static int mkdir(struct inode *inode, struct dentry *dentry, umode_t umode) { return 0; } static const struct inode_operations diff --git a/sys/contrib/openzfs/config/kernel-mknod.m4 b/sys/contrib/openzfs/config/kernel-mknod.m4 index 1494ec1ae4d..6ad3453aaf0 100644 --- a/sys/contrib/openzfs/config/kernel-mknod.m4 +++ b/sys/contrib/openzfs/config/kernel-mknod.m4 @@ -7,7 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [ #include #include - int tmp_mknod(struct mnt_idmap *idmap, + static int tmp_mknod(struct mnt_idmap *idmap, struct inode *inode ,struct dentry *dentry, umode_t u, dev_t d) { return 0; } @@ -25,7 +25,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [ #include #include - int tmp_mknod(struct user_namespace *userns, + static int tmp_mknod(struct user_namespace *userns, struct inode *inode ,struct dentry *dentry, umode_t u, dev_t d) { return 0; } diff --git a/sys/contrib/openzfs/config/kernel-proc-operations.m4 b/sys/contrib/openzfs/config/kernel-proc-operations.m4 index df216222ecc..3ae8ce2b6d0 100644 --- a/sys/contrib/openzfs/config/kernel-proc-operations.m4 +++ b/sys/contrib/openzfs/config/kernel-proc-operations.m4 @@ -7,14 +7,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PROC_OPERATIONS], [ ZFS_LINUX_TEST_SRC([proc_ops_struct], [ #include - int test_open(struct inode *ip, struct file *fp) { return 0; } - ssize_t test_read(struct file *fp, char __user *ptr, + static int test_open(struct inode *ip, struct file *fp) { return 0; } + static ssize_t test_read(struct file *fp, char __user *ptr, size_t size, loff_t *offp) { return 0; } - ssize_t test_write(struct file *fp, const char __user *ptr, + static ssize_t test_write(struct file *fp, const char __user *ptr, size_t size, loff_t *offp) { return 0; } - loff_t test_lseek(struct file *fp, loff_t off, int flag) + static loff_t test_lseek(struct file *fp, loff_t off, int flag) { return 0; } - int test_release(struct inode *ip, struct file *fp) + static int test_release(struct inode *ip, struct file *fp) { return 0; } const struct proc_ops test_ops __attribute__ ((unused)) = { diff --git a/sys/contrib/openzfs/config/kernel-put-link.m4 b/sys/contrib/openzfs/config/kernel-put-link.m4 index 4234861f334..8ab318cbff8 100644 --- a/sys/contrib/openzfs/config/kernel-put-link.m4 +++ b/sys/contrib/openzfs/config/kernel-put-link.m4 @@ -4,7 +4,7 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_PUT_LINK], [ ZFS_LINUX_TEST_SRC([put_link_cookie], [ #include - void put_link(struct inode *ip, void *cookie) + static void put_link(struct inode *ip, void *cookie) { return; } static struct inode_operations iops __attribute__ ((unused)) = { @@ -14,7 +14,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_PUT_LINK], [ ZFS_LINUX_TEST_SRC([put_link_nameidata], [ #include - void put_link(struct dentry *de, struct + static void put_link(struct dentry *de, struct nameidata *nd, void *ptr) { return; } static struct inode_operations iops __attribute__ ((unused)) = { diff --git a/sys/contrib/openzfs/config/kernel-rename.m4 b/sys/contrib/openzfs/config/kernel-rename.m4 index 57c3eed7897..ce881502d1b 100644 --- a/sys/contrib/openzfs/config/kernel-rename.m4 +++ b/sys/contrib/openzfs/config/kernel-rename.m4 @@ -8,7 +8,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename2], [ #include - int rename2_fn(struct inode *sip, struct dentry *sdp, + static int rename2_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } @@ -26,7 +26,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [ #include - int rename_fn(struct inode *sip, struct dentry *sdp, + static int rename_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } @@ -44,7 +44,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [ #include - int rename2_fn(struct inode *sip, struct dentry *sdp, + static int rename2_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } @@ -62,7 +62,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [ #include - int rename_fn(struct user_namespace *user_ns, struct inode *sip, + static int rename_fn(struct user_namespace *user_ns, struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } @@ -77,7 +77,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_mnt_idmap], [ #include - int rename_fn(struct mnt_idmap *idmap, struct inode *sip, + static int rename_fn(struct mnt_idmap *idmap, struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, unsigned int flags) { return 0; } diff --git a/sys/contrib/openzfs/config/kernel-show-options.m4 b/sys/contrib/openzfs/config/kernel-show-options.m4 index 93bd5fbfbb2..fd62f30086d 100644 --- a/sys/contrib/openzfs/config/kernel-show-options.m4 +++ b/sys/contrib/openzfs/config/kernel-show-options.m4 @@ -5,7 +5,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHOW_OPTIONS], [ ZFS_LINUX_TEST_SRC([super_operations_show_options], [ #include - int show_options(struct seq_file * x, struct dentry * y) { + static int show_options(struct seq_file * x, struct dentry * y) { return 0; }; diff --git a/sys/contrib/openzfs/config/kernel-shrink.m4 b/sys/contrib/openzfs/config/kernel-shrink.m4 index 0c702153e8c..6580b08d5ff 100644 --- a/sys/contrib/openzfs/config/kernel-shrink.m4 +++ b/sys/contrib/openzfs/config/kernel-shrink.m4 @@ -8,9 +8,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [ ZFS_LINUX_TEST_SRC([super_block_s_shrink], [ #include - int shrink(struct shrinker *s, struct shrink_control *sc) - { return 0; } - static const struct super_block sb __attribute__ ((unused)) = { .s_shrink.seeks = DEFAULT_SEEKS, @@ -19,12 +16,44 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK], [ ],[]) ]) +dnl # +dnl # 6.7 API change +dnl # s_shrink is now a pointer. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR], [ + ZFS_LINUX_TEST_SRC([super_block_s_shrink_ptr], [ + #include + static unsigned long shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { return 0; } + static struct shrinker shrinker = { + .count_objects = shrinker_cb, + .scan_objects = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + static const struct super_block + sb __attribute__ ((unused)) = { + .s_shrink = &shrinker, + }; + ],[]) +]) + AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [ AC_MSG_CHECKING([whether super_block has s_shrink]) ZFS_LINUX_TEST_RESULT([super_block_s_shrink], [ AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK, 1, + [have super_block s_shrink]) ],[ - ZFS_LINUX_TEST_ERROR([sb->s_shrink()]) + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether super_block has s_shrink pointer]) + ZFS_LINUX_TEST_RESULT([super_block_s_shrink_ptr], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SUPER_BLOCK_S_SHRINK_PTR, 1, + [have super_block s_shrink pointer]) + ],[ + AC_MSG_RESULT(no) + ZFS_LINUX_TEST_ERROR([sb->s_shrink()]) + ]) ]) ]) @@ -57,7 +86,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ ZFS_LINUX_TEST_SRC([register_shrinker_vararg], [ #include - unsigned long shrinker_cb(struct shrinker *shrink, + static unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { @@ -72,7 +101,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [ #include - int shrinker_cb(struct shrinker *shrink, + static int shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { @@ -84,7 +113,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control_split], [ #include - unsigned long shrinker_cb(struct shrinker *shrink, + static unsigned long shrinker_cb(struct shrinker *shrink, struct shrink_control *sc) { return 0; } ],[ struct shrinker cache_shrinker = { @@ -96,6 +125,25 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ ]) ]) +dnl # +dnl # 6.7 API change +dnl # register_shrinker has been replaced by shrinker_register. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER], [ + ZFS_LINUX_TEST_SRC([shrinker_register], [ + #include + static unsigned long shrinker_cb(struct shrinker *shrink, + struct shrink_control *sc) { return 0; } + ],[ + struct shrinker cache_shrinker = { + .count_objects = shrinker_cb, + .scan_objects = shrinker_cb, + .seeks = DEFAULT_SEEKS, + }; + shrinker_register(&cache_shrinker); + ]) +]) + AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ dnl # dnl # 6.0 API change @@ -133,14 +181,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ dnl # cs->shrink() is logically split in to dnl # cs->count_objects() and cs->scan_objects() dnl # - AC_MSG_CHECKING([if cs->count_objects callback exists]) + AC_MSG_CHECKING( + [whether cs->count_objects callback exists]) ZFS_LINUX_TEST_RESULT( - [shrinker_cb_shrink_control_split],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, - [cs->count_objects exists]) + [shrinker_cb_shrink_control_split],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, + [cs->count_objects exists]) ],[ + AC_MSG_RESULT(no) + + AC_MSG_CHECKING( + [whether shrinker_register exists]) + ZFS_LINUX_TEST_RESULT([shrinker_register], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SHRINKER_REGISTER, 1, + [shrinker_register exists]) + + dnl # We assume that the split shrinker + dnl # callback exists if + dnl # shrinker_register() exists, + dnl # because the latter is a much more + dnl # recent addition, and the macro + dnl # test for shrinker_register() only + dnl # works if the callback is split + AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, + 1, [cs->count_objects exists]) + ],[ + AC_MSG_RESULT(no) ZFS_LINUX_TEST_ERROR([shrinker]) + ]) ]) ]) ]) @@ -174,10 +244,12 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [ ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK + ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG + ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [ diff --git a/sys/contrib/openzfs/config/kernel-strlcpy.m4 b/sys/contrib/openzfs/config/kernel-strlcpy.m4 new file mode 100644 index 00000000000..c31cf52d78b --- /dev/null +++ b/sys/contrib/openzfs/config/kernel-strlcpy.m4 @@ -0,0 +1,47 @@ +dnl # +dnl # 6.8.x replaced strlcpy with strscpy. Check for both so we can provide +dnl # appropriate fallbacks. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_STRLCPY], [ + ZFS_LINUX_TEST_SRC([kernel_has_strlcpy], [ + #include + ], [ + const char *src = "goodbye"; + char dst[32]; + size_t len; + len = strlcpy(dst, src, sizeof (dst)); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SRC_STRSCPY], [ + ZFS_LINUX_TEST_SRC([kernel_has_strscpy], [ + #include + ], [ + const char *src = "goodbye"; + char dst[32]; + ssize_t len; + len = strscpy(dst, src, sizeof (dst)); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_STRLCPY], [ + AC_MSG_CHECKING([whether strlcpy() exists]) + ZFS_LINUX_TEST_RESULT([kernel_has_strlcpy], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_KERNEL_STRLCPY, 1, + [strlcpy() exists]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_STRSCPY], [ + AC_MSG_CHECKING([whether strscpy() exists]) + ZFS_LINUX_TEST_RESULT([kernel_has_strscpy], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_KERNEL_STRSCPY, 1, + [strscpy() exists]) + ], [ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/sys/contrib/openzfs/config/kernel-symlink.m4 b/sys/contrib/openzfs/config/kernel-symlink.m4 index a0333ed66a7..804fceab28f 100644 --- a/sys/contrib/openzfs/config/kernel-symlink.m4 +++ b/sys/contrib/openzfs/config/kernel-symlink.m4 @@ -6,7 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [ ZFS_LINUX_TEST_SRC([symlink_mnt_idmap], [ #include #include - int tmp_symlink(struct mnt_idmap *idmap, + static int tmp_symlink(struct mnt_idmap *idmap, struct inode *inode ,struct dentry *dentry, const char *path) { return 0; } @@ -23,7 +23,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [ #include #include - int tmp_symlink(struct user_namespace *userns, + static int tmp_symlink(struct user_namespace *userns, struct inode *inode ,struct dentry *dentry, const char *path) { return 0; } diff --git a/sys/contrib/openzfs/config/kernel-timer.m4 b/sys/contrib/openzfs/config/kernel-timer.m4 index 403cff3f418..c710e804be0 100644 --- a/sys/contrib/openzfs/config/kernel-timer.m4 +++ b/sys/contrib/openzfs/config/kernel-timer.m4 @@ -18,7 +18,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_SETUP], [ int data; }; - void task_expire(struct timer_list *tl) + static void task_expire(struct timer_list *tl) { struct my_task_timer *task_timer = from_timer(task_timer, tl, timer); @@ -31,7 +31,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_SETUP], [ ZFS_LINUX_TEST_SRC([timer_list_function], [ #include - void task_expire(struct timer_list *tl) {} + static void task_expire(struct timer_list *tl) {} ],[ struct timer_list tl; tl.function = task_expire; diff --git a/sys/contrib/openzfs/config/kernel-tmpfile.m4 b/sys/contrib/openzfs/config/kernel-tmpfile.m4 index cc18b8f65a8..7439514186e 100644 --- a/sys/contrib/openzfs/config/kernel-tmpfile.m4 +++ b/sys/contrib/openzfs/config/kernel-tmpfile.m4 @@ -9,7 +9,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_mnt_idmap], [ #include - int tmpfile(struct mnt_idmap *idmap, + static int tmpfile(struct mnt_idmap *idmap, struct inode *inode, struct file *file, umode_t mode) { return 0; } static struct inode_operations @@ -22,7 +22,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_tmpfile], [ #include - int tmpfile(struct user_namespace *userns, + static int tmpfile(struct user_namespace *userns, struct inode *inode, struct file *file, umode_t mode) { return 0; } static struct inode_operations @@ -36,7 +36,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ dnl # ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_dentry_userns], [ #include - int tmpfile(struct user_namespace *userns, + static int tmpfile(struct user_namespace *userns, struct inode *inode, struct dentry *dentry, umode_t mode) { return 0; } static struct inode_operations @@ -46,7 +46,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [ ],[]) ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_dentry], [ #include - int tmpfile(struct inode *inode, struct dentry *dentry, + static int tmpfile(struct inode *inode, struct dentry *dentry, umode_t mode) { return 0; } static struct inode_operations iops __attribute__ ((unused)) = { diff --git a/sys/contrib/openzfs/config/kernel-vfs-direct_IO.m4 b/sys/contrib/openzfs/config/kernel-vfs-direct_IO.m4 index 82583d52fcb..7b7b91f979f 100644 --- a/sys/contrib/openzfs/config/kernel-vfs-direct_IO.m4 +++ b/sys/contrib/openzfs/config/kernel-vfs-direct_IO.m4 @@ -5,7 +5,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iter], [ #include - ssize_t test_direct_IO(struct kiocb *kiocb, + static ssize_t test_direct_IO(struct kiocb *kiocb, struct iov_iter *iter) { return 0; } static const struct address_space_operations @@ -17,7 +17,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iter_offset], [ #include - ssize_t test_direct_IO(struct kiocb *kiocb, + static ssize_t test_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t offset) { return 0; } static const struct address_space_operations @@ -29,7 +29,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iter_rw_offset], [ #include - ssize_t test_direct_IO(int rw, struct kiocb *kiocb, + static ssize_t test_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t offset) { return 0; } static const struct address_space_operations @@ -41,7 +41,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iovec], [ #include - ssize_t test_direct_IO(int rw, struct kiocb *kiocb, + static ssize_t test_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { return 0; } diff --git a/sys/contrib/openzfs/config/kernel-vfs-iterate.m4 b/sys/contrib/openzfs/config/kernel-vfs-iterate.m4 index 172118eac87..2e396daa1c0 100644 --- a/sys/contrib/openzfs/config/kernel-vfs-iterate.m4 +++ b/sys/contrib/openzfs/config/kernel-vfs-iterate.m4 @@ -1,7 +1,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ ZFS_LINUX_TEST_SRC([file_operations_iterate_shared], [ #include - int iterate(struct file *filp, struct dir_context * context) + static int iterate(struct file *filp, struct dir_context * context) { return 0; } static const struct file_operations fops @@ -12,7 +12,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ ZFS_LINUX_TEST_SRC([file_operations_iterate], [ #include - int iterate(struct file *filp, + static int iterate(struct file *filp, struct dir_context *context) { return 0; } static const struct file_operations fops @@ -27,7 +27,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ ZFS_LINUX_TEST_SRC([file_operations_readdir], [ #include - int readdir(struct file *filp, void *entry, + static int readdir(struct file *filp, void *entry, filldir_t func) { return 0; } static const struct file_operations fops diff --git a/sys/contrib/openzfs/config/kernel-vfs-rw-iterate.m4 b/sys/contrib/openzfs/config/kernel-vfs-rw-iterate.m4 index 000353ec15b..cb20ed03099 100644 --- a/sys/contrib/openzfs/config/kernel-vfs-rw-iterate.m4 +++ b/sys/contrib/openzfs/config/kernel-vfs-rw-iterate.m4 @@ -5,9 +5,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE], [ ZFS_LINUX_TEST_SRC([file_operations_rw], [ #include - ssize_t test_read(struct kiocb *kiocb, struct iov_iter *to) + static ssize_t test_read(struct kiocb *kiocb, struct iov_iter *to) { return 0; } - ssize_t test_write(struct kiocb *kiocb, struct iov_iter *from) + static ssize_t test_write(struct kiocb *kiocb, struct iov_iter *from) { return 0; } static const struct file_operations diff --git a/sys/contrib/openzfs/config/kernel-writepage_t.m4 b/sys/contrib/openzfs/config/kernel-writepage_t.m4 index 3a0cffd9857..a82cf370c9d 100644 --- a/sys/contrib/openzfs/config/kernel-writepage_t.m4 +++ b/sys/contrib/openzfs/config/kernel-writepage_t.m4 @@ -6,7 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [ dnl # ZFS_LINUX_TEST_SRC([writepage_t_folio], [ #include - int putpage(struct folio *folio, + static int putpage(struct folio *folio, struct writeback_control *wbc, void *data) { return 0; } writepage_t func = putpage; diff --git a/sys/contrib/openzfs/config/kernel-xattr-handler.m4 b/sys/contrib/openzfs/config/kernel-xattr-handler.m4 index 6b8a08dbcc8..32f58c70a50 100644 --- a/sys/contrib/openzfs/config/kernel-xattr-handler.m4 +++ b/sys/contrib/openzfs/config/kernel-xattr-handler.m4 @@ -68,7 +68,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode], [ #include - int get(const struct xattr_handler *handler, + static int get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { return 0; } static const struct xattr_handler @@ -80,7 +80,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_xattr_handler], [ #include - int get(const struct xattr_handler *handler, + static int get(const struct xattr_handler *handler, struct dentry *dentry, const char *name, void *buffer, size_t size) { return 0; } static const struct xattr_handler @@ -92,7 +92,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry], [ #include - int get(struct dentry *dentry, const char *name, + static int get(struct dentry *dentry, const char *name, void *buffer, size_t size, int handler_flags) { return 0; } static const struct xattr_handler @@ -104,7 +104,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode_flags], [ #include - int get(const struct xattr_handler *handler, + static int get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size, int flags) { return 0; } @@ -182,7 +182,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_mnt_idmap], [ #include - int set(const struct xattr_handler *handler, + static int set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, @@ -197,7 +197,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [ #include - int set(const struct xattr_handler *handler, + static int set(const struct xattr_handler *handler, struct user_namespace *mnt_userns, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, @@ -212,7 +212,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry_inode], [ #include - int set(const struct xattr_handler *handler, + static int set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) @@ -226,7 +226,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_xattr_handler], [ #include - int set(const struct xattr_handler *handler, + static int set(const struct xattr_handler *handler, struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags) { return 0; } @@ -239,7 +239,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry], [ #include - int set(struct dentry *dentry, const char *name, + static int set(struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags, int handler_flags) { return 0; } static const struct xattr_handler @@ -325,7 +325,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ ZFS_LINUX_TEST_SRC([xattr_handler_list_simple], [ #include - bool list(struct dentry *dentry) { return 0; } + static bool list(struct dentry *dentry) { return 0; } static const struct xattr_handler xops __attribute__ ((unused)) = { .list = list, @@ -335,7 +335,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ ZFS_LINUX_TEST_SRC([xattr_handler_list_xattr_handler], [ #include - size_t list(const struct xattr_handler *handler, + static size_t list(const struct xattr_handler *handler, struct dentry *dentry, char *list, size_t list_size, const char *name, size_t name_len) { return 0; } static const struct xattr_handler @@ -347,7 +347,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ ZFS_LINUX_TEST_SRC([xattr_handler_list_dentry], [ #include - size_t list(struct dentry *dentry, + static size_t list(struct dentry *dentry, char *list, size_t list_size, const char *name, size_t name_len, int handler_flags) { return 0; } diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4 index 056517a841f..e3f8645774c 100644 --- a/sys/contrib/openzfs/config/kernel.m4 +++ b/sys/contrib/openzfs/config/kernel.m4 @@ -149,6 +149,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SYSFS ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG + ZFS_AC_KERNEL_SRC_STRLCPY + ZFS_AC_KERNEL_SRC_STRSCPY ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_SRC_ADD_DISK ZFS_AC_KERNEL_SRC_KTHREAD @@ -156,6 +158,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM ZFS_AC_KERNEL_SRC_IDMAP_MNT_API + ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS ZFS_AC_KERNEL_SRC_IATTR_VFSID ZFS_AC_KERNEL_SRC_FILEMAP ZFS_AC_KERNEL_SRC_WRITEPAGE_T @@ -168,6 +171,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE ;; + riscv*) + ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE + ;; esac AC_MSG_CHECKING([for available kernel interfaces]) @@ -291,6 +297,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SYSFS ZFS_AC_KERNEL_SET_SPECIAL_STATE ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG + ZFS_AC_KERNEL_STRLCPY + ZFS_AC_KERNEL_STRSCPY ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_ADD_DISK ZFS_AC_KERNEL_KTHREAD @@ -298,6 +306,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC ZFS_AC_KERNEL_USER_NS_COMMON_INUM ZFS_AC_KERNEL_IDMAP_MNT_API + ZFS_AC_KERNEL_IDMAP_NO_USERNS ZFS_AC_KERNEL_IATTR_VFSID ZFS_AC_KERNEL_FILEMAP ZFS_AC_KERNEL_WRITEPAGE_T @@ -310,6 +319,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_CPU_HAS_FEATURE ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE ;; + riscv*) + ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE + ;; esac ]) diff --git a/sys/contrib/openzfs/config/rpm.am b/sys/contrib/openzfs/config/rpm.am index 13bd54a625b..85c56c0b2e3 100644 --- a/sys/contrib/openzfs/config/rpm.am +++ b/sys/contrib/openzfs/config/rpm.am @@ -83,6 +83,11 @@ srpm-common: rpm-local || exit 1; \ LANG=C $(RPMBUILD) \ --define "_tmppath $$rpmbuild/TMP" \ + --define "_builddir $$rpmbuild/BUILD" \ + --define "_rpmdir $$rpmbuild/RPMS" \ + --define "_srcrpmdir $$rpmbuild/SRPMS" \ + --define "_specdir $$rpmbuild/SPECS" \ + --define "_sourcedir $$rpmbuild/SOURCES" \ --define "_topdir $$rpmbuild" \ $(def) -bs $$rpmbuild/SPECS/$$rpmspec || exit 1; \ cp $$rpmbuild/SRPMS/$$rpmpkg . || exit 1; \ @@ -99,6 +104,11 @@ rpm-common: rpm-local || exit 1; \ LANG=C ${RPMBUILD} \ --define "_tmppath $$rpmbuild/TMP" \ + --define "_builddir $$rpmbuild/BUILD" \ + --define "_rpmdir $$rpmbuild/RPMS" \ + --define "_srcrpmdir $$rpmbuild/SRPMS" \ + --define "_specdir $$rpmbuild/SPECS" \ + --define "_sourcedir $$rpmbuild/SOURCES" \ --define "_topdir $$rpmbuild" \ $(def) --rebuild $$rpmpkg || exit 1; \ cp $$rpmbuild/RPMS/*/* . || exit 1; \ diff --git a/sys/contrib/openzfs/include/libzfs.h b/sys/contrib/openzfs/include/libzfs.h index 4adfa38e87b..770c5e1f201 100644 --- a/sys/contrib/openzfs/include/libzfs.h +++ b/sys/contrib/openzfs/include/libzfs.h @@ -318,6 +318,9 @@ _LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *); _LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); _LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); +_LIBZFS_H int zpool_vdev_set_removed_state(zpool_handle_t *, uint64_t, + vdev_aux_t); + _LIBZFS_H int zpool_vdev_clear(zpool_handle_t *, uint64_t); _LIBZFS_H nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h index 053b1ed4b52..839486fb62b 100644 --- a/sys/contrib/openzfs/include/libzutil.h +++ b/sys/contrib/openzfs/include/libzutil.h @@ -97,6 +97,7 @@ _LIBZUTIL_H int zpool_find_config(libpc_handle_t *, const char *, nvlist_t **, _LIBZUTIL_H const char * const * zpool_default_search_paths(size_t *count); _LIBZUTIL_H int zpool_read_label(int, nvlist_t **, int *); _LIBZUTIL_H int zpool_label_disk_wait(const char *, int); +_LIBZUTIL_H int zpool_disk_wait(const char *); struct udev_device; @@ -163,6 +164,8 @@ _LIBZUTIL_H void zfs_niceraw(uint64_t, char *, size_t); _LIBZUTIL_H void zpool_dump_ddt(const ddt_stat_t *, const ddt_histogram_t *); _LIBZUTIL_H int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); +_LIBZUTIL_H void fsleep(float sec); +_LIBZUTIL_H int zpool_getenv_int(const char *env, int default_val); struct zfs_cmd; @@ -205,9 +208,65 @@ _LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...); typedef int (*pool_vdev_iter_f)(void *, nvlist_t *, void *); int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func, void *data); +int for_each_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, void *data); +int for_each_real_leaf_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, + void *data); +/* + * Often you'll want to iterate over all the vdevs in the pool, but don't want + * to use for_each_vdev() since it requires a callback function. + * + * Instead you can use FOR_EACH_VDEV(): + * + * zpool_handle_t *zhp // Assume this is initialized + * nvlist_t *nv + * ... + * FOR_EACH_VDEV(zhp, nv) { + * const char *path = NULL; + * nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path); + * printf("Looking at vdev %s\n", path); + * } + * + * Note: FOR_EACH_VDEV runs in O(n^2) time where n = number of vdevs. However, + * there's an upper limit of 256 vdevs per dRAID top-level vdevs (TLDs), 255 for + * raidz2 TLDs, a real world limit of ~500 vdevs for mirrors, so this shouldn't + * really be an issue. + * + * Here are some micro-benchmarks of a complete FOR_EACH_VDEV loop on a RAID0 + * pool: + * + * 100 vdevs = 0.7ms + * 500 vdevs = 17ms + * 750 vdevs = 40ms + * 1000 vdevs = 82ms + * + * The '__nv += 0' at the end of the for() loop gets around a "comma or + * semicolon followed by non-blank" checkstyle error. Note on most compliers + * the '__nv += 0' can just be replaced with 'NULL', but gcc on Centos 7 + * will give a 'warning: statement with no effect' error if you do that. + */ +#define __FOR_EACH_VDEV(__zhp, __nv, __func) { \ + __nv = zpool_get_config(__zhp, NULL); \ + VERIFY0(nvlist_lookup_nvlist(__nv, ZPOOL_CONFIG_VDEV_TREE, &__nv)); \ + } \ + for (nvlist_t *__root_nv = __nv, *__state = (nvlist_t *)0; \ + for_each_vdev_cb(&__state, __root_nv, __func, &__nv) == 1; \ + __nv += 0) + +#define FOR_EACH_VDEV(__zhp, __nv) \ + __FOR_EACH_VDEV(__zhp, __nv, for_each_vdev_macro_helper_func) + +/* + * "real leaf" vdevs are leaf vdevs that are real devices (disks or files). + * This excludes leaf vdevs like like draid spares. + */ +#define FOR_EACH_REAL_LEAF_VDEV(__zhp, __nv) \ + __FOR_EACH_VDEV(__zhp, __nv, for_each_real_leaf_vdev_macro_helper_func) + int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func, void *data); void update_vdevs_config_dev_sysfs_path(nvlist_t *config); +_LIBZUTIL_H void update_vdev_config_dev_sysfs_path(nvlist_t *nv, + const char *path, const char *key); #ifdef __cplusplus } #endif diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h index 77ce75ca3f1..150e50380d8 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/mod_os.h @@ -91,6 +91,12 @@ #define param_set_max_auto_ashift_args(var) \ CTLTYPE_UINT, NULL, 0, param_set_max_auto_ashift, "IU" +#define spa_taskq_read_param_set_args(var) \ + CTLTYPE_STRING, NULL, 0, spa_taskq_read_param, "A" + +#define spa_taskq_write_param_set_args(var) \ + CTLTYPE_STRING, NULL, 0, spa_taskq_write_param, "A" + #define fletcher_4_param_set_args(var) \ CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A" diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h index b71f2f2e562..b9d41903ea6 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h @@ -62,7 +62,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) } static inline void -zfs_uio_advance(zfs_uio_t *uio, size_t size) +zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { zfs_uio_resid(uio) -= size; zfs_uio_offset(uio) += size; diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h index 56a0ac96ac1..24bb03575f3 100644 --- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h +++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_vfsops_os.h @@ -286,7 +286,6 @@ typedef struct zfid_long { extern uint_t zfs_fsyncer_key; extern int zfs_super_owner; -extern int zfs_bclone_enabled; extern void zfs_init(void); extern void zfs_fini(void); diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h index 1e35204932d..ab1711b99f3 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h @@ -42,8 +42,8 @@ /* * Starting from Linux 5.13, flush_dcache_page() becomes an inline function * and under some configurations, may indirectly referencing GPL-only - * cpu_feature_keys on powerpc. Override this function when it is detected - * being GPL-only. + * symbols, e.g., cpu_feature_keys on powerpc and PageHuge on riscv. + * Override this function when it is detected being GPL-only. */ #if defined __powerpc__ && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY #include @@ -53,6 +53,17 @@ clear_bit(PG_dcache_clean, &(page)->flags); \ } while (0) #endif +/* + * For riscv implementation, the use of PageHuge can be safely removed. + * Because it handles pages allocated by HugeTLB, while flush_dcache_page + * in zfs module is only called on kernel pages. + */ +#if defined __riscv && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY +#define flush_dcache_page(page) do { \ + if (test_bit(PG_dcache_clean, &(page)->flags)) \ + clear_bit(PG_dcache_clean, &(page)->flags); \ + } while (0) +#endif /* * 2.6.30 API change, diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_aarch64.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_aarch64.h index 16276b08c75..123a0c72bc6 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_aarch64.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_aarch64.h @@ -71,9 +71,15 @@ #define ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 1, 0) #define ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) +#if (defined(HAVE_KERNEL_NEON) && defined(CONFIG_KERNEL_MODE_NEON)) #define kfpu_allowed() 1 #define kfpu_begin() kernel_neon_begin() #define kfpu_end() kernel_neon_end() +#else +#define kfpu_allowed() 0 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#endif #define kfpu_init() (0) #define kfpu_fini() do {} while (0) diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_arm.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_arm.h index c432a6d4abd..bc70eaef307 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_arm.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_arm.h @@ -53,9 +53,15 @@ #include #include +#if (defined(HAVE_KERNEL_NEON) && defined(CONFIG_KERNEL_MODE_NEON)) #define kfpu_allowed() 1 #define kfpu_begin() kernel_neon_begin() #define kfpu_end() kernel_neon_end() +#else +#define kfpu_allowed() 0 +#define kfpu_begin() do {} while (0) +#define kfpu_end() do {} while (0) +#endif #define kfpu_init() (0) #define kfpu_fini() do {} while (0) diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/cred.h b/sys/contrib/openzfs/include/os/linux/spl/sys/cred.h index 7fd5f644863..c19c3c0719f 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/cred.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/cred.h @@ -73,13 +73,25 @@ static inline struct user_namespace *zfs_i_user_ns(struct inode *inode) static inline boolean_t zfs_no_idmapping(struct user_namespace *mnt_userns, struct user_namespace *fs_userns) { - return (zfs_is_init_userns(mnt_userns) || mnt_userns == fs_userns); + return (zfs_is_init_userns(mnt_userns) || + mnt_userns == fs_userns); } static inline uid_t zfs_uid_to_vfsuid(zidmap_t *mnt_userns, struct user_namespace *fs_userns, uid_t uid) { - struct user_namespace *owner = idmap_owner(mnt_userns); + struct user_namespace *owner; +#ifdef HAVE_IOPS_CREATE_IDMAP + if (mnt_userns == zfs_init_idmap) + return (uid); +#endif +#ifdef HAVE_IDMAP_NO_USERNS + struct user_namespace ns; + ns.uid_map = mnt_userns->uid_map; + owner = &ns; +#else + owner = idmap_owner(mnt_userns); +#endif if (zfs_no_idmapping(owner, fs_userns)) return (uid); if (!zfs_is_init_userns(fs_userns)) @@ -92,7 +104,18 @@ static inline uid_t zfs_uid_to_vfsuid(zidmap_t *mnt_userns, static inline gid_t zfs_gid_to_vfsgid(zidmap_t *mnt_userns, struct user_namespace *fs_userns, gid_t gid) { - struct user_namespace *owner = idmap_owner(mnt_userns); + struct user_namespace *owner; +#ifdef HAVE_IOPS_CREATE_IDMAP + if (mnt_userns == zfs_init_idmap) + return (gid); +#endif +#ifdef HAVE_IDMAP_NO_USERNS + struct user_namespace ns; + ns.gid_map = mnt_userns->gid_map; + owner = &ns; +#else + owner = idmap_owner(mnt_userns); +#endif if (zfs_no_idmapping(owner, fs_userns)) return (gid); if (!zfs_is_init_userns(fs_userns)) @@ -105,7 +128,18 @@ static inline gid_t zfs_gid_to_vfsgid(zidmap_t *mnt_userns, static inline uid_t zfs_vfsuid_to_uid(zidmap_t *mnt_userns, struct user_namespace *fs_userns, uid_t uid) { - struct user_namespace *owner = idmap_owner(mnt_userns); + struct user_namespace *owner; +#ifdef HAVE_IOPS_CREATE_IDMAP + if (mnt_userns == zfs_init_idmap) + return (uid); +#endif +#ifdef HAVE_IDMAP_NO_USERNS + struct user_namespace ns; + ns.uid_map = mnt_userns->uid_map; + owner = &ns; +#else + owner = idmap_owner(mnt_userns); +#endif if (zfs_no_idmapping(owner, fs_userns)) return (uid); uid = from_kuid(owner, KUIDT_INIT(uid)); @@ -119,7 +153,18 @@ static inline uid_t zfs_vfsuid_to_uid(zidmap_t *mnt_userns, static inline gid_t zfs_vfsgid_to_gid(zidmap_t *mnt_userns, struct user_namespace *fs_userns, gid_t gid) { - struct user_namespace *owner = idmap_owner(mnt_userns); + struct user_namespace *owner; +#ifdef HAVE_IOPS_CREATE_IDMAP + if (mnt_userns == zfs_init_idmap) + return (gid); +#endif +#ifdef HAVE_IDMAP_NO_USERNS + struct user_namespace ns; + ns.gid_map = mnt_userns->gid_map; + owner = &ns; +#else + owner = idmap_owner(mnt_userns); +#endif if (zfs_no_idmapping(owner, fs_userns)) return (gid); gid = from_kgid(owner, KGIDT_INIT(gid)); diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h index 82d50b6034c..b159bb52d11 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h @@ -70,8 +70,6 @@ typedef enum kmem_cbrc { #define KMC_REAP_CHUNK INT_MAX #define KMC_DEFAULT_SEEKS 1 -#define KMC_RECLAIM_ONCE 0x1 /* Force a single shrinker pass */ - extern struct list_head spl_kmem_cache_list; extern struct rw_semaphore spl_kmem_cache_sem; diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h b/sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h index d472754be4f..bca4c850694 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/shrinker.h @@ -29,12 +29,13 @@ /* * Due to frequent changes in the shrinker API the following - * compatibility wrappers should be used. They are as follows: + * compatibility wrapper should be used. * - * SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost); + * shrinker = spl_register_shrinker(name, countfunc, scanfunc, seek_cost); + * spl_unregister_shrinker(shrinker); * - * SPL_SHRINKER_DECLARE is used to declare a shrinker with the name varname, - * which is passed to spl_register_shrinker()/spl_unregister_shrinker(). + * spl_register_shrinker is used to create and register a shrinker with the + * given name. * The countfunc returns the number of free-able objects. * The scanfunc returns the number of objects that were freed. * The callbacks can return SHRINK_STOP if further calls can't make any more @@ -57,57 +58,28 @@ * ...scan objects in the cache and reclaim them... * } * - * SPL_SHRINKER_DECLARE(my_shrinker, my_count, my_scan, DEFAULT_SEEKS); + * static struct shrinker *my_shrinker; * * void my_init_func(void) { - * spl_register_shrinker(&my_shrinker); + * my_shrinker = spl_register_shrinker("my-shrinker", + * my_count, my_scan, DEFAULT_SEEKS); + * } + * + * void my_fini_func(void) { + * spl_unregister_shrinker(my_shrinker); * } */ -#ifdef HAVE_REGISTER_SHRINKER_VARARG -#define spl_register_shrinker(x) register_shrinker(x, "zfs-arc-shrinker") -#else -#define spl_register_shrinker(x) register_shrinker(x) -#endif -#define spl_unregister_shrinker(x) unregister_shrinker(x) +typedef unsigned long (*spl_shrinker_cb) + (struct shrinker *, struct shrink_control *); -/* - * Linux 3.0 to 3.11 Shrinker API Compatibility. - */ -#if defined(HAVE_SINGLE_SHRINKER_CALLBACK) -#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \ -static int \ -__ ## varname ## _wrapper(struct shrinker *shrink, struct shrink_control *sc)\ -{ \ - if (sc->nr_to_scan != 0) { \ - (void) scanfunc(shrink, sc); \ - } \ - return (countfunc(shrink, sc)); \ -} \ - \ -static struct shrinker varname = { \ - .shrink = __ ## varname ## _wrapper, \ - .seeks = seek_cost, \ -} +struct shrinker *spl_register_shrinker(const char *name, + spl_shrinker_cb countfunc, spl_shrinker_cb scanfunc, int seek_cost); +void spl_unregister_shrinker(struct shrinker *); +#ifndef SHRINK_STOP +/* 3.0-3.11 compatibility */ #define SHRINK_STOP (-1) - -/* - * Linux 3.12 and later Shrinker API Compatibility. - */ -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) -#define SPL_SHRINKER_DECLARE(varname, countfunc, scanfunc, seek_cost) \ -static struct shrinker varname = { \ - .count_objects = countfunc, \ - .scan_objects = scanfunc, \ - .seeks = seek_cost, \ -} - -#else -/* - * Linux 2.x to 2.6.22, or a newer shrinker API has been introduced. - */ -#error "Unknown shrinker callback" #endif #endif /* SPL_SHRINKER_H */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/string.h b/sys/contrib/openzfs/include/os/linux/spl/sys/string.h index 38134dcf4c7..f44bf23eb32 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/string.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/string.h @@ -1 +1,50 @@ +/* + * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * UCRL-CODE-235197 + * + * This file is part of the SPL, Solaris Porting Layer. + * + * The SPL is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * The SPL is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with the SPL. If not, see . + */ + +#ifndef _SPL_STRING_H +#define _SPL_STRING_H + #include + +/* Fallbacks for kernel missing strlcpy */ +#ifndef HAVE_KERNEL_STRLCPY + +#if defined(HAVE_KERNEL_STRSCPY) +/* + * strscpy is strlcpy, but returns an error on truncation. strlcpy is defined + * to return strlen(src), so detect error and override it. + */ +static inline size_t +strlcpy(char *dest, const char *src, size_t size) +{ + ssize_t ret = strscpy(dest, src, size); + if (likely(ret > 0)) + return ((size_t)ret); + return (strlen(src)); +} +#else +#error "no strlcpy fallback available" +#endif + +#endif /* HAVE_KERNEL_STRLCPY */ + +#endif /* _SPL_STRING_H */ diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/types.h b/sys/contrib/openzfs/include/os/linux/spl/sys/types.h index d89a91c36f9..20ba457f7ef 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/types.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/types.h @@ -57,12 +57,23 @@ typedef int minor_t; struct user_namespace; #ifdef HAVE_IOPS_CREATE_IDMAP #include +#ifdef HAVE_IDMAP_NO_USERNS +#include +struct mnt_idmap { + struct uid_gid_map uid_map; + struct uid_gid_map gid_map; + refcount_t count; +}; +typedef struct mnt_idmap zidmap_t; +#define idmap_owner(p) (NULL) +#else struct mnt_idmap { struct user_namespace *owner; refcount_t count; }; typedef struct mnt_idmap zidmap_t; #define idmap_owner(p) (((struct mnt_idmap *)p)->owner) +#endif #else typedef struct user_namespace zidmap_t; #define idmap_owner(p) ((struct user_namespace *)p) diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h index a4b600004c9..5e6ea8d3c22 100644 --- a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h +++ b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h @@ -95,7 +95,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) } static inline void -zfs_uio_advance(zfs_uio_t *uio, size_t size) +zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { uio->uio_resid -= size; uio->uio_loffset += size; diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h index 22046655025..b4d5db21f5e 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h @@ -45,8 +45,6 @@ extern "C" { typedef struct zfsvfs zfsvfs_t; struct znode; -extern int zfs_bclone_enabled; - /* * This structure emulates the vfs_t from other platforms. It's purpose * is to facilitate the handling of mount options and minimize structural diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h index 9b729be6d74..91a4751fffb 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h @@ -273,5 +273,25 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg); #else #define zpl_inode_set_ctime_to_ts(ip, ts) (ip->i_ctime = ts) #endif +#ifdef HAVE_INODE_GET_ATIME +#define zpl_inode_get_atime(ip) inode_get_atime(ip) +#else +#define zpl_inode_get_atime(ip) (ip->i_atime) +#endif +#ifdef HAVE_INODE_SET_ATIME_TO_TS +#define zpl_inode_set_atime_to_ts(ip, ts) inode_set_atime_to_ts(ip, ts) +#else +#define zpl_inode_set_atime_to_ts(ip, ts) (ip->i_atime = ts) +#endif +#ifdef HAVE_INODE_GET_MTIME +#define zpl_inode_get_mtime(ip) inode_get_mtime(ip) +#else +#define zpl_inode_get_mtime(ip) (ip->i_mtime) +#endif +#ifdef HAVE_INODE_SET_MTIME_TO_TS +#define zpl_inode_set_mtime_to_ts(ip, ts) inode_set_mtime_to_ts(ip, ts) +#else +#define zpl_inode_set_mtime_to_ts(ip, ts) (ip->i_mtime = ts) +#endif #endif /* _SYS_ZPL_H */ diff --git a/sys/contrib/openzfs/include/sys/dataset_kstats.h b/sys/contrib/openzfs/include/sys/dataset_kstats.h index 40cf5258a2e..c81a07f0c11 100644 --- a/sys/contrib/openzfs/include/sys/dataset_kstats.h +++ b/sys/contrib/openzfs/include/sys/dataset_kstats.h @@ -71,6 +71,7 @@ typedef struct dataset_kstats { int dataset_kstats_create(dataset_kstats_t *, objset_t *); void dataset_kstats_destroy(dataset_kstats_t *); +void dataset_kstats_rename(dataset_kstats_t *dk, const char *); void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t); void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t); diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h index 1800a7e31da..f2a1535c916 100644 --- a/sys/contrib/openzfs/include/sys/dbuf.h +++ b/sys/contrib/openzfs/include/sys/dbuf.h @@ -379,8 +379,8 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level, int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx); -void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx); -void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail); +boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed); void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx); dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, diff --git a/sys/contrib/openzfs/include/sys/dsl_crypt.h b/sys/contrib/openzfs/include/sys/dsl_crypt.h index 72716e296c9..fbcae371535 100644 --- a/sys/contrib/openzfs/include/sys/dsl_crypt.h +++ b/sys/contrib/openzfs/include/sys/dsl_crypt.h @@ -206,6 +206,7 @@ void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, dmu_tx_t *tx); int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, boolean_t *will_encrypt); +boolean_t dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb); void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h index cdf65c37133..c7ecd3d0ccd 100644 --- a/sys/contrib/openzfs/include/sys/spa_impl.h +++ b/sys/contrib/openzfs/include/sys/spa_impl.h @@ -266,6 +266,7 @@ struct spa { spa_aux_vdev_t spa_spares; /* hot spares */ spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */ + boolean_t spa_aux_sync_uber; /* need to sync aux uber */ nvlist_t *spa_label_features; /* Features for reading MOS */ uint64_t spa_config_object; /* MOS object for pool config */ uint64_t spa_config_generation; /* config generation number */ diff --git a/sys/contrib/openzfs/include/sys/zfs_vnops.h b/sys/contrib/openzfs/include/sys/zfs_vnops.h index 5da103f1778..e60b99bed19 100644 --- a/sys/contrib/openzfs/include/sys/zfs_vnops.h +++ b/sys/contrib/openzfs/include/sys/zfs_vnops.h @@ -24,8 +24,11 @@ #ifndef _SYS_FS_ZFS_VNOPS_H #define _SYS_FS_ZFS_VNOPS_H + #include +extern int zfs_bclone_enabled; + extern int zfs_fsync(znode_t *, int, cred_t *); extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *); extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *); diff --git a/sys/contrib/openzfs/lib/libspl/include/assert.h b/sys/contrib/openzfs/lib/libspl/include/assert.h index c5bf0f0cc8f..af4957dfbaa 100644 --- a/sys/contrib/openzfs/lib/libspl/include/assert.h +++ b/sys/contrib/openzfs/lib/libspl/include/assert.h @@ -64,6 +64,9 @@ libspl_assert(const char *buf, const char *file, const char *func, int line) #undef verify #endif +#define PANIC(fmt, a...) \ + libspl_assertf(__FILE__, __FUNCTION__, __LINE__, fmt, ## a) + #define VERIFY(cond) \ (void) ((!(cond)) && \ libspl_assert(#cond, __FILE__, __FUNCTION__, __LINE__)) diff --git a/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/stat.h b/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/stat.h index 88773cceb95..af488244bd0 100644 --- a/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/stat.h +++ b/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/stat.h @@ -76,8 +76,12 @@ fstat64_blk(int fd, struct stat64 *st) /* * Only Intel-based Macs have a separate stat64; Arm-based Macs are like * FreeBSD and have a full 64-bit stat from the start. + * + * On Linux, musl libc is full 64-bit too and has deprecated its own version + * of these defines since version 1.2.4. */ -#if defined(__APPLE__) && !(defined(__i386__) || defined(__x86_64__)) +#if (defined(__APPLE__) && !(defined(__i386__) || defined(__x86_64__))) || \ + (defined(__linux__) && !defined(__GLIBC__)) #define stat64 stat #define fstat64 fstat #endif diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h index e9e21819d4f..665bfc42301 100644 --- a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h +++ b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h @@ -90,7 +90,7 @@ zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) } static inline void -zfs_uio_advance(zfs_uio_t *uio, size_t size) +zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { uio->uio_resid -= size; uio->uio_loffset += size; diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi index 2d612a16b22..9bb8f6a47de 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi @@ -179,6 +179,7 @@ + @@ -260,6 +261,7 @@ + @@ -465,6 +467,7 @@ + @@ -496,6 +499,7 @@ + @@ -566,6 +570,7 @@ + @@ -1401,8 +1406,6 @@ - - @@ -6354,6 +6357,12 @@ + + + + + + @@ -7587,6 +7596,12 @@ + + + + + + @@ -7596,6 +7611,10 @@ + + + + @@ -7713,6 +7732,11 @@ + + + + + @@ -7880,6 +7904,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -8069,12 +8124,6 @@ - - - - - - @@ -8101,11 +8150,6 @@ - - - - - @@ -8190,10 +8234,6 @@ - - - - @@ -8329,6 +8369,16 @@ + + + + + + + + + + @@ -8348,6 +8398,9 @@ + + + @@ -8621,6 +8674,7 @@ + @@ -8658,11 +8712,27 @@ + + + + + + + + + + + + + + + + diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_import.c b/sys/contrib/openzfs/lib/libzfs/libzfs_import.c index 2a7c5a76a0a..e2d40a7b3bf 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_import.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_import.c @@ -291,10 +291,8 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, *inuse = B_FALSE; - if (zpool_read_label(fd, &config, NULL) != 0) { - (void) no_memory(hdl); + if (zpool_read_label(fd, &config, NULL) != 0) return (-1); - } if (config == NULL) return (0); diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c index 4ebd112f452..2f9ccbc2ab5 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c @@ -3036,6 +3036,9 @@ zpool_vdev_is_interior(const char *name) return (B_FALSE); } +/* + * Lookup the nvlist for a given vdev. + */ nvlist_t * zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) @@ -3043,6 +3046,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, char *end; nvlist_t *nvroot, *search, *ret; uint64_t guid; + boolean_t __avail_spare, __l2cache, __log; search = fnvlist_alloc(); @@ -3058,6 +3062,18 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, nvroot = fnvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE); + /* + * User can pass NULL for avail_spare, l2cache, and log, but + * we still need to provide variables to vdev_to_nvlist_iter(), so + * just point them to junk variables here. + */ + if (!avail_spare) + avail_spare = &__avail_spare; + if (!l2cache) + l2cache = &__l2cache; + if (!log) + log = &__log; + *avail_spare = B_FALSE; *l2cache = B_FALSE; if (log != NULL) @@ -3313,21 +3329,23 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) } /* - * Mark the given vdev degraded. + * Generic set vdev state function */ -int -zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +static int +zpool_vdev_set_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux, + vdev_state_t state) { zfs_cmd_t zc = {"\0"}; char errbuf[ERRBUFLEN]; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid); + dgettext(TEXT_DOMAIN, "cannot set %s %llu"), + zpool_state_to_name(state, aux), (u_longlong_t)guid); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; - zc.zc_cookie = VDEV_STATE_DEGRADED; + zc.zc_cookie = state; zc.zc_obj = aux; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) @@ -3336,6 +3354,27 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) return (zpool_standard_error(hdl, errno, errbuf)); } +/* + * Mark the given vdev degraded. + */ +int +zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_DEGRADED)); +} + +/* + * Mark the given vdev as in a removed state (as if the device does not exist). + * + * This is different than zpool_vdev_remove() which does a removal of a device + * from the pool (but the device does exist). + */ +int +zpool_vdev_set_removed_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_REMOVED)); +} + /* * Returns TRUE if the given nvlist is a vdev that was originally swapped in as * a hot spare. diff --git a/sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c b/sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c index 19ba58e79a0..049710d3985 100644 --- a/sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c +++ b/sys/contrib/openzfs/lib/libzutil/os/freebsd/zutil_import_os.c @@ -249,8 +249,25 @@ zfs_dev_flush(int fd) return (0); } +void +update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, + const char *key) +{ + (void) nv; + (void) path; + (void) key; +} + void update_vdevs_config_dev_sysfs_path(nvlist_t *config) { (void) config; } + +int +zpool_disk_wait(const char *path) +{ + + (void) path; + return (ENOTSUP); +} diff --git a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c index 44ed697dd49..bb91dec5acf 100644 --- a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c +++ b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_import_os.c @@ -170,25 +170,17 @@ zpool_open_func(void *arg) if (rn->rn_labelpaths) { const char *path = NULL; const char *devid = NULL; - const char *env = NULL; rdsk_node_t *slice; avl_index_t where; - int timeout; int error; if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) return; - env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); - if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || - timeout < 0) { - timeout = DISK_LABEL_WAIT; - } - /* * Allow devlinks to stabilize so all paths are available. */ - zpool_label_disk_wait(rn->rn_name, timeout); + zpool_disk_wait(rn->rn_name); if (path != NULL) { slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); @@ -682,6 +674,20 @@ zpool_label_disk_wait(const char *path, int timeout_ms) #endif /* HAVE_LIBUDEV */ } +/* + * Simplified version of zpool_label_disk_wait() where we wait for a device + * to appear using the default timeouts. + */ +int +zpool_disk_wait(const char *path) +{ + int timeout; + timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS", + DISK_LABEL_WAIT); + + return (zpool_label_disk_wait(path, timeout)); +} + /* * Encode the persistent devices strings * used for the vdev disk label @@ -766,20 +772,37 @@ no_dev: * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it * in the nvlist * (if applicable). Like: * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' + * + * If an old path was in the nvlist, and the rescan can not find a new path, + * then keep the old path, since the disk may have been removed. + * + * path: The vdev path (value from ZPOOL_CONFIG_PATH) + * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH) */ -static void -update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path) +void +update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, + const char *key) { char *upath, *spath; + const char *oldpath = NULL; + + (void) nvlist_lookup_string(nv, key, &oldpath); /* Add enclosure sysfs path (if disk is in an enclosure). */ upath = zfs_get_underlying_path(path); spath = zfs_get_enclosure_sysfs_path(upath); if (spath) { - nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath); + (void) nvlist_add_string(nv, key, spath); } else { - nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + /* + * We couldn't dynamically scan the disk's enclosure sysfs path. + * This could be because the disk went away. If there's an old + * enclosure sysfs path in the nvlist, then keep using it. + */ + if (!oldpath) { + (void) nvlist_remove_all(nv, key); + } } free(upath); @@ -799,7 +822,8 @@ sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data) return (1); /* Rescan our enclosure sysfs path for this vdev */ - update_vdev_config_dev_sysfs_path(nv, path); + update_vdev_config_dev_sysfs_path(nv, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); return (0); } @@ -888,7 +912,8 @@ update_vdev_config_dev_strs(nvlist_t *nv) (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, vds.vds_devphys); } - update_vdev_config_dev_sysfs_path(nv, path); + update_vdev_config_dev_sysfs_path(nv, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); } else { /* Clear out any stale entries. */ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); diff --git a/sys/contrib/openzfs/lib/libzutil/zutil_import.c b/sys/contrib/openzfs/lib/libzutil/zutil_import.c index 19d8a474281..eb913119045 100644 --- a/sys/contrib/openzfs/lib/libzutil/zutil_import.c +++ b/sys/contrib/openzfs/lib/libzutil/zutil_import.c @@ -1056,10 +1056,21 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels) case EINVAL: break; case EINPROGRESS: - // This shouldn't be possible to - // encounter, die if we do. + /* + * This shouldn't be possible to + * encounter, die if we do. + */ ASSERT(B_FALSE); zfs_fallthrough; + case EREMOTEIO: + /* + * May be returned by an NVMe device + * which is visible in /dev/ but due + * to a low-level format change, or + * other error, needs to be rescanned. + * Try the slow method. + */ + zfs_fallthrough; case EOPNOTSUPP: case ENOSYS: do_slow = B_TRUE; @@ -1210,13 +1221,26 @@ label_paths(libpc_handle_t *hdl, nvlist_t *label, const char **path, nvlist_t *nvroot; uint64_t pool_guid; uint64_t vdev_guid; + uint64_t state; *path = NULL; *devid = NULL; + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &vdev_guid) != 0) + return (ENOENT); + + /* + * In case of spare or l2cache, we directly return path/devid from the + * label. + */ + if (!(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state)) && + (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE)) { + (void) nvlist_lookup_string(label, ZPOOL_CONFIG_PATH, path); + (void) nvlist_lookup_string(label, ZPOOL_CONFIG_DEVID, devid); + return (0); + } if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || - nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &pool_guid) || - nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &vdev_guid)) + nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) return (ENOENT); return (label_paths_impl(hdl, nvroot, pool_guid, vdev_guid, path, @@ -1898,6 +1922,104 @@ zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp, return (0); } +/* Return if a vdev is a leaf vdev. Note: draid spares are leaf vdevs. */ +static boolean_t +vdev_is_leaf(nvlist_t *nv) +{ + uint_t children = 0; + nvlist_t **child; + + (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children); + + return (children == 0); +} + +/* Return if a vdev is a leaf vdev and a real device (disk or file) */ +static boolean_t +vdev_is_real_leaf(nvlist_t *nv) +{ + const char *type = NULL; + if (!vdev_is_leaf(nv)) + return (B_FALSE); + + (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type); + if ((strcmp(type, VDEV_TYPE_DISK) == 0) || + (strcmp(type, VDEV_TYPE_FILE) == 0)) { + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * This function is called by our FOR_EACH_VDEV() macros. + * + * state: State machine status (stored inside of a (nvlist_t *)) + * nv: The current vdev nvlist_t we are iterating over. + * last_nv: The previous vdev nvlist_t we returned to the user in + * the last iteration of FOR_EACH_VDEV(). We use it + * to find the next vdev nvlist_t we should return. + * real_leaves_only: Only return leaf vdevs. + * + * Returns 1 if we found the next vdev nvlist_t for this iteration. 0 if + * we're still searching for it. + */ +static int +__for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv, + boolean_t real_leaves_only) +{ + enum {FIRST_NV = 0, NEXT_IS_MATCH = 1, STOP_LOOKING = 2}; + + /* The very first entry in the NV list is a special case */ + if (*((nvlist_t **)state) == (nvlist_t *)FIRST_NV) { + if (real_leaves_only && !vdev_is_real_leaf(nv)) + return (0); + + *((nvlist_t **)last_nv) = nv; + *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING; + return (1); + } + + /* + * We came across our last_nv, meaning the next one is the one we + * want + */ + if (nv == *((nvlist_t **)last_nv)) { + /* Next iteration of this function will return the nvlist_t */ + *((nvlist_t **)state) = (nvlist_t *)NEXT_IS_MATCH; + return (0); + } + + /* + * We marked NEXT_IS_MATCH on the previous iteration, so this is the one + * we want. + */ + if (*(nvlist_t **)state == (nvlist_t *)NEXT_IS_MATCH) { + if (real_leaves_only && !vdev_is_real_leaf(nv)) + return (0); + + *((nvlist_t **)last_nv) = nv; + *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING; + return (1); + } + + return (0); +} + +int +for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv) +{ + return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_FALSE)); +} + +int +for_each_real_leaf_vdev_macro_helper_func(void *state, nvlist_t *nv, + void *last_nv) +{ + return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_TRUE)); +} + /* * Internal function for iterating over the vdevs. * diff --git a/sys/contrib/openzfs/lib/libzutil/zutil_pool.c b/sys/contrib/openzfs/lib/libzutil/zutil_pool.c index 288a0033cd1..86460de3fc6 100644 --- a/sys/contrib/openzfs/lib/libzutil/zutil_pool.c +++ b/sys/contrib/openzfs/lib/libzutil/zutil_pool.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -144,3 +145,33 @@ zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover, *leftover = bytes_read; return (0); } + +/* + * Floating point sleep(). Allows you to pass in a floating point value for + * seconds. + */ +void +fsleep(float sec) +{ + struct timespec req; + req.tv_sec = floor(sec); + req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; + nanosleep(&req, NULL); +} + +/* + * Get environment variable 'env' and return it as an integer. + * If 'env' is not set, then return 'default_val' instead. + */ +int +zpool_getenv_int(const char *env, int default_val) +{ + char *str; + int val; + str = getenv(env); + if ((str == NULL) || sscanf(str, "%d", &val) != 1 || + val < 0) { + val = default_val; + } + return (val); +} diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4 index 82455fb5325..414a9239485 100644 --- a/sys/contrib/openzfs/man/man4/spl.4 +++ b/sys/contrib/openzfs/man/man4/spl.4 @@ -31,14 +31,6 @@ for use by the kmem caches. For the majority of systems and workloads only a small number of threads are required. . -.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint -When this is set it prevents Linux from being able to rapidly reclaim all the -memory held by the kmem caches. -This may be useful in circumstances where it's preferable that Linux -reclaim memory from some other subsystem first. -Setting this will increase the likelihood out of memory events on a memory -constrained system. -. .It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint The preferred number of objects per slab in the cache. In general, a larger value will increase the caches memory footprint diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 4ec52a2fb65..352990e02da 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -1142,6 +1142,15 @@ Enable the experimental block cloning feature. If this setting is 0, then even if feature@block_cloning is enabled, attempts to clone blocks will act as though the feature is disabled. . +.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int +When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be +written to disk. +This allows the clone operation to reliably succeed when a file is +modified and then immediately cloned. +For small files this may be slower than making a copy of the file. +Therefore, this setting defaults to 0 which causes a clone operation to +immediately fail when encountering a dirty block. +. .It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string Select a BLAKE3 implementation. .Pp @@ -2280,6 +2289,16 @@ If .Sy 0 , generate a system-dependent value close to 6 threads per taskq. . +.It Sy zio_taskq_read Ns = Ns Sy fixed,1,8 null scale null Pq charp +Set the queue and thread configuration for the IO read queues. +This is an advanced debugging parameter. +Don't change this unless you understand what it does. +. +.It Sy zio_taskq_write Ns = Ns Sy batch fixed,1,5 scale fixed,1,5 Pq charp +Set the queue and thread configuration for the IO write queues. +This is an advanced debugging parameter. +Don't change this unless you understand what it does. +. .It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint Do not create zvol device nodes. This may slightly improve startup time on diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7 index 8ca4bd927b2..8456a9aa764 100644 --- a/sys/contrib/openzfs/man/man7/zpool-features.7 +++ b/sys/contrib/openzfs/man/man7/zpool-features.7 @@ -364,9 +364,12 @@ When this feature is enabled ZFS will use block cloning for operations like Block cloning allows to create multiple references to a single block. It is much faster than copying the data (as the actual data is neither read nor written) and takes no additional space. -Blocks can be cloned across datasets under some conditions (like disabled -encryption and equal -.Nm recordsize ) . +Blocks can be cloned across datasets under some conditions (like equal +.Nm recordsize , +the same master encryption key, etc.). +ZFS tries its best to clone across datasets including encrypted ones. +This is limited for various (nontrivial) reasons depending on the OS +and/or ZFS internals. .Pp This feature becomes .Sy active diff --git a/sys/contrib/openzfs/man/man8/zfs-list.8 b/sys/contrib/openzfs/man/man8/zfs-list.8 index 9f6a73ab956..85bd3fbafce 100644 --- a/sys/contrib/openzfs/man/man8/zfs-list.8 +++ b/sys/contrib/openzfs/man/man8/zfs-list.8 @@ -29,7 +29,7 @@ .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. .\" -.Dd March 16, 2022 +.Dd February 8, 2024 .Dt ZFS-LIST 8 .Os . @@ -155,6 +155,15 @@ or For example, specifying .Fl t Sy snapshot displays only snapshots. +.Sy fs , +.Sy snap , +or +.Sy vol +can be used as aliases for +.Sy filesystem , +.Sy snapshot , +or +.Sy volume . .El . .Sh EXAMPLES diff --git a/sys/contrib/openzfs/man/man8/zpool-clear.8 b/sys/contrib/openzfs/man/man8/zpool-clear.8 index 7b9d40c74eb..c61ecae483a 100644 --- a/sys/contrib/openzfs/man/man8/zpool-clear.8 +++ b/sys/contrib/openzfs/man/man8/zpool-clear.8 @@ -36,6 +36,7 @@ .Sh SYNOPSIS .Nm zpool .Cm clear +.Op Fl -power .Ar pool .Oo Ar device Oc Ns … . @@ -52,6 +53,16 @@ Pools with enabled which have been suspended cannot be resumed. While the pool was suspended, it may have been imported on another host, and resuming I/O could result in pool damage. +.Bl -tag -width Ds +.It Fl -power +Power on the devices's slot in the storage enclosure and wait for the device +to show up before attempting to clear errors. +This is done on all the devices specified. +Alternatively, you can set the +.Sy ZPOOL_AUTO_POWER_ON_SLOT +environment variable to always enable this behavior. +Note: This flag currently works on Linux only. +.El . .Sh SEE ALSO .Xr zdb 8 , diff --git a/sys/contrib/openzfs/man/man8/zpool-iostat.8 b/sys/contrib/openzfs/man/man8/zpool-iostat.8 index 34f7243d5aa..e1d2a4b4ff1 100644 --- a/sys/contrib/openzfs/man/man8/zpool-iostat.8 +++ b/sys/contrib/openzfs/man/man8/zpool-iostat.8 @@ -146,7 +146,7 @@ Specify .Sy u for a printed representation of the internal representation of time. See -.Xr time 2 . +.Xr time 1 . Specify .Sy d for standard date format. diff --git a/sys/contrib/openzfs/man/man8/zpool-list.8 b/sys/contrib/openzfs/man/man8/zpool-list.8 index 9e905d52ddd..c60c47f5eb3 100644 --- a/sys/contrib/openzfs/man/man8/zpool-list.8 +++ b/sys/contrib/openzfs/man/man8/zpool-list.8 @@ -95,7 +95,7 @@ Specify .Sy u for a printed representation of the internal representation of time. See -.Xr time 2 . +.Xr time 1 . Specify .Sy d for standard date format. diff --git a/sys/contrib/openzfs/man/man8/zpool-offline.8 b/sys/contrib/openzfs/man/man8/zpool-offline.8 index edcf1d06ab6..1b6095d6370 100644 --- a/sys/contrib/openzfs/man/man8/zpool-offline.8 +++ b/sys/contrib/openzfs/man/man8/zpool-offline.8 @@ -36,12 +36,13 @@ .Sh SYNOPSIS .Nm zpool .Cm offline -.Op Fl ft +.Op Fl Sy -power Ns | Ns Op Fl Sy ft .Ar pool .Ar device Ns … .Nm zpool .Cm online -.Op Fl e +.Op Fl Sy -power +.Op Fl Sy e .Ar pool .Ar device Ns … . @@ -50,7 +51,7 @@ .It Xo .Nm zpool .Cm offline -.Op Fl ft +.Op Fl Sy -power Ns | Ns Op Fl Sy ft .Ar pool .Ar device Ns … .Xc @@ -60,6 +61,9 @@ While the is offline, no attempt is made to read or write to the device. This command is not applicable to spares. .Bl -tag -width Ds +.It Fl -power +Power off the device's slot in the storage enclosure. +This flag currently works on Linux only .It Fl f Force fault. Instead of offlining the disk, put it into a faulted state. @@ -73,6 +77,7 @@ Upon reboot, the specified physical device reverts to its previous state. .It Xo .Nm zpool .Cm online +.Op Fl -power .Op Fl e .Ar pool .Ar device Ns … @@ -80,6 +85,13 @@ Upon reboot, the specified physical device reverts to its previous state. Brings the specified physical device online. This command is not applicable to spares. .Bl -tag -width Ds +.It Fl -power +Power on the device's slot in the storage enclosure and wait for the device +to show up before attempting to online it. +Alternatively, you can set the +.Sy ZPOOL_AUTO_POWER_ON_SLOT +environment variable to always enable this behavior. +This flag currently works on Linux only .It Fl e Expand the device to use all available space. If the device is part of a mirror or raidz then all devices must be expanded diff --git a/sys/contrib/openzfs/man/man8/zpool-status.8 b/sys/contrib/openzfs/man/man8/zpool-status.8 index 8f9580cf086..24ad6e643ca 100644 --- a/sys/contrib/openzfs/man/man8/zpool-status.8 +++ b/sys/contrib/openzfs/man/man8/zpool-status.8 @@ -36,7 +36,7 @@ .Sh SYNOPSIS .Nm zpool .Cm status -.Op Fl DigLpPstvx +.Op Fl DeigLpPstvx .Op Fl T Sy u Ns | Ns Sy d .Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … .Oo Ar pool Oc Ns … @@ -57,6 +57,8 @@ and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change. .Bl -tag -width Ds +.It Fl -power +Display vdev enclosure slot power status (on or off). .It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … Run a script (or scripts) on each vdev and include the output as a new column in the @@ -67,6 +69,8 @@ See the option of .Nm zpool Cm iostat for complete details. +.It Fl e +Only show unhealthy vdevs (not-ONLINE or with errors). .It Fl i Display vdev initialization status. .It Fl g @@ -110,7 +114,7 @@ Specify .Sy u for a printed representation of the internal representation of time. See -.Xr time 2 . +.Xr time 1 . Specify .Sy d for standard date format. diff --git a/sys/contrib/openzfs/man/man8/zpool-wait.8 b/sys/contrib/openzfs/man/man8/zpool-wait.8 index 683b0141425..4fa4cb23564 100644 --- a/sys/contrib/openzfs/man/man8/zpool-wait.8 +++ b/sys/contrib/openzfs/man/man8/zpool-wait.8 @@ -97,7 +97,7 @@ Specify .Sy u for a printed representation of the internal representation of time. See -.Xr time 2 . +.Xr time 1 . Specify .Sy d for standard date format. diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8 index 4c4020bdd81..fe44e15cabe 100644 --- a/sys/contrib/openzfs/man/man8/zpool.8 +++ b/sys/contrib/openzfs/man/man8/zpool.8 @@ -444,7 +444,7 @@ rpool 14.6G 54.9G 4 55 250K 2.69M .Ed . .Sh ENVIRONMENT VARIABLES -.Bl -tag -compact -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS" +.Bl -tag -compact -width "ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE" .It Sy ZFS_ABORT Cause .Nm @@ -456,6 +456,23 @@ Use ANSI color in and .Nm zpool Cm iostat output. +.It Sy ZPOOL_AUTO_POWER_ON_SLOT +Automatically attempt to turn on the drives enclosure slot power to a drive when +running the +.Nm zpool Cm online +or +.Nm zpool Cm clear +commands. +This has the same effect as passing the +.Fl -power +option to those commands. +.It Sy ZPOOL_POWER_ON_SLOT_TIMEOUT_MS +The maximum time in milliseconds to wait for a slot power sysfs value +to return the correct value after writing it. +For example, after writing "on" to the sysfs enclosure slot power_control file, +it can take some time for the enclosure to power down the slot and return +"on" if you read back the 'power_control' value. +Defaults to 30 seconds (30000ms) if not set. .It Sy ZPOOL_IMPORT_PATH The search path for devices or files to use with the pool. This is a colon-separated list of directories in which diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in index b9c284a2441..f1a145779dd 100644 --- a/sys/contrib/openzfs/module/Kbuild.in +++ b/sys/contrib/openzfs/module/Kbuild.in @@ -79,6 +79,7 @@ SPL_OBJS := \ spl-kstat.o \ spl-proc.o \ spl-procfs-list.o \ + spl-shrinker.o \ spl-taskq.o \ spl-thread.o \ spl-trace.o \ diff --git a/sys/contrib/openzfs/module/lua/ldebug.c b/sys/contrib/openzfs/module/lua/ldebug.c index 0092474c762..23e321bb124 100644 --- a/sys/contrib/openzfs/module/lua/ldebug.c +++ b/sys/contrib/openzfs/module/lua/ldebug.c @@ -111,10 +111,11 @@ static const char *upvalname (Proto *p, int uv) { static const char *findvararg (CallInfo *ci, int n, StkId *pos) { int nparams = clLvalue(ci->func)->p->numparams; - if (n >= ci->u.l.base - ci->func - nparams) + int nvararg = cast_int(ci->u.l.base - ci->func) - nparams; + if (n <= -nvararg) return NULL; /* no such vararg */ else { - *pos = ci->func + nparams + n; + *pos = ci->func + nparams - n; return "(*vararg)"; /* generic name for any vararg */ } } @@ -126,7 +127,7 @@ static const char *findlocal (lua_State *L, CallInfo *ci, int n, StkId base; if (isLua(ci)) { if (n < 0) /* access to vararg values? */ - return findvararg(ci, -n, pos); + return findvararg(ci, n, pos); else { base = ci->u.l.base; name = luaF_getlocalname(ci_func(ci)->p, n, currentpc(ci)); diff --git a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c index 9f5f92e194e..43cd4da02e3 100644 --- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c +++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c @@ -187,19 +187,18 @@ kstat_sysctl_dataset_string(SYSCTL_HANDLER_ARGS) static int kstat_sysctl_io(SYSCTL_HANDLER_ARGS) { - struct sbuf *sb; + struct sbuf sb; kstat_t *ksp = arg1; kstat_io_t *kip = ksp->ks_data; int rc; - sb = sbuf_new_auto(); - if (sb == NULL) - return (ENOMEM); + sbuf_new_for_sysctl(&sb, NULL, 0, req); + /* Update the aggsums before reading */ (void) ksp->ks_update(ksp, KSTAT_READ); /* though wlentime & friends are signed, they will never be negative */ - sbuf_printf(sb, + sbuf_printf(&sb, "%-8llu %-8llu %-8u %-8u %-8llu %-8llu " "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n", kip->nread, kip->nwritten, @@ -207,25 +206,21 @@ kstat_sysctl_io(SYSCTL_HANDLER_ARGS) kip->wtime, kip->wlentime, kip->wlastupdate, kip->rtime, kip->rlentime, kip->rlastupdate, kip->wcnt, kip->rcnt); - rc = sbuf_finish(sb); - if (rc == 0) - rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); - sbuf_delete(sb); + rc = sbuf_finish(&sb); + sbuf_delete(&sb); return (rc); } static int kstat_sysctl_raw(SYSCTL_HANDLER_ARGS) { - struct sbuf *sb; + struct sbuf sb; void *data; kstat_t *ksp = arg1; void *(*addr_op)(kstat_t *ksp, loff_t index); int n, has_header, rc = 0; - sb = sbuf_new_auto(); - if (sb == NULL) - return (ENOMEM); + sbuf_new_for_sysctl(&sb, NULL, PAGE_SIZE, req); if (ksp->ks_raw_ops.addr) addr_op = ksp->ks_raw_ops.addr; @@ -258,8 +253,10 @@ restart_headers: if (has_header) { if (rc == ENOMEM && !kstat_resize_raw(ksp)) goto restart_headers; - if (rc == 0) - sbuf_printf(sb, "\n%s", ksp->ks_raw_buf); + if (rc == 0) { + sbuf_cat(&sb, "\n"); + sbuf_cat(&sb, ksp->ks_raw_buf); + } } while ((data = addr_op(ksp, n)) != NULL) { @@ -270,22 +267,19 @@ restart: if (rc == ENOMEM && !kstat_resize_raw(ksp)) goto restart; if (rc == 0) - sbuf_printf(sb, "%s", ksp->ks_raw_buf); + sbuf_cat(&sb, ksp->ks_raw_buf); } else { ASSERT3U(ksp->ks_ndata, ==, 1); - sbuf_hexdump(sb, ksp->ks_data, + sbuf_hexdump(&sb, ksp->ks_data, ksp->ks_data_size, NULL, 0); } n++; } free(ksp->ks_raw_buf, M_TEMP); mutex_exit(ksp->ks_lock); - sbuf_trim(sb); - rc = sbuf_finish(sb); - if (rc == 0) - rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); - sbuf_delete(sb); + rc = sbuf_finish(&sb); + sbuf_delete(&sb); return (rc); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c index a5f486b95db..c33ce01ab39 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c @@ -110,7 +110,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); if (tocpy == db->db_size) - dmu_buf_will_fill(db, tx); + dmu_buf_will_fill(db, tx, B_FALSE); else dmu_buf_will_dirty(db, tx); @@ -126,7 +126,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } if (tocpy == db->db_size) - dmu_buf_fill_done(db, tx); + dmu_buf_fill_done(db, tx, B_FALSE); offset += tocpy; size -= tocpy; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c index 60c9ff0581e..f7f2be2cf95 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c @@ -53,26 +53,65 @@ int zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) { struct thread *td; - int rc, fd; + struct vnode *vp; + struct file *fp; + struct nameidata nd; + int error; td = curthread; pwd_ensure_dirs(); - /* 12.x doesn't take a const char * */ - rc = kern_openat(td, AT_FDCWD, __DECONST(char *, path), - UIO_SYSSPACE, flags, mode); - if (rc) - return (SET_ERROR(rc)); - fd = td->td_retval[0]; - td->td_retval[0] = 0; - if (fget(curthread, fd, &cap_no_rights, fpp)) - kern_close(td, fd); + + KASSERT((flags & (O_EXEC | O_PATH)) == 0, + ("invalid flags: 0x%x", flags)); + KASSERT((flags & O_ACCMODE) != O_ACCMODE, + ("invalid flags: 0x%x", flags)); + flags = FFLAGS(flags); + + error = falloc_noinstall(td, &fp); + if (error != 0) { + return (error); + } + fp->f_flag = flags & FMASK; + +#if __FreeBSD_version >= 1400043 + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path); +#else + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, td); +#endif + error = vn_open(&nd, &flags, mode, fp); + if (error != 0) { + falloc_abort(td, fp); + return (SET_ERROR(error)); + } + NDFREE_PNBUF(&nd); + vp = nd.ni_vp; + fp->f_vnode = vp; + if (fp->f_ops == &badfileops) { + finit_vnode(fp, flags, NULL, &vnops); + } + VOP_UNLOCK(vp); + if (vp->v_type != VREG) { + zfs_file_close(fp); + return (SET_ERROR(EACCES)); + } + + if (flags & O_TRUNC) { + error = fo_truncate(fp, 0, td->td_ucred, td); + if (error != 0) { + zfs_file_close(fp); + return (SET_ERROR(error)); + } + } + + *fpp = fp; + return (0); } void zfs_file_close(zfs_file_t *fp) { - fo_close(fp, curthread); + fdrop(fp, curthread); } static int @@ -263,7 +302,7 @@ zfs_file_get(int fd) void zfs_file_put(zfs_file_t *fp) { - fdrop(fp, curthread); + zfs_file_close(fp); } loff_t diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c index 23b8da18453..a972c720dfd 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c @@ -89,10 +89,6 @@ int zfs_debug_level; SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, "Debug level"); -int zfs_bclone_enabled = 0; -SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN, - &zfs_bclone_enabled, 0, "Enable block cloning"); - struct zfs_jailparam { int mount_snapshot; }; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c index 024a931d781..b08916b317f 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zio_crypt.c @@ -1251,7 +1251,7 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, iovec_t *dst_iovecs; zil_chain_t *zilc; lr_t *lr; - uint64_t txtype, lr_len; + uint64_t txtype, lr_len, nused; uint_t crypt_len, nr_iovecs, vec; uint_t aad_len = 0, total_len = 0; @@ -1268,7 +1268,10 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, zilc = (zil_chain_t *)src; slrp = src + sizeof (zil_chain_t); aadp = aadbuf; - blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused); + nused = ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused); + ASSERT3U(nused, >=, sizeof (zil_chain_t)); + ASSERT3U(nused, <=, datalen); + blkend = src + nused; /* * Calculate the number of encrypted iovecs we will need. @@ -1287,6 +1290,8 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, txtype = lr->lrc_txtype; lr_len = lr->lrc_reclen; } + ASSERT3U(lr_len, >=, sizeof (lr_t)); + ASSERT3U(lr_len, <=, blkend - slrp); nr_iovecs++; if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t)) diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c index 2520507b98a..b6edac434de 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c @@ -1333,6 +1333,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname) } } strlcpy(zv->zv_name, newname, sizeof (zv->zv_name)); + dataset_kstats_rename(&zv->zv_kstat, newname); } /* diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c b/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c index e87954714e3..5898789ad53 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c @@ -20,7 +20,7 @@ * You should have received a copy of the GNU General Public License along * with the SPL. If not, see . * - * Solaris Porting Layer (SPL) Credential Implementation. + * Solaris Porting Layer (SPL) Condition Variables Implementation. */ #include diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c index 3c30dfc577b..42821ad6025 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -76,17 +77,6 @@ module_param(spl_kmem_cache_magazine_size, uint, 0444); MODULE_PARM_DESC(spl_kmem_cache_magazine_size, "Default magazine size (2-256), set automatically (0)"); -/* - * The default behavior is to report the number of objects remaining in the - * cache. This allows the Linux VM to repeatedly reclaim objects from the - * cache when memory is low satisfy other memory allocations. Alternately, - * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache - * is reclaimed. This may increase the likelihood of out of memory events. - */ -static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */; -module_param(spl_kmem_cache_reclaim, uint, 0644); -MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)"); - static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB; module_param(spl_kmem_cache_obj_per_slab, uint, 0644); MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab"); @@ -102,7 +92,8 @@ MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB"); * of 16K was determined to be optimal for architectures using 4K pages and * to also work well on architecutres using larger 64K page sizes. */ -static unsigned int spl_kmem_cache_slab_limit = 16384; +static unsigned int spl_kmem_cache_slab_limit = + SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE; module_param(spl_kmem_cache_slab_limit, uint, 0644); MODULE_PARM_DESC(spl_kmem_cache_slab_limit, "Objects less than N bytes use the Linux slab"); @@ -794,7 +785,7 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, } else { unsigned long slabflags = 0; - if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) + if (size > spl_kmem_cache_slab_limit) goto out; #if defined(SLAB_USERCOPY) diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c index 4308581147a..ad553a73a69 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c @@ -32,6 +32,7 @@ #include #include #include +#include static kmutex_t kstat_module_lock; static struct list_head kstat_module_list; diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-shrinker.c b/sys/contrib/openzfs/module/os/linux/spl/spl-shrinker.c new file mode 100644 index 00000000000..d5c8da471cb --- /dev/null +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-shrinker.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. + * Copyright (C) 2007 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * UCRL-CODE-235197 + * + * This file is part of the SPL, Solaris Porting Layer. + * + * The SPL is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * The SPL is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with the SPL. If not, see . + * + * Solaris Porting Layer (SPL) Shrinker Implementation. + */ + +#include +#include + +#ifdef HAVE_SINGLE_SHRINKER_CALLBACK +/* 3.0-3.11: single shrink() callback, which we wrap to carry both functions */ +struct spl_shrinker_wrap { + struct shrinker shrinker; + spl_shrinker_cb countfunc; + spl_shrinker_cb scanfunc; +}; + +static int +spl_shrinker_single_cb(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct spl_shrinker_wrap *sw = (struct spl_shrinker_wrap *)shrinker; + + if (sc->nr_to_scan != 0) + (void) sw->scanfunc(&sw->shrinker, sc); + return (sw->countfunc(&sw->shrinker, sc)); +} +#endif + +struct shrinker * +spl_register_shrinker(const char *name, spl_shrinker_cb countfunc, + spl_shrinker_cb scanfunc, int seek_cost) +{ + struct shrinker *shrinker; + + /* allocate shrinker */ +#if defined(HAVE_SHRINKER_REGISTER) + /* 6.7: kernel will allocate the shrinker for us */ + shrinker = shrinker_alloc(0, name); +#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) + /* 3.12-6.6: we allocate the shrinker */ + shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP); +#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) + /* 3.0-3.11: allocate a wrapper */ + struct spl_shrinker_wrap *sw = + kmem_zalloc(sizeof (struct spl_shrinker_wrap), KM_SLEEP); + shrinker = &sw->shrinker; +#else + /* 2.x-2.6.22, or a newer shrinker API has been introduced. */ +#error "Unknown shrinker API" +#endif + + if (shrinker == NULL) + return (NULL); + + /* set callbacks */ +#ifdef HAVE_SINGLE_SHRINKER_CALLBACK + sw->countfunc = countfunc; + sw->scanfunc = scanfunc; + shrinker->shrink = spl_shrinker_single_cb; +#else + shrinker->count_objects = countfunc; + shrinker->scan_objects = scanfunc; +#endif + + /* set params */ + shrinker->seeks = seek_cost; + + /* register with kernel */ +#if defined(HAVE_SHRINKER_REGISTER) + shrinker_register(shrinker); +#elif defined(HAVE_REGISTER_SHRINKER_VARARG) + register_shrinker(shrinker, name); +#else + register_shrinker(shrinker); +#endif + + return (shrinker); +} +EXPORT_SYMBOL(spl_register_shrinker); + +void +spl_unregister_shrinker(struct shrinker *shrinker) +{ +#if defined(HAVE_SHRINKER_REGISTER) + shrinker_free(shrinker); +#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) + unregister_shrinker(shrinker); + kmem_free(shrinker, sizeof (struct shrinker)); +#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) + unregister_shrinker(shrinker); + kmem_free(shrinker, sizeof (struct spl_shrinker_wrap)); +#else +#error "Unknown shrinker API" +#endif +} +EXPORT_SYMBOL(spl_unregister_shrinker); diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c index b4ef86a5e4a..ee3eb4690c3 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-thread.c @@ -26,6 +26,7 @@ #include #include #include +#include /* * Thread interfaces diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c index e821fbb4f3a..d0d0cca154a 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c @@ -30,6 +30,7 @@ #include #include #include +#include #if defined(CONFIG_USER_NS) #include diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c index 13150adbe0c..24390fbbf12 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c @@ -60,8 +60,16 @@ #ifdef _KERNEL #include #include +#endif + +#ifdef _KERNEL +#if defined(MAX_ORDER) +#define ABD_MAX_ORDER (MAX_ORDER) +#elif defined(MAX_PAGE_ORDER) +#define ABD_MAX_ORDER (MAX_PAGE_ORDER) +#endif #else -#define MAX_ORDER 1 +#define ABD_MAX_ORDER (1) #endif typedef struct abd_stats { @@ -71,7 +79,7 @@ typedef struct abd_stats { kstat_named_t abdstat_scatter_cnt; kstat_named_t abdstat_scatter_data_size; kstat_named_t abdstat_scatter_chunk_waste; - kstat_named_t abdstat_scatter_orders[MAX_ORDER]; + kstat_named_t abdstat_scatter_orders[ABD_MAX_ORDER]; kstat_named_t abdstat_scatter_page_multi_chunk; kstat_named_t abdstat_scatter_page_multi_zone; kstat_named_t abdstat_scatter_page_alloc_retry; @@ -139,7 +147,7 @@ static struct { wmsum_t abdstat_scatter_cnt; wmsum_t abdstat_scatter_data_size; wmsum_t abdstat_scatter_chunk_waste; - wmsum_t abdstat_scatter_orders[MAX_ORDER]; + wmsum_t abdstat_scatter_orders[ABD_MAX_ORDER]; wmsum_t abdstat_scatter_page_multi_chunk; wmsum_t abdstat_scatter_page_multi_zone; wmsum_t abdstat_scatter_page_alloc_retry; @@ -222,7 +230,7 @@ abd_free_struct_impl(abd_t *abd) } #ifdef _KERNEL -static unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1; +static unsigned zfs_abd_scatter_max_order = ABD_MAX_ORDER - 1; /* * Mark zfs data pages so they can be excluded from kernel crash dumps @@ -272,7 +280,8 @@ abd_alloc_chunks(abd_t *abd, size_t size) struct page *page, *tmp_page = NULL; gfp_t gfp = __GFP_NOWARN | GFP_NOIO; gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM; - unsigned int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1); + unsigned int max_order = MIN(zfs_abd_scatter_max_order, + ABD_MAX_ORDER - 1); unsigned int nr_pages = abd_chunkcnt_for_bytes(size); unsigned int chunks = 0, zones = 0; size_t remaining_size; @@ -729,7 +738,7 @@ abd_kstats_update(kstat_t *ksp, int rw) wmsum_value(&abd_sums.abdstat_scatter_data_size); as->abdstat_scatter_chunk_waste.value.ui64 = wmsum_value(&abd_sums.abdstat_scatter_chunk_waste); - for (int i = 0; i < MAX_ORDER; i++) { + for (int i = 0; i < ABD_MAX_ORDER; i++) { as->abdstat_scatter_orders[i].value.ui64 = wmsum_value(&abd_sums.abdstat_scatter_orders[i]); } @@ -758,7 +767,7 @@ abd_init(void) wmsum_init(&abd_sums.abdstat_scatter_cnt, 0); wmsum_init(&abd_sums.abdstat_scatter_data_size, 0); wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0); - for (i = 0; i < MAX_ORDER; i++) + for (i = 0; i < ABD_MAX_ORDER; i++) wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0); wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0); wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0); @@ -768,7 +777,7 @@ abd_init(void) abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (abd_ksp != NULL) { - for (i = 0; i < MAX_ORDER; i++) { + for (i = 0; i < ABD_MAX_ORDER; i++) { snprintf(abd_stats.abdstat_scatter_orders[i].name, KSTAT_STRLEN, "scatter_order_%d", i); abd_stats.abdstat_scatter_orders[i].data_type = @@ -798,7 +807,7 @@ abd_fini(void) wmsum_fini(&abd_sums.abdstat_scatter_cnt); wmsum_fini(&abd_sums.abdstat_scatter_data_size); wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste); - for (int i = 0; i < MAX_ORDER; i++) + for (int i = 0; i < ABD_MAX_ORDER; i++) wmsum_fini(&abd_sums.abdstat_scatter_orders[i]); wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk); wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c index 43ed087e2db..1fa9f3eb3f5 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c @@ -247,8 +247,7 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) return (sc->nr_to_scan); } -SPL_SHRINKER_DECLARE(arc_shrinker, - arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS); +static struct shrinker *arc_shrinker = NULL; int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) @@ -351,14 +350,18 @@ arc_lowmem_init(void) * reclaim from the arc. This is done to prevent kswapd from * swapping out pages when it is preferable to shrink the arc. */ - spl_register_shrinker(&arc_shrinker); + arc_shrinker = spl_register_shrinker("zfs-arc-shrinker", + arc_shrinker_count, arc_shrinker_scan, DEFAULT_SEEKS); + VERIFY(arc_shrinker); + arc_set_sys_free(allmem); } void arc_lowmem_fini(void) { - spl_unregister_shrinker(&arc_shrinker); + spl_unregister_shrinker(arc_shrinker); + arc_shrinker = NULL; } int diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c index 48ac55f0703..b0bda5fa201 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c @@ -41,8 +41,28 @@ #include #endif +/* + * Linux 6.8.x uses a bdev_handle as an instance/refcount for an underlying + * block_device. Since it carries the block_device inside, its convenient to + * just use the handle as a proxy. For pre-6.8, we just emulate this with + * a cast, since we don't need any of the other fields inside the handle. + */ +#ifdef HAVE_BDEV_OPEN_BY_PATH +typedef struct bdev_handle zfs_bdev_handle_t; +#define BDH_BDEV(bdh) ((bdh)->bdev) +#define BDH_IS_ERR(bdh) (IS_ERR(bdh)) +#define BDH_PTR_ERR(bdh) (PTR_ERR(bdh)) +#define BDH_ERR_PTR(err) (ERR_PTR(err)) +#else +typedef void zfs_bdev_handle_t; +#define BDH_BDEV(bdh) ((struct block_device *)bdh) +#define BDH_IS_ERR(bdh) (IS_ERR(BDH_BDEV(bdh))) +#define BDH_PTR_ERR(bdh) (PTR_ERR(BDH_BDEV(bdh))) +#define BDH_ERR_PTR(err) (ERR_PTR(err)) +#endif + typedef struct vdev_disk { - struct block_device *vd_bdev; + zfs_bdev_handle_t *vd_bdh; krwlock_t vd_lock; } vdev_disk_t; @@ -85,7 +105,7 @@ static blk_mode_t #else static fmode_t #endif -vdev_bdev_mode(spa_mode_t spa_mode) +vdev_bdev_mode(spa_mode_t spa_mode, boolean_t exclusive) { #ifdef HAVE_BLK_MODE_T blk_mode_t mode = 0; @@ -95,6 +115,9 @@ vdev_bdev_mode(spa_mode_t spa_mode) if (spa_mode & SPA_MODE_WRITE) mode |= BLK_OPEN_WRITE; + + if (exclusive) + mode |= BLK_OPEN_EXCL; #else fmode_t mode = 0; @@ -103,6 +126,9 @@ vdev_bdev_mode(spa_mode_t spa_mode) if (spa_mode & SPA_MODE_WRITE) mode |= FMODE_WRITE; + + if (exclusive) + mode |= FMODE_EXCL; #endif return (mode); @@ -203,42 +229,39 @@ static void vdev_disk_kobj_evt_post(vdev_t *v) { vdev_disk_t *vd = v->vdev_tsd; - if (vd && vd->vd_bdev) { - spl_signal_kobj_evt(vd->vd_bdev); + if (vd && vd->vd_bdh) { + spl_signal_kobj_evt(BDH_BDEV(vd->vd_bdh)); } else { vdev_dbgmsg(v, "vdev_disk_t is NULL for VDEV:%s\n", v->vdev_path); } } -#if !defined(HAVE_BLKDEV_GET_BY_PATH_4ARG) -/* - * Define a dummy struct blk_holder_ops for kernel versions - * prior to 6.5. - */ -struct blk_holder_ops {}; -#endif - -static struct block_device * -vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder, - const struct blk_holder_ops *hops) +static zfs_bdev_handle_t * +vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder) { -#ifdef HAVE_BLKDEV_GET_BY_PATH_4ARG +#if defined(HAVE_BDEV_OPEN_BY_PATH) + return (bdev_open_by_path(path, + vdev_bdev_mode(mode, B_TRUE), holder, NULL)); +#elif defined(HAVE_BLKDEV_GET_BY_PATH_4ARG) return (blkdev_get_by_path(path, - vdev_bdev_mode(mode) | BLK_OPEN_EXCL, holder, hops)); + vdev_bdev_mode(mode, B_TRUE), holder, NULL)); #else return (blkdev_get_by_path(path, - vdev_bdev_mode(mode) | FMODE_EXCL, holder)); + vdev_bdev_mode(mode, B_TRUE), holder)); #endif } static void -vdev_blkdev_put(struct block_device *bdev, spa_mode_t mode, void *holder) +vdev_blkdev_put(zfs_bdev_handle_t *bdh, spa_mode_t mode, void *holder) { -#ifdef HAVE_BLKDEV_PUT_HOLDER - return (blkdev_put(bdev, holder)); +#if defined(HAVE_BDEV_RELEASE) + return (bdev_release(bdh)); +#elif defined(HAVE_BLKDEV_PUT_HOLDER) + return (blkdev_put(BDH_BDEV(bdh), holder)); #else - return (blkdev_put(bdev, vdev_bdev_mode(mode) | FMODE_EXCL)); + return (blkdev_put(BDH_BDEV(bdh), + vdev_bdev_mode(mode, B_TRUE))); #endif } @@ -246,11 +269,11 @@ static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, uint64_t *logical_ashift, uint64_t *physical_ashift) { - struct block_device *bdev; + zfs_bdev_handle_t *bdh; #ifdef HAVE_BLK_MODE_T - blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa)); + blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE); #else - fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa)); + fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa), B_FALSE); #endif hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms); vdev_disk_t *vd; @@ -276,10 +299,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, boolean_t reread_part = B_FALSE; rw_enter(&vd->vd_lock, RW_WRITER); - bdev = vd->vd_bdev; - vd->vd_bdev = NULL; + bdh = vd->vd_bdh; + vd->vd_bdh = NULL; - if (bdev) { + if (bdh) { + struct block_device *bdev = BDH_BDEV(bdh); if (v->vdev_expanding && bdev != bdev_whole(bdev)) { vdev_bdevname(bdev_whole(bdev), disk_name + 5); /* @@ -301,15 +325,16 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, reread_part = B_TRUE; } - vdev_blkdev_put(bdev, mode, zfs_vdev_holder); + vdev_blkdev_put(bdh, mode, zfs_vdev_holder); } if (reread_part) { - bdev = vdev_blkdev_get_by_path(disk_name, mode, - zfs_vdev_holder, NULL); - if (!IS_ERR(bdev)) { - int error = vdev_bdev_reread_part(bdev); - vdev_blkdev_put(bdev, mode, zfs_vdev_holder); + bdh = vdev_blkdev_get_by_path(disk_name, mode, + zfs_vdev_holder); + if (!BDH_IS_ERR(bdh)) { + int error = + vdev_bdev_reread_part(BDH_BDEV(bdh)); + vdev_blkdev_put(bdh, mode, zfs_vdev_holder); if (error == 0) { timeout = MSEC2NSEC( zfs_vdev_open_timeout_ms * 2); @@ -352,11 +377,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * subsequent attempts are expected to eventually succeed. */ hrtime_t start = gethrtime(); - bdev = ERR_PTR(-ENXIO); - while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) { - bdev = vdev_blkdev_get_by_path(v->vdev_path, mode, - zfs_vdev_holder, NULL); - if (unlikely(PTR_ERR(bdev) == -ENOENT)) { + bdh = BDH_ERR_PTR(-ENXIO); + while (BDH_IS_ERR(bdh) && ((gethrtime() - start) < timeout)) { + bdh = vdev_blkdev_get_by_path(v->vdev_path, mode, + zfs_vdev_holder); + if (unlikely(BDH_PTR_ERR(bdh) == -ENOENT)) { /* * There is no point of waiting since device is removed * explicitly @@ -365,52 +390,54 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, break; schedule_timeout(MSEC_TO_TICK(10)); - } else if (unlikely(PTR_ERR(bdev) == -ERESTARTSYS)) { + } else if (unlikely(BDH_PTR_ERR(bdh) == -ERESTARTSYS)) { timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms * 10); continue; - } else if (IS_ERR(bdev)) { + } else if (BDH_IS_ERR(bdh)) { break; } } - if (IS_ERR(bdev)) { - int error = -PTR_ERR(bdev); + if (BDH_IS_ERR(bdh)) { + int error = -BDH_PTR_ERR(bdh); vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error, (u_longlong_t)(gethrtime() - start), (u_longlong_t)timeout); - vd->vd_bdev = NULL; + vd->vd_bdh = NULL; v->vdev_tsd = vd; rw_exit(&vd->vd_lock); return (SET_ERROR(error)); } else { - vd->vd_bdev = bdev; + vd->vd_bdh = bdh; v->vdev_tsd = vd; rw_exit(&vd->vd_lock); } + struct block_device *bdev = BDH_BDEV(vd->vd_bdh); + /* Determine the physical block size */ - int physical_block_size = bdev_physical_block_size(vd->vd_bdev); + int physical_block_size = bdev_physical_block_size(bdev); /* Determine the logical block size */ - int logical_block_size = bdev_logical_block_size(vd->vd_bdev); + int logical_block_size = bdev_logical_block_size(bdev); /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ v->vdev_nowritecache = B_FALSE; /* Set when device reports it supports TRIM. */ - v->vdev_has_trim = bdev_discard_supported(vd->vd_bdev); + v->vdev_has_trim = bdev_discard_supported(bdev); /* Set when device reports it supports secure TRIM. */ - v->vdev_has_securetrim = bdev_secure_discard_supported(vd->vd_bdev); + v->vdev_has_securetrim = bdev_secure_discard_supported(bdev); /* Inform the ZIO pipeline that we are non-rotational */ - v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev)); + v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(bdev)); /* Physical volume size in bytes for the partition */ - *psize = bdev_capacity(vd->vd_bdev); + *psize = bdev_capacity(bdev); /* Physical volume size in bytes including possible expansion space */ - *max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk); + *max_psize = bdev_max_capacity(bdev, v->vdev_wholedisk); /* Based on the minimum sector size set the block size */ *physical_ashift = highbit64(MAX(physical_block_size, @@ -430,8 +457,8 @@ vdev_disk_close(vdev_t *v) if (v->vdev_reopening || vd == NULL) return; - if (vd->vd_bdev != NULL) { - vdev_blkdev_put(vd->vd_bdev, spa_mode(v->vdev_spa), + if (vd->vd_bdh != NULL) { + vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa), zfs_vdev_holder); } @@ -835,27 +862,66 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) return (0); } +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \ + defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC) +BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error) +{ + zio_t *zio = bio->bi_private; +#ifdef HAVE_1ARG_BIO_END_IO_T + zio->io_error = BIO_END_IO_ERROR(bio); +#else + zio->io_error = -error; +#endif + bio_put(bio); + if (zio->io_error) + vdev_disk_error(zio); + zio_interrupt(zio); +} + +static int +vdev_issue_discard_trim(zio_t *zio, unsigned long flags) +{ + int ret; + struct bio *bio = NULL; + +#if defined(BLKDEV_DISCARD_SECURE) + ret = - __blkdev_issue_discard( + BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh), + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, flags, &bio); +#else + (void) flags; + ret = - __blkdev_issue_discard( + BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh), + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, &bio); +#endif + if (!ret && bio) { + bio->bi_private = zio; + bio->bi_end_io = vdev_disk_discard_end_io; + vdev_submit_bio(bio); + } + return (ret); +} +#endif + static int vdev_disk_io_trim(zio_t *zio) { - vdev_t *v = zio->io_vd; - vdev_disk_t *vd = v->vdev_tsd; - -#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) - if (zio->io_trim_flags & ZIO_TRIM_SECURE) { - return (-blkdev_issue_secure_erase(vd->vd_bdev, - zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); - } else { - return (-blkdev_issue_discard(vd->vd_bdev, - zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); - } -#elif defined(HAVE_BLKDEV_ISSUE_DISCARD) unsigned long trim_flags = 0; -#if defined(BLKDEV_DISCARD_SECURE) - if (zio->io_trim_flags & ZIO_TRIM_SECURE) + if (zio->io_trim_flags & ZIO_TRIM_SECURE) { +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) + return (-blkdev_issue_secure_erase( + BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh), + zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS)); +#elif defined(BLKDEV_DISCARD_SECURE) trim_flags |= BLKDEV_DISCARD_SECURE; #endif - return (-blkdev_issue_discard(vd->vd_bdev, + } +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) || \ + defined(HAVE_BLKDEV_ISSUE_DISCARD_ASYNC) + return (vdev_issue_discard_trim(zio, trim_flags)); +#elif defined(HAVE_BLKDEV_ISSUE_DISCARD) + return (-blkdev_issue_discard( + BDH_BDEV(((vdev_disk_t *)zio->io_vd->vdev_tsd)->vd_bdh), zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS, trim_flags)); #else #error "Unsupported kernel" @@ -885,7 +951,7 @@ vdev_disk_io_start(zio_t *zio) * If the vdev is closed, it's likely due to a failed reopen and is * in the UNAVAIL state. Nothing to be done here but return failure. */ - if (vd->vd_bdev == NULL) { + if (vd->vd_bdh == NULL) { rw_exit(&vd->vd_lock); zio->io_error = ENXIO; zio_interrupt(zio); @@ -913,7 +979,7 @@ vdev_disk_io_start(zio_t *zio) break; } - error = vdev_disk_io_flush(vd->vd_bdev, zio); + error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio); if (error == 0) { rw_exit(&vd->vd_lock); return; @@ -941,7 +1007,12 @@ vdev_disk_io_start(zio_t *zio) case ZIO_TYPE_TRIM: zio->io_error = vdev_disk_io_trim(zio); rw_exit(&vd->vd_lock); +#if defined(HAVE_BLKDEV_ISSUE_SECURE_ERASE) + if (zio->io_trim_flags & ZIO_TRIM_SECURE) + zio_interrupt(zio); +#elif defined(HAVE_BLKDEV_ISSUE_DISCARD) zio_interrupt(zio); +#endif return; default: @@ -952,7 +1023,7 @@ vdev_disk_io_start(zio_t *zio) } zio->io_target_timestamp = zio_handle_io_delay(zio); - error = __vdev_disk_physio(vd->vd_bdev, zio, + error = __vdev_disk_physio(BDH_BDEV(vd->vd_bdh), zio, zio->io_size, zio->io_offset, rw, 0); rw_exit(&vd->vd_lock); @@ -975,8 +1046,8 @@ vdev_disk_io_done(zio_t *zio) vdev_t *v = zio->io_vd; vdev_disk_t *vd = v->vdev_tsd; - if (!zfs_check_disk_status(vd->vd_bdev)) { - invalidate_bdev(vd->vd_bdev); + if (!zfs_check_disk_status(BDH_BDEV(vd->vd_bdh))) { + invalidate_bdev(BDH_BDEV(vd->vd_bdh)); v->vdev_remove_wanted = B_TRUE; spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c index 94e25fa0ae8..54ed70d0394 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c @@ -520,8 +520,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, ip->i_uid = SUID_TO_KUID(0); ip->i_gid = SGID_TO_KGID(0); ip->i_blkbits = SPA_MINBLOCKSHIFT; - ip->i_atime = now; - ip->i_mtime = now; + zpl_inode_set_atime_to_ts(ip, now); + zpl_inode_set_mtime_to_ts(ip, now); zpl_inode_set_ctime_to_ts(ip, now); ip->i_fop = fops; ip->i_op = ops; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index 2792bc02721..2015c20d734 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -1240,12 +1240,18 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan) * and inode caches. This can occur when the ARC needs to free meta data * blocks but can't because they are all pinned by entries in these caches. */ +#if defined(HAVE_SUPER_BLOCK_S_SHRINK) +#define S_SHRINK(sb) (&(sb)->s_shrink) +#elif defined(HAVE_SUPER_BLOCK_S_SHRINK_PTR) +#define S_SHRINK(sb) ((sb)->s_shrink) +#endif + int zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) { zfsvfs_t *zfsvfs = sb->s_fs_info; int error = 0; - struct shrinker *shrinker = &sb->s_shrink; + struct shrinker *shrinker = S_SHRINK(sb); struct shrink_control sc = { .nr_to_scan = nr_to_scan, .gfp_mask = GFP_KERNEL, @@ -1257,7 +1263,7 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) #if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \ defined(SHRINK_CONTROL_HAS_NID) && \ defined(SHRINKER_NUMA_AWARE) - if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) { + if (shrinker->flags & SHRINKER_NUMA_AWARE) { *objects = 0; for_each_online_node(sc.nid) { *objects += (*shrinker->scan_objects)(shrinker, &sc); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index b464f615cdd..c06a75662bf 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -1853,7 +1853,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns) { struct inode *ip; zfsvfs_t *zfsvfs = ZTOZSB(zp); - objset_t *os = zfsvfs->z_os; + objset_t *os; zilog_t *zilog; dmu_tx_t *tx; vattr_t oldva; @@ -1885,6 +1885,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns) if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (err); ip = ZTOI(zp); + os = zfsvfs->z_os; /* * If this is a xvattr_t, then get a pointer to the structure of @@ -2435,15 +2436,16 @@ top: if ((mask & ATTR_ATIME) || zp->z_atime_dirty) { zp->z_atime_dirty = B_FALSE; - ZFS_TIME_ENCODE(&ip->i_atime, atime); + inode_timespec_t tmp_atime = zpl_inode_get_atime(ip); + ZFS_TIME_ENCODE(&tmp_atime, atime); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, sizeof (atime)); } if (mask & (ATTR_MTIME | ATTR_SIZE)) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); - ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate( - vap->va_mtime, ZTOI(zp)); + zpl_inode_set_mtime_to_ts(ZTOI(zp), + zpl_inode_timestamp_truncate(vap->va_mtime, ZTOI(zp))); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, sizeof (mtime)); @@ -3657,7 +3659,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, caddr_t va; int err = 0; uint64_t mtime[2], ctime[2]; - inode_timespec_t tmp_ctime; + inode_timespec_t tmp_ts; sa_bulk_attr_t bulk[3]; int cnt = 0; struct address_space *mapping; @@ -3821,9 +3823,10 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, &zp->z_pflags, 8); /* Preserve the mtime and ctime provided by the inode */ - ZFS_TIME_ENCODE(&ip->i_mtime, mtime); - tmp_ctime = zpl_inode_get_ctime(ip); - ZFS_TIME_ENCODE(&tmp_ctime, ctime); + tmp_ts = zpl_inode_get_mtime(ip); + ZFS_TIME_ENCODE(&tmp_ts, mtime); + tmp_ts = zpl_inode_get_ctime(ip); + ZFS_TIME_ENCODE(&tmp_ts, ctime); zp->z_atime_dirty = B_FALSE; zp->z_seq++; @@ -3873,7 +3876,7 @@ zfs_dirty_inode(struct inode *ip, int flags) zfsvfs_t *zfsvfs = ITOZSB(ip); dmu_tx_t *tx; uint64_t mode, atime[2], mtime[2], ctime[2]; - inode_timespec_t tmp_ctime; + inode_timespec_t tmp_ts; sa_bulk_attr_t bulk[4]; int error = 0; int cnt = 0; @@ -3918,10 +3921,12 @@ zfs_dirty_inode(struct inode *ip, int flags) SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); /* Preserve the mode, mtime and ctime provided by the inode */ - ZFS_TIME_ENCODE(&ip->i_atime, atime); - ZFS_TIME_ENCODE(&ip->i_mtime, mtime); - tmp_ctime = zpl_inode_get_ctime(ip); - ZFS_TIME_ENCODE(&tmp_ctime, ctime); + tmp_ts = zpl_inode_get_atime(ip); + ZFS_TIME_ENCODE(&tmp_ts, atime); + tmp_ts = zpl_inode_get_mtime(ip); + ZFS_TIME_ENCODE(&tmp_ts, mtime); + tmp_ts = zpl_inode_get_ctime(ip); + ZFS_TIME_ENCODE(&tmp_ts, ctime); mode = ip->i_mode; zp->z_mode = mode; @@ -3964,7 +3969,9 @@ zfs_inactive(struct inode *ip) if (error) { dmu_tx_abort(tx); } else { - ZFS_TIME_ENCODE(&ip->i_atime, atime); + inode_timespec_t tmp_atime; + tmp_atime = zpl_inode_get_atime(ip); + ZFS_TIME_ENCODE(&tmp_atime, atime); mutex_enter(&zp->z_lock); (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), (void *)&atime, sizeof (atime), tx); @@ -4241,9 +4248,4 @@ EXPORT_SYMBOL(zfs_map); /* CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); - -/* CSTYLED */ -module_param(zfs_bclone_enabled, uint, 0644); -MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning"); - #endif diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c index f71026da83c..b99df188c64 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c @@ -542,7 +542,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, uint64_t links; uint64_t z_uid, z_gid; uint64_t atime[2], mtime[2], ctime[2], btime[2]; - inode_timespec_t tmp_ctime; + inode_timespec_t tmp_ts; uint64_t projid = ZFS_DEFAULT_PROJID; sa_bulk_attr_t bulk[12]; int count = 0; @@ -614,10 +614,12 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, if (zp->z_pflags & ZFS_XATTR) zp->z_xattr_parent = parent; - ZFS_TIME_DECODE(&ip->i_atime, atime); - ZFS_TIME_DECODE(&ip->i_mtime, mtime); - ZFS_TIME_DECODE(&tmp_ctime, ctime); - zpl_inode_set_ctime_to_ts(ip, tmp_ctime); + ZFS_TIME_DECODE(&tmp_ts, atime); + zpl_inode_set_atime_to_ts(ip, tmp_ts); + ZFS_TIME_DECODE(&tmp_ts, mtime); + zpl_inode_set_mtime_to_ts(ip, tmp_ts); + ZFS_TIME_DECODE(&tmp_ts, ctime); + zpl_inode_set_ctime_to_ts(ip, tmp_ts); ZFS_TIME_DECODE(&zp->z_btime, btime); ip->i_ino = zp->z_id; @@ -1197,7 +1199,7 @@ zfs_rezget(znode_t *zp) uint64_t gen; uint64_t z_uid, z_gid; uint64_t atime[2], mtime[2], ctime[2], btime[2]; - inode_timespec_t tmp_ctime; + inode_timespec_t tmp_ts; uint64_t projid = ZFS_DEFAULT_PROJID; znode_hold_t *zh; @@ -1290,10 +1292,12 @@ zfs_rezget(znode_t *zp) zfs_uid_write(ZTOI(zp), z_uid); zfs_gid_write(ZTOI(zp), z_gid); - ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime); - ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime); - ZFS_TIME_DECODE(&tmp_ctime, ctime); - zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime); + ZFS_TIME_DECODE(&tmp_ts, atime); + zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_ts); + ZFS_TIME_DECODE(&tmp_ts, mtime); + zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts); + ZFS_TIME_DECODE(&tmp_ts, ctime); + zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts); ZFS_TIME_DECODE(&zp->z_btime, btime); if ((uint32_t)gen != ZTOI(zp)->i_generation) { @@ -1401,22 +1405,24 @@ zfs_zinactive(znode_t *zp) boolean_t zfs_relatime_need_update(const struct inode *ip) { - inode_timespec_t now, tmp_ctime; + inode_timespec_t now, tmp_atime, tmp_ts; gethrestime(&now); + tmp_atime = zpl_inode_get_atime(ip); /* * In relatime mode, only update the atime if the previous atime * is earlier than either the ctime or mtime or if at least a day * has passed since the last update of atime. */ - if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0) + tmp_ts = zpl_inode_get_mtime(ip); + if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0) return (B_TRUE); - tmp_ctime = zpl_inode_get_ctime(ip); - if (zfs_compare_timespec(&tmp_ctime, &ip->i_atime) >= 0) + tmp_ts = zpl_inode_get_ctime(ip); + if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0) return (B_TRUE); - if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60) + if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60) return (B_TRUE); return (B_FALSE); @@ -1439,7 +1445,7 @@ void zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], uint64_t ctime[2]) { - inode_timespec_t now, tmp_ctime; + inode_timespec_t now, tmp_ts; gethrestime(&now); @@ -1447,7 +1453,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], if (flag & ATTR_MTIME) { ZFS_TIME_ENCODE(&now, mtime); - ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime); + ZFS_TIME_DECODE(&tmp_ts, mtime); + zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts); if (ZTOZSB(zp)->z_use_fuids) { zp->z_pflags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); @@ -1456,8 +1463,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], if (flag & ATTR_CTIME) { ZFS_TIME_ENCODE(&now, ctime); - ZFS_TIME_DECODE(&tmp_ctime, ctime); - zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime); + ZFS_TIME_DECODE(&tmp_ts, ctime); + zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts); if (ZTOZSB(zp)->z_use_fuids) zp->z_pflags |= ZFS_ARCHIVE; } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c index 775ab8efbcd..2114be28190 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c @@ -1405,7 +1405,7 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, boolean_t *no_crypt) { int ret; - uint64_t txtype, lr_len; + uint64_t txtype, lr_len, nused; uint_t nr_src, nr_dst, crypt_len; uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; @@ -1432,7 +1432,10 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, zilc = (zil_chain_t *)src; slrp = src + sizeof (zil_chain_t); aadp = aadbuf; - blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused); + nused = ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused); + ASSERT3U(nused, >=, sizeof (zil_chain_t)); + ASSERT3U(nused, <=, datalen); + blkend = src + nused; /* calculate the number of encrypted iovecs we will need */ for (; slrp < blkend; slrp += lr_len) { @@ -1445,6 +1448,8 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, txtype = BSWAP_64(lr->lrc_txtype); lr_len = BSWAP_64(lr->lrc_reclen); } + ASSERT3U(lr_len, >=, sizeof (lr_t)); + ASSERT3U(lr_len, <=, blkend - slrp); nr_iovecs++; if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t)) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c index 139c51cf46d..3065d54fa9d 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c @@ -31,8 +31,6 @@ #include #include -int zfs_bclone_enabled = 0; - /* * Clone part of a file via block cloning. * @@ -40,7 +38,7 @@ int zfs_bclone_enabled = 0; * care of that depending on how it was called. */ static ssize_t -__zpl_clone_file_range(struct file *src_file, loff_t src_off, +zpl_clone_file_range_impl(struct file *src_file, loff_t src_off, struct file *dst_file, loff_t dst_off, size_t len) { struct inode *src_i = file_inode(src_file); @@ -96,11 +94,12 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off, { ssize_t ret; + /* Flags is reserved for future extensions and must be zero. */ if (flags != 0) return (-EINVAL); - /* Try to do it via zfs_clone_range() */ - ret = __zpl_clone_file_range(src_file, src_off, + /* Try to do it via zfs_clone_range() and allow shortening. */ + ret = zpl_clone_file_range_impl(src_file, src_off, dst_file, dst_off, len); #ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE @@ -137,6 +136,11 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off, * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the * range in both files and if they're the same, arrange for them to be backed * by the same storage. + * + * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range + * if we want. It's designed for filesystems that may need to shorten the + * length for alignment, EOF, or any other requirement. ZFS may shorten the + * request when there is outstanding dirty data which hasn't been written. */ loff_t zpl_remap_file_range(struct file *src_file, loff_t src_off, @@ -145,24 +149,21 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off, if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN)) return (-EINVAL); - /* - * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given - * range if we want. Its designed for filesystems that make data past - * EOF available, and don't want it to be visible in both files. ZFS - * doesn't do that, so we just turn the flag off. - */ - flags &= ~REMAP_FILE_CAN_SHORTEN; - + /* No support for dedup yet */ if (flags & REMAP_FILE_DEDUP) - /* No support for dedup yet */ return (-EOPNOTSUPP); /* Zero length means to clone everything to the end of the file */ if (len == 0) len = i_size_read(file_inode(src_file)) - src_off; - return (__zpl_clone_file_range(src_file, src_off, - dst_file, dst_off, len)); + ssize_t ret = zpl_clone_file_range_impl(src_file, src_off, + dst_file, dst_off, len); + + if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len) + ret = -EINVAL; + + return (ret); } #endif /* HAVE_VFS_REMAP_FILE_RANGE */ @@ -179,8 +180,14 @@ zpl_clone_file_range(struct file *src_file, loff_t src_off, if (len == 0) len = i_size_read(file_inode(src_file)) - src_off; - return (__zpl_clone_file_range(src_file, src_off, - dst_file, dst_off, len)); + /* The entire length must be cloned or this is an error. */ + ssize_t ret = zpl_clone_file_range_impl(src_file, src_off, + dst_file, dst_off, len); + + if (ret >= 0 && ret != len) + ret = -EINVAL; + + return (ret); } #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ @@ -214,8 +221,7 @@ zpl_ioctl_ficlone(struct file *dst_file, void *arg) size_t len = i_size_read(file_inode(src_file)); - ssize_t ret = - __zpl_clone_file_range(src_file, 0, dst_file, 0, len); + ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len); fput(src_file); @@ -253,7 +259,7 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg) if (len == 0) len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset; - ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset, + ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset, dst_file, fcr.fcr_dest_offset, len); fput(src_file); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c index 96f65b9e94e..ad1753f7a07 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c @@ -526,7 +526,8 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia) vap->va_ctime = ia->ia_ctime; if (vap->va_mask & ATTR_ATIME) - ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip); + zpl_inode_set_atime_to_ts(ip, + zpl_inode_timestamp_truncate(ia->ia_atime, ip)); cookie = spl_fstrans_mark(); #ifdef HAVE_USERNS_IOPS_SETATTR diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index f94ce69fb9e..8562e989738 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -1521,6 +1521,8 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname) */ set_disk_ro(zv->zv_zso->zvo_disk, !readonly); set_disk_ro(zv->zv_zso->zvo_disk, readonly); + + dataset_kstats_rename(&zv->zv_kstat, newname); } void diff --git a/sys/contrib/openzfs/module/zfs/abd.c b/sys/contrib/openzfs/module/zfs/abd.c index 745ee8f02ed..d982f201c93 100644 --- a/sys/contrib/openzfs/module/zfs/abd.c +++ b/sys/contrib/openzfs/module/zfs/abd.c @@ -802,13 +802,10 @@ abd_iterate_func(abd_t *abd, size_t off, size_t size, abd_verify(abd); ASSERT3U(off + size, <=, abd->abd_size); - boolean_t gang = abd_is_gang(abd); abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off); while (size > 0) { - /* If we are at the end of the gang ABD we are done */ - if (gang && !c_abd) - break; + IMPLY(abd_is_gang(abd), c_abd != NULL); abd_iter_map(&aiter); @@ -930,7 +927,6 @@ abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, { int ret = 0; struct abd_iter daiter, saiter; - boolean_t dabd_is_gang_abd, sabd_is_gang_abd; abd_t *c_dabd, *c_sabd; if (size == 0) @@ -942,16 +938,12 @@ abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, ASSERT3U(doff + size, <=, dabd->abd_size); ASSERT3U(soff + size, <=, sabd->abd_size); - dabd_is_gang_abd = abd_is_gang(dabd); - sabd_is_gang_abd = abd_is_gang(sabd); c_dabd = abd_init_abd_iter(dabd, &daiter, doff); c_sabd = abd_init_abd_iter(sabd, &saiter, soff); while (size > 0) { - /* if we are at the end of the gang ABD we are done */ - if ((dabd_is_gang_abd && !c_dabd) || - (sabd_is_gang_abd && !c_sabd)) - break; + IMPLY(abd_is_gang(dabd), c_dabd != NULL); + IMPLY(abd_is_gang(sabd), c_sabd != NULL); abd_iter_map(&daiter); abd_iter_map(&saiter); @@ -1032,66 +1024,40 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, int i; ssize_t len, dlen; struct abd_iter caiters[3]; - struct abd_iter daiter = {0}; + struct abd_iter daiter; void *caddrs[3]; unsigned long flags __maybe_unused = 0; abd_t *c_cabds[3]; abd_t *c_dabd = NULL; - boolean_t cabds_is_gang_abd[3]; - boolean_t dabd_is_gang_abd = B_FALSE; ASSERT3U(parity, <=, 3); - for (i = 0; i < parity; i++) { - cabds_is_gang_abd[i] = abd_is_gang(cabds[i]); + abd_verify(cabds[i]); + ASSERT3U(csize, <=, cabds[i]->abd_size); c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0); } - if (dabd) { - dabd_is_gang_abd = abd_is_gang(dabd); + ASSERT3S(dsize, >=, 0); + if (dsize > 0) { + ASSERT(dabd); + abd_verify(dabd); + ASSERT3U(dsize, <=, dabd->abd_size); c_dabd = abd_init_abd_iter(dabd, &daiter, 0); } - ASSERT3S(dsize, >=, 0); - abd_enter_critical(flags); while (csize > 0) { - /* if we are at the end of the gang ABD we are done */ - if (dabd_is_gang_abd && !c_dabd) - break; - + len = csize; for (i = 0; i < parity; i++) { - /* - * If we are at the end of the gang ABD we are - * done. - */ - if (cabds_is_gang_abd[i] && !c_cabds[i]) - break; + IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL); abd_iter_map(&caiters[i]); caddrs[i] = caiters[i].iter_mapaddr; + len = MIN(caiters[i].iter_mapsize, len); } - len = csize; - - if (dabd && dsize > 0) + if (dsize > 0) { + IMPLY(abd_is_gang(dabd), c_dabd != NULL); abd_iter_map(&daiter); - - switch (parity) { - case 3: - len = MIN(caiters[2].iter_mapsize, len); - zfs_fallthrough; - case 2: - len = MIN(caiters[1].iter_mapsize, len); - zfs_fallthrough; - case 1: - len = MIN(caiters[0].iter_mapsize, len); - } - - /* must be progressive */ - ASSERT3S(len, >, 0); - - if (dabd && dsize > 0) { - /* this needs precise iter.length */ len = MIN(daiter.iter_mapsize, len); dlen = len; } else @@ -1114,7 +1080,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, &caiters[i], len); } - if (dabd && dsize > 0) { + if (dsize > 0) { abd_iter_unmap(&daiter); c_dabd = abd_advance_abd_iter(dabd, c_dabd, &daiter, @@ -1153,16 +1119,16 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, struct abd_iter xiters[3]; void *caddrs[3], *xaddrs[3]; unsigned long flags __maybe_unused = 0; - boolean_t cabds_is_gang_abd[3]; - boolean_t tabds_is_gang_abd[3]; abd_t *c_cabds[3]; abd_t *c_tabds[3]; ASSERT3U(parity, <=, 3); for (i = 0; i < parity; i++) { - cabds_is_gang_abd[i] = abd_is_gang(cabds[i]); - tabds_is_gang_abd[i] = abd_is_gang(tabds[i]); + abd_verify(cabds[i]); + abd_verify(tabds[i]); + ASSERT3U(tsize, <=, cabds[i]->abd_size); + ASSERT3U(tsize, <=, tabds[i]->abd_size); c_cabds[i] = abd_init_abd_iter(cabds[i], &citers[i], 0); c_tabds[i] = @@ -1171,36 +1137,18 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, abd_enter_critical(flags); while (tsize > 0) { - + len = tsize; for (i = 0; i < parity; i++) { - /* - * If we are at the end of the gang ABD we - * are done. - */ - if (cabds_is_gang_abd[i] && !c_cabds[i]) - break; - if (tabds_is_gang_abd[i] && !c_tabds[i]) - break; + IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL); + IMPLY(abd_is_gang(tabds[i]), c_tabds[i] != NULL); abd_iter_map(&citers[i]); abd_iter_map(&xiters[i]); caddrs[i] = citers[i].iter_mapaddr; xaddrs[i] = xiters[i].iter_mapaddr; + len = MIN(citers[i].iter_mapsize, len); + len = MIN(xiters[i].iter_mapsize, len); } - len = tsize; - switch (parity) { - case 3: - len = MIN(xiters[2].iter_mapsize, len); - len = MIN(citers[2].iter_mapsize, len); - zfs_fallthrough; - case 2: - len = MIN(xiters[1].iter_mapsize, len); - len = MIN(citers[1].iter_mapsize, len); - zfs_fallthrough; - case 1: - len = MIN(xiters[0].iter_mapsize, len); - len = MIN(citers[0].iter_mapsize, len); - } /* must be progressive */ ASSERT3S(len, >, 0); /* diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index dfea15b7439..4db6c06148b 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -8031,9 +8031,8 @@ l2arc_write_size(l2arc_dev_t *dev) */ size = l2arc_write_max; if (size == 0) { - cmn_err(CE_NOTE, "Bad value for l2arc_write_max, value must " - "be greater than zero, resetting it to the default (%d)", - L2ARC_WRITE_SIZE); + cmn_err(CE_NOTE, "l2arc_write_max must be greater than zero, " + "resetting it to the default (%d)", L2ARC_WRITE_SIZE); size = l2arc_write_max = L2ARC_WRITE_SIZE; } @@ -8056,30 +8055,9 @@ l2arc_write_size(l2arc_dev_t *dev) * device. This is important in l2arc_evict(), otherwise infinite * iteration can occur. */ - if (size > dev->l2ad_end - dev->l2ad_start) { - cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost " - "plus the overhead of log blocks (persistent L2ARC, " - "%llu bytes) exceeds the size of the cache device " - "(guid %llu), resetting them to the default (%d)", - (u_longlong_t)l2arc_log_blk_overhead(size, dev), - (u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE); + size = MIN(size, (dev->l2ad_end - dev->l2ad_start) / 4); - size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE; - - if (l2arc_trim_ahead > 1) { - cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1"); - l2arc_trim_ahead = 1; - } - - if (arc_warm == B_FALSE) - size += l2arc_write_boost; - - size += l2arc_log_blk_overhead(size, dev); - if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) { - size += MAX(64 * 1024 * 1024, - (size * l2arc_trim_ahead) / 100); - } - } + size = P2ROUNDUP(size, 1ULL << dev->l2ad_vdev->vdev_ashift); return (size); diff --git a/sys/contrib/openzfs/module/zfs/brt.c b/sys/contrib/openzfs/module/zfs/brt.c index 759bc8d2e2b..225ddaca1e5 100644 --- a/sys/contrib/openzfs/module/zfs/brt.c +++ b/sys/contrib/openzfs/module/zfs/brt.c @@ -157,10 +157,8 @@ * (copying the file content to the new dataset and removing the source file). * In that case Block Cloning will only be used briefly, because the BRT entries * will be removed when the source is removed. - * Note: currently it is not possible to clone blocks between encrypted - * datasets, even if those datasets use the same encryption key (this includes - * snapshots of encrypted datasets). Cloning blocks between datasets that use - * the same keys should be possible and should be implemented in the future. + * Block Cloning across encrypted datasets is supported as long as both + * datasets share the same master key (e.g. snapshots and clones) * * Block Cloning flow through ZFS layers. * @@ -344,7 +342,7 @@ brt_vdev_entcount_get(const brt_vdev_t *brtvd, uint64_t idx) ASSERT3U(idx, <, brtvd->bv_size); - if (brtvd->bv_need_byteswap) { + if (unlikely(brtvd->bv_need_byteswap)) { return (BSWAP_16(brtvd->bv_entcount[idx])); } else { return (brtvd->bv_entcount[idx]); @@ -357,7 +355,7 @@ brt_vdev_entcount_set(brt_vdev_t *brtvd, uint64_t idx, uint16_t entcnt) ASSERT3U(idx, <, brtvd->bv_size); - if (brtvd->bv_need_byteswap) { + if (unlikely(brtvd->bv_need_byteswap)) { brtvd->bv_entcount[idx] = BSWAP_16(entcnt); } else { brtvd->bv_entcount[idx] = entcnt; @@ -392,55 +390,39 @@ brt_vdev_entcount_dec(brt_vdev_t *brtvd, uint64_t idx) #ifdef ZFS_DEBUG static void -brt_vdev_dump(brt_t *brt) +brt_vdev_dump(brt_vdev_t *brtvd) { - brt_vdev_t *brtvd; - uint64_t vdevid; + uint64_t idx; - if ((zfs_flags & ZFS_DEBUG_BRT) == 0) { - return; - } - - if (brt->brt_nvdevs == 0) { - zfs_dbgmsg("BRT empty"); - return; - } - - zfs_dbgmsg("BRT vdev dump:"); - for (vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) { - uint64_t idx; - - brtvd = &brt->brt_vdevs[vdevid]; - zfs_dbgmsg(" vdevid=%llu/%llu meta_dirty=%d entcount_dirty=%d " - "size=%llu totalcount=%llu nblocks=%llu bitmapsize=%zu\n", - (u_longlong_t)vdevid, (u_longlong_t)brtvd->bv_vdevid, - brtvd->bv_meta_dirty, brtvd->bv_entcount_dirty, - (u_longlong_t)brtvd->bv_size, - (u_longlong_t)brtvd->bv_totalcount, - (u_longlong_t)brtvd->bv_nblocks, - (size_t)BT_SIZEOFMAP(brtvd->bv_nblocks)); - if (brtvd->bv_totalcount > 0) { - zfs_dbgmsg(" entcounts:"); - for (idx = 0; idx < brtvd->bv_size; idx++) { - if (brt_vdev_entcount_get(brtvd, idx) > 0) { - zfs_dbgmsg(" [%04llu] %hu", - (u_longlong_t)idx, - brt_vdev_entcount_get(brtvd, idx)); - } + zfs_dbgmsg(" BRT vdevid=%llu meta_dirty=%d entcount_dirty=%d " + "size=%llu totalcount=%llu nblocks=%llu bitmapsize=%zu\n", + (u_longlong_t)brtvd->bv_vdevid, + brtvd->bv_meta_dirty, brtvd->bv_entcount_dirty, + (u_longlong_t)brtvd->bv_size, + (u_longlong_t)brtvd->bv_totalcount, + (u_longlong_t)brtvd->bv_nblocks, + (size_t)BT_SIZEOFMAP(brtvd->bv_nblocks)); + if (brtvd->bv_totalcount > 0) { + zfs_dbgmsg(" entcounts:"); + for (idx = 0; idx < brtvd->bv_size; idx++) { + uint16_t entcnt = brt_vdev_entcount_get(brtvd, idx); + if (entcnt > 0) { + zfs_dbgmsg(" [%04llu] %hu", + (u_longlong_t)idx, entcnt); } } - if (brtvd->bv_entcount_dirty) { - char *bitmap; + } + if (brtvd->bv_entcount_dirty) { + char *bitmap; - bitmap = kmem_alloc(brtvd->bv_nblocks + 1, KM_SLEEP); - for (idx = 0; idx < brtvd->bv_nblocks; idx++) { - bitmap[idx] = - BT_TEST(brtvd->bv_bitmap, idx) ? 'x' : '.'; - } - bitmap[idx] = '\0'; - zfs_dbgmsg(" bitmap: %s", bitmap); - kmem_free(bitmap, brtvd->bv_nblocks + 1); + bitmap = kmem_alloc(brtvd->bv_nblocks + 1, KM_SLEEP); + for (idx = 0; idx < brtvd->bv_nblocks; idx++) { + bitmap[idx] = + BT_TEST(brtvd->bv_bitmap, idx) ? 'x' : '.'; } + bitmap[idx] = '\0'; + zfs_dbgmsg(" dirty: %s", bitmap); + kmem_free(bitmap, brtvd->bv_nblocks + 1); } } #endif @@ -769,7 +751,8 @@ brt_vdev_addref(brt_t *brt, brt_vdev_t *brtvd, const brt_entry_t *bre, BT_SET(brtvd->bv_bitmap, idx); #ifdef ZFS_DEBUG - brt_vdev_dump(brt); + if (zfs_flags & ZFS_DEBUG_BRT) + brt_vdev_dump(brtvd); #endif } @@ -805,7 +788,8 @@ brt_vdev_decref(brt_t *brt, brt_vdev_t *brtvd, const brt_entry_t *bre, BT_SET(brtvd->bv_bitmap, idx); #ifdef ZFS_DEBUG - brt_vdev_dump(brt); + if (zfs_flags & ZFS_DEBUG_BRT) + brt_vdev_dump(brtvd); #endif } diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c index 767a461e002..2ac058fd2c9 100644 --- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c +++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c @@ -198,6 +198,18 @@ dataset_kstats_destroy(dataset_kstats_t *dk) zil_sums_fini(&dk->dk_zil_sums); } +void +dataset_kstats_rename(dataset_kstats_t *dk, const char *name) +{ + dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data; + char *ds_name; + + ds_name = KSTAT_NAMED_STR_PTR(&dkv->dkv_ds_name); + ASSERT3S(ds_name, !=, NULL); + (void) strlcpy(ds_name, name, + KSTAT_NAMED_STR_BUFLEN(&dkv->dkv_ds_name)); +} + void dataset_kstats_update_write_kstats(dataset_kstats_t *dk, int64_t nwritten) diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 5a7fe42b602..280001bc34b 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -1619,8 +1619,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, */ if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) { spa_log_error(db->db_objset->os_spa, &zb, &bpp->blk_birth); - zfs_panic_recover("unencrypted block in encrypted " - "object set %llu", dmu_objset_id(db->db_objset)); err = SET_ERROR(EIO); goto early_unlock; } @@ -1904,7 +1902,6 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) dmu_buf_impl_t *db = dr->dr_dbuf; blkptr_t *bp = &dr->dt.dl.dr_overridden_by; uint64_t txg = dr->dr_txg; - boolean_t release; ASSERT(MUTEX_HELD(&db->db_mtx)); /* @@ -1925,7 +1922,10 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) zio_free(db->db_objset->os_spa, txg, bp); - release = !dr->dt.dl.dr_brtwrite; + if (dr->dt.dl.dr_brtwrite) { + ASSERT0(dr->dt.dl.dr_data); + dr->dt.dl.dr_data = db->db_buf; + } dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; dr->dt.dl.dr_brtwrite = B_FALSE; @@ -1939,7 +1939,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * the buf thawed to save the effort of freezing & * immediately re-thawing it. */ - if (release) + if (dr->dt.dl.dr_data) arc_release(dr->dt.dl.dr_data, db); } @@ -2734,7 +2734,7 @@ dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) } void -dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) +dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; @@ -2752,8 +2752,14 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) * Block cloning: We will be completely overwriting a block * cloned in this transaction group, so let's undirty the * pending clone and mark the block as uncached. This will be - * as if the clone was never done. + * as if the clone was never done. But if the fill can fail + * we should have a way to return back to the cloned data. */ + if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) { + mutex_exit(&db->db_mtx); + dmu_buf_will_dirty(db_fake, tx); + return; + } VERIFY(!dbuf_undirty(db, tx)); db->db_state = DB_UNCACHED; } @@ -2814,32 +2820,41 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx) dl->dr_overridden_by.blk_birth = dr->dr_txg; } -void -dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx) +boolean_t +dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed) { (void) tx; dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; - dbuf_states_t old_state; mutex_enter(&db->db_mtx); DBUF_VERIFY(db); - old_state = db->db_state; - db->db_state = DB_CACHED; - if (old_state == DB_FILL) { + if (db->db_state == DB_FILL) { if (db->db_level == 0 && db->db_freed_in_flight) { ASSERT(db->db_blkid != DMU_BONUS_BLKID); /* we were freed while filling */ /* XXX dbuf_undirty? */ memset(db->db.db_data, 0, db->db.db_size); db->db_freed_in_flight = FALSE; + db->db_state = DB_CACHED; DTRACE_SET_STATE(db, "fill done handling freed in flight"); + failed = B_FALSE; + } else if (failed) { + VERIFY(!dbuf_undirty(db, tx)); + db->db_buf = NULL; + dbuf_clear_data(db); + DTRACE_SET_STATE(db, "fill failed"); } else { + db->db_state = DB_CACHED; DTRACE_SET_STATE(db, "fill done"); } cv_broadcast(&db->db_changed); + } else { + db->db_state = DB_CACHED; + failed = B_FALSE; } mutex_exit(&db->db_mtx); + return (failed); } void @@ -2930,7 +2945,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) while (db->db_state == DB_READ || db->db_state == DB_FILL) cv_wait(&db->db_changed, &db->db_mtx); - ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED); + ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED || + db->db_state == DB_NOFILL); if (db->db_state == DB_CACHED && zfs_refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { @@ -2967,6 +2983,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) arc_buf_destroy(db->db_buf, db); } db->db_buf = NULL; + } else if (db->db_state == DB_NOFILL) { + /* + * We will be completely replacing the cloned block. In case + * it was cloned in this transaction group, let's undirty the + * pending clone and mark the block as uncached. This will be + * as if the clone was never done. + */ + VERIFY(!dbuf_undirty(db, tx)); + db->db_state = DB_UNCACHED; } ASSERT(db->db_buf == NULL); dbuf_set_data(db, buf); @@ -2974,7 +2999,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) DTRACE_SET_STATE(db, "filling assigned arcbuf"); mutex_exit(&db->db_mtx); (void) dbuf_dirty(db, tx); - dmu_buf_fill_done(&db->db, tx); + dmu_buf_fill_done(&db->db, tx, B_FALSE); } void diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index 3f626031de5..3215ab1c2a1 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -1115,14 +1115,14 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size, ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); if (tocpy == db->db_size) - dmu_buf_will_fill(db, tx); + dmu_buf_will_fill(db, tx, B_FALSE); else dmu_buf_will_dirty(db, tx); (void) memcpy((char *)db->db_data + bufoff, buf, tocpy); if (tocpy == db->db_size) - dmu_buf_fill_done(db, tx); + dmu_buf_fill_done(db, tx, B_FALSE); offset += tocpy; size -= tocpy; @@ -1330,27 +1330,24 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) ASSERT(size > 0); - bufoff = zfs_uio_offset(uio) - db->db_offset; + offset_t off = zfs_uio_offset(uio); + bufoff = off - db->db_offset; tocpy = MIN(db->db_size - bufoff, size); ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); if (tocpy == db->db_size) - dmu_buf_will_fill(db, tx); + dmu_buf_will_fill(db, tx, B_TRUE); else dmu_buf_will_dirty(db, tx); - /* - * XXX zfs_uiomove could block forever (eg.nfs-backed - * pages). There needs to be a uiolockdown() function - * to lock the pages in memory, so that zfs_uiomove won't - * block. - */ err = zfs_uio_fault_move((char *)db->db_data + bufoff, tocpy, UIO_WRITE, uio); - if (tocpy == db->db_size) - dmu_buf_fill_done(db, tx); + if (tocpy == db->db_size && dmu_buf_fill_done(db, tx, err)) { + /* The fill was reverted. Undo any uio progress. */ + zfs_uio_advance(uio, off - zfs_uio_offset(uio)); + } if (err) break; @@ -1482,9 +1479,9 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, 0, offset); db = dbuf_hold(dn, blkid, FTAG); + rw_exit(&dn->dn_struct_rwlock); if (db == NULL) return (SET_ERROR(EIO)); - rw_exit(&dn->dn_struct_rwlock); /* * We can only assign if the offset is aligned and the arc buf is the @@ -2255,6 +2252,21 @@ dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, goto out; } + /* + * If the block was allocated in transaction group that is not + * yet synced, we could clone it, but we couldn't write this + * operation into ZIL, or it may be impossible to replay, since + * the block may appear not yet allocated at that point. + */ + if (BP_PHYSICAL_BIRTH(bp) > spa_freeze_txg(os->os_spa)) { + error = SET_ERROR(EINVAL); + goto out; + } + if (BP_PHYSICAL_BIRTH(bp) > spa_last_synced_txg(os->os_spa)) { + error = SET_ERROR(EAGAIN); + goto out; + } + bps[i] = *bp; } diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c index 05ca91717c2..54aa60259ea 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_recv.c +++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c @@ -2532,7 +2532,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, * size of the provided arc_buf_t. */ if (db_spill->db_size != drrs->drr_length) { - dmu_buf_will_fill(db_spill, tx); + dmu_buf_will_fill(db_spill, tx, B_FALSE); VERIFY0(dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); } diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c index 2d37ed2cdfb..37c68528bf9 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_send.c +++ b/sys/contrib/openzfs/module/zfs/dmu_send.c @@ -1124,8 +1124,6 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (sta->os->os_encrypted && !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) { spa_log_error(spa, zb, &bp->blk_birth); - zfs_panic_recover("unencrypted block in encrypted " - "object set %llu", dmu_objset_id(sta->os)); return (SET_ERROR(EIO)); } diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c index 5e6e4e3d6c3..8e1055d9bcb 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c +++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c @@ -266,6 +266,40 @@ spa_crypto_key_compare(const void *a, const void *b) return (0); } +/* + * this compares a crypto key based on zk_guid. See comment on + * spa_crypto_key_compare for more information. + */ +boolean_t +dmu_objset_crypto_key_equal(objset_t *osa, objset_t *osb) +{ + dsl_crypto_key_t *dcka = NULL; + dsl_crypto_key_t *dckb = NULL; + uint64_t obja, objb; + boolean_t equal; + spa_t *spa; + + spa = dmu_objset_spa(osa); + if (spa != dmu_objset_spa(osb)) + return (B_FALSE); + obja = dmu_objset_ds(osa)->ds_object; + objb = dmu_objset_ds(osb)->ds_object; + + if (spa_keystore_lookup_key(spa, obja, FTAG, &dcka) != 0) + return (B_FALSE); + if (spa_keystore_lookup_key(spa, objb, FTAG, &dckb) != 0) { + spa_keystore_dsl_key_rele(spa, dcka, FTAG); + return (B_FALSE); + } + + equal = (dcka->dck_key.zk_guid == dckb->dck_key.zk_guid); + + spa_keystore_dsl_key_rele(spa, dcka, FTAG); + spa_keystore_dsl_key_rele(spa, dckb, FTAG); + + return (equal); +} + static int spa_key_mapping_compare(const void *a, const void *b) { diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c index 47c234f76c4..ac30a370813 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c +++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c @@ -1000,8 +1000,6 @@ livelist_compare(const void *larg, const void *rarg) /* if vdevs are equal, sort by offsets. */ uint64_t l_dva0_offset = DVA_GET_OFFSET(&l->blk_dva[0]); uint64_t r_dva0_offset = DVA_GET_OFFSET(&r->blk_dva[0]); - if (l_dva0_offset == r_dva0_offset) - ASSERT3U(l->blk_birth, ==, r->blk_birth); return (TREE_CMP(l_dva0_offset, r_dva0_offset)); } @@ -1016,9 +1014,9 @@ struct livelist_iter_arg { * and used to match up ALLOC/FREE pairs. ALLOC'd blkptrs without a * corresponding FREE are stored in the supplied bplist. * - * Note that multiple FREE and ALLOC entries for the same blkptr may - * be encountered when dedup is involved. For this reason we keep a - * refcount for all the FREE entries of each blkptr and ensure that + * Note that multiple FREE and ALLOC entries for the same blkptr may be + * encountered when dedup or block cloning is involved. For this reason we + * keep a refcount for all the FREE entries of each blkptr and ensure that * each of those FREE entries has a corresponding ALLOC preceding it. */ static int @@ -1037,6 +1035,13 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, livelist_entry_t node; node.le_bp = *bp; livelist_entry_t *found = avl_find(avl, &node, NULL); + if (found) { + ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(&found->le_bp)); + ASSERT3U(BP_GET_CHECKSUM(bp), ==, + BP_GET_CHECKSUM(&found->le_bp)); + ASSERT3U(BP_PHYSICAL_BIRTH(bp), ==, + BP_PHYSICAL_BIRTH(&found->le_bp)); + } if (bp_freed) { if (found == NULL) { /* first free entry for this blkptr */ @@ -1046,10 +1051,10 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, e->le_refcnt = 1; avl_add(avl, e); } else { - /* dedup block free */ - ASSERT(BP_GET_DEDUP(bp)); - ASSERT3U(BP_GET_CHECKSUM(bp), ==, - BP_GET_CHECKSUM(&found->le_bp)); + /* + * Deduped or cloned block free. We could assert D bit + * for dedup, but there is no such one for cloning. + */ ASSERT3U(found->le_refcnt + 1, >, found->le_refcnt); found->le_refcnt++; } @@ -1065,14 +1070,6 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, /* all tracked free pairs have been matched */ avl_remove(avl, found); kmem_free(found, sizeof (livelist_entry_t)); - } else { - /* - * This is definitely a deduped blkptr so - * let's validate it. - */ - ASSERT(BP_GET_DEDUP(bp)); - ASSERT3U(BP_GET_CHECKSUM(bp), ==, - BP_GET_CHECKSUM(&found->le_bp)); } } } diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c index 599d7ffa0cf..5809a832bcb 100644 --- a/sys/contrib/openzfs/module/zfs/metaslab.c +++ b/sys/contrib/openzfs/module/zfs/metaslab.c @@ -5061,7 +5061,6 @@ metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal, int allocator, boolean_t try_hard) { uint64_t offset; - ASSERT(mg->mg_initialized); offset = metaslab_group_alloc_normal(mg, zal, asize, txg, want_unique, dva, d, allocator, try_hard); @@ -5212,8 +5211,6 @@ top: goto next; } - ASSERT(mg->mg_initialized); - /* * Avoid writing single-copy data to an unhealthy, * non-redundant vdev, unless we've already tried all diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index 1410651c63c..d7fe96cde6a 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -151,7 +151,7 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that * need to be handled with minimum delay. */ -static const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { +static zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ { ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */ @@ -1164,6 +1164,275 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) tqs->stqs_taskq = NULL; } +#ifdef _KERNEL +/* + * The READ and WRITE rows of zio_taskqs are configurable at module load time + * by setting zio_taskq_read or zio_taskq_write. + * + * Example (the defaults for READ and WRITE) + * zio_taskq_read='fixed,1,8 null scale null' + * zio_taskq_write='batch fixed,1,5 scale fixed,1,5' + * + * Each sets the entire row at a time. + * + * 'fixed' is parameterised: fixed,Q,T where Q is number of taskqs, T is number + * of threads per taskq. + * + * 'null' can only be set on the high-priority queues (queue selection for + * high-priority queues will fall back to the regular queue if the high-pri + * is NULL. + */ +static const char *const modes[ZTI_NMODES] = { + "fixed", "batch", "scale", "null" +}; + +/* Parse the incoming config string. Modifies cfg */ +static int +spa_taskq_param_set(zio_type_t t, char *cfg) +{ + int err = 0; + + zio_taskq_info_t row[ZIO_TASKQ_TYPES] = {{0}}; + + char *next = cfg, *tok, *c; + + /* + * Parse out each element from the string and fill `row`. The entire + * row has to be set at once, so any errors are flagged by just + * breaking out of this loop early. + */ + uint_t q; + for (q = 0; q < ZIO_TASKQ_TYPES; q++) { + /* `next` is the start of the config */ + if (next == NULL) + break; + + /* Eat up leading space */ + while (isspace(*next)) + next++; + if (*next == '\0') + break; + + /* Mode ends at space or end of string */ + tok = next; + next = strchr(tok, ' '); + if (next != NULL) *next++ = '\0'; + + /* Parameters start after a comma */ + c = strchr(tok, ','); + if (c != NULL) *c++ = '\0'; + + /* Match mode string */ + uint_t mode; + for (mode = 0; mode < ZTI_NMODES; mode++) + if (strcmp(tok, modes[mode]) == 0) + break; + if (mode == ZTI_NMODES) + break; + + /* Invalid canary */ + row[q].zti_mode = ZTI_NMODES; + + /* Per-mode setup */ + switch (mode) { + + /* + * FIXED is parameterised: number of queues, and number of + * threads per queue. + */ + case ZTI_MODE_FIXED: { + /* No parameters? */ + if (c == NULL || *c == '\0') + break; + + /* Find next parameter */ + tok = c; + c = strchr(tok, ','); + if (c == NULL) + break; + + /* Take digits and convert */ + unsigned long long nq; + if (!(isdigit(*tok))) + break; + err = ddi_strtoull(tok, &tok, 10, &nq); + /* Must succeed and also end at the next param sep */ + if (err != 0 || tok != c) + break; + + /* Move past the comma */ + tok++; + /* Need another number */ + if (!(isdigit(*tok))) + break; + /* Remember start to make sure we moved */ + c = tok; + + /* Take digits */ + unsigned long long ntpq; + err = ddi_strtoull(tok, &tok, 10, &ntpq); + /* Must succeed, and moved forward */ + if (err != 0 || tok == c || *tok != '\0') + break; + + /* + * sanity; zero queues/threads make no sense, and + * 16K is almost certainly more than anyone will ever + * need and avoids silly numbers like UINT32_MAX + */ + if (nq == 0 || nq >= 16384 || + ntpq == 0 || ntpq >= 16384) + break; + + const zio_taskq_info_t zti = ZTI_P(ntpq, nq); + row[q] = zti; + break; + } + + case ZTI_MODE_BATCH: { + const zio_taskq_info_t zti = ZTI_BATCH; + row[q] = zti; + break; + } + + case ZTI_MODE_SCALE: { + const zio_taskq_info_t zti = ZTI_SCALE; + row[q] = zti; + break; + } + + case ZTI_MODE_NULL: { + /* + * Can only null the high-priority queues; the general- + * purpose ones have to exist. + */ + if (q != ZIO_TASKQ_ISSUE_HIGH && + q != ZIO_TASKQ_INTERRUPT_HIGH) + break; + + const zio_taskq_info_t zti = ZTI_NULL; + row[q] = zti; + break; + } + + default: + break; + } + + /* Ensure we set a mode */ + if (row[q].zti_mode == ZTI_NMODES) + break; + } + + /* Didn't get a full row, fail */ + if (q < ZIO_TASKQ_TYPES) + return (SET_ERROR(EINVAL)); + + /* Eat trailing space */ + if (next != NULL) + while (isspace(*next)) + next++; + + /* If there's anything left over then fail */ + if (next != NULL && *next != '\0') + return (SET_ERROR(EINVAL)); + + /* Success! Copy it into the real config */ + for (q = 0; q < ZIO_TASKQ_TYPES; q++) + zio_taskqs[t][q] = row[q]; + + return (0); +} + +static int +spa_taskq_param_get(zio_type_t t, char *buf, boolean_t add_newline) +{ + int pos = 0; + + /* Build paramater string from live config */ + const char *sep = ""; + for (uint_t q = 0; q < ZIO_TASKQ_TYPES; q++) { + const zio_taskq_info_t *zti = &zio_taskqs[t][q]; + if (zti->zti_mode == ZTI_MODE_FIXED) + pos += sprintf(&buf[pos], "%s%s,%u,%u", sep, + modes[zti->zti_mode], zti->zti_count, + zti->zti_value); + else + pos += sprintf(&buf[pos], "%s%s", sep, + modes[zti->zti_mode]); + sep = " "; + } + + if (add_newline) + buf[pos++] = '\n'; + buf[pos] = '\0'; + + return (pos); +} + +#ifdef __linux__ +static int +spa_taskq_read_param_set(const char *val, zfs_kernel_param_t *kp) +{ + char *cfg = kmem_strdup(val); + int err = spa_taskq_param_set(ZIO_TYPE_READ, cfg); + kmem_free(cfg, strlen(val)+1); + return (-err); +} +static int +spa_taskq_read_param_get(char *buf, zfs_kernel_param_t *kp) +{ + return (spa_taskq_param_get(ZIO_TYPE_READ, buf, TRUE)); +} + +static int +spa_taskq_write_param_set(const char *val, zfs_kernel_param_t *kp) +{ + char *cfg = kmem_strdup(val); + int err = spa_taskq_param_set(ZIO_TYPE_WRITE, cfg); + kmem_free(cfg, strlen(val)+1); + return (-err); +} +static int +spa_taskq_write_param_get(char *buf, zfs_kernel_param_t *kp) +{ + return (spa_taskq_param_get(ZIO_TYPE_WRITE, buf, TRUE)); +} +#else +/* + * On FreeBSD load-time parameters can be set up before malloc() is available, + * so we have to do all the parsing work on the stack. + */ +#define SPA_TASKQ_PARAM_MAX (128) + +static int +spa_taskq_read_param(ZFS_MODULE_PARAM_ARGS) +{ + char buf[SPA_TASKQ_PARAM_MAX]; + int err; + + (void) spa_taskq_param_get(ZIO_TYPE_READ, buf, FALSE); + err = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (err || req->newptr == NULL) + return (err); + return (spa_taskq_param_set(ZIO_TYPE_READ, buf)); +} + +static int +spa_taskq_write_param(ZFS_MODULE_PARAM_ARGS) +{ + char buf[SPA_TASKQ_PARAM_MAX]; + int err; + + (void) spa_taskq_param_get(ZIO_TYPE_WRITE, buf, FALSE); + err = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (err || req->newptr == NULL) + return (err); + return (spa_taskq_param_set(ZIO_TYPE_WRITE, buf)); +} +#endif +#endif /* _KERNEL */ + /* * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. * Note that a type may have multiple discrete taskqs to avoid lock contention @@ -10210,4 +10479,13 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT, ZMOD_RW, "Whether extra ALLOC blkptrs were added to a livelist entry while it " "was being condensed"); + +#ifdef _KERNEL +ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read, + spa_taskq_read_param_set, spa_taskq_read_param_get, ZMOD_RD, + "Configure IO queues for read IO"); +ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write, + spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RD, + "Configure IO queues for write IO"); +#endif /* END CSTYLED */ diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index 72b690162d6..24f038ad7f4 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -1822,7 +1822,8 @@ spa_get_slop_space(spa_t *spa) * deduplicated data, so since it's not useful to reserve more * space with more deduplicated data, we subtract that out here. */ - space = spa_get_dspace(spa) - spa->spa_dedup_dspace; + space = + spa_get_dspace(spa) - spa->spa_dedup_dspace - brt_get_dspace(spa); slop = MIN(space >> spa_slop_shift, spa_max_slop); /* diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index afb01c0ef7f..e1ca1aecc90 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -2484,23 +2484,37 @@ vdev_validate(vdev_t *vd) return (0); } +static void +vdev_update_path(const char *prefix, char *svd, char **dvd, uint64_t guid) +{ + if (svd != NULL && *dvd != NULL) { + if (strcmp(svd, *dvd) != 0) { + zfs_dbgmsg("vdev_copy_path: vdev %llu: %s changed " + "from '%s' to '%s'", (u_longlong_t)guid, prefix, + *dvd, svd); + spa_strfree(*dvd); + *dvd = spa_strdup(svd); + } + } else if (svd != NULL) { + *dvd = spa_strdup(svd); + zfs_dbgmsg("vdev_copy_path: vdev %llu: path set to '%s'", + (u_longlong_t)guid, *dvd); + } +} + static void vdev_copy_path_impl(vdev_t *svd, vdev_t *dvd) { char *old, *new; - if (svd->vdev_path != NULL && dvd->vdev_path != NULL) { - if (strcmp(svd->vdev_path, dvd->vdev_path) != 0) { - zfs_dbgmsg("vdev_copy_path: vdev %llu: path changed " - "from '%s' to '%s'", (u_longlong_t)dvd->vdev_guid, - dvd->vdev_path, svd->vdev_path); - spa_strfree(dvd->vdev_path); - dvd->vdev_path = spa_strdup(svd->vdev_path); - } - } else if (svd->vdev_path != NULL) { - dvd->vdev_path = spa_strdup(svd->vdev_path); - zfs_dbgmsg("vdev_copy_path: vdev %llu: path set to '%s'", - (u_longlong_t)dvd->vdev_guid, dvd->vdev_path); - } + + vdev_update_path("vdev_path", svd->vdev_path, &dvd->vdev_path, + dvd->vdev_guid); + + vdev_update_path("vdev_devid", svd->vdev_devid, &dvd->vdev_devid, + dvd->vdev_guid); + + vdev_update_path("vdev_physpath", svd->vdev_physpath, + &dvd->vdev_physpath, dvd->vdev_guid); /* * Our enclosure sysfs path may have changed between imports diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c index a2e5524a839..737d8b33e18 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_label.c +++ b/sys/contrib/openzfs/module/zfs/vdev_label.c @@ -1023,6 +1023,10 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) int error; uint64_t spare_guid = 0, l2cache_guid = 0; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + boolean_t reason_spare = (reason == VDEV_LABEL_SPARE || (reason == + VDEV_LABEL_REMOVE && vd->vdev_isspare)); + boolean_t reason_l2cache = (reason == VDEV_LABEL_L2CACHE || (reason == + VDEV_LABEL_REMOVE && vd->vdev_isl2cache)); ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); @@ -1108,34 +1112,20 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * really part of an active pool just yet. The labels will * be written again with a meaningful txg by spa_sync(). */ - if (reason == VDEV_LABEL_SPARE || - (reason == VDEV_LABEL_REMOVE && vd->vdev_isspare)) { + if (reason_spare || reason_l2cache) { /* - * For inactive hot spares, we generate a special label that - * identifies as a mutually shared hot spare. We write the - * label if we are adding a hot spare, or if we are removing an - * active hot spare (in which case we want to revert the - * labels). + * For inactive hot spares and level 2 ARC devices, we generate + * a special label that identifies as a mutually shared hot + * spare or l2cache device. We write the label in case of + * addition or removal of hot spare or l2cache vdev (in which + * case we want to revert the labels). */ VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, - POOL_STATE_SPARE) == 0); - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, - vd->vdev_guid) == 0); - } else if (reason == VDEV_LABEL_L2CACHE || - (reason == VDEV_LABEL_REMOVE && vd->vdev_isl2cache)) { - /* - * For level 2 ARC devices, add a special label. - */ - VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, - spa_version(spa)) == 0); - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, - POOL_STATE_L2CACHE) == 0); + reason_spare ? POOL_STATE_SPARE : POOL_STATE_L2CACHE) == 0); VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); @@ -1146,8 +1136,34 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * spa->spa_l2cache->sav_config (populated in * spa_ld_open_aux_vdevs()). */ - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, - vd->vdev_ashift) == 0); + if (reason_l2cache) { + VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_ASHIFT, + vd->vdev_ashift) == 0); + } + + /* + * Add path information to help find it during pool import + */ + if (vd->vdev_path != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PATH, + vd->vdev_path) == 0); + } + if (vd->vdev_devid != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_DEVID, + vd->vdev_devid) == 0); + } + if (vd->vdev_physpath != NULL) { + VERIFY(nvlist_add_string(label, ZPOOL_CONFIG_PHYS_PATH, + vd->vdev_physpath) == 0); + } + + /* + * When spare or l2cache (aux) vdev is added during pool + * creation, spa->spa_uberblock is not written until this + * point. Write it on next config sync. + */ + if (uberblock_verify(&spa->spa_uberblock)) + spa->spa_aux_sync_uber = B_TRUE; } else { uint64_t txg = 0ULL; @@ -1749,6 +1765,16 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) for (int v = 0; v < svdcount; v++) vdev_uberblock_sync(zio, &good_writes, ub, svd[v], flags); + if (spa->spa_aux_sync_uber) { + for (int v = 0; v < spa->spa_spares.sav_count; v++) { + vdev_uberblock_sync(zio, &good_writes, ub, + spa->spa_spares.sav_vdevs[v], flags); + } + for (int v = 0; v < spa->spa_l2cache.sav_count; v++) { + vdev_uberblock_sync(zio, &good_writes, ub, + spa->spa_l2cache.sav_vdevs[v], flags); + } + } (void) zio_wait(zio); /* @@ -1763,6 +1789,19 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) zio_flush(zio, svd[v]); } } + if (spa->spa_aux_sync_uber) { + spa->spa_aux_sync_uber = B_FALSE; + for (int v = 0; v < spa->spa_spares.sav_count; v++) { + if (vdev_writeable(spa->spa_spares.sav_vdevs[v])) { + zio_flush(zio, spa->spa_spares.sav_vdevs[v]); + } + } + for (int v = 0; v < spa->spa_l2cache.sav_count; v++) { + if (vdev_writeable(spa->spa_l2cache.sav_vdevs[v])) { + zio_flush(zio, spa->spa_l2cache.sav_vdevs[v]); + } + } + } (void) zio_wait(zio); diff --git a/sys/contrib/openzfs/module/zfs/vdev_trim.c b/sys/contrib/openzfs/module/zfs/vdev_trim.c index 03e17db024e..d96b75e5edf 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_trim.c +++ b/sys/contrib/openzfs/module/zfs/vdev_trim.c @@ -194,7 +194,8 @@ vdev_autotrim_wait_kick(vdev_t *vd, int num_of_kick) for (int i = 0; i < num_of_kick; i++) { if (vd->vdev_autotrim_exit_wanted) break; - cv_wait(&vd->vdev_autotrim_kick_cv, &vd->vdev_autotrim_lock); + cv_wait_idle(&vd->vdev_autotrim_kick_cv, + &vd->vdev_autotrim_lock); } boolean_t exit_wanted = vd->vdev_autotrim_exit_wanted; mutex_exit(&vd->vdev_autotrim_lock); diff --git a/sys/contrib/openzfs/module/zfs/zfs_replay.c b/sys/contrib/openzfs/module/zfs/zfs_replay.c index 09c7be853bf..2e0af60f6db 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_replay.c +++ b/sys/contrib/openzfs/module/zfs/zfs_replay.c @@ -309,6 +309,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap) uint64_t dnodesize; int error; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lracl)); + txtype = (lr->lr_common.lrc_txtype & ~TX_CI); if (byteswap) { byteswap_uint64_array(lracl, sizeof (*lracl)); @@ -470,6 +472,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) uint64_t dnodesize; int error; + ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); + txtype = (lr->lr_common.lrc_txtype & ~TX_CI); if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); @@ -613,6 +617,8 @@ zfs_replay_remove(void *arg1, void *arg2, boolean_t byteswap) int error; int vflg = 0; + ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -648,6 +654,8 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) int error; int vflg = 0; + ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -715,12 +723,14 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; lr_rename_t *lr = arg2; - char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ - char *tname = sname + strlen(sname) + 1; + + ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL)); } @@ -730,12 +740,14 @@ zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap) #ifdef __linux__ zfsvfs_t *zfsvfs = arg1; lr_rename_t *lr = arg2; - char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ - char *tname = sname + strlen(sname) + 1; + + ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr)); if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE, NULL)); #else @@ -750,14 +762,13 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) zfsvfs_t *zfsvfs = arg1; lr_rename_whiteout_t *lr = arg2; int error; - /* sname and tname follow lr_rename_whiteout_t */ - char *sname = (char *)(lr + 1); - char *tname = sname + strlen(sname) + 1; /* For the whiteout file. */ xvattr_t xva; uint64_t objid; uint64_t dnodesize; + ASSERT3U(lr->lr_rename.lr_common.lrc_reclen, >, sizeof (*lr)); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -783,6 +794,9 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) if (error) return (error); + /* sname and tname follow lr_rename_whiteout_t */ + char *sname = (char *)(lr + 1); + char *tname = sname + strlen(sname) + 1; return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname, RENAME_WHITEOUT, &xva.xva_vattr)); #else @@ -800,6 +814,8 @@ zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap) int error; uint64_t eod, offset, length; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -863,6 +879,8 @@ zfs_replay_write2(void *arg1, void *arg2, boolean_t byteswap) int error; uint64_t end; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -910,6 +928,8 @@ zfs_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) flock64_t fl = {0}; int error; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -940,6 +960,8 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) int error; void *start; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + xva_init(&xva); if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); @@ -1002,6 +1024,9 @@ zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap) size_t size; int error = 0; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr) + lr->lr_size); + ASSERT(spa_feature_is_active(zfsvfs->z_os->os_spa, SPA_FEATURE_ZILSAXATTR)); if (byteswap) @@ -1079,6 +1104,10 @@ zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap) znode_t *zp; int error; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr) + + sizeof (ace_t) * lr->lr_aclcnt); + if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); zfs_oldace_byteswap(ace, lr->lr_aclcnt); @@ -1124,6 +1153,9 @@ zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap) znode_t *zp; int error; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr) + lr->lr_acl_bytes); + if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE); @@ -1171,6 +1203,10 @@ zfs_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap) znode_t *zp; int error; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + ASSERT3U(lr->lr_common.lrc_reclen, >=, offsetof(lr_clone_range_t, + lr_bps[lr->lr_nbps])); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index 3a5fa75df2e..2b37834d5c5 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,26 @@ #include #include +/* + * Enable the experimental block cloning feature. If this setting is 0, then + * even if feature@block_cloning is enabled, attempts to clone blocks will act + * as though the feature is disabled. + */ +int zfs_bclone_enabled = 0; + +/* + * When set zfs_clone_range() waits for dirty data to be written to disk. + * This allows the clone operation to reliably succeed when a file is modified + * and then immediately cloned. For small files this may be slower than making + * a copy of the file and is therefore not the default. However, in certain + * scenarios this behavior may be desirable so a tunable is provided. + */ +static int zfs_bclone_wait_dirty = 0; + +/* + * Maximum bytes to read per chunk in zfs_read(). + */ +static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; static ulong_t zfs_fsync_sync_cnt = 4; @@ -188,8 +209,6 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr) return (error); } -static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */ - /* * Read bytes from specified file into supplied buffer. * @@ -800,11 +819,11 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr) zfsvfs_t *zfsvfs = ZTOZSB(zp); int error; boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - zilog_t *zilog = zfsvfs->z_log; + zilog_t *zilog; if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (error); - + zilog = zfsvfs->z_log; error = zfs_setacl(zp, vsecp, skipaclchk, cr); if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) @@ -1054,6 +1073,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, size_t maxblocks, nbps; uint_t inblksz; uint64_t clear_setid_bits_txg = 0; + uint64_t last_synced_txg = 0; inoff = *inoffp; outoff = *outoffp; @@ -1103,6 +1123,16 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, return (SET_ERROR(EXDEV)); } + /* + * Cloning across encrypted datasets is possible only if they + * share the same master key. + */ + if (inos != outos && inos->os_encrypted && + !dmu_objset_crypto_key_equal(inos, outos)) { + zfs_exit_two(inzfsvfs, outzfsvfs, FTAG); + return (SET_ERROR(EXDEV)); + } + error = zfs_verify_zp(inzp); if (error == 0) error = zfs_verify_zp(outzp); @@ -1181,11 +1211,18 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, inblksz = inzp->z_blksz; /* - * We cannot clone into files with different block size if we can't - * grow it (block size is already bigger or more than one block). + * We cannot clone into a file with different block size if we can't + * grow it (block size is already bigger, has more than one block, or + * not locked for growth). There are other possible reasons for the + * grow to fail, but we cover what we can before opening transaction + * and the rest detect after we try to do it. */ + if (inblksz < outzp->z_blksz) { + error = SET_ERROR(EINVAL); + goto unlock; + } if (inblksz != outzp->z_blksz && (outzp->z_size > outzp->z_blksz || - outzp->z_size > inblksz)) { + outlr->lr_length != UINT64_MAX)) { error = SET_ERROR(EINVAL); goto unlock; } @@ -1275,30 +1312,24 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, } nbps = maxblocks; + last_synced_txg = spa_last_synced_txg(dmu_objset_spa(inos)); error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps, &nbps); if (error != 0) { /* * If we are trying to clone a block that was created - * in the current transaction group, error will be - * EAGAIN here, which we can just return to the caller - * so it can fallback if it likes. + * in the current transaction group, the error will be + * EAGAIN here. Based on zfs_bclone_wait_dirty either + * return a shortened range to the caller so it can + * fallback, or wait for the next TXG and check again. */ - break; - } - /* - * Encrypted data is fine as long as it comes from the same - * dataset. - * TODO: We want to extend it in the future to allow cloning to - * datasets with the same keys, like clones or to be able to - * clone a file from a snapshot of an encrypted dataset into the - * dataset itself. - */ - if (BP_IS_PROTECTED(&bps[0])) { - if (inzfsvfs != outzfsvfs) { - error = SET_ERROR(EXDEV); - break; + if (error == EAGAIN && zfs_bclone_wait_dirty) { + txg_wait_synced(dmu_objset_pool(inos), + last_synced_txg + 1); + continue; } + + break; } /* @@ -1318,12 +1349,24 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, } /* - * Copy source znode's block size. This only happens on the - * first iteration since zfs_rangelock_reduce() will shrink down - * lr_len to the appropriate size. + * Copy source znode's block size. This is done only if the + * whole znode is locked (see zfs_rangelock_cb()) and only + * on the first iteration since zfs_rangelock_reduce() will + * shrink down lr_length to the appropriate size. */ if (outlr->lr_length == UINT64_MAX) { zfs_grow_blocksize(outzp, inblksz, tx); + + /* + * Block growth may fail for many reasons we can not + * predict here. If it happen the cloning is doomed. + */ + if (inblksz != outzp->z_blksz) { + error = SET_ERROR(EINVAL); + dmu_tx_abort(tx); + break; + } + /* * Round range lock up to the block boundary, so we * prevent appends until we are done. @@ -1339,6 +1382,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, break; } + if (zn_has_cached_data(outzp, outoff, outoff + size - 1)) { + update_pages(outzp, outoff, size, outos); + } + zfs_clear_setid_bits_if_necessary(outzfsvfs, outzp, cr, &clear_setid_bits_txg, tx); @@ -1503,3 +1550,9 @@ EXPORT_SYMBOL(zfs_clone_range_replay); ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW, "Bytes to read per chunk"); + +ZFS_MODULE_PARAM(zfs, zfs_, bclone_enabled, INT, ZMOD_RW, + "Enable block cloning"); + +ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW, + "Wait for dirty blocks when cloning"); diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c index a1188613699..5642f082bdb 100644 --- a/sys/contrib/openzfs/module/zfs/zil.c +++ b/sys/contrib/openzfs/module/zfs/zil.c @@ -522,6 +522,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, lr_t *lr = (lr_t *)lrp; reclen = lr->lrc_reclen; ASSERT3U(reclen, >=, sizeof (lr_t)); + ASSERT3U(reclen, <=, end - lrp); if (lr->lrc_seq > claim_lr_seq) { arc_buf_destroy(abuf, &abuf); goto done; @@ -604,7 +605,7 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg) lr_write_t *lr = (lr_write_t *)lrc; int error; - ASSERT(lrc->lrc_txtype == TX_WRITE); + ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); /* * If the block is not readable, don't claim it. This can happen @@ -624,14 +625,17 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg) } static int -zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx) +zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx, + uint64_t first_txg) { const lr_clone_range_t *lr = (const lr_clone_range_t *)lrc; const blkptr_t *bp; - spa_t *spa; + spa_t *spa = zilog->zl_spa; uint_t ii; - ASSERT(lrc->lrc_txtype == TX_CLONE_RANGE); + ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); + ASSERT3U(lrc->lrc_reclen, >=, offsetof(lr_clone_range_t, + lr_bps[lr->lr_nbps])); if (tx == NULL) { return (0); @@ -641,19 +645,36 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx) * XXX: Do we need to byteswap lr? */ - spa = zilog->zl_spa; - for (ii = 0; ii < lr->lr_nbps; ii++) { bp = &lr->lr_bps[ii]; /* - * When data in embedded into BP there is no need to create - * BRT entry as there is no data block. Just copy the BP as - * it contains the data. + * When data is embedded into the BP there is no need to create + * BRT entry as there is no data block. Just copy the BP as it + * contains the data. */ - if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + continue; + + /* + * We can not handle block pointers from the future, since they + * are not yet allocated. It should not normally happen, but + * just in case lets be safe and just stop here now instead of + * corrupting the pool. + */ + if (BP_PHYSICAL_BIRTH(bp) >= first_txg) + return (SET_ERROR(ENOENT)); + + /* + * Assert the block is really allocated before we reference it. + */ + metaslab_check_free(spa, bp); + } + + for (ii = 0; ii < lr->lr_nbps; ii++) { + bp = &lr->lr_bps[ii]; + if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) brt_pending_add(spa, bp, tx); - } } return (0); @@ -668,7 +689,7 @@ zil_claim_log_record(zilog_t *zilog, const lr_t *lrc, void *tx, case TX_WRITE: return (zil_claim_write(zilog, lrc, tx, first_txg)); case TX_CLONE_RANGE: - return (zil_claim_clone_range(zilog, lrc, tx)); + return (zil_claim_clone_range(zilog, lrc, tx, first_txg)); default: return (0); } @@ -691,7 +712,7 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg) lr_write_t *lr = (lr_write_t *)lrc; blkptr_t *bp = &lr->lr_blkptr; - ASSERT(lrc->lrc_txtype == TX_WRITE); + ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); /* * If we previously claimed it, we need to free it. @@ -712,7 +733,9 @@ zil_free_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx) spa_t *spa; uint_t ii; - ASSERT(lrc->lrc_txtype == TX_CLONE_RANGE); + ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); + ASSERT3U(lrc->lrc_reclen, >=, offsetof(lr_clone_range_t, + lr_bps[lr->lr_nbps])); if (tx == NULL) { return (0); @@ -1710,8 +1733,6 @@ static const struct { { 8192 + 4096, 8192 + 4096 }, /* database */ { 32768 + 4096, 32768 + 4096 }, /* NFS writes */ { 65536 + 4096, 65536 + 4096 }, /* 64KB writes */ - { 131072, 131072 }, /* < 128KB writes */ - { 131072 +4096, 65536 + 4096 }, /* 128KB writes */ { UINT64_MAX, SPA_OLD_MAXBLOCKSIZE}, /* > 128KB writes */ }; @@ -1794,6 +1815,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) itx = list_next(&lwb->lwb_itxs, itx)) zil_lwb_commit(zilog, lwb, itx); lwb->lwb_nused = lwb->lwb_nfilled; + ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax); lwb->lwb_root_zio = zio_root(spa, zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL); @@ -2023,13 +2045,16 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs) return (lwb); } + reclen = lr->lrc_reclen; if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) { + ASSERT3U(reclen, ==, sizeof (lr_write_t)); dlen = P2ROUNDUP_TYPED( lrw->lr_length, sizeof (uint64_t), uint64_t); } else { + ASSERT3U(reclen, >=, sizeof (lr_t)); dlen = 0; } - reclen = lr->lrc_reclen; + ASSERT3U(reclen, <=, zil_max_log_data(zilog, 0)); zilog->zl_cur_used += (reclen + dlen); cont: @@ -2048,19 +2073,19 @@ cont: if (lwb == NULL) return (NULL); lwb_sp = lwb->lwb_nmax - lwb->lwb_nused; - - /* - * There must be enough space in the new, empty log block to - * hold reclen. For WR_COPIED, we need to fit the whole - * record in one block, and reclen is the header size + the - * data size. For WR_NEED_COPY, we can create multiple - * records, splitting the data into multiple blocks, so we - * only need to fit one word of data per block; in this case - * reclen is just the header size (no data). - */ - ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp); } + /* + * There must be enough space in the log block to hold reclen. + * For WR_COPIED, we need to fit the whole record in one block, + * and reclen is the write record header size + the data size. + * For WR_NEED_COPY, we can create multiple records, splitting + * the data into multiple blocks, so we only need to fit one + * word of data per block; in this case reclen is just the header + * size (no data). + */ + ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp); + dnow = MIN(dlen, lwb_sp - reclen); if (dlen > dnow) { ASSERT3U(lr->lrc_txtype, ==, TX_WRITE); @@ -2236,7 +2261,9 @@ zil_itx_create(uint64_t txtype, size_t olrsize) size_t itxsize, lrsize; itx_t *itx; + ASSERT3U(olrsize, >=, sizeof (lr_t)); lrsize = P2ROUNDUP_TYPED(olrsize, sizeof (uint64_t), size_t); + ASSERT3U(lrsize, >=, olrsize); itxsize = offsetof(itx_t, itx_lr) + lrsize; itx = zio_data_buf_alloc(itxsize); @@ -2255,6 +2282,10 @@ zil_itx_create(uint64_t txtype, size_t olrsize) static itx_t * zil_itx_clone(itx_t *oitx) { + ASSERT3U(oitx->itx_size, >=, sizeof (itx_t)); + ASSERT3U(oitx->itx_size, ==, + offsetof(itx_t, itx_lr) + oitx->itx_lr.lrc_reclen); + itx_t *itx = zio_data_buf_alloc(oitx->itx_size); memcpy(itx, oitx, oitx->itx_size); itx->itx_callback = NULL; @@ -2265,6 +2296,9 @@ zil_itx_clone(itx_t *oitx) void zil_itx_destroy(itx_t *itx) { + ASSERT3U(itx->itx_size, >=, sizeof (itx_t)); + ASSERT3U(itx->itx_lr.lrc_reclen, ==, + itx->itx_size - offsetof(itx_t, itx_lr)); IMPLY(itx->itx_lr.lrc_txtype == TX_COMMIT, itx->itx_callback == NULL); IMPLY(itx->itx_callback != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT); @@ -2348,7 +2382,7 @@ void zil_remove_async(zilog_t *zilog, uint64_t oid) { uint64_t otxg, txg; - itx_async_node_t *ian; + itx_async_node_t *ian, ian_search; avl_tree_t *t; avl_index_t where; list_t clean_list; @@ -2375,7 +2409,8 @@ zil_remove_async(zilog_t *zilog, uint64_t oid) * Locate the object node and append its list. */ t = &itxg->itxg_itxs->i_async_tree; - ian = avl_find(t, &oid, &where); + ian_search.ia_foid = oid; + ian = avl_find(t, &ian_search, &where); if (ian != NULL) list_move_tail(&clean_list, &ian->ia_list); mutex_exit(&itxg->itxg_lock); @@ -2573,7 +2608,7 @@ void zil_async_to_sync(zilog_t *zilog, uint64_t foid) { uint64_t otxg, txg; - itx_async_node_t *ian; + itx_async_node_t *ian, ian_search; avl_tree_t *t; avl_index_t where; @@ -2603,7 +2638,8 @@ zil_async_to_sync(zilog_t *zilog, uint64_t foid) */ t = &itxg->itxg_itxs->i_async_tree; if (foid != 0) { - ian = avl_find(t, &foid, &where); + ian_search.ia_foid = foid; + ian = avl_find(t, &ian_search, &where); if (ian != NULL) { list_move_tail(&itxg->itxg_itxs->i_sync_list, &ian->ia_list); diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 3b3b40fa73d..d8eb075eef5 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -306,6 +306,53 @@ zio_fini(void) * ========================================================================== */ +#ifdef ZFS_DEBUG +static const ulong_t zio_buf_canary = (ulong_t)0xdeadc0dedead210b; +#endif + +/* + * Use empty space after the buffer to detect overflows. + * + * Since zio_init() creates kmem caches only for certain set of buffer sizes, + * allocations of different sizes may have some unused space after the data. + * Filling part of that space with a known pattern on allocation and checking + * it on free should allow us to detect some buffer overflows. + */ +static void +zio_buf_put_canary(ulong_t *p, size_t size, kmem_cache_t **cache, size_t c) +{ +#ifdef ZFS_DEBUG + size_t off = P2ROUNDUP(size, sizeof (ulong_t)); + ulong_t *canary = p + off / sizeof (ulong_t); + size_t asize = (c + 1) << SPA_MINBLOCKSHIFT; + if (c + 1 < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT && + cache[c] == cache[c + 1]) + asize = (c + 2) << SPA_MINBLOCKSHIFT; + for (; off < asize; canary++, off += sizeof (ulong_t)) + *canary = zio_buf_canary; +#endif +} + +static void +zio_buf_check_canary(ulong_t *p, size_t size, kmem_cache_t **cache, size_t c) +{ +#ifdef ZFS_DEBUG + size_t off = P2ROUNDUP(size, sizeof (ulong_t)); + ulong_t *canary = p + off / sizeof (ulong_t); + size_t asize = (c + 1) << SPA_MINBLOCKSHIFT; + if (c + 1 < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT && + cache[c] == cache[c + 1]) + asize = (c + 2) << SPA_MINBLOCKSHIFT; + for (; off < asize; canary++, off += sizeof (ulong_t)) { + if (unlikely(*canary != zio_buf_canary)) { + PANIC("ZIO buffer overflow %p (%zu) + %zu %#lx != %#lx", + p, size, (canary - p) * sizeof (ulong_t), + *canary, zio_buf_canary); + } + } +#endif +} + /* * Use zio_buf_alloc to allocate ZFS metadata. This data will appear in a * crashdump if the kernel panics, so use it judiciously. Obviously, it's @@ -322,7 +369,9 @@ zio_buf_alloc(size_t size) atomic_add_64(&zio_buf_cache_allocs[c], 1); #endif - return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); + void *p = kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE); + zio_buf_put_canary(p, size, zio_buf_cache, c); + return (p); } /* @@ -338,7 +387,9 @@ zio_data_buf_alloc(size_t size) VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); - return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); + void *p = kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE); + zio_buf_put_canary(p, size, zio_data_buf_cache, c); + return (p); } void @@ -351,6 +402,7 @@ zio_buf_free(void *buf, size_t size) atomic_add_64(&zio_buf_cache_frees[c], 1); #endif + zio_buf_check_canary(buf, size, zio_buf_cache, c); kmem_cache_free(zio_buf_cache[c], buf); } @@ -361,6 +413,7 @@ zio_data_buf_free(void *buf, size_t size) VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); + zio_buf_check_canary(buf, size, zio_data_buf_cache, c); kmem_cache_free(zio_data_buf_cache[c], buf); } diff --git a/sys/contrib/openzfs/module/zfs/zio_checksum.c b/sys/contrib/openzfs/module/zfs/zio_checksum.c index 9de515e8767..e511b31fee6 100644 --- a/sys/contrib/openzfs/module/zfs/zio_checksum.c +++ b/sys/contrib/openzfs/module/zfs/zio_checksum.c @@ -363,11 +363,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, zil_chain_t zilc; abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t)); - size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ, - uint64_t); + uint64_t nused = P2ROUNDUP_TYPED(zilc.zc_nused, + ZIL_MIN_BLKSZ, uint64_t); + ASSERT3U(size, >=, nused); + size = nused; eck = zilc.zc_eck; eck_offset = offsetof(zil_chain_t, zc_eck); } else { + ASSERT3U(size, >=, sizeof (zio_eck_t)); eck_offset = size - sizeof (zio_eck_t); abd_copy_to_buf_off(&eck, abd, eck_offset, sizeof (zio_eck_t)); @@ -448,12 +451,13 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp, return (SET_ERROR(ECKSUM)); } - if (nused > size) { + nused = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); + if (size < nused) return (SET_ERROR(ECKSUM)); - } - - size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); + size = nused; } else { + if (size < sizeof (zio_eck_t)) + return (SET_ERROR(ECKSUM)); eck_offset = size - sizeof (zio_eck_t); abd_copy_to_buf_off(&eck, abd, eck_offset, sizeof (zio_eck_t)); diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c index 53dcb4dee44..20ea71f2337 100644 --- a/sys/contrib/openzfs/module/zfs/zvol.c +++ b/sys/contrib/openzfs/module/zfs/zvol.c @@ -417,6 +417,8 @@ zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) lr_truncate_t *lr = arg2; uint64_t offset, length; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -453,6 +455,8 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap) dmu_tx_t *tx; int error; + ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr)); + if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -482,60 +486,6 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap) return (error); } -/* - * Replay a TX_CLONE_RANGE ZIL transaction that didn't get committed - * after a system failure. - * - * TODO: For now we drop block cloning transations for ZVOLs as they are - * unsupported, but we still need to inform BRT about that as we - * claimed them during pool import. - * This situation can occur when we try to import a pool from a ZFS - * version supporting block cloning for ZVOLs into a system that - * has this ZFS version, that doesn't support block cloning for ZVOLs. - */ -static int -zvol_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap) -{ - char name[ZFS_MAX_DATASET_NAME_LEN]; - zvol_state_t *zv = arg1; - objset_t *os = zv->zv_objset; - lr_clone_range_t *lr = arg2; - blkptr_t *bp; - dmu_tx_t *tx; - spa_t *spa; - uint_t ii; - int error; - - dmu_objset_name(os, name); - cmn_err(CE_WARN, "ZFS dropping block cloning transaction for %s.", - name); - - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - - tx = dmu_tx_create(os); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { - dmu_tx_abort(tx); - return (error); - } - - spa = os->os_spa; - - for (ii = 0; ii < lr->lr_nbps; ii++) { - bp = &lr->lr_bps[ii]; - - if (!BP_IS_HOLE(bp)) { - zio_free(spa, dmu_tx_get_txg(tx), bp); - } - } - - (void) zil_replaying(zv->zv_zilog, tx); - dmu_tx_commit(tx); - - return (0); -} - static int zvol_replay_err(void *arg1, void *arg2, boolean_t byteswap) { @@ -570,7 +520,7 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* TX_SETSAXATTR */ zvol_replay_err, /* TX_RENAME_EXCHANGE */ zvol_replay_err, /* TX_RENAME_WHITEOUT */ - zvol_replay_clone_range /* TX_CLONE_RANGE */ + zvol_replay_err, /* TX_CLONE_RANGE */ }; /* diff --git a/sys/contrib/openzfs/rpm/generic/zfs-kmod.spec.in b/sys/contrib/openzfs/rpm/generic/zfs-kmod.spec.in index 3c73e2ff2d6..4cc075585d4 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs-kmod.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs-kmod.spec.in @@ -150,6 +150,30 @@ for kernel_version in %{?kernel_versions}; do done +# Module signing (modsign) +# +# This must be run _after_ find-debuginfo.sh runs, otherwise that will strip +# the signature off of the modules. +# (Based on Fedora's kernel.spec workaround) +%define __modsign_install_post \ + sign_pem="%{ksrc}/certs/signing_key.pem"; \ + sign_x509="%{ksrc}/certs/signing_key.x509"; \ + if [ -f "${sign_x509}" ]\ + then \ + echo "Signing kernel modules ..."; \ + for kmod in $(find ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}/*/extra/ -name \*.ko); do \ + %{ksrc}/scripts/sign-file sha256 ${sign_pem} ${sign_x509} ${kmod}; \ + done \ + fi \ +%{nil} + +# hack to ensure signing happens after find-debuginfo.sh runs +%define __spec_install_post \ + %{?__debug_package:%{__debug_install_post}}\ + %{__arch_install_post}\ + %{__os_install_post}\ + %{__modsign_install_post} + %install rm -rf ${RPM_BUILD_ROOT} diff --git a/sys/contrib/openzfs/rpm/redhat/zfs-kmod.spec.in b/sys/contrib/openzfs/rpm/redhat/zfs-kmod.spec.in index f59551c0b43..9c836786bae 100644 --- a/sys/contrib/openzfs/rpm/redhat/zfs-kmod.spec.in +++ b/sys/contrib/openzfs/rpm/redhat/zfs-kmod.spec.in @@ -72,6 +72,30 @@ fi %{?kernel_llvm} make %{?_smp_mflags} +# Module signing (modsign) +# +# This must be run _after_ find-debuginfo.sh runs, otherwise that will strip +# the signature off of the modules. +# (Based on Fedora's kernel.spec workaround) +%define __modsign_install_post \ + sign_pem="%{ksrc}/certs/signing_key.pem"; \ + sign_x509="%{ksrc}/certs/signing_key.x509"; \ + if [ -f "${sign_x509}" ]\ + then \ + echo "Signing kernel modules ..."; \ + for kmod in $(find %{buildroot}/lib/modules/%{kverrel}/extra/ -name \*.ko); do \ + %{ksrc}/scripts/sign-file sha256 ${sign_pem} ${sign_x509} ${kmod}; \ + done \ + fi \ +%{nil} + +# hack to ensure signing happens after find-debuginfo.sh runs +%define __spec_install_post \ + %{?__debug_package:%{__debug_install_post}}\ + %{__arch_install_post}\ + %{__os_install_post}\ + %{__modsign_install_post} + %install make install \ DESTDIR=${RPM_BUILD_ROOT} \ diff --git a/sys/contrib/openzfs/tests/Makefile.am b/sys/contrib/openzfs/tests/Makefile.am index 2e633041ab5..12e9c9f9daf 100644 --- a/sys/contrib/openzfs/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/Makefile.am @@ -16,6 +16,7 @@ dist_scripts_test_runner_include_DATA = \ scripts_runfilesdir = $(datadir)/$(PACKAGE)/runfiles dist_scripts_runfiles_DATA = \ + %D%/runfiles/bclone.run \ %D%/runfiles/common.run \ %D%/runfiles/freebsd.run \ %D%/runfiles/linux.run \ diff --git a/sys/contrib/openzfs/tests/runfiles/bclone.run b/sys/contrib/openzfs/tests/runfiles/bclone.run new file mode 100644 index 00000000000..3d0f545d922 --- /dev/null +++ b/sys/contrib/openzfs/tests/runfiles/bclone.run @@ -0,0 +1,46 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# This run file contains all of the common functional tests. When +# adding a new test consider also adding it to the sanity.run file +# if the new test runs to completion in only a few seconds. +# +# Approximate run time: 5 hours +# + +[DEFAULT] +pre = setup +quiet = False +pre_user = root +user = root +timeout = 28800 +post_user = root +post = cleanup +failsafe_user = root +failsafe = callbacks/zfs_failsafe +outputdir = /var/tmp/test_results +tags = ['bclone'] + +[tests/functional/bclone] +tests = ['bclone_crossfs_corner_cases', + 'bclone_crossfs_data', + 'bclone_crossfs_embedded', + 'bclone_crossfs_hole', + 'bclone_diffprops_all', + 'bclone_diffprops_checksum', + 'bclone_diffprops_compress', + 'bclone_diffprops_copies', + 'bclone_diffprops_recordsize', + 'bclone_prop_sync', + 'bclone_samefs_corner_cases', + 'bclone_samefs_data', + 'bclone_samefs_embedded', + 'bclone_samefs_hole'] +tags = ['bclone'] diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index ef787c65c0f..7331244515f 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -53,6 +53,37 @@ tags = ['functional', 'arc'] tests = ['atime_001_pos', 'atime_002_neg', 'root_atime_off', 'root_atime_on'] tags = ['functional', 'atime'] +[tests/functional/bclone] +tests = ['bclone_crossfs_corner_cases_limited', + 'bclone_crossfs_data', + 'bclone_crossfs_embedded', + 'bclone_crossfs_hole', + 'bclone_diffprops_all', + 'bclone_diffprops_checksum', + 'bclone_diffprops_compress', + 'bclone_diffprops_copies', + 'bclone_diffprops_recordsize', + 'bclone_prop_sync', + 'bclone_samefs_corner_cases_limited', + 'bclone_samefs_data', + 'bclone_samefs_embedded', + 'bclone_samefs_hole'] +tags = ['functional', 'bclone'] +timeout = 7200 + +[tests/functional/block_cloning] +tests = ['block_cloning_clone_mmap_cached', + 'block_cloning_copyfilerange', + 'block_cloning_copyfilerange_partial', + 'block_cloning_copyfilerange_fallback', + 'block_cloning_disabled_copyfilerange', + 'block_cloning_copyfilerange_cross_dataset', + 'block_cloning_cross_enc_dataset', + 'block_cloning_copyfilerange_fallback_same_txg', + 'block_cloning_replay', 'block_cloning_replay_encrypted', + 'block_cloning_lwb_buffer_overflow', 'block_cloning_clone_mmap_write'] +tags = ['functional', 'block_cloning'] + [tests/functional/bootfs] tests = ['bootfs_001_pos', 'bootfs_002_neg', 'bootfs_003_pos', 'bootfs_004_neg', 'bootfs_005_neg', 'bootfs_006_pos', 'bootfs_007_pos', @@ -132,6 +163,7 @@ tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos', pre = post = tags = ['functional', 'cli_root', 'zdb'] +timeout = 1200 [tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos'] @@ -287,7 +319,8 @@ tags = ['functional', 'cli_root', 'zfs_set'] [tests/functional/cli_root/zfs_share] tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', 'zfs_share_004_pos', 'zfs_share_006_pos', 'zfs_share_008_neg', - 'zfs_share_010_neg', 'zfs_share_011_pos', 'zfs_share_concurrent_shares'] + 'zfs_share_010_neg', 'zfs_share_011_pos', 'zfs_share_concurrent_shares', + 'zfs_share_after_mount'] tags = ['functional', 'cli_root', 'zfs_share'] [tests/functional/cli_root/zfs_snapshot] @@ -502,7 +535,8 @@ tags = ['functional', 'cli_root', 'zpool_split'] tests = ['zpool_status_001_pos', 'zpool_status_002_pos', 'zpool_status_003_pos', 'zpool_status_004_pos', 'zpool_status_005_pos', 'zpool_status_006_pos', - 'zpool_status_007_pos', 'zpool_status_features_001_pos'] + 'zpool_status_007_pos', 'zpool_status_008_pos', + 'zpool_status_features_001_pos'] tags = ['functional', 'cli_root', 'zpool_status'] [tests/functional/cli_root/zpool_sync] @@ -597,7 +631,7 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', tags = ['functional', 'compression'] [tests/functional/cp_files] -tests = ['cp_files_001_pos'] +tests = ['cp_files_001_pos', 'cp_files_002_pos', 'cp_stress'] tags = ['functional', 'cp_files'] [tests/functional/crtime] diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run index 8bc55a1b4b4..6a4cd3fe691 100644 --- a/sys/contrib/openzfs/tests/runfiles/linux.run +++ b/sys/contrib/openzfs/tests/runfiles/linux.run @@ -35,14 +35,9 @@ tests = ['atime_003_pos', 'root_relatime_on'] tags = ['functional', 'atime'] [tests/functional/block_cloning:Linux] -tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', - 'block_cloning_copyfilerange_fallback', - 'block_cloning_ficlone', 'block_cloning_ficlonerange', - 'block_cloning_ficlonerange_partial', - 'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone', - 'block_cloning_disabled_ficlonerange', - 'block_cloning_copyfilerange_cross_dataset', - 'block_cloning_copyfilerange_fallback_same_txg'] +tests = ['block_cloning_ficlone', 'block_cloning_ficlonerange', + 'block_cloning_ficlonerange_partial', 'block_cloning_disabled_ficlone', + 'block_cloning_disabled_ficlonerange'] tags = ['functional', 'block_cloning'] [tests/functional/chattr:Linux] diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in index 4608e87522a..ecc50f48715 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in @@ -138,7 +138,11 @@ idmap_reason = 'Idmapped mount needs kernel 5.12+' # copy_file_range() is not supported by all kernels # cfr_reason = 'Kernel copy_file_range support required' -cfr_cross_reason = 'copy_file_range(2) cross-filesystem needs kernel 5.3+' + +if sys.platform.startswith('freebsd'): + cfr_cross_reason = 'copy_file_range(2) cross-filesystem needs FreeBSD 14+' +else: + cfr_cross_reason = 'copy_file_range(2) cross-filesystem needs kernel 5.3+' # # These tests are known to fail, thus we use this list to prevent these @@ -176,6 +180,7 @@ if sys.platform.startswith('freebsd'): 'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason], 'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason], 'cli_root/zfs_unshare/zfs_unshare_008_pos': ['SKIP', na_reason], + 'cp_files/cp_files_002_pos': ['SKIP', na_reason], 'link_count/link_count_001': ['SKIP', na_reason], 'casenorm/mixed_create_failure': ['FAIL', 13215], 'mmap/mmap_sync_001_pos': ['SKIP', na_reason], @@ -263,51 +268,95 @@ if sys.platform.startswith('freebsd'): 'cli_root/zpool_import/zpool_import_012_pos': ['FAIL', known_reason], 'delegate/zfs_allow_003_pos': ['FAIL', known_reason], 'inheritance/inherit_001_pos': ['FAIL', 11829], - 'resilver/resilver_restart_001': ['FAIL', known_reason], 'pool_checkpoint/checkpoint_big_rewind': ['FAIL', 12622], 'pool_checkpoint/checkpoint_indirect': ['FAIL', 12623], + 'resilver/resilver_restart_001': ['FAIL', known_reason], 'snapshot/snapshot_002_pos': ['FAIL', '14831'], + 'bclone/bclone_crossfs_corner_cases': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_corner_cases_limited': + ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_data': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_embedded': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_hole': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_all': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_checksum': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_compress': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_copies': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_recordsize': ['SKIP', cfr_cross_reason], + 'bclone/bclone_prop_sync': ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_cross_enc_dataset': + ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_copyfilerange_cross_dataset': + ['SKIP', cfr_cross_reason] }) elif sys.platform.startswith('linux'): maybe.update({ + 'bclone/bclone_crossfs_corner_cases': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_corner_cases_limited': + ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_data': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_embedded': ['SKIP', cfr_cross_reason], + 'bclone/bclone_crossfs_hole': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_all': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_checksum': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_compress': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_copies': ['SKIP', cfr_cross_reason], + 'bclone/bclone_diffprops_recordsize': ['SKIP', cfr_cross_reason], + 'bclone/bclone_prop_sync': ['SKIP', cfr_cross_reason], + 'bclone/bclone_samefs_corner_cases': ['SKIP', cfr_reason], + 'bclone/bclone_samefs_corner_cases_limited': ['SKIP', cfr_reason], + 'bclone/bclone_samefs_data': ['SKIP', cfr_reason], + 'bclone/bclone_samefs_embedded': ['SKIP', cfr_reason], + 'bclone/bclone_samefs_hole': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_clone_mmap_cached': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_clone_mmap_write': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_copyfilerange': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_copyfilerange_cross_dataset': + ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_copyfilerange_fallback': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': + ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_copyfilerange_partial': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_cross_enc_dataset': + ['SKIP', cfr_cross_reason], + 'block_cloning/block_cloning_disabled_copyfilerange': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_lwb_buffer_overflow': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_replay': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_replay_encrypted': + ['SKIP', cfr_reason], 'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason], 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason], + 'cp_files/cp_files_002_pos': ['SKIP', cfr_reason], 'fault/auto_online_002_pos': ['FAIL', 11889], 'fault/auto_replace_001_pos': ['FAIL', 14851], 'fault/auto_spare_002_pos': ['FAIL', 11889], 'fault/auto_spare_multiple': ['FAIL', 11889], 'fault/auto_spare_shared': ['FAIL', 11889], 'fault/decompress_fault': ['FAIL', 11889], + 'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason], + 'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason], + 'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason], + 'idmap_mount/idmap_mount_004': ['SKIP', idmap_reason], + 'idmap_mount/idmap_mount_005': ['SKIP', idmap_reason], 'io/io_uring': ['SKIP', 'io_uring support required'], 'limits/filesystem_limit': ['SKIP', known_reason], 'limits/snapshot_limit': ['SKIP', known_reason], 'mmp/mmp_active_import': ['FAIL', known_reason], 'mmp/mmp_exported_import': ['FAIL', known_reason], 'mmp/mmp_inactive_import': ['FAIL', known_reason], - 'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621], - 'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason], 'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872], + 'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621], 'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872], - 'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason], - 'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason], - 'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason], - 'idmap_mount/idmap_mount_004': ['SKIP', idmap_reason], - 'idmap_mount/idmap_mount_005': ['SKIP', idmap_reason], - 'block_cloning/block_cloning_disabled_copyfilerange': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_copyfilerange': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_copyfilerange_partial': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_copyfilerange_fallback': - ['SKIP', cfr_reason], - 'block_cloning/block_cloning_copyfilerange_cross_dataset': - ['SKIP', cfr_cross_reason], - 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': - ['SKIP', cfr_cross_reason], + 'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason], }) - # Not all Github actions runners have scsi_debug module, so we may skip # some tests which use it. if os.environ.get('CI') == 'true': diff --git a/sys/contrib/openzfs/tests/zfs-tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/Makefile.am index f8166352489..3dd1a645272 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/Makefile.am @@ -13,6 +13,9 @@ scripts_zfs_tests_functional_hkdf_PROGRAMS = %D%/tests/functional/hkdf/hkdf_test %C%_tests_functional_hkdf_hkdf_test_LDADD = \ libzpool.la +scripts_zfs_tests_functional_cp_filesdir = $(datadir)/$(PACKAGE)/zfs-tests/tests/functional/cp_files +scripts_zfs_tests_functional_cp_files_PROGRAMS = %D%/tests/functional/cp_files/seekflood + if BUILD_LINUX scripts_zfs_tests_functional_tmpfiledir = $(datadir)/$(PACKAGE)/zfs-tests/tests/functional/tmpfile scripts_zfs_tests_functional_tmpfile_PROGRAMS = \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore index 5f53b687191..0ed0a69eb01 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore @@ -2,6 +2,8 @@ /btree_test /chg_usr_exec /clonefile +/clone_mmap_cached +/clone_mmap_write /devname2devid /dir_rd_update /draid diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am index 9bdb3c20975..23848a82ffb 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am @@ -2,6 +2,9 @@ scripts_zfs_tests_bindir = $(datadir)/$(PACKAGE)/zfs-tests/bin scripts_zfs_tests_bin_PROGRAMS = %D%/chg_usr_exec +scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile +scripts_zfs_tests_bin_PROGRAMS += %D%/clone_mmap_cached +scripts_zfs_tests_bin_PROGRAMS += %D%/clone_mmap_write scripts_zfs_tests_bin_PROGRAMS += %D%/cp_files scripts_zfs_tests_bin_PROGRAMS += %D%/ctime scripts_zfs_tests_bin_PROGRAMS += %D%/dir_rd_update @@ -119,7 +122,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/renameat2 scripts_zfs_tests_bin_PROGRAMS += %D%/xattrtest scripts_zfs_tests_bin_PROGRAMS += %D%/zed_fd_spill-zedlet scripts_zfs_tests_bin_PROGRAMS += %D%/idmap_util -scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile %C%_idmap_util_LDADD = libspl.la diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_mmap_cached.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_mmap_cached.c new file mode 100644 index 00000000000..c1cdf796cfb --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_mmap_cached.c @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2024 by Pawel Jakub Dawidek + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __FreeBSD__ +#define loff_t off_t +#endif + +ssize_t +copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int) + __attribute__((weak)); + +static void * +mmap_file(int fd, size_t size) +{ + void *p; + + p = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) { + (void) fprintf(stderr, "mmap failed: %s\n", strerror(errno)); + exit(2); + } + + return (p); +} + +static void +usage(const char *progname) +{ + + /* + * -i cache input before copy_file_range(2). + * -o cache input before copy_file_range(2). + */ + (void) fprintf(stderr, "usage: %s [-io] \n", progname); + exit(3); +} + +int +main(int argc, char *argv[]) +{ + int dfd, sfd; + size_t dsize, ssize; + void *dmem, *smem, *ptr; + off_t doff, soff; + struct stat sb; + bool cache_input, cache_output; + const char *progname; + int c; + + progname = argv[0]; + cache_input = cache_output = false; + + while ((c = getopt(argc, argv, "io")) != -1) { + switch (c) { + case 'i': + cache_input = true; + break; + case 'o': + cache_output = true; + break; + default: + usage(progname); + } + } + argc -= optind; + argv += optind; + + if (argc != 2) { + usage(progname); + } + + sfd = open(argv[0], O_RDONLY); + if (fstat(sfd, &sb) == -1) { + (void) fprintf(stderr, "fstat failed: %s\n", strerror(errno)); + exit(2); + } + ssize = sb.st_size; + smem = mmap_file(sfd, ssize); + + dfd = open(argv[1], O_RDWR); + if (fstat(dfd, &sb) == -1) { + (void) fprintf(stderr, "fstat failed: %s\n", strerror(errno)); + exit(2); + } + dsize = sb.st_size; + dmem = mmap_file(dfd, dsize); + + /* + * Hopefully it won't be compiled out. + */ + if (cache_input) { + ptr = malloc(ssize); + assert(ptr != NULL); + memcpy(ptr, smem, ssize); + free(ptr); + } + if (cache_output) { + ptr = malloc(ssize); + assert(ptr != NULL); + memcpy(ptr, dmem, dsize); + free(ptr); + } + + soff = doff = 0; + if (copy_file_range(sfd, &soff, dfd, &doff, ssize, 0) < 0) { + (void) fprintf(stderr, "copy_file_range failed: %s\n", + strerror(errno)); + exit(2); + } + + exit(memcmp(smem, dmem, ssize) == 0 ? 0 : 1); +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_mmap_write.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_mmap_write.c new file mode 100644 index 00000000000..6a5cd8721c5 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/clone_mmap_write.c @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * This program clones the file, mmap it, and writes from the map into + * file. This scenario triggers a panic on Linux in dbuf_redirty(), + * which is fixed under PR#15656. On FreeBSD, the same test causes data + * corruption, which is fixed by PR#15665. + * + * It would be good to test for this scenario in ZTS. This program and + * issue was initially produced by @robn. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __FreeBSD__ +#define loff_t off_t +#endif + +ssize_t +copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int) + __attribute__((weak)); + +static int +open_file(const char *source) +{ + int fd; + if ((fd = open(source, O_RDWR | O_APPEND)) < 0) { + (void) fprintf(stderr, "Error opening %s\n", source); + exit(1); + } + sync(); + return (fd); +} + +static int +clone_file(int sfd, long long size, const char *dest) +{ + int dfd; + + if ((dfd = open(dest, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) < 0) { + (void) fprintf(stderr, "Error opening %s\n", dest); + exit(1); + } + + if (copy_file_range(sfd, 0, dfd, 0, size, 0) < 0) { + (void) fprintf(stderr, "copy_file_range failed\n"); + exit(1); + } + + return (dfd); +} + +static void * +map_file(int fd, long long size) +{ + void *p = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) { + (void) fprintf(stderr, "mmap failed\n"); + exit(1); + } + + return (p); +} + +static void +map_write(void *p, int fd) +{ + if (pwrite(fd, p, 1024*128, 0) < 0) { + (void) fprintf(stderr, "write failed\n"); + exit(1); + } +} + +int +main(int argc, char **argv) +{ + int sfd, dfd; + void *p; + struct stat sb; + if (argc != 3) { + (void) printf("usage: %s " + "\n", argv[0]); + exit(1); + } + sfd = open_file(argv[1]); + if (fstat(sfd, &sb) == -1) { + (void) fprintf(stderr, "fstat failed\n"); + exit(1); + } + dfd = clone_file(sfd, sb.st_size, argv[2]); + p = map_file(dfd, sb.st_size); + map_write(p, dfd); + return (0); +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/clonefile.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/clonefile.c index 696dc471d8c..bc30bb7798e 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/clonefile.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/clonefile.c @@ -59,6 +59,10 @@ #endif #endif /* __NR_copy_file_range */ +#ifdef __FreeBSD__ +#define loff_t off_t +#endif + ssize_t copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int) __attribute__((weak)); @@ -140,7 +144,7 @@ usage(void) " FICLONERANGE:\n" " clonefile -r \n" " copy_file_range:\n" - " clonefile -f \n" + " clonefile -f [ ]\n" " FIDEDUPERANGE:\n" " clonefile -d \n"); return (1); @@ -158,7 +162,7 @@ main(int argc, char **argv) { cf_mode_t mode = CF_MODE_NONE; - char c; + int c; while ((c = getopt(argc, argv, "crfdq")) != -1) { switch (c) { case 'c': @@ -179,13 +183,29 @@ main(int argc, char **argv) } } - if (mode == CF_MODE_NONE || (argc-optind) < 2 || - (mode != CF_MODE_CLONE && (argc-optind) < 5)) - return (usage()); + switch (mode) { + case CF_MODE_NONE: + return (usage()); + case CF_MODE_CLONE: + if ((argc-optind) != 2) + return (usage()); + break; + case CF_MODE_CLONERANGE: + case CF_MODE_DEDUPERANGE: + if ((argc-optind) != 5) + return (usage()); + break; + case CF_MODE_COPYFILERANGE: + if ((argc-optind) != 2 && (argc-optind) != 5) + return (usage()); + break; + default: + abort(); + } loff_t soff = 0, doff = 0; - size_t len = 0; - if (mode != CF_MODE_CLONE) { + size_t len = SSIZE_MAX; + if ((argc-optind) == 5) { soff = strtoull(argv[optind+2], NULL, 10); if (soff == ULLONG_MAX) { fprintf(stderr, "invalid source offset"); @@ -196,10 +216,15 @@ main(int argc, char **argv) fprintf(stderr, "invalid dest offset"); return (1); } - len = strtoull(argv[optind+4], NULL, 10); - if (len == ULLONG_MAX) { - fprintf(stderr, "invalid length"); - return (1); + if (mode == CF_MODE_COPYFILERANGE && + strcmp(argv[optind+4], "all") == 0) { + len = SSIZE_MAX; + } else { + len = strtoull(argv[optind+4], NULL, 10); + if (len == ULLONG_MAX) { + fprintf(stderr, "invalid length"); + return (1); + } } } @@ -237,13 +262,15 @@ main(int argc, char **argv) abort(); } - off_t spos = lseek(sfd, 0, SEEK_CUR); - off_t slen = lseek(sfd, 0, SEEK_END); - off_t dpos = lseek(dfd, 0, SEEK_CUR); - off_t dlen = lseek(dfd, 0, SEEK_END); + if (!quiet) { + off_t spos = lseek(sfd, 0, SEEK_CUR); + off_t slen = lseek(sfd, 0, SEEK_END); + off_t dpos = lseek(dfd, 0, SEEK_CUR); + off_t dlen = lseek(dfd, 0, SEEK_END); - fprintf(stderr, "file offsets: src=%lu/%lu; dst=%lu/%lu\n", spos, slen, - dpos, dlen); + fprintf(stderr, "file offsets: src=%lu/%lu; dst=%lu/%lu\n", + spos, slen, dpos, dlen); + } close(dfd); close(sfd); @@ -254,7 +281,8 @@ main(int argc, char **argv) int do_clone(int sfd, int dfd) { - fprintf(stderr, "using FICLONE\n"); + if (!quiet) + fprintf(stderr, "using FICLONE\n"); int err = ioctl(dfd, CF_FICLONE, sfd); if (err < 0) { fprintf(stderr, "ioctl(FICLONE): %s\n", strerror(errno)); @@ -266,7 +294,8 @@ do_clone(int sfd, int dfd) int do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) { - fprintf(stderr, "using FICLONERANGE\n"); + if (!quiet) + fprintf(stderr, "using FICLONERANGE\n"); cf_file_clone_range_t fcr = { .src_fd = sfd, .src_offset = soff, @@ -284,12 +313,22 @@ do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) int do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) { - fprintf(stderr, "using copy_file_range\n"); + if (!quiet) + fprintf(stderr, "using copy_file_range\n"); ssize_t copied = cf_copy_file_range(sfd, &soff, dfd, &doff, len, 0); if (copied < 0) { fprintf(stderr, "copy_file_range: %s\n", strerror(errno)); return (1); } + if (len == SSIZE_MAX) { + struct stat sb; + + if (fstat(sfd, &sb) < 0) { + fprintf(stderr, "fstat(sfd): %s\n", strerror(errno)); + return (1); + } + len = sb.st_size; + } if (copied != len) { fprintf(stderr, "copy_file_range: copied less than requested: " "requested=%lu; copied=%lu\n", len, copied); @@ -301,7 +340,8 @@ do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) int do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len) { - fprintf(stderr, "using FIDEDUPERANGE\n"); + if (!quiet) + fprintf(stderr, "using FIDEDUPERANGE\n"); char buf[sizeof (cf_file_dedupe_range_t)+ sizeof (cf_file_dedupe_range_info_t)] = {0}; diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/ctime.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/ctime.c index 0f5d81aea61..5ff1cea8a86 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/ctime.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/ctime.c @@ -362,12 +362,20 @@ main(void) return (1); } - if (t1 == t2) { - (void) fprintf(stderr, "%s: t1(%ld) == t2(%ld)\n", + + /* + * Ideally, time change would be exactly two seconds, but allow + * a little slack in case of scheduling delays or similar. + */ + long delta = (long)t2 - (long)t1; + if (delta < 2 || delta > 4) { + (void) fprintf(stderr, + "%s: BAD time change: t1(%ld), t2(%ld)\n", timetest_table[i].name, (long)t1, (long)t2); return (1); } else { - (void) fprintf(stderr, "%s: t1(%ld) != t2(%ld)\n", + (void) fprintf(stderr, + "%s: good time change: t1(%ld), t2(%ld)\n", timetest_table[i].name, (long)t1, (long)t2); } } diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg index 648f2203dfb..daa79455168 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg @@ -98,7 +98,8 @@ export SYSTEM_FILES_COMMON='awk uname uniq vmstat - wc' + wc + xargs' export SYSTEM_FILES_FREEBSD='chflags compress @@ -184,6 +185,8 @@ export ZFSTEST_FILES='badsend btree_test chg_usr_exec clonefile + clone_mmap_cached + clone_mmap_write devname2devid dir_rd_update draid diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib index b4d2b91dd47..dfab48d2cda 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib +++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib @@ -61,13 +61,8 @@ function compare_version_gte [ "$(printf "$1\n$2" | sort -V | tail -n1)" = "$1" ] } -# Linux kernel version comparison function -# -# $1 Linux version ("4.10", "2.6.32") or blank for installed Linux version -# -# Used for comparison: if [ $(linux_version) -ge $(linux_version "2.6.32") ] -# -function linux_version +# Helper function used by linux_version() and freebsd_version() +function kernel_version { typeset ver="$1" @@ -83,6 +78,24 @@ function linux_version echo $((version * 100000 + major * 1000 + minor)) } +# Linux kernel version comparison function +# +# $1 Linux version ("4.10", "2.6.32") or blank for installed Linux version +# +# Used for comparison: if [ $(linux_version) -ge $(linux_version "2.6.32") ] +function linux_version { + kernel_version "$1" +} + +# FreeBSD version comparison function +# +# $1 FreeBSD version ("13.2", "14.0") or blank for installed FreeBSD version +# +# Used for comparison: if [ $(freebsd_version) -ge $(freebsd_version "13.2") ] +function freebsd_version { + kernel_version "$1" +} + # Determine if this is a Linux test system # # Return 0 if platform Linux, 1 if otherwise diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/math.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/math.shlib index da1e77e5fb9..2b5e60180f5 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/math.shlib +++ b/sys/contrib/openzfs/tests/zfs-tests/include/math.shlib @@ -123,10 +123,21 @@ function verify_ne # # # $1 lower bound # $2 upper bound +# [$3 how many] function random_int_between { typeset -i min=$1 typeset -i max=$2 + typeset -i count + typeset -i i - echo $(( (RANDOM % (max - min + 1)) + min )) + if [[ -z "$3" ]]; then + count=1 + else + count=$3 + fi + + for (( i = 0; i < $count; i++ )); do + echo $(( (RANDOM % (max - min + 1)) + min )) + done } diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg index a0edad14d02..718c4cf2d8a 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg @@ -90,7 +90,8 @@ VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev VOL_MODE vol.mode zvol_volmode VOL_RECURSIVE vol.recursive UNSUPPORTED VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq -BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled +BCLONE_ENABLED bclone_enabled zfs_bclone_enabled +BCLONE_WAIT_DIRTY bclone_wait_dirty zfs_bclone_wait_dirty XATTR_COMPAT xattr_compat zfs_xattr_compat ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index 87b50f59ca7..e2824ee065e 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -90,6 +90,9 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/alloc_class/alloc_class.kshlib \ functional/atime/atime.cfg \ functional/atime/atime_common.kshlib \ + functional/bclone/bclone.cfg \ + functional/bclone/bclone_common.kshlib \ + functional/bclone/bclone_corner_cases.kshlib \ functional/block_cloning/block_cloning.kshlib \ functional/cache/cache.cfg \ functional/cache/cache.kshlib \ @@ -438,8 +441,28 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/atime/root_atime_on.ksh \ functional/atime/root_relatime_on.ksh \ functional/atime/setup.ksh \ + functional/bclone/bclone_crossfs_corner_cases.ksh \ + functional/bclone/bclone_crossfs_corner_cases_limited.ksh \ + functional/bclone/bclone_crossfs_data.ksh \ + functional/bclone/bclone_crossfs_embedded.ksh \ + functional/bclone/bclone_crossfs_hole.ksh \ + functional/bclone/bclone_diffprops_all.ksh \ + functional/bclone/bclone_diffprops_checksum.ksh \ + functional/bclone/bclone_diffprops_compress.ksh \ + functional/bclone/bclone_diffprops_copies.ksh \ + functional/bclone/bclone_diffprops_recordsize.ksh \ + functional/bclone/bclone_prop_sync.ksh \ + functional/bclone/bclone_samefs_corner_cases.ksh \ + functional/bclone/bclone_samefs_corner_cases_limited.ksh \ + functional/bclone/bclone_samefs_data.ksh \ + functional/bclone/bclone_samefs_embedded.ksh \ + functional/bclone/bclone_samefs_hole.ksh \ + functional/bclone/cleanup.ksh \ + functional/bclone/setup.ksh \ functional/block_cloning/cleanup.ksh \ functional/block_cloning/setup.ksh \ + functional/block_cloning/block_cloning_clone_mmap_cached.ksh \ + functional/block_cloning/block_cloning_clone_mmap_write.ksh \ functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \ functional/block_cloning/block_cloning_copyfilerange_fallback.ksh \ functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh \ @@ -451,6 +474,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/block_cloning/block_cloning_ficlone.ksh \ functional/block_cloning/block_cloning_ficlonerange.ksh \ functional/block_cloning/block_cloning_ficlonerange_partial.ksh \ + functional/block_cloning/block_cloning_cross_enc_dataset.ksh \ + functional/block_cloning/block_cloning_replay.ksh \ + functional/block_cloning/block_cloning_replay_encrypted.ksh \ + functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ @@ -887,6 +914,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zfs_share/zfs_share_012_pos.ksh \ functional/cli_root/zfs_share/zfs_share_013_pos.ksh \ functional/cli_root/zfs_share/zfs_share_concurrent_shares.ksh \ + functional/cli_root/zfs_share/zfs_share_after_mount.ksh \ functional/cli_root/zfs_snapshot/cleanup.ksh \ functional/cli_root/zfs_snapshot/setup.ksh \ functional/cli_root/zfs_snapshot/zfs_snapshot_001_neg.ksh \ @@ -1210,6 +1238,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_status/zpool_status_005_pos.ksh \ functional/cli_root/zpool_status/zpool_status_006_pos.ksh \ functional/cli_root/zpool_status/zpool_status_007_pos.ksh \ + functional/cli_root/zpool_status/zpool_status_008_pos.ksh \ functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \ functional/cli_root/zpool_sync/cleanup.ksh \ functional/cli_root/zpool_sync/setup.ksh \ @@ -1365,6 +1394,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/compression/setup.ksh \ functional/cp_files/cleanup.ksh \ functional/cp_files/cp_files_001_pos.ksh \ + functional/cp_files/cp_files_002_pos.ksh \ + functional/cp_files/cp_stress.ksh \ functional/cp_files/setup.ksh \ functional/crtime/cleanup.ksh \ functional/crtime/crtime_001_pos.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/TODO b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/TODO new file mode 100644 index 00000000000..7cd4ee898fc --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/TODO @@ -0,0 +1,4 @@ +- If dedup enabled, block_cloning uses dedup. +- check when block cloning doesn't suppose to work +- check block cloning between two different pools +- block cloning from a snapshot diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone.cfg new file mode 100644 index 00000000000..f72d17c1bec --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone.cfg @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +# TODO: We should calculate that based on ashift. +export MINBLOCKSIZE=512 + +export TESTSRCFS="$TESTPOOL/$TESTFS/src" +export TESTDSTFS="$TESTPOOL/$TESTFS/dst" +export TESTSRCDIR="$TESTDIR/src" +export TESTDSTDIR="$TESTDIR/dst" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib new file mode 100644 index 00000000000..3b8eaea5bb5 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_common.kshlib @@ -0,0 +1,286 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/tests/functional/bclone/bclone.cfg + +export RECORDSIZE=$(zfs get -Hp -o value recordsize $TESTPOOL/$TESTFS) + +MINBLKSIZE1=512 +MINBLKSIZE2=1024 + +function verify_block_cloning +{ + if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" + fi +} + +function verify_crossfs_block_cloning +{ + if is_linux && [[ $(linux_version) -lt $(linux_version "5.3") ]]; then + log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3" + fi + + # Cross dataset block cloning only supported on FreeBSD 14+ + # https://github.com/freebsd/freebsd-src/commit/969071be938c + if is_freebsd && [ $(freebsd_version) -lt $(freebsd_version 14.0) ] ; then + log_unsupported "Cloning across datasets not supported in $(uname -r)" + fi +} + +# Unused. +function size_to_dsize +{ + typeset -r size=$1 + typeset -r dir=$2 + + typeset -r dataset=$(df $dir | tail -1 | awk '{print $1}') + typeset -r recordsize=$(get_prop recordsize $dataset) + typeset -r copies=$(get_prop copies $dataset) + typeset dsize + + if [[ $size -le $recordsize ]]; then + dsize=$(( ((size - 1) / MINBLOCKSIZE + 1) * MINBLOCKSIZE )) + else + dsize=$(( ((size - 1) / recordsize + 1) * recordsize )) + fi + dsize=$((dsize*copies)) + + echo $dsize +} + +function test_file_integrity +{ + typeset -r original_checksum=$1 + typeset -r clone=$2 + typeset -r filesize=$3 + + typeset -r clone_checksum=$(sha256digest $clone) + + if [[ $original_checksum != $clone_checksum ]]; then + log_fail "Clone $clone is corrupted with file size $filesize" + fi +} + +function verify_pool_prop_eq +{ + typeset -r prop=$1 + typeset -r expected=$2 + + typeset -r value=$(get_pool_prop $prop $TESTPOOL) + if [[ $value != $expected ]]; then + log_fail "Pool property $prop is incorrect: expected $expected, got $value" + fi +} + +function verify_pool_props +{ + typeset -r dsize=$1 + typeset -r ratio=$2 + + if [[ $dsize -eq 0 ]]; then + verify_pool_prop_eq bcloneused 0 + verify_pool_prop_eq bclonesaved 0 + verify_pool_prop_eq bcloneratio 1.00 + else + if [[ $ratio -eq 1 ]]; then + verify_pool_prop_eq bcloneused 0 + else + verify_pool_prop_eq bcloneused $dsize + fi + verify_pool_prop_eq bclonesaved $((dsize*(ratio-1))) + verify_pool_prop_eq bcloneratio "${ratio}.00" + fi +} + +# Function to test file copying and integrity check. +function bclone_test +{ + typeset -r datatype=$1 + typeset filesize=$2 + typeset -r embedded=$3 + typeset -r srcdir=$4 + typeset -r dstdir=$5 + typeset dsize + + typeset -r original="${srcdir}/original" + typeset -r clone="${dstdir}/clone" + + log_note "Testing file copy with datatype $datatype, file size $filesize, embedded $embedded" + + # Create a test file with known content. + case $datatype in + random|text) + sync_pool $TESTPOOL + if [[ $datatype = "random" ]]; then + dd if=/dev/urandom of=$original bs=$filesize count=1 2>/dev/null + else + filesize=$(((filesize/4)*4)) + dd if=/dev/urandom bs=$(((filesize/4)*3)) count=1 | \ + openssl base64 -A > $original + fi + sync_pool $TESTPOOL + clonefile -f $original "${clone}-tmp" + sync_pool $TESTPOOL + # It is hard to predict block sizes that will be used, + # so just do one clone and take it from bcloneused. + filesize=$(zpool get -Hp -o value bcloneused $TESTPOOL) + if [[ $embedded = "false" ]]; then + log_must test $filesize -gt 0 + fi + rm -f "${clone}-tmp" + sync_pool $TESTPOOL + dsize=$filesize + ;; + hole) + log_must truncate_test -s $filesize -f $original + dsize=0 + ;; + *) + log_fail "Unknown datatype $datatype" + ;; + esac + if [[ $embedded = "true" ]]; then + dsize=0 + fi + + typeset -r original_checksum=$(sha256digest $original) + + sync_pool $TESTPOOL + + # Create a first clone of the entire file. + clonefile -f $original "${clone}0" + # Try to clone the clone in the same transaction group. + clonefile -f "${clone}0" "${clone}2" + + # Clone the original again... + clonefile -f $original "${clone}1" + # ...and overwrite it in the same transaction group. + clonefile -f $original "${clone}1" + + # Clone the clone... + clonefile -f "${clone}1" "${clone}3" + sync_pool $TESTPOOL + # ...and overwrite in the new transaction group. + clonefile -f "${clone}1" "${clone}3" + + sync_pool $TESTPOOL + + # Test removal of the pending clones (before they are committed to disk). + clonefile -f $original "${clone}4" + clonefile -f "${clone}4" "${clone}5" + rm -f "${clone}4" "${clone}5" + + # Clone into one file, but remove another file, but with the same data in + # the same transaction group. + clonefile -f $original "${clone}5" + sync_pool $TESTPOOL + clonefile -f $original "${clone}4" + rm -f "${clone}5" + test_file_integrity $original_checksum "${clone}4" $filesize + sync_pool $TESTPOOL + test_file_integrity $original_checksum "${clone}4" $filesize + + clonefile -f "${clone}4" "${clone}5" + # Verify integrity of the cloned file before it is committed to disk. + test_file_integrity $original_checksum "${clone}5" $filesize + + sync_pool $TESTPOOL + + # Verify integrity in the new transaction group. + test_file_integrity $original_checksum "${clone}0" $filesize + test_file_integrity $original_checksum "${clone}1" $filesize + test_file_integrity $original_checksum "${clone}2" $filesize + test_file_integrity $original_checksum "${clone}3" $filesize + test_file_integrity $original_checksum "${clone}4" $filesize + test_file_integrity $original_checksum "${clone}5" $filesize + + verify_pool_props $dsize 7 + + # Clear cache and test after fresh import. + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + # Cloned uncached file. + clonefile -f $original "${clone}6" + # Cloned uncached clone. + clonefile -f "${clone}6" "${clone}7" + + # Cache the file. + cat $original >/dev/null + clonefile -f $original "${clone}8" + clonefile -f "${clone}8" "${clone}9" + + test_file_integrity $original_checksum "${clone}6" $filesize + test_file_integrity $original_checksum "${clone}7" $filesize + test_file_integrity $original_checksum "${clone}8" $filesize + test_file_integrity $original_checksum "${clone}9" $filesize + + sync_pool $TESTPOOL + + verify_pool_props $dsize 11 + + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + test_file_integrity $original_checksum "${clone}0" $filesize + test_file_integrity $original_checksum "${clone}1" $filesize + test_file_integrity $original_checksum "${clone}2" $filesize + test_file_integrity $original_checksum "${clone}3" $filesize + test_file_integrity $original_checksum "${clone}4" $filesize + test_file_integrity $original_checksum "${clone}5" $filesize + test_file_integrity $original_checksum "${clone}6" $filesize + test_file_integrity $original_checksum "${clone}7" $filesize + test_file_integrity $original_checksum "${clone}8" $filesize + test_file_integrity $original_checksum "${clone}9" $filesize + + rm -f $original + rm -f "${clone}1" "${clone}3" "${clone}5" "${clone}7" + + sync_pool $TESTPOOL + + test_file_integrity $original_checksum "${clone}0" $filesize + test_file_integrity $original_checksum "${clone}2" $filesize + test_file_integrity $original_checksum "${clone}4" $filesize + test_file_integrity $original_checksum "${clone}6" $filesize + test_file_integrity $original_checksum "${clone}8" $filesize + test_file_integrity $original_checksum "${clone}9" $filesize + + verify_pool_props $dsize 6 + + rm -f "${clone}0" "${clone}2" "${clone}4" "${clone}8" "${clone}9" + + sync_pool $TESTPOOL + + test_file_integrity $original_checksum "${clone}6" $filesize + + verify_pool_props $dsize 1 + + rm -f "${clone}6" + + sync_pool $TESTPOOL + + verify_pool_props $dsize 1 +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib new file mode 100644 index 00000000000..ddfbfc999c4 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_corner_cases.kshlib @@ -0,0 +1,315 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +function first_half_checksum +{ + typeset -r file=$1 + + dd if=$file bs=$HALFRECORDSIZE count=1 2>/dev/null | sha256digest +} + +function second_half_checksum +{ + typeset -r file=$1 + + dd if=$file bs=$HALFRECORDSIZE count=1 skip=1 2>/dev/null | sha256digest +} + +function bclone_corner_cases_init +{ + typeset -r srcdir=$1 + typeset -r dstdir=$2 + + export RECORDSIZE=4096 + export HALFRECORDSIZE=$((RECORDSIZE / 2)) + + export CLONE="$dstdir/clone0" + export ORIG0="$srcdir/orig0" + export ORIG1="$srcdir/orig1" + export ORIG2="$srcdir/orig2" + + # Create source files. + log_must dd if=/dev/urandom of="$ORIG0" bs=$RECORDSIZE count=1 + log_must dd if=/dev/urandom of="$ORIG1" bs=$RECORDSIZE count=1 + log_must dd if=/dev/urandom of="$ORIG2" bs=$RECORDSIZE count=1 + + export FIRST_HALF_ORIG0_CHECKSUM=$(first_half_checksum $ORIG0) + export FIRST_HALF_ORIG1_CHECKSUM=$(first_half_checksum $ORIG1) + export FIRST_HALF_ORIG2_CHECKSUM=$(first_half_checksum $ORIG2) + export SECOND_HALF_ORIG0_CHECKSUM=$(second_half_checksum $ORIG0) + export SECOND_HALF_ORIG1_CHECKSUM=$(second_half_checksum $ORIG1) + export SECOND_HALF_ORIG2_CHECKSUM=$(second_half_checksum $ORIG2) + export ZEROS_CHECKSUM=$(dd if=/dev/zero bs=$HALFRECORDSIZE count=1 | sha256digest) + export FIRST_HALF_CHECKSUM="" + export SECOND_HALF_CHECKSUM="" +} + +function cache_clone +{ + typeset -r cached=$1 + + case "$cached" in + "cached") + dd if=$CLONE of=/dev/null bs=$RECORDSIZE 2>/dev/null + ;; + "uncached") + ;; + *) + log_fail "invalid cached: $cached" + ;; + esac +} + +function create_existing +{ + typeset -r existing=$1 + + case "$existing" in + "no") + ;; + "small empty") + log_must truncate_test -s $HALFRECORDSIZE -f $CLONE + ;; + "full empty") + log_must truncate_test -s $RECORDSIZE -f $CLONE + ;; + "small data") + log_must dd if=/dev/urandom of=$CLONE bs=$HALFRECORDSIZE count=1 \ + 2>/dev/null + ;; + "full data") + log_must dd if=/dev/urandom of=$CLONE bs=$RECORDSIZE count=1 2>/dev/null + ;; + *) + log_fail "invalid existing: $existing" + ;; + esac +} + +function create_clone +{ + typeset -r clone=$1 + typeset -r file=$2 + + case "$clone" in + "no") + ;; + "yes") + clonefile -f $file $CLONE + case "$file" in + $ORIG0) + FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG0_CHECKSUM + SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG0_CHECKSUM + ;; + $ORIG2) + FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG2_CHECKSUM + SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG2_CHECKSUM + ;; + *) + log_fail "invalid file: $file" + ;; + esac + ;; + *) + log_fail "invalid clone: $clone" + ;; + esac +} + +function overwrite_clone +{ + typeset -r overwrite=$1 + + case "$overwrite" in + "no") + ;; + "free") + log_must truncate_test -s 0 -f $CLONE + log_must truncate_test -s $RECORDSIZE -f $CLONE + FIRST_HALF_CHECKSUM=$ZEROS_CHECKSUM + SECOND_HALF_CHECKSUM=$ZEROS_CHECKSUM + ;; + "full") + log_must dd if=$ORIG1 of=$CLONE bs=$RECORDSIZE count=1 2>/dev/null + FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG1_CHECKSUM + SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG1_CHECKSUM + ;; + "first half") + log_must dd if=$ORIG1 of=$CLONE bs=$HALFRECORDSIZE skip=0 seek=0 \ + count=1 conv=notrunc 2>/dev/null + FIRST_HALF_CHECKSUM=$FIRST_HALF_ORIG1_CHECKSUM + ;; + "second half") + log_must dd if=$ORIG1 of=$CLONE bs=$HALFRECORDSIZE skip=1 seek=1 \ + count=1 conv=notrunc 2>/dev/null + SECOND_HALF_CHECKSUM=$SECOND_HALF_ORIG1_CHECKSUM + ;; + *) + log_fail "invalid overwrite: $overwrite" + ;; + esac +} + +function checksum_compare +{ + typeset -r compare=$1 + typeset first_half_calculated_checksum second_half_calculated_checksum + + case "$compare" in + "no") + ;; + "yes") + first_half_calculated_checksum=$(first_half_checksum $CLONE) + second_half_calculated_checksum=$(second_half_checksum $CLONE) + + if [[ $first_half_calculated_checksum != $FIRST_HALF_CHECKSUM ]] || \ + [[ $second_half_calculated_checksum != $SECOND_HALF_CHECKSUM ]]; then + return 1 + fi + ;; + *) + log_fail "invalid compare: $compare" + ;; + esac +} + +function bclone_corner_cases_test +{ + typeset cached existing + typeset first_clone first_overwrite + typeset read_after read_before + typeset second_clone second_overwrite + typeset -r srcdir=$1 + typeset -r dstdir=$2 + typeset limit=$3 + typeset -i count=0 + + if [[ $srcdir != "count" ]]; then + if [[ -n "$limit" ]]; then + typeset -r total_count=$(bclone_corner_cases_test count) + limit=$(random_int_between 1 $total_count $((limit*2)) | sort -nu | head -n $limit | xargs) + fi + bclone_corner_cases_init $srcdir $dstdir + fi + + # + # (create) / (cache) / (clone) / (overwrite) / (read) / (clone) / (overwrite) / (read) / read next txg + # + for existing in "no" "small empty" "full empty" "small data" "full data"; do + for cached in "uncached" "cached"; do + for first_clone in "no" "yes"; do + for first_overwrite in "no" "free" "full" "first half" "second half"; do + for read_before in "no" "yes"; do + for second_clone in "no" "yes"; do + for second_overwrite in "no" "free" "full" "first half" "second half"; do + for read_after in "no" "yes"; do + if [[ $first_clone = "no" ]] && \ + [[ $second_clone = "no" ]]; then + continue + fi + if [[ $first_clone = "no" ]] && \ + [[ $read_before = "yes" ]]; then + continue + fi + if [[ $second_clone = "no" ]] && \ + [[ $read_before = "yes" ]] && \ + [[ $read_after = "yes" ]]; then + continue + fi + + count=$((count+1)) + + if [[ $srcdir = "count" ]]; then + # Just counting. + continue + fi + + if [[ -n "$limit" ]]; then + if ! echo " $limit " | grep -q " $count "; then + continue + fi + fi + + FIRST_HALF_CHECKSUM="" + SECOND_HALF_CHECKSUM="" + + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + create_existing "$existing" + + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + cache_clone "$cached" + + create_clone "$first_clone" "$ORIG0" + + overwrite_clone "$first_overwrite" + + if checksum_compare $read_before; then + log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before" + else + log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before" + fi + + create_clone "$second_clone" "$ORIG2" + + overwrite_clone "$second_overwrite" + + if checksum_compare $read_after; then + log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after" + else + log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after" + fi + + log_must zpool export $TESTPOOL + log_must zpool import $TESTPOOL + + if checksum_compare "yes"; then + log_note "existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg" + else + log_fail "FAIL: existing: $existing / cached: $cached / first_clone: $first_clone / first_overwrite: $first_overwrite / read_before: $read_before / second_clone: $second_clone / read_after: $read_after / read_next_txg" + fi + + rm -f "$CLONE" + done + done + done + done + done + done + done + done + + if [[ $srcdir = "count" ]]; then + echo $count + fi +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh new file mode 100755 index 00000000000..35188cddb06 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh @@ -0,0 +1,45 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify various corner cases in block cloning across datasets" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS +log_must zfs set recordsize=$RECORDSIZE $TESTDSTFS + +bclone_corner_cases_test $TESTSRCDIR $TESTDSTDIR + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases_limited.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases_limited.ksh new file mode 100755 index 00000000000..1fc1bbd07fd --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases_limited.ksh @@ -0,0 +1,45 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify various corner cases in block cloning across datasets" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS +log_must zfs set recordsize=$RECORDSIZE $TESTDSTFS + +bclone_corner_cases_test $TESTSRCDIR $TESTDSTDIR 100 + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh new file mode 100755 index 00000000000..e2fe25d451d --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh @@ -0,0 +1,46 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning properly clones regular files across datasets" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS + +for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \ + 1048575 1048576 1048577 4194303 4194304 4194305; do + bclone_test random $filesize false $TESTSRCDIR $TESTDSTDIR +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_embedded.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_embedded.ksh new file mode 100755 index 00000000000..6a6fe1d309a --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_embedded.ksh @@ -0,0 +1,50 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning properly clones small files (with embedded blocks) across datasets" + +# Enable ZLE compression to make sure what is the maximum amount of data we +# can store in BP. +log_must zfs set compress=zle $TESTSRCFS +log_must zfs set compress=zle $TESTDSTFS + +# Test BP_IS_EMBEDDED(). +# Maximum embedded payload size is 112 bytes, but the buffer is extended to +# 512 bytes first and then compressed. 107 random bytes followed by 405 zeros +# gives exactly 112 bytes after compression with ZLE. +for filesize in 1 2 4 8 16 32 64 96 107; do + bclone_test random $filesize true $TESTSRCDIR $TESTDSTDIR +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_hole.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_hole.ksh new file mode 100755 index 00000000000..d4c33d6da30 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_hole.ksh @@ -0,0 +1,45 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning properly clones sparse files (files with holes) across datasets" + +# Compression doesn't matter here. + +# Test BP_IS_HOLE(). +for filesize in 1 511 512 513 4095 4096 4097 131071 131072 131073 \ + 1048575 1048576 1048577 4194303 4194304 4194305; do + bclone_test hole $filesize false $TESTSRCDIR $TESTDSTDIR +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_all.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_all.ksh new file mode 100755 index 00000000000..a5e7282fe6a --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_all.ksh @@ -0,0 +1,86 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different properties" + +log_must zfs set checksum=off $TESTSRCFS +log_must zfs set compress=off $TESTSRCFS +log_must zfs set copies=1 $TESTSRCFS +log_must zfs set recordsize=131072 $TESTSRCFS +log_must zfs set checksum=fletcher2 $TESTDSTFS +log_must zfs set compress=lz4 $TESTDSTFS +log_must zfs set copies=3 $TESTDSTFS +log_must zfs set recordsize=8192 $TESTDSTFS + +FILESIZE=$(random_int_between 2 32767) +FILESIZE=$((FILESIZE * 64)) +bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR + +log_must zfs set checksum=sha256 $TESTSRCFS +log_must zfs set compress=zstd $TESTSRCFS +log_must zfs set copies=2 $TESTSRCFS +log_must zfs set recordsize=262144 $TESTSRCFS +log_must zfs set checksum=off $TESTDSTFS +log_must zfs set compress=off $TESTDSTFS +log_must zfs set copies=1 $TESTDSTFS +log_must zfs set recordsize=131072 $TESTDSTFS + +FILESIZE=$(random_int_between 2 32767) +FILESIZE=$((FILESIZE * 64)) +bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR + +log_must zfs set checksum=sha512 $TESTSRCFS +log_must zfs set compress=gzip $TESTSRCFS +log_must zfs set copies=2 $TESTSRCFS +log_must zfs set recordsize=512 $TESTSRCFS +log_must zfs set checksum=fletcher4 $TESTDSTFS +log_must zfs set compress=lzjb $TESTDSTFS +log_must zfs set copies=3 $TESTDSTFS +log_must zfs set recordsize=16384 $TESTDSTFS + +FILESIZE=$(random_int_between 2 32767) +FILESIZE=$((FILESIZE * 64)) +bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR + +log_must zfs inherit checksum $TESTSRCFS +log_must zfs inherit compress $TESTSRCFS +log_must zfs inherit copies $TESTSRCFS +log_must zfs inherit recordsize $TESTSRCFS +log_must zfs inherit checksum $TESTDSTFS +log_must zfs inherit compress $TESTDSTFS +log_must zfs inherit copies $TESTDSTFS +log_must zfs inherit recordsize $TESTDSTFS + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_checksum.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_checksum.ksh new file mode 100755 index 00000000000..7e064a0dfd7 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_checksum.ksh @@ -0,0 +1,62 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different checksum properties" + +log_must zfs set compress=off $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS + +for srcprop in "${checksum_prop_vals[@]}"; do + for dstprop in "${checksum_prop_vals[@]}"; do + if [[ $srcprop == $dstprop ]]; then + continue + fi + log_must zfs set checksum=$srcprop $TESTSRCFS + log_must zfs set checksum=$dstprop $TESTDSTFS + # 15*8=120, which is greater than 113, so we are sure the data won't + # be embedded into BP. + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 15 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit checksum $TESTSRCFS +log_must zfs inherit checksum $TESTDSTFS + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_compress.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_compress.ksh new file mode 100755 index 00000000000..e1d6e594921 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_compress.ksh @@ -0,0 +1,59 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different compression properties" + +for srcprop in "${compress_prop_vals[@]}"; do + for dstprop in "${compress_prop_vals[@]}"; do + if [[ $srcprop == $dstprop ]]; then + continue + fi + log_must zfs set compress=$srcprop $TESTSRCFS + log_must zfs set compress=$dstprop $TESTDSTFS + # 15*8=120, which is greater than 113, so we are sure the data won't + # be embedded into BP. + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 15 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test text $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit compress $TESTSRCFS +log_must zfs inherit compress $TESTDSTFS + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_copies.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_copies.ksh new file mode 100755 index 00000000000..ac823e1ec39 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_copies.ksh @@ -0,0 +1,59 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different copies properties" + +log_must zfs set compress=off $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS + +for srcprop in "${copies_prop_vals[@]}"; do + for dstprop in "${copies_prop_vals[@]}"; do + log_must zfs set copies=$srcprop $TESTSRCFS + log_must zfs set copies=$dstprop $TESTDSTFS + # 15*8=120, which is greater than 113, so we are sure the data won't + # be embedded into BP. + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 15 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit copies $TESTSRCFS +log_must zfs inherit copies $TESTDSTFS + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_recordsize.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_recordsize.ksh new file mode 100755 index 00000000000..d833e612310 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_diffprops_recordsize.ksh @@ -0,0 +1,65 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning across datasets with different recordsize properties" + +log_must zfs set compress=off $TESTSRCFS +log_must zfs set compress=off $TESTDSTFS + +# recsize_prop_vals[] array contains too many entries and the tests take too +# long. Let's use only a subset of them. +typeset -a bclone_recsize_prop_vals=('512' '4096' '131072' '1048576') + +for srcprop in "${bclone_recsize_prop_vals[@]}"; do + for dstprop in "${bclone_recsize_prop_vals[@]}"; do + if [[ $srcprop == $dstprop ]]; then + continue + fi + log_must zfs set recordsize=$srcprop $TESTSRCFS + log_must zfs set recordsize=$dstprop $TESTDSTFS + # 2*64=128, which is greater than 113, so we are sure the data won't + # be embedded into BP. + # 32767*64=2097088, which is larger than the largest recordsize (1MB). + FILESIZE=$(random_int_between 2 32767) + FILESIZE=$((FILESIZE * 64)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit recordsize $TESTSRCFS +log_must zfs inherit recordsize $TESTDSTFS + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_prop_sync.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_prop_sync.ksh new file mode 100755 index 00000000000..f8aa1c875c6 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_prop_sync.ksh @@ -0,0 +1,66 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning +verify_crossfs_block_cloning + +log_assert "Verify block cloning with all sync property settings" + +log_must zfs set compress=zle $TESTSRCFS +log_must zfs set compress=zle $TESTDSTFS + +for prop in "${sync_prop_vals[@]}"; do + log_must zfs set sync=$prop $TESTSRCFS + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 1 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTSRCDIR +done + +for srcprop in "${sync_prop_vals[@]}"; do + log_must zfs set sync=$srcprop $TESTSRCFS + for dstprop in "${sync_prop_vals[@]}"; do + log_must zfs set sync=$dstprop $TESTDSTFS + # 32767*8=262136, which is larger than a single default recordsize of + # 131072. + FILESIZE=$(random_int_between 1 32767) + FILESIZE=$((FILESIZE * 8)) + bclone_test random $FILESIZE false $TESTSRCDIR $TESTDSTDIR + done +done + +log_must zfs inherit sync $TESTSRCFS +log_must zfs inherit sync $TESTDSTFS + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh new file mode 100755 index 00000000000..4aa2914da29 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh @@ -0,0 +1,42 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify various corner cases in block cloning within the same dataset" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS + +bclone_corner_cases_test $TESTSRCDIR $TESTSRCDIR + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases_limited.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases_limited.ksh new file mode 100755 index 00000000000..b4737700eb7 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases_limited.ksh @@ -0,0 +1,42 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_corner_cases.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify various corner cases in block cloning within the same dataset" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS +log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS + +bclone_corner_cases_test $TESTSRCDIR $TESTSRCDIR 100 + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh new file mode 100755 index 00000000000..e964f7bbf64 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh @@ -0,0 +1,44 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify block cloning properly clones regular files within the same dataset" + +# Disable compression to make sure we won't use embedded blocks. +log_must zfs set compress=off $TESTSRCFS + +for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \ + 1048575 1048576 1048577 4194303 4194304 4194305; do + bclone_test random $filesize false $TESTSRCDIR $TESTSRCDIR +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_embedded.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_embedded.ksh new file mode 100755 index 00000000000..df393a87801 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_embedded.ksh @@ -0,0 +1,48 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify block cloning properly clones small files (with embedded blocks) within the same dataset" + +# Enable ZLE compression to make sure what is the maximum amount of data we +# can store in BP. +log_must zfs set compress=zle $TESTSRCFS + +# Test BP_IS_EMBEDDED(). +# Maximum embedded payload size is 112 bytes, but the buffer is extended to +# 512 bytes first and then compressed. 107 random bytes followed by 405 zeros +# gives exactly 112 bytes after compression with ZLE. +for filesize in 1 2 4 8 16 32 64 96 107; do + bclone_test random $filesize true $TESTSRCDIR $TESTSRCDIR +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_hole.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_hole.ksh new file mode 100755 index 00000000000..3c6e345e6e6 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_hole.ksh @@ -0,0 +1,44 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "both" + +verify_block_cloning + +log_assert "Verify block cloning properly clones sparse files (files with holes) within the same dataset" + +# Compression doesn't matter here. + +# Test BP_IS_HOLE(). +for filesize in 1 511 512 513 4095 4096 4097 131071 131072 131073 \ + 1048575 1048576 1048577 4194303 4194304 4194305; do + bclone_test hole $filesize false $TESTSRCDIR $TESTSRCDIR +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/cleanup.ksh new file mode 100755 index 00000000000..0021ccb57ae --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/cleanup.ksh @@ -0,0 +1,44 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone.cfg + +log_must zfs destroy $TESTSRCFS +log_must zfs destroy $TESTDSTFS + +default_cleanup_noexit + +if tunable_exists BCLONE_ENABLED ; then + log_must restore_tunable BCLONE_ENABLED +fi + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/setup.ksh new file mode 100755 index 00000000000..9d26088c5a8 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/setup.ksh @@ -0,0 +1,50 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2023 by Pawel Jakub Dawidek +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone.cfg + +if ! command -v clonefile > /dev/null ; then + log_unsupported "clonefile program required to test block cloning" +fi + +if tunable_exists BCLONE_ENABLED ; then + log_must save_tunable BCLONE_ENABLED + log_must set_tunable32 BCLONE_ENABLED 1 +fi + +DISK=${DISKS%% *} + +default_setup_noexit $DISK "true" +log_must zpool set feature@block_cloning=enabled $TESTPOOL +log_must zfs create $TESTSRCFS +log_must zfs create $TESTDSTFS +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib index 8e16366b4cd..50f3a3d262c 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib @@ -28,8 +28,8 @@ function have_same_content { - typeset hash1=$(cat $1 | md5sum) - typeset hash2=$(cat $2 | md5sum) + typeset hash1=$(md5digest $1) + typeset hash2=$(md5digest $2) log_must [ "$hash1" = "$hash2" ] } @@ -44,11 +44,15 @@ function have_same_content # function get_same_blocks { + KEY=$5 + if [ ${#KEY} -gt 0 ]; then + KEY="--key=$KEY" + fi typeset zdbout=${TMPDIR:-$TEST_BASE_DIR}/zdbout.$$ - zdb -vvvvv $1 -O $2 | \ + zdb $KEY -vvvvv $1 -O $2 | \ awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.a - zdb -vvvvv $3 -O $4 | \ + zdb $KEY -vvvvv $3 -O $4 | \ awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.b - echo $(sort $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ') + echo $(sort -n $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ') } diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_cached.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_cached.ksh new file mode 100755 index 00000000000..b0ef8ec9953 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_cached.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# When the destination file is mmaped and is already cached we need to +# update mmaped pages after successful clone. +# +# STRATEGY: +# 1. Create a pool. +# 2. Create a two test files with random content. +# 3. mmap the files, read them and clone from one to the other using +# clone_mmap_cached. +# 4. clone_mmap_cached also verifies if the content of the destination +# file was updated while reading it from mmaped memory. +# + +verify_runnable "global" + +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +VDIR=$TEST_BASE_DIR/disk-bclone +VDEV="$VDIR/a" + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $VDIR +} + +log_onexit cleanup + +log_assert "Test for clone into mmaped and cached file" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s 1G $VDEV + +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV +log_must zfs create $TESTPOOL/$TESTFS + +for opts in "--" "-i" "-o" "-io" +do + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/src bs=1M count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/dst bs=1M count=1 + + # Clear cache. + log_must zpool export $TESTPOOL + log_must zpool import -d $VDIR $TESTPOOL + + log_must clone_mmap_cached $opts /$TESTPOOL/$TESTFS/src /$TESTPOOL/$TESTFS/dst + + sync_pool $TESTPOOL + log_must sync + + log_must have_same_content /$TESTPOOL/$TESTFS/src /$TESTPOOL/$TESTFS/dst + blocks=$(get_same_blocks $TESTPOOL/$TESTFS src $TESTPOOL/$TESTFS dst) + # FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). + log_must [ "$blocks" = "$(seq -s " " 0 7 | sed 's/ $//')" ] +done + +log_pass "Clone properly updates mmapped and cached pages" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_write.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_write.ksh new file mode 100755 index 00000000000..6215b3178e7 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_clone_mmap_write.ksh @@ -0,0 +1,79 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# A PANIC is triggered in dbuf_redirty() if we clone a file, mmap it +# and write from the map into the file. PR#15656 fixes this scenario. +# This scenario also causes data corruption on FreeBSD, which is fixed +# by PR#15665. +# +# STRATEGY: +# 1. Create a pool +# 2. Create a test file +# 3. Clone, mmap and write to the file using clone_mmap_write +# 5. Synchronize cached writes +# 6. Verfiy data is correctly written to the disk +# + +verify_runnable "global" + +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +VDIR=$TEST_BASE_DIR/disk-bclone +VDEV="$VDIR/a" + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $VDIR +} + +log_onexit cleanup + +log_assert "Test for clone, mmap and write scenario" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s 1G $VDEV + +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV +log_must zfs create $TESTPOOL/$TESTFS + +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file bs=1M count=512 +log_must clone_mmap_write /$TESTPOOL/$TESTFS/file /$TESTPOOL/$TESTFS/clone + +sync_pool $TESTPOOL +log_must sync + +log_must have_same_content /$TESTPOOL/$TESTFS/file /$TESTPOOL/$TESTFS/clone +blocks=$(get_same_blocks $TESTPOOL/$TESTFS file $TESTPOOL/$TESTFS clone) +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 1 4095 | sed 's/ $//')" ] + +log_pass "Clone, mmap and write does not cause data corruption or " \ + "trigger panic" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange.ksh index 43ea47b0ef1..0599739abee 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange.ksh @@ -29,7 +29,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh index 74e6b04903a..ad83d30291a 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh @@ -26,12 +26,11 @@ . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "5.3") ]]; then - log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3" -fi +verify_crossfs_block_cloning claim="The copy_file_range syscall can clone across datasets." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback.ksh index 9a96eacd60a..475910be747 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback.ksh @@ -30,7 +30,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh index e52b34ec8a5..00982f68db8 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh @@ -30,7 +30,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_partial.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_partial.ksh index a5da0a0bd35..38c46e4741c 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_partial.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_copyfilerange_partial.ksh @@ -29,7 +29,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh new file mode 100755 index 00000000000..702e23267f7 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_cross_enc_dataset.ksh @@ -0,0 +1,169 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023, Kay Pedersen +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +verify_runnable "global" + +verify_crossfs_block_cloning + +claim="Block cloning across encrypted datasets." + +log_assert $claim + +DS1="$TESTPOOL/encrypted1" +DS2="$TESTPOOL/encrypted2" +DS1_NC="$TESTPOOL/notcrypted1" +PASSPHRASE="top_secret" + +function prepare_enc +{ + log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS + log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $DS1" + log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $DS2" + log_must zfs create $DS1/child1 + log_must zfs create $DS1/child2 + log_must zfs create $DS1_NC + + log_note "Create test file" + # we must wait until the src file txg is written to the disk otherwise we + # will fallback to normal copy. See "dmu_read_l0_bps" in + # "zfs/module/zfs/dmu.c" and "zfs_clone_range" in + # "zfs/module/zfs/zfs_vnops.c" + log_must dd if=/dev/urandom of=/$DS1/file bs=128K count=4 + log_must dd if=/dev/urandom of=/$DS1/child1/file bs=128K count=4 + log_must dd if=/dev/urandom of=/$DS1_NC/file bs=128K count=4 + log_must sync_pool $TESTPOOL +} + +function cleanup_enc +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +function clone_and_check +{ + I_FILE="$1" + O_FILE=$2 + I_DS=$3 + O_DS=$4 + SAME_BLOCKS=$5 + # the CLONE option provides a choice between copy_file_range + # which should clone and a dd which is a copy no matter what + CLONE=$6 + SNAPSHOT=$7 + if [ ${#SNAPSHOT} -gt 0 ]; then + I_FILE=".zfs/snapshot/$SNAPSHOT/$1" + fi + if [ $CLONE ]; then + log_must clonefile -f "/$I_DS/$I_FILE" "/$O_DS/$O_FILE" 0 0 524288 + else + log_must dd if="/$I_DS/$I_FILE" of="/$O_DS/$O_FILE" bs=128K + fi + log_must sync_pool $TESTPOOL + + log_must have_same_content "/$I_DS/$I_FILE" "/$O_DS/$O_FILE" + + if [ ${#SNAPSHOT} -gt 0 ]; then + I_DS="$I_DS@$SNAPSHOT" + I_FILE="$1" + fi + typeset blocks=$(get_same_blocks \ + $I_DS $I_FILE $O_DS $O_FILE $PASSPHRASE) + log_must [ "$blocks" = "$SAME_BLOCKS" ] +} + +log_onexit cleanup_enc + +prepare_enc + +log_note "Cloning entire file with copy_file_range across different enc" \ + "roots, should fallback" +# we are expecting no same block map. +clone_and_check "file" "clone" $DS1 $DS2 "" true +log_note "check if the file is still readable and the same after" \ + "unmount and key unload, shouldn't fail" +typeset hash1=$(md5digest "/$DS1/file") +log_must zfs umount $DS1 && zfs unload-key $DS1 +typeset hash2=$(md5digest "/$DS2/clone") +log_must [ "$hash1" = "$hash2" ] + +cleanup_enc +prepare_enc + +log_note "Cloning entire file with copy_file_range across different child datasets" +# clone shouldn't work because of deriving a new master key for the child +# we are expecting no same block map. +clone_and_check "file" "clone" $DS1 "$DS1/child1" "" true +clone_and_check "file" "clone" "$DS1/child1" "$DS1/child2" "" true + +cleanup_enc +prepare_enc + +log_note "Copying entire file with copy_file_range across same snapshot" +log_must zfs snapshot -r $DS1@s1 +log_must sync_pool $TESTPOOL +log_must rm -f "/$DS1/file" +log_must sync_pool $TESTPOOL +clone_and_check "file" "clone" "$DS1" "$DS1" "0 1 2 3" true "s1" + +cleanup_enc +prepare_enc + +log_note "Copying entire file with copy_file_range across different snapshot" +clone_and_check "file" "file" $DS1 $DS2 "" true +log_must zfs snapshot -r $DS2@s1 +log_must sync_pool $TESTPOOL +log_must rm -f "/$DS1/file" "/$DS2/file" +log_must sync_pool $TESTPOOL +clone_and_check "file" "clone" "$DS2" "$DS1" "" true "s1" +typeset hash1=$(md5digest "/$DS1/.zfs/snapshot/s1/file") +log_note "destroy the snapshot and check if the file is still readable and" \ + "has the same content" +log_must zfs destroy -r $DS2@s1 +log_must sync_pool $TESTPOOL +typeset hash2=$(md5digest "/$DS1/file") +log_must [ "$hash1" = "$hash2" ] + +cleanup_enc +prepare_enc + +log_note "Copying with copy_file_range from non encrypted to encrypted" +clone_and_check "file" "copy" $DS1_NC $DS1 "" true + +cleanup_enc +prepare_enc + +log_note "Copying with copy_file_range from encrypted to non encrypted" +clone_and_check "file" "copy" $DS1 $DS1_NC "" true + +log_must sync_pool $TESTPOOL + +log_pass $claim diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_disabled_copyfilerange.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_disabled_copyfilerange.ksh index d21b6251134..3d916ab9216 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_disabled_copyfilerange.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_disabled_copyfilerange.ksh @@ -29,7 +29,7 @@ verify_runnable "global" -if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then log_unsupported "copy_file_range not available before Linux 4.5" fi diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh new file mode 100755 index 00000000000..919f320dea3 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by iXsystems, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Test for LWB buffer overflow with multiple VDEVs ZIL when 128KB +# block write is split into two 68KB ones, trying to write maximum +# sizes 128KB TX_CLONE_RANGE record with 1022 block pointers into +# 68KB buffer. +# +# STRATEGY: +# 1. Create a pool with multiple VDEVs ZIL +# 2. Write maximum sizes TX_CLONE_RANGE record with 1022 block +# pointers into 68KB buffer +# 3. Sync TXG +# 4. Clone the file +# 5. Synchronize cached writes +# + +verify_runnable "global" + +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +VDIR=$TEST_BASE_DIR/disk-bclone +VDEV="$VDIR/a $VDIR/b $VDIR/c" +LDEV="$VDIR/e $VDIR/f" + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $VDIR +} + +log_onexit cleanup + +log_assert "Test for LWB buffer overflow with multiple VDEVs ZIL" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV + +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must zfs create -o recordsize=32K $TESTPOOL/$TESTFS +# Each ZIL log entry can fit 130816 bytes for a block cloning operation, +# so it can store 1022 block pointers. When LWB optimization is enabled, +# an assert is hit when 128KB block write is split into two 68KB ones +# for 2 SLOG devices +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 bs=32K count=1022 \ + conv=fsync +sync_pool $TESTPOOL +log_must clonefile -f /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 +log_must sync + +sync_pool $TESTPOOL +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2) +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 0 1021 | sed 's/ $//')" ] + +log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL" + diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh new file mode 100755 index 00000000000..53015200468 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh @@ -0,0 +1,132 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Verify slogs are replayed correctly for cloned files. This +# test is ported from slog_replay tests for block cloning. +# +# STRATEGY: +# 1. Create an empty file system (TESTFS) +# 2. Create regular files and sync +# 3. Freeze TESTFS +# 4. Clone the file +# 5. Unmount filesystem +# +# 6. Remount TESTFS +# 7. Compare clone file with the original file +# + +verify_runnable "global" + +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +export VDIR=$TEST_BASE_DIR/disk-bclone +export VDEV="$VDIR/a $VDIR/b $VDIR/c" +export LDEV="$VDIR/e $VDIR/f" +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV + +claim="The slogs are replayed correctly for cloned files." + +log_assert $claim + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $TESTDIR $VDIR $VDIR2 +} + +log_onexit cleanup + +# +# 1. Create an empty file system (TESTFS) +# +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must zfs create $TESTPOOL/$TESTFS + +# +# 2. TX_WRITE: Create two files and sync txg +# +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 \ + oflag=sync bs=128k count=4 +log_must zfs set recordsize=16K $TESTPOOL/$TESTFS +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file2 \ + oflag=sync bs=16K count=2048 +sync_pool $TESTPOOL + +# +# 3. Checkpoint for ZIL Replay +# +log_must zpool freeze $TESTPOOL + +# +# 4. TX_CLONE_RANGE: Clone the file +# +log_must clonefile -f /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must clonefile -f /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is frozen, the intent log contains a complete set +# of deltas to replay for clone files. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# 7. Compare clone file with the original file +# +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must have_same_content /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 \ + $TESTPOOL/$TESTFS clone1) +log_must [ "$blocks" = "0 1 2 3" ] + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ + $TESTPOOL/$TESTFS clone2) +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 0 2047 | sed 's/ $//')" ] + +log_pass $claim diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh new file mode 100755 index 00000000000..0967415b7b7 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh @@ -0,0 +1,134 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Verify slogs are replayed correctly for encrypted cloned files. +# This test is ported from slog_replay tests for block cloning. +# +# STRATEGY: +# 1. Create an encrypted file system (TESTFS) +# 2. Create regular files and sync +# 3. Freeze TESTFS +# 4. Clone the file +# 5. Unmount filesystem +# +# 6. Remount encrypted TESTFS +# 7. Compare clone file with the original file +# + +verify_runnable "global" + +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +export VDIR=$TEST_BASE_DIR/disk-bclone +export VDEV="$VDIR/a $VDIR/b $VDIR/c" +export LDEV="$VDIR/e $VDIR/f" +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV +export PASSPHRASE="password" + +claim="The slogs are replayed correctly for encrypted cloned files." + +log_assert $claim + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $TESTDIR $VDIR $VDIR2 +} + +log_onexit cleanup + +# +# 1. Create an encrypted file system (TESTFS) +# +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS" + +# +# 2. TX_WRITE: Create two files and sync txg +# +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 \ + oflag=sync bs=128k count=4 +log_must zfs set recordsize=16K $TESTPOOL/$TESTFS +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file2 \ + oflag=sync bs=16K count=2048 +sync_pool $TESTPOOL + +# +# 3. Checkpoint for ZIL Replay +# +log_must zpool freeze $TESTPOOL + +# +# 4. TX_CLONE_RANGE: Clone the file +# +log_must clonefile -f /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must clonefile -f /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is frozen, the intent log contains a complete set +# of deltas to replay for clone files. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must eval "echo $PASSPHRASE | zpool import -l -f -d $VDIR $TESTPOOL" + +# +# 7. Compare clone file with the original file +# +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must have_same_content /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 \ + $TESTPOOL/$TESTFS clone1 $PASSPHRASE) +log_must [ "$blocks" = "0 1 2 3" ] + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ + $TESTPOOL/$TESTFS clone2 $PASSPHRASE) +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 0 2047 | sed 's/ $//')" ] + +log_pass $claim diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh index 58441bf8f3a..a9b13f062a4 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/setup.ksh @@ -30,6 +30,9 @@ if ! command -v clonefile > /dev/null ; then log_unsupported "clonefile program required to test block cloning" fi +if ! command -v clone_mmap_cached > /dev/null ; then + log_unsupported "clone_mmap_cached program required to test block cloning" +fi verify_runnable "global" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh index 74caa12a9cc..945db71bf11 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh @@ -31,15 +31,13 @@ # 2. Set l2arc_write_max to a value larger than the cache device. # 3. Create a file larger than the cache device and random read # for 10 sec. -# 4. Verify that l2arc_write_max is set back to the default. -# 5. Set l2arc_write_max to a value less than the cache device size but +# 4. Set l2arc_write_max to a value less than the cache device size but # larger than the default (256MB). -# 6. Record the l2_size. -# 7. Random read for 1 sec. -# 8. Record the l2_size again. -# 9. If (6) <= (8) then we have not looped around yet. -# 10. If (6) > (8) then we looped around. Break out of the loop and test. -# 11. Destroy pool. +# 5. Record the l2_size. +# 6. Random read for 1 sec. +# 7. Record the l2_size again. +# 8. If (5) <= (7) then we have not looped around yet. +# 9. Destroy pool. # verify_runnable "global" @@ -93,10 +91,6 @@ log_must zfs set relatime=off $TESTPOOL log_must fio $FIO_SCRIPTS/mkfiles.fio log_must fio $FIO_SCRIPTS/random_reads.fio -typeset write_max2=$(get_tunable L2ARC_WRITE_MAX) - -log_must test $write_max2 -eq $write_max - log_must set_tunable32 L2ARC_WRITE_MAX $(( 256 * 1024 * 1024 )) export RUNTIME=1 @@ -108,8 +102,6 @@ while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do do_once=false done -log_must test $l2_size1 -gt $l2_size2 - log_must zpool destroy $TESTPOOL log_pass "Looping around a cache device succeeds." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_after_mount.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_after_mount.ksh new file mode 100755 index 00000000000..0d4b66ea854 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share_after_mount.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Proxmox. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# DESCRIPTION: +# Verify that nfs shares persist after zfs mount -a +# +# STRATEGY: +# 1. Verify that the filesystem is not shared. +# 2. Enable the 'sharenfs' property +# 3. Verify filesystem is shared +# 4. Invoke 'zfs mount -a' +# 5. Verify filesystem is still shared + +verify_runnable "global" + +function cleanup +{ + log_must zfs set sharenfs=off $TESTPOOL/$TESTFS + is_shared $TESTPOOL/$TESTFS && \ + log_must unshare_fs $TESTPOOL/$TESTFS + log_must zfs share -a +} + + +log_onexit cleanup + +cleanup + +log_must zfs set sharenfs="on" $TESTPOOL/$TESTFS +log_must is_shared $TESTPOOL/$TESTFS +log_must is_exported $TESTPOOL/$TESTFS + +log_must zfs mount -a +log_must is_shared $TESTPOOL/$TESTFS +log_must is_exported $TESTPOOL/$TESTFS + +log_pass "Verify that nfs shares persist after zfs mount -a" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh index 3bdd7db649f..d6f32cdc7ac 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_002_pos.ksh @@ -51,7 +51,7 @@ else fi set -A args "" "-x" "-v" "-x $testpool" "-v $testpool" "-xv $testpool" \ - "-vx $testpool" + "-vx $testpool" "-e $testpool" "-es $testpool" log_assert "Executing 'zpool status' with correct options succeeds" @@ -64,4 +64,6 @@ while [[ $i -lt ${#args[*]} ]]; do (( i = i + 1 )) done +cleanup + log_pass "'zpool status' with correct options succeeded" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh index b501aac5ad6..52b22dd833f 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_003_pos.ksh @@ -37,6 +37,7 @@ # 3. Read the file # 4. Take a snapshot and make a clone # 5. Verify we see "snapshot, clone and filesystem" output in 'zpool status -v' +# and 'zpool status -ev' function cleanup { @@ -68,6 +69,7 @@ log_must zpool status -v $TESTPOOL2 log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" +log_must eval "zpool status -ev | grep '$TESTPOOL2/10m_file'" log_mustnot eval "zpool status -v | grep '$TESTFS1'" log_pass "'zpool status -v' outputs affected filesystem, snapshot & clone" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh new file mode 100755 index 00000000000..6be2ad5a741 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_status/zpool_status_008_pos.ksh @@ -0,0 +1,104 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify 'zpool status -e' only shows unhealthy devices. +# +# STRATEGY: +# 1. Create zpool +# 2. Force DEGRADE, FAULT, or inject slow IOs for vdevs +# 3. Verify vdevs are reported correctly with -e and -s +# 4. Verify parents are reported as DEGRADED +# 5. Verify healthy children are not reported +# + +function cleanup +{ + log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO + zinject -c all + poolexists $TESTPOOL2 && destroy_pool $TESTPOOL2 + log_must rm -f $all_vdevs +} + +log_assert "Verify 'zpool status -e'" + +log_onexit cleanup + +all_vdevs=$(echo $TESTDIR/vdev{1..6}) +log_must mkdir -p $TESTDIR +log_must truncate -s $MINVDEVSIZE $all_vdevs + +OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS) + +for raid_type in "draid2:3d:6c:1s" "raidz2"; do + + log_must zpool create -f $TESTPOOL2 $raid_type $all_vdevs + + # Check DEGRADED vdevs are shown. + log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev4 "ONLINE" + log_must zinject -d $TESTDIR/vdev4 -A degrade $TESTPOOL2 + log_must eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev4 | grep DEGRADED" + + # Check FAULTED vdevs are shown. + log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev5 "ONLINE" + log_must zinject -d $TESTDIR/vdev5 -A fault $TESTPOOL2 + log_must eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev5 | grep FAULTED" + + # Check no ONLINE vdevs are shown + log_mustnot eval "zpool status -e $TESTPOOL2 | grep ONLINE" + + # Check no ONLINE slow vdevs are show. Then mark IOs greater than + # 10ms slow, delay IOs 20ms to vdev6, check slow IOs. + log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev6 "ONLINE" + log_mustnot eval "zpool status -es $TESTPOOL2 | grep ONLINE" + + log_must set_tunable64 ZIO_SLOW_IO_MS 10 + log_must zinject -d $TESTDIR/vdev6 -D20:100 $TESTPOOL2 + log_must mkfile 1048576 /$TESTPOOL2/testfile + sync_pool $TESTPOOL2 + log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO + + # Check vdev6 slow IOs are only shown when requested with -s. + log_mustnot eval "zpool status -e $TESTPOOL2 | grep $TESTDIR/vdev6 | grep ONLINE" + log_must eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev6 | grep ONLINE" + + # Pool level and top-vdev level status must be DEGRADED. + log_must eval "zpool status -e $TESTPOOL2 | grep $TESTPOOL2 | grep DEGRADED" + log_must eval "zpool status -e $TESTPOOL2 | grep $raid_type | grep DEGRADED" + + # Check that healthy vdevs[1-3] aren't shown with -e. + log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev1 "ONLINE" + log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev2 "ONLINE" + log_must check_vdev_state $TESTPOOL2 $TESTDIR/vdev3 "ONLINE" + log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev1 | grep ONLINE" + log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev2 | grep ONLINE" + log_mustnot eval "zpool status -es $TESTPOOL2 | grep $TESTDIR/vdev3 | grep ONLINE" + + log_must zinject -c all + log_must zpool status -es $TESTPOOL2 + + zpool destroy $TESTPOOL2 +done + +log_pass "Verify zpool status -e shows only unhealthy vdevs" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/.gitignore b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/.gitignore new file mode 100644 index 00000000000..d15225ac842 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/.gitignore @@ -0,0 +1 @@ +seekflood diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh index 42fe70042d6..c0bccab1221 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh @@ -32,3 +32,7 @@ . $STF_SUITE/include/libtest.shlib default_cleanup + +if tunable_exists BCLONE_ENABLED ; then + log_must restore_tunable BCLONE_ENABLED +fi diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh new file mode 100755 index 00000000000..60817449ab0 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_files_002_pos.ksh @@ -0,0 +1,161 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2024 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib + +# +# DESCRIPTION: +# Verify all cp --reflink modes work with modified file. +# +# STRATEGY: +# 1. Verify "cp --reflink=never|auto|always" behaves as expected. +# Two different modes of operation are tested. +# +# a. zfs_bclone_wait_dirty=0: FICLONE and FICLONERANGE fail with EINVAL +# when there are dirty blocks which cannot be immediately cloned. +# This is the default behavior. +# +# b. zfs_bclone_wait_dirty=1: FICLONE and FICLONERANGE wait for +# dirty blocks to be written to disk allowing the clone to succeed. +# The downside to this is it may be slow which depending on the +# situtation may defeat the point of making a clone. +# + +verify_runnable "global" +verify_block_cloning + +if ! is_linux; then + log_unsupported "cp --reflink is a GNU coreutils option" +fi + +function cleanup +{ + datasetexists $TESTPOOL/cp-reflink && \ + destroy_dataset $$TESTPOOL/cp-reflink -f + log_must set_tunable32 BCLONE_WAIT_DIRTY 0 +} + +function verify_copy +{ + src_cksum=$(sha256digest $1) + dst_cksum=$(sha256digest $2) + + if [[ "$src_cksum" != "$dst_cksum" ]]; then + log_must ls -l $CP_TESTDIR + log_fail "checksum mismatch ($src_cksum != $dst_cksum)" + fi +} + +log_assert "Verify all cp --reflink modes work with modified file" + +log_onexit cleanup + +SRC_FILE=src.data +DST_FILE=dst.data +SRC_SIZE=$(($RANDOM % 2048)) + +# A smaller recordsize is used merely to speed up the test. +RECORDSIZE=4096 + +log_must zfs create -o recordsize=$RECORDSIZE $TESTPOOL/cp-reflink +CP_TESTDIR=$(get_prop mountpoint $TESTPOOL/cp-reflink) + +log_must cd $CP_TESTDIR + +# Never wait on dirty blocks (zfs_bclone_wait_dirty=0) +log_must set_tunable32 BCLONE_WAIT_DIRTY 0 + +for mode in "never" "auto" "always"; do + log_note "Checking 'cp --reflink=$mode'" + + # Create a new file and immediately copy it. + log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE + + if [[ "$mode" == "always" ]]; then + log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE + log_must ls -l $CP_TESTDIR + else + log_must cp --reflink=$mode $SRC_FILE $DST_FILE + verify_copy $SRC_FILE $DST_FILE + fi + log_must rm -f $DST_FILE + + # Append to an existing file and immediately copy it. + sync_pool $TESTPOOL + log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \ + count=1 conv=notrunc + if [[ "$mode" == "always" ]]; then + log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE + log_must ls -l $CP_TESTDIR + else + log_must cp --reflink=$mode $SRC_FILE $DST_FILE + verify_copy $SRC_FILE $DST_FILE + fi + log_must rm -f $DST_FILE + + # Overwrite a random range of an existing file and immediately copy it. + sync_pool $TESTPOOL + log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \ + seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc + if [[ "$mode" == "always" ]]; then + log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE + log_must ls -l $CP_TESTDIR + else + log_must cp --reflink=$mode $SRC_FILE $DST_FILE + verify_copy $SRC_FILE $DST_FILE + fi + log_must rm -f $SRC_FILE $DST_FILE +done + +# Wait on dirty blocks (zfs_bclone_wait_dirty=1) +log_must set_tunable32 BCLONE_WAIT_DIRTY 1 + +for mode in "never" "auto" "always"; do + log_note "Checking 'cp --reflink=$mode'" + + # Create a new file and immediately copy it. + log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE + log_must cp --reflink=$mode $SRC_FILE $DST_FILE + verify_copy $SRC_FILE $DST_FILE + log_must rm -f $DST_FILE + + # Append to an existing file and immediately copy it. + log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \ + count=1 conv=notrunc + log_must cp --reflink=$mode $SRC_FILE $DST_FILE + verify_copy $SRC_FILE $DST_FILE + log_must rm -f $DST_FILE + + # Overwrite a random range of an existing file and immediately copy it. + log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \ + seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc + log_must cp --reflink=$mode $SRC_FILE $DST_FILE + verify_copy $SRC_FILE $DST_FILE + log_must rm -f $SRC_FILE $DST_FILE +done + +log_pass diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh new file mode 100755 index 00000000000..43bb8ab572d --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh @@ -0,0 +1,73 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2023 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# +# https://github.com/openzfs/zfs/issues/15526 identified a dirty dnode +# SEEK_HOLE/SEEK_DATA bug. https://github.com/openzfs/zfs/pull/15571 +# fixed the bug, and was backported to 2.1.14 and 2.2.2. +# +# This test is to ensure that the bug, as understood, will not recur. +# +# STRATEGY: +# +# 1. Run the 'seekflood' binary, for creation of files with timing +# characteristics that can trigger #15526. +# 2. A single run is not always a trigger, so run repeatedly. + +verify_runnable "global" + +function cleanup +{ + rm -rf /$TESTDIR/cp_stress +} + +log_assert "Run the 'seekflood' binary repeatedly to try to trigger #15526" + +log_onexit cleanup + +log_must mkdir /$TESTPOOL/cp_stress + +MYPWD="$PWD" +cd /$TESTPOOL/cp_stress +CPUS=$(get_num_cpus) + +if is_freebsd ; then + # 'seekflood' takes longer on FreeBSD and can timeout the test + RUNS=3 +else + RUNS=10 +fi + +for i in $(seq 1 $RUNS) ; do + # Each run takes around 12 seconds. + log_must $STF_SUITE/tests/functional/cp_files/seekflood 2000 $CPUS +done +cd "$MYPWD" + +log_pass "No corruption detected" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/seekflood.c b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/seekflood.c new file mode 100644 index 00000000000..02c2c8e6eca --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/seekflood.c @@ -0,0 +1,180 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright (c) 2023, Rob Norris + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DATASIZE (4096) +char data[DATASIZE]; + +static int +_open_file(int n, int wr) +{ + char buf[256]; + int fd; + + snprintf(buf, sizeof (buf), "testdata_%d_%d", getpid(), n); + + if ((fd = open(buf, wr ? (O_WRONLY | O_CREAT) : O_RDONLY, + wr ? (S_IRUSR | S_IWUSR) : 0)) < 0) { + fprintf(stderr, "Error: open '%s' (%s): %s\n", + buf, wr ? "write" : "read", strerror(errno)); + exit(1); + } + + return (fd); +} + +static void +_write_file(int n, int fd) +{ + /* write a big ball of stuff */ + ssize_t nwr = write(fd, data, DATASIZE); + if (nwr < 0) { + fprintf(stderr, "Error: write '%d_%d': %s\n", + getpid(), n, strerror(errno)); + exit(1); + } else if (nwr < DATASIZE) { + fprintf(stderr, "Error: write '%d_%d': short write\n", getpid(), + n); + exit(1); + } +} + +static int +_seek_file(int n, int fd) +{ + struct stat st; + if (fstat(fd, &st) < 0) { + fprintf(stderr, "Error: fstat '%d_%d': %s\n", getpid(), n, + strerror(errno)); + exit(1); + } + + /* + * A zero-sized file correctly has no data, so seeking the file is + * pointless. + */ + if (st.st_size == 0) + return (0); + + /* size is real, and we only write, so SEEK_DATA must find something */ + if (lseek(fd, 0, SEEK_DATA) < 0) { + if (errno == ENXIO) + return (1); + fprintf(stderr, "Error: lseek '%d_%d': %s\n", + getpid(), n, strerror(errno)); + exit(2); + } + + return (0); +} + +int +main(int argc, char **argv) +{ + int nfiles = 0; + int nthreads = 0; + + if (argc < 3 || (nfiles = atoi(argv[1])) == 0 || + (nthreads = atoi(argv[2])) == 0) { + printf("usage: seekflood \n"); + exit(1); + } + + memset(data, 0x5a, DATASIZE); + + /* fork off some flood threads */ + for (int i = 0; i < nthreads; i++) { + if (!fork()) { + /* thread main */ + + /* create zero file */ + int fd = _open_file(0, 1); + _write_file(0, fd); + close(fd); + + int count = 0; + + int h = 0, i, j, rfd, wfd; + for (i = 0; i < nfiles; i += 2, h++) { + j = i+1; + + /* seek h, write i */ + rfd = _open_file(h, 0); + wfd = _open_file(i, 1); + count += _seek_file(h, rfd); + _write_file(i, wfd); + close(rfd); + close(wfd); + + /* seek i, write j */ + rfd = _open_file(i, 0); + wfd = _open_file(j, 1); + count += _seek_file(i, rfd); + _write_file(j, wfd); + close(rfd); + close(wfd); + } + + /* return count of failed seeks to parent */ + exit(count < 256 ? count : 255); + } + } + + /* wait for threads, take their seek fail counts from exit code */ + int count = 0, crashed = 0; + for (int i = 0; i < nthreads; i++) { + int wstatus; + wait(&wstatus); + if (WIFEXITED(wstatus)) + count += WEXITSTATUS(wstatus); + else + crashed++; + } + + if (crashed) { + fprintf(stderr, "Error: child crashed; test failed\n"); + exit(1); + } + + if (count) { + fprintf(stderr, "Error: %d seek failures; test failed\n", + count); + exit(1); + } + + exit(0); +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/setup.ksh index b756d4e76c8..4223386b361 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/setup.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cp_files/setup.ksh @@ -32,4 +32,10 @@ . $STF_SUITE/include/libtest.shlib DISK=${DISKS%% *} + +if tunable_exists BCLONE_ENABLED ; then + log_must save_tunable BCLONE_ENABLED + log_must set_tunable32 BCLONE_ENABLED 1 +fi + default_setup $DISK diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh index 47e439d0f4d..2fa14655635 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/io/io_uring.ksh @@ -44,6 +44,13 @@ if ! $(grep -q "CONFIG_IO_URING=y" /boot/config-$(uname -r)); then log_unsupported "Requires io_uring support" fi +if [ -e /etc/os-release ] ; then + source /etc/os-release + if [ -n "$REDHAT_SUPPORT_PRODUCT_VERSION" ] && ((floor($REDHAT_SUPPORT_PRODUCT_VERSION) == 9)) ; then + log_unsupported "Disabled on CentOS 9, fails with 'Operation not permitted'" + fi +fi + fio --ioengine=io_uring --parse-only || log_unsupported "fio io_uring support required" function cleanup diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib index 30818050a07..297c6a073bb 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib @@ -44,28 +44,6 @@ function cleanup done } -# -# Get random number between min and max number. -# -# $1 Minimal value -# $2 Maximal value -# -function random -{ - typeset -i min=$1 - typeset -i max=$2 - typeset -i value - - while true; do - ((value = RANDOM % (max + 1))) - if ((value >= min)); then - break - fi - done - - echo $value -} - # # Get the number of checksum errors for the pool. # diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 8fc8a954174..fe858026318 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -108,6 +108,12 @@ /* bdev_max_secure_erase_sectors() is available */ /* #undef HAVE_BDEV_MAX_SECURE_ERASE_SECTORS */ +/* bdev_open_by_path() exists */ +/* #undef HAVE_BDEV_OPEN_BY_PATH */ + +/* bdev_release() exists */ +/* #undef HAVE_BDEV_RELEASE */ + /* block_device_operations->submit_bio() returns void */ /* #undef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID */ @@ -156,6 +162,9 @@ /* blkdev_issue_discard() is available */ /* #undef HAVE_BLKDEV_ISSUE_DISCARD */ +/* __blkdev_issue_discard() is available */ +/* #undef HAVE_BLKDEV_ISSUE_DISCARD_ASYNC */ + /* blkdev_issue_secure_erase() is available */ /* #undef HAVE_BLKDEV_ISSUE_SECURE_ERASE */ @@ -424,15 +433,24 @@ /* APIs for idmapped mount are present */ /* #undef HAVE_IDMAP_MNT_API */ +/* mnt_idmap does not have user_namespace */ +/* #undef HAVE_IDMAP_NO_USERNS */ + /* Define if compiler supports -Wimplicit-fallthrough */ /* #undef HAVE_IMPLICIT_FALLTHROUGH */ /* Define if compiler supports -Winfinite-recursion */ /* #undef HAVE_INFINITE_RECURSION */ +/* inode_get_atime() exists in linux/fs.h */ +/* #undef HAVE_INODE_GET_ATIME */ + /* inode_get_ctime() exists in linux/fs.h */ /* #undef HAVE_INODE_GET_CTIME */ +/* inode_get_mtime() exists in linux/fs.h */ +/* #undef HAVE_INODE_GET_MTIME */ + /* yes */ /* #undef HAVE_INODE_LOCK_SHARED */ @@ -445,6 +463,9 @@ /* inode_owner_or_capable() takes user_ns */ /* #undef HAVE_INODE_OWNER_OR_CAPABLE_USERNS */ +/* inode_set_atime_to_ts() exists in linux/fs.h */ +/* #undef HAVE_INODE_SET_ATIME_TO_TS */ + /* inode_set_ctime_to_ts() exists in linux/fs.h */ /* #undef HAVE_INODE_SET_CTIME_TO_TS */ @@ -454,6 +475,9 @@ /* inode_set_iversion() exists */ /* #undef HAVE_INODE_SET_IVERSION */ +/* inode_set_mtime_to_ts() exists in linux/fs.h */ +/* #undef HAVE_INODE_SET_MTIME_TO_TS */ + /* inode->i_*time's are timespec64 */ /* #undef HAVE_INODE_TIMESPEC64_TIMES */ @@ -550,6 +574,9 @@ /* Define if compiler supports -Winfinite-recursion */ /* #undef HAVE_KERNEL_INFINITE_RECURSION */ +/* kernel has kernel_neon_* functions */ +/* #undef HAVE_KERNEL_NEON */ + /* kernel does stack verification */ /* #undef HAVE_KERNEL_OBJTOOL */ @@ -559,6 +586,12 @@ /* kernel_read() take loff_t pointer */ /* #undef HAVE_KERNEL_READ_PPOS */ +/* strlcpy() exists */ +/* #undef HAVE_KERNEL_STRLCPY */ + +/* strscpy() exists */ +/* #undef HAVE_KERNEL_STRSCPY */ + /* timer_list.function gets a timer_list */ /* #undef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST */ @@ -769,6 +802,9 @@ /* set_special_state() exists */ /* #undef HAVE_SET_SPECIAL_STATE */ +/* shrinker_register exists */ +/* #undef HAVE_SHRINKER_REGISTER */ + /* struct shrink_control exists */ /* #undef HAVE_SHRINK_CONTROL_STRUCT */ @@ -834,6 +870,12 @@ /* submit_bio is member of struct block_device_operations */ /* #undef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */ +/* have super_block s_shrink */ +/* #undef HAVE_SUPER_BLOCK_S_SHRINK */ + +/* have super_block s_shrink pointer */ +/* #undef HAVE_SUPER_BLOCK_S_SHRINK_PTR */ + /* super_setup_bdi_name() exits */ /* #undef HAVE_SUPER_SETUP_BDI_NAME */ @@ -1110,7 +1152,7 @@ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.2.2-FreeBSD_g494aaaed8" +#define ZFS_META_ALIAS "zfs-2.2.3-FreeBSD_gc883088df" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -1119,7 +1161,7 @@ /* #undef ZFS_META_DATA */ /* Define the maximum compatible kernel version. */ -#define ZFS_META_KVER_MAX "6.6" +#define ZFS_META_KVER_MAX "6.7" /* Define the minimum compatible kernel version. */ #define ZFS_META_KVER_MIN "3.10" @@ -1140,10 +1182,10 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "FreeBSD_g494aaaed8" +#define ZFS_META_RELEASE "FreeBSD_gc883088df" /* Define the project version. */ -#define ZFS_META_VERSION "2.2.2" +#define ZFS_META_VERSION "2.2.3" /* count is located in percpu_ref.data */ /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */ diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 82745ed7414..70b3c3310d8 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.2.2-0-g494aaaed8" +#define ZFS_META_GITREV "zfs-2.2.3-0-gc883088df"