From 069bf406b4619597df1519171edf5fc1736218ee Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Sun, 13 Jun 2021 10:48:53 -0700 Subject: [PATCH 1/5] vdev_draid_min_asize() ignores reserved space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vdev_draid_min_asize() returns the minimum size of a child vdev. This is used when determining if a disk is big enough to replace a child. It's also used by zdb to determine how big of a child to make to test replacement. vdev_draid_min_asize() says that the child’s asize has to be at least 1/Nth of the entire draid’s asize, which is the same logic as raidz. However, this contradicts the code in vdev_draid_open(), which calculates the draid’s asize based on a reduced child size: An additional 32MB of scratch space is reserved at the end of each child for use by the dRAID expansion feature So the problem is that you can replace a draid disk with one that’s vdev_draid_min_asize(), but it actually needs to be larger to accommodate the additional 32MB. The replacement is allowed and everything works at first (since the reserved space is at the end, and we don’t try to use it yet), but when you try to close and reopen the pool, vdev_draid_open() calculates a smaller asize for the draid, because of the smaller leaf, which is not allowed. I think the confusion is that vdev_draid_min_asize() is correctly returning the amount of required *allocatable* space in a leaf, but the actual *size* of the leaf needs to be at least 32MB more than that. ztest_vdev_attach_detach() assumes that it can attach that size of device, and it actually can (the kernel/libzpool accepts it), but it then later causes zdb to not be able to open the pool. This commit changes vdev_draid_min_asize() to return the required size of the leaf, not the size that draid will make available to the metaslab allocator. Reviewed-by: Brian Behlendorf Reviewed-by: Mark Maybee Signed-off-by: Matthew Ahrens Closes #11459 Closes #12221 --- module/zfs/vdev_draid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c index 20b1457f0ce..b8f82d52e8f 100644 --- a/module/zfs/vdev_draid.c +++ b/module/zfs/vdev_draid.c @@ -1132,7 +1132,8 @@ vdev_draid_min_asize(vdev_t *vd) ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops); - return ((vd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks)); + return (VDEV_DRAID_REFLOW_RESERVE + + (vd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks)); } /* From 83ba91adf6c0b4b1a5b0f0a6f3e84d267e689915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Mon, 14 Jun 2021 17:48:53 +0200 Subject: [PATCH 2/5] systemd: import: expand $ZPOOL_IMPORT_OPTS correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out $ZPOOL_IMPORT_OPTS expands in a shell-like fashion, yielding 'import' '-aN' '-o' 'cachefile=none' for an unset variable, and 'import' '-aN' '-o' 'cachefile=none' 'word1' 'word2' for a white-spaced one, but ${ZPOOL_IMPORT_OPTS} expands like "${Z_I_O}" would in a shell, yielding 'import' '-aN' '-o' 'cachefile=none' '' (empty) and 'import' '-aN' '-o' 'cachefile=none' 'word1 word2' (spaced) Fixes eec5ba113e1d285d445333079a3e8184872ad00a "dracut: 90zfs: respect zfs_force=1 on systemd systems" Reviewed-by: Brian Behlendorf Reviewed-by: John Kennedy Signed-off-by: Ahelenia Ziemiańska Closes: #12231 --- etc/systemd/system/zfs-import-cache.service.in | 2 +- etc/systemd/system/zfs-import-scan.service.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/etc/systemd/system/zfs-import-cache.service.in b/etc/systemd/system/zfs-import-cache.service.in index b09961a8bb8..5e5c6281c9d 100644 --- a/etc/systemd/system/zfs-import-cache.service.in +++ b/etc/systemd/system/zfs-import-cache.service.in @@ -14,7 +14,7 @@ ConditionPathIsDirectory=/sys/module/zfs [Service] Type=oneshot RemainAfterExit=yes -ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN ${ZPOOL_IMPORT_OPTS} +ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN $ZPOOL_IMPORT_OPTS [Install] WantedBy=zfs-import.target diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in index c1111c73a60..d3c083f7e9c 100644 --- a/etc/systemd/system/zfs-import-scan.service.in +++ b/etc/systemd/system/zfs-import-scan.service.in @@ -13,7 +13,7 @@ ConditionPathIsDirectory=/sys/module/zfs [Service] Type=oneshot RemainAfterExit=yes -ExecStart=@sbindir@/zpool import -aN -o cachefile=none ${ZPOOL_IMPORT_OPTS} +ExecStart=@sbindir@/zpool import -aN -o cachefile=none $ZPOOL_IMPORT_OPTS [Install] WantedBy=zfs-import.target From ff3175040572baae1aacf5d9062192c5df849030 Mon Sep 17 00:00:00 2001 From: Rich Ercolani <214141+rincebrain@users.noreply.github.com> Date: Mon, 14 Jun 2021 12:59:54 -0400 Subject: [PATCH 3/5] Fix importing with symlinks It turns out that symlinks are heavily used on Linux in /dev/disk. So let's allow importing from them. Reviewed-by: Brian Behlendorf Reviewed-by: Ryan Moeller Signed-off-by: Rich Ercolani Closes #12238 --- lib/libzutil/zutil_import.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 9bd12973fc5..b5b2d7dbed9 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1260,6 +1260,7 @@ zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock, switch (dp->d_type) { case DT_UNKNOWN: case DT_BLK: + case DT_LNK: #ifdef __FreeBSD__ case DT_CHR: #endif From 9ffcaa370aee6871c92c7c84aa65942fba63a884 Mon Sep 17 00:00:00 2001 From: George Amanakis Date: Thu, 17 Jun 2021 03:17:42 +0300 Subject: [PATCH 4/5] Avoid deadlock when removing L2ARC devices under I/O In case we have I/O and try to remove an L2ARC device a deadlock might occur. arc_read()->zio_read()->zfs_blkptr_verify() waits for SCL_VDEV to be dropped while holding the hash_lock. However, spa_l2cache_load() holds SCL_ALL and waits for the hash_lock in l2arc_evict(). Fix this by moving zfs_blkptr_verify() to the top top arc_read() before the hash_lock is taken. Verify the block pointer and return a checksum error if damaged rather than halting the system, by using BLK_VERIFY_LOG instead of BLK_VERIFY_HALT. Reviewed-by: Brian Behlendorf Reviewed-by: Mark Maybee Signed-off-by: George Amanakis Closes #12054 --- module/zfs/arc.c | 17 ++++++----------- module/zfs/zio.c | 3 --- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 737904f3485..11285aa46ee 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -5944,6 +5944,12 @@ top: * Embedded BP's have no DVA and require no I/O to "read". * Create an anonymous arc buf to back it. */ + if (!zfs_blkptr_verify(spa, bp, zio_flags & + ZIO_FLAG_CONFIG_WRITER, BLK_VERIFY_LOG)) { + rc = SET_ERROR(ECKSUM); + goto out; + } + hdr = buf_hash_find(guid, bp, &hash_lock); } @@ -6115,17 +6121,6 @@ top: goto out; } - /* - * Gracefully handle a damaged logical block size as a - * checksum error. - */ - if (lsize > spa_maxblocksize(spa)) { - rc = SET_ERROR(ECKSUM); - if (hash_lock != NULL) - mutex_exit(hash_lock); - goto out; - } - if (hdr == NULL) { /* * This block is not in the cache or it has diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 87ccb686185..66ac545c798 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1107,9 +1107,6 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, { zio_t *zio; - (void) zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER, - BLK_VERIFY_HALT); - zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, data, size, size, done, private, ZIO_TYPE_READ, priority, flags, NULL, 0, zb, From c4c162c1e8ff9ce8833014711875d18df520096c Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 16 Jun 2021 20:19:34 -0400 Subject: [PATCH 5/5] Use wmsum for arc, abd, dbuf and zfetch statistics. (#12172) wmsum was designed exactly for cases like these with many updates and rare reads. It allows to completely avoid atomic operations on congested global variables. Reviewed-by: Brian Behlendorf Reviewed-by: Mark Maybee Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. Closes #12172 --- include/sys/abd_impl.h | 4 +- include/sys/arc_impl.h | 119 +++++-- module/os/freebsd/zfs/abd_os.c | 46 +++ module/os/linux/zfs/abd_os.c | 75 +++++ module/os/linux/zfs/arc_os.c | 2 +- module/zfs/arc.c | 587 ++++++++++++++++++++++++--------- module/zfs/dbuf.c | 112 +++++-- module/zfs/dmu_zfetch.c | 47 ++- 8 files changed, 777 insertions(+), 215 deletions(-) diff --git a/include/sys/abd_impl.h b/include/sys/abd_impl.h index 435a8dc6d9c..6bce08cfa34 100644 --- a/include/sys/abd_impl.h +++ b/include/sys/abd_impl.h @@ -27,6 +27,7 @@ #define _ABD_IMPL_H #include +#include #ifdef __cplusplus extern "C" { @@ -82,9 +83,8 @@ void abd_iter_unmap(struct abd_iter *); /* * Helper macros */ -#define ABDSTAT(stat) (abd_stats.stat.value.ui64) #define ABDSTAT_INCR(stat, val) \ - atomic_add_64(&abd_stats.stat.value.ui64, (val)) + wmsum_add(&abd_sums.stat, (val)) #define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1) #define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1) diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index c01da46e01e..1f341ec94fa 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -563,7 +564,6 @@ typedef struct arc_stats { kstat_named_t arcstat_c; kstat_named_t arcstat_c_min; kstat_named_t arcstat_c_max; - /* Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_size; /* * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd. @@ -592,14 +592,12 @@ typedef struct arc_stats { * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only * caches), and arc_buf_t structures (allocated via arc_buf_t * cache). - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_hdr_size; /* * Number of bytes consumed by ARC buffers of type equal to * ARC_BUFC_DATA. This is generally consumed by buffers backing * on disk user data (e.g. plain file contents). - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_data_size; /* @@ -607,22 +605,18 @@ typedef struct arc_stats { * ARC_BUFC_METADATA. This is generally consumed by buffers * backing on disk data that is used for internal ZFS * structures (e.g. ZAP, dnode, indirect blocks, etc). - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_metadata_size; /* * Number of bytes consumed by dmu_buf_impl_t objects. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_dbuf_size; /* * Number of bytes consumed by dnode_t objects. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_dnode_size; /* * Number of bytes consumed by bonus buffers. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_bonus_size; #if defined(COMPAT_FREEBSD11) @@ -637,7 +631,6 @@ typedef struct arc_stats { * arc_anon state. This includes *all* buffers in the arc_anon * state; e.g. data, metadata, evictable, and unevictable buffers * are all included in this value. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_anon_size; /* @@ -645,7 +638,6 @@ typedef struct arc_stats { * following criteria: backing buffers of type ARC_BUFC_DATA, * residing in the arc_anon state, and are eligible for eviction * (e.g. have no outstanding holds on the buffer). - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_anon_evictable_data; /* @@ -653,7 +645,6 @@ typedef struct arc_stats { * following criteria: backing buffers of type ARC_BUFC_METADATA, * residing in the arc_anon state, and are eligible for eviction * (e.g. have no outstanding holds on the buffer). - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_anon_evictable_metadata; /* @@ -661,7 +652,6 @@ typedef struct arc_stats { * arc_mru state. This includes *all* buffers in the arc_mru * state; e.g. data, metadata, evictable, and unevictable buffers * are all included in this value. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mru_size; /* @@ -669,7 +659,6 @@ typedef struct arc_stats { * following criteria: backing buffers of type ARC_BUFC_DATA, * residing in the arc_mru state, and are eligible for eviction * (e.g. have no outstanding holds on the buffer). - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mru_evictable_data; /* @@ -677,7 +666,6 @@ typedef struct arc_stats { * following criteria: backing buffers of type ARC_BUFC_METADATA, * residing in the arc_mru state, and are eligible for eviction * (e.g. have no outstanding holds on the buffer). - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mru_evictable_metadata; /* @@ -688,21 +676,18 @@ typedef struct arc_stats { * don't actually have ARC buffers linked off of these headers. * Thus, *if* the headers had associated ARC buffers, these * buffers *would have* consumed this number of bytes. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mru_ghost_size; /* * Number of bytes that *would have been* consumed by ARC * buffers that are eligible for eviction, of type * ARC_BUFC_DATA, and linked off the arc_mru_ghost state. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mru_ghost_evictable_data; /* * Number of bytes that *would have been* consumed by ARC * buffers that are eligible for eviction, of type * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mru_ghost_evictable_metadata; /* @@ -710,42 +695,36 @@ typedef struct arc_stats { * arc_mfu state. This includes *all* buffers in the arc_mfu * state; e.g. data, metadata, evictable, and unevictable buffers * are all included in this value. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mfu_size; /* * Number of bytes consumed by ARC buffers that are eligible for * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu * state. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mfu_evictable_data; /* * Number of bytes consumed by ARC buffers that are eligible for * eviction, of type ARC_BUFC_METADATA, and reside in the * arc_mfu state. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mfu_evictable_metadata; /* * Total number of bytes that *would have been* consumed by ARC * buffers in the arc_mfu_ghost state. See the comment above * arcstat_mru_ghost_size for more details. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mfu_ghost_size; /* * Number of bytes that *would have been* consumed by ARC * buffers that are eligible for eviction, of type * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mfu_ghost_evictable_data; /* * Number of bytes that *would have been* consumed by ARC * buffers that are eligible for eviction, of type * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. - * Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_mfu_ghost_evictable_metadata; kstat_named_t arcstat_l2_hits; @@ -779,7 +758,6 @@ typedef struct arc_stats { kstat_named_t arcstat_l2_io_error; kstat_named_t arcstat_l2_lsize; kstat_named_t arcstat_l2_psize; - /* Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_l2_hdr_size; /* * Number of L2ARC log blocks written. These are used for restoring the @@ -860,7 +838,6 @@ typedef struct arc_stats { kstat_named_t arcstat_tempreserve; kstat_named_t arcstat_loaned_bytes; kstat_named_t arcstat_prune; - /* Not updated directly; only synced in arc_kstat_update. */ kstat_named_t arcstat_meta_used; kstat_named_t arcstat_meta_limit; kstat_named_t arcstat_dnode_limit; @@ -876,6 +853,96 @@ typedef struct arc_stats { kstat_named_t arcstat_abd_chunk_waste_size; } arc_stats_t; +typedef struct arc_sums { + wmsum_t arcstat_hits; + wmsum_t arcstat_misses; + wmsum_t arcstat_demand_data_hits; + wmsum_t arcstat_demand_data_misses; + wmsum_t arcstat_demand_metadata_hits; + wmsum_t arcstat_demand_metadata_misses; + wmsum_t arcstat_prefetch_data_hits; + wmsum_t arcstat_prefetch_data_misses; + wmsum_t arcstat_prefetch_metadata_hits; + wmsum_t arcstat_prefetch_metadata_misses; + wmsum_t arcstat_mru_hits; + wmsum_t arcstat_mru_ghost_hits; + wmsum_t arcstat_mfu_hits; + wmsum_t arcstat_mfu_ghost_hits; + wmsum_t arcstat_deleted; + wmsum_t arcstat_mutex_miss; + wmsum_t arcstat_access_skip; + wmsum_t arcstat_evict_skip; + wmsum_t arcstat_evict_not_enough; + wmsum_t arcstat_evict_l2_cached; + wmsum_t arcstat_evict_l2_eligible; + wmsum_t arcstat_evict_l2_eligible_mfu; + wmsum_t arcstat_evict_l2_eligible_mru; + wmsum_t arcstat_evict_l2_ineligible; + wmsum_t arcstat_evict_l2_skip; + wmsum_t arcstat_hash_collisions; + wmsum_t arcstat_hash_chains; + aggsum_t arcstat_size; + wmsum_t arcstat_compressed_size; + wmsum_t arcstat_uncompressed_size; + wmsum_t arcstat_overhead_size; + wmsum_t arcstat_hdr_size; + wmsum_t arcstat_data_size; + wmsum_t arcstat_metadata_size; + wmsum_t arcstat_dbuf_size; + aggsum_t arcstat_dnode_size; + wmsum_t arcstat_bonus_size; + wmsum_t arcstat_l2_hits; + wmsum_t arcstat_l2_misses; + wmsum_t arcstat_l2_prefetch_asize; + wmsum_t arcstat_l2_mru_asize; + wmsum_t arcstat_l2_mfu_asize; + wmsum_t arcstat_l2_bufc_data_asize; + wmsum_t arcstat_l2_bufc_metadata_asize; + wmsum_t arcstat_l2_feeds; + wmsum_t arcstat_l2_rw_clash; + wmsum_t arcstat_l2_read_bytes; + wmsum_t arcstat_l2_write_bytes; + wmsum_t arcstat_l2_writes_sent; + wmsum_t arcstat_l2_writes_done; + wmsum_t arcstat_l2_writes_error; + wmsum_t arcstat_l2_writes_lock_retry; + wmsum_t arcstat_l2_evict_lock_retry; + wmsum_t arcstat_l2_evict_reading; + wmsum_t arcstat_l2_evict_l1cached; + wmsum_t arcstat_l2_free_on_write; + wmsum_t arcstat_l2_abort_lowmem; + wmsum_t arcstat_l2_cksum_bad; + wmsum_t arcstat_l2_io_error; + wmsum_t arcstat_l2_lsize; + wmsum_t arcstat_l2_psize; + aggsum_t arcstat_l2_hdr_size; + wmsum_t arcstat_l2_log_blk_writes; + wmsum_t arcstat_l2_log_blk_asize; + wmsum_t arcstat_l2_log_blk_count; + wmsum_t arcstat_l2_rebuild_success; + wmsum_t arcstat_l2_rebuild_abort_unsupported; + wmsum_t arcstat_l2_rebuild_abort_io_errors; + wmsum_t arcstat_l2_rebuild_abort_dh_errors; + wmsum_t arcstat_l2_rebuild_abort_cksum_lb_errors; + wmsum_t arcstat_l2_rebuild_abort_lowmem; + wmsum_t arcstat_l2_rebuild_size; + wmsum_t arcstat_l2_rebuild_asize; + wmsum_t arcstat_l2_rebuild_bufs; + wmsum_t arcstat_l2_rebuild_bufs_precached; + wmsum_t arcstat_l2_rebuild_log_blks; + wmsum_t arcstat_memory_throttle_count; + wmsum_t arcstat_memory_direct_count; + wmsum_t arcstat_memory_indirect_count; + wmsum_t arcstat_prune; + aggsum_t arcstat_meta_used; + wmsum_t arcstat_async_upgrade_sync; + wmsum_t arcstat_demand_hit_predictive_prefetch; + wmsum_t arcstat_demand_hit_prescient_prefetch; + wmsum_t arcstat_raw_size; + wmsum_t arcstat_cached_only_in_progress; + wmsum_t arcstat_abd_chunk_waste_size; +} arc_sums_t; + typedef struct arc_evict_waiter { list_node_t aew_node; kcondvar_t aew_cv; @@ -885,7 +952,7 @@ typedef struct arc_evict_waiter { #define ARCSTAT(stat) (arc_stats.stat.value.ui64) #define ARCSTAT_INCR(stat, val) \ - atomic_add_64(&arc_stats.stat.value.ui64, (val)) + wmsum_add(&arc_sums.stat, (val)) #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) @@ -899,6 +966,7 @@ typedef struct arc_evict_waiter { extern taskq_t *arc_prune_taskq; extern arc_stats_t arc_stats; +extern arc_sums_t arc_sums; extern hrtime_t arc_growtime; extern boolean_t arc_warm; extern int arc_grow_retry; @@ -906,7 +974,6 @@ extern int arc_no_grow_shift; extern int arc_shrink_shift; extern kmutex_t arc_prune_mtx; extern list_t arc_prune_list; -extern aggsum_t arc_size; extern arc_state_t *arc_mfu; extern arc_state_t *arc_mru; extern uint_t zfs_arc_pc_percent; diff --git a/module/os/freebsd/zfs/abd_os.c b/module/os/freebsd/zfs/abd_os.c index 00aa21c6c9e..47adc2278df 100644 --- a/module/os/freebsd/zfs/abd_os.c +++ b/module/os/freebsd/zfs/abd_os.c @@ -69,6 +69,15 @@ static abd_stats_t abd_stats = { { "linear_data_size", KSTAT_DATA_UINT64 }, }; +struct { + wmsum_t abdstat_struct_size; + wmsum_t abdstat_scatter_cnt; + wmsum_t abdstat_scatter_data_size; + wmsum_t abdstat_scatter_chunk_waste; + wmsum_t abdstat_linear_cnt; + wmsum_t abdstat_linear_data_size; +} abd_sums; + /* * The size of the chunks ABD allocates. Because the sizes allocated from the * kmem_cache can't change, this tunable can only be modified at boot. Changing @@ -271,16 +280,46 @@ abd_free_zero_scatter(void) kmem_free(abd_zero_buf, zfs_abd_chunk_size); } +static int +abd_kstats_update(kstat_t *ksp, int rw) +{ + abd_stats_t *as = ksp->ks_data; + + if (rw == KSTAT_WRITE) + return (EACCES); + as->abdstat_struct_size.value.ui64 = + wmsum_value(&abd_sums.abdstat_struct_size); + as->abdstat_scatter_cnt.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_cnt); + as->abdstat_scatter_data_size.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_data_size); + as->abdstat_scatter_chunk_waste.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_chunk_waste); + as->abdstat_linear_cnt.value.ui64 = + wmsum_value(&abd_sums.abdstat_linear_cnt); + as->abdstat_linear_data_size.value.ui64 = + wmsum_value(&abd_sums.abdstat_linear_data_size); + return (0); +} + void abd_init(void) { abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, NULL, NULL, NULL, NULL, 0, KMC_NODEBUG); + wmsum_init(&abd_sums.abdstat_struct_size, 0); + wmsum_init(&abd_sums.abdstat_scatter_cnt, 0); + wmsum_init(&abd_sums.abdstat_scatter_data_size, 0); + wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0); + wmsum_init(&abd_sums.abdstat_linear_cnt, 0); + wmsum_init(&abd_sums.abdstat_linear_data_size, 0); + abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (abd_ksp != NULL) { abd_ksp->ks_data = &abd_stats; + abd_ksp->ks_update = abd_kstats_update; kstat_install(abd_ksp); } @@ -297,6 +336,13 @@ abd_fini(void) abd_ksp = NULL; } + wmsum_fini(&abd_sums.abdstat_struct_size); + wmsum_fini(&abd_sums.abdstat_scatter_cnt); + wmsum_fini(&abd_sums.abdstat_scatter_data_size); + wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste); + wmsum_fini(&abd_sums.abdstat_linear_cnt); + wmsum_fini(&abd_sums.abdstat_linear_data_size); + kmem_cache_destroy(abd_chunk_cache); abd_chunk_cache = NULL; } diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 551a3cc8d1d..af543d6e3f7 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -132,6 +132,20 @@ static abd_stats_t abd_stats = { { "scatter_sg_table_retry", KSTAT_DATA_UINT64 }, }; +struct { + wmsum_t abdstat_struct_size; + wmsum_t abdstat_linear_cnt; + wmsum_t abdstat_linear_data_size; + wmsum_t abdstat_scatter_cnt; + wmsum_t abdstat_scatter_data_size; + wmsum_t abdstat_scatter_chunk_waste; + wmsum_t abdstat_scatter_orders[MAX_ORDER]; + wmsum_t abdstat_scatter_page_multi_chunk; + wmsum_t abdstat_scatter_page_multi_zone; + wmsum_t abdstat_scatter_page_alloc_retry; + wmsum_t abdstat_scatter_sg_table_retry; +} abd_sums; + #define abd_for_each_sg(abd, sg, n, i) \ for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i) @@ -687,6 +701,40 @@ abd_free_zero_scatter(void) #endif /* _KERNEL */ } +static int +abd_kstats_update(kstat_t *ksp, int rw) +{ + abd_stats_t *as = ksp->ks_data; + + if (rw == KSTAT_WRITE) + return (EACCES); + as->abdstat_struct_size.value.ui64 = + wmsum_value(&abd_sums.abdstat_struct_size); + as->abdstat_linear_cnt.value.ui64 = + wmsum_value(&abd_sums.abdstat_linear_cnt); + as->abdstat_linear_data_size.value.ui64 = + wmsum_value(&abd_sums.abdstat_linear_data_size); + as->abdstat_scatter_cnt.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_cnt); + as->abdstat_scatter_data_size.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_data_size); + as->abdstat_scatter_chunk_waste.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_chunk_waste); + for (int i = 0; i < MAX_ORDER; i++) { + as->abdstat_scatter_orders[i].value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_orders[i]); + } + as->abdstat_scatter_page_multi_chunk.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_page_multi_chunk); + as->abdstat_scatter_page_multi_zone.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_page_multi_zone); + as->abdstat_scatter_page_alloc_retry.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_page_alloc_retry); + as->abdstat_scatter_sg_table_retry.value.ui64 = + wmsum_value(&abd_sums.abdstat_scatter_sg_table_retry); + return (0); +} + void abd_init(void) { @@ -695,6 +743,19 @@ abd_init(void) abd_cache = kmem_cache_create("abd_t", sizeof (abd_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + wmsum_init(&abd_sums.abdstat_struct_size, 0); + wmsum_init(&abd_sums.abdstat_linear_cnt, 0); + wmsum_init(&abd_sums.abdstat_linear_data_size, 0); + wmsum_init(&abd_sums.abdstat_scatter_cnt, 0); + wmsum_init(&abd_sums.abdstat_scatter_data_size, 0); + wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0); + for (i = 0; i < MAX_ORDER; i++) + wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0); + wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0); + wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0); + wmsum_init(&abd_sums.abdstat_scatter_page_alloc_retry, 0); + wmsum_init(&abd_sums.abdstat_scatter_sg_table_retry, 0); + abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (abd_ksp != NULL) { @@ -705,6 +766,7 @@ abd_init(void) KSTAT_DATA_UINT64; } abd_ksp->ks_data = &abd_stats; + abd_ksp->ks_update = abd_kstats_update; kstat_install(abd_ksp); } @@ -721,6 +783,19 @@ abd_fini(void) abd_ksp = NULL; } + wmsum_fini(&abd_sums.abdstat_struct_size); + wmsum_fini(&abd_sums.abdstat_linear_cnt); + wmsum_fini(&abd_sums.abdstat_linear_data_size); + wmsum_fini(&abd_sums.abdstat_scatter_cnt); + wmsum_fini(&abd_sums.abdstat_scatter_data_size); + wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste); + for (int i = 0; i < MAX_ORDER; i++) + wmsum_fini(&abd_sums.abdstat_scatter_orders[i]); + wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk); + wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone); + wmsum_fini(&abd_sums.abdstat_scatter_page_alloc_retry); + wmsum_fini(&abd_sums.abdstat_scatter_sg_table_retry); + if (abd_cache) { kmem_cache_destroy(abd_cache); abd_cache = NULL; diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index 465775a6748..b03ad8318d1 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -135,7 +135,7 @@ arc_available_memory(void) static uint64_t arc_evictable_memory(void) { - int64_t asize = aggsum_value(&arc_size); + int64_t asize = aggsum_value(&arc_sums.arcstat_size); uint64_t arc_clean = zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) + zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) + diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 11285aa46ee..b0468159d2e 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -600,6 +600,8 @@ arc_stats_t arc_stats = { { "abd_chunk_waste_size", KSTAT_DATA_UINT64 }, }; +arc_sums_t arc_sums; + #define ARCSTAT_MAX(stat, val) { \ uint64_t m; \ while ((val) > (m = arc_stats.stat.value.ui64) && \ @@ -607,9 +609,6 @@ arc_stats_t arc_stats = { continue; \ } -#define ARCSTAT_MAXSTAT(stat) \ - ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64) - /* * We define a macro to allow ARC hits/misses to be easily broken down by * two separate conditions, giving a total of four different subtypes for @@ -671,37 +670,8 @@ arc_state_t *arc_mfu; /* max size for dnodes */ #define arc_dnode_size_limit ARCSTAT(arcstat_dnode_limit) #define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */ -#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */ #define arc_need_free ARCSTAT(arcstat_need_free) /* waiting to be evicted */ -/* size of all b_rabd's in entire arc */ -#define arc_raw_size ARCSTAT(arcstat_raw_size) -/* compressed size of entire arc */ -#define arc_compressed_size ARCSTAT(arcstat_compressed_size) -/* uncompressed size of entire arc */ -#define arc_uncompressed_size ARCSTAT(arcstat_uncompressed_size) -/* number of bytes in the arc from arc_buf_t's */ -#define arc_overhead_size ARCSTAT(arcstat_overhead_size) - -/* - * There are also some ARC variables that we want to export, but that are - * updated so often that having the canonical representation be the statistic - * variable causes a performance bottleneck. We want to use aggsum_t's for these - * instead, but still be able to export the kstat in the same way as before. - * The solution is to always use the aggsum version, except in the kstat update - * callback. - */ -aggsum_t arc_size; -aggsum_t arc_meta_used; -wmsum_t astat_data_size; -wmsum_t astat_metadata_size; -wmsum_t astat_dbuf_size; -aggsum_t astat_dnode_size; -wmsum_t astat_bonus_size; -wmsum_t astat_hdr_size; -aggsum_t astat_l2_hdr_size; -wmsum_t astat_abd_chunk_waste_size; - hrtime_t arc_growtime; list_t arc_prune_list; kmutex_t arc_prune_mtx; @@ -819,9 +789,6 @@ uint64_t zfs_crc64_table[256]; */ #define L2ARC_FEED_TYPES 4 -#define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent) -#define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done) - /* L2ARC Performance Tunables */ unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ @@ -1085,9 +1052,9 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp) ARCSTAT_MAX(arcstat_hash_chain_max, i); } - - ARCSTAT_BUMP(arcstat_hash_elements); - ARCSTAT_MAXSTAT(arcstat_hash_elements); + uint64_t he = atomic_inc_64_nv( + &arc_stats.arcstat_hash_elements.value.ui64); + ARCSTAT_MAX(arcstat_hash_elements_max, he); return (NULL); } @@ -1111,7 +1078,7 @@ buf_hash_remove(arc_buf_hdr_t *hdr) arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE); /* collect some hash table performance data */ - ARCSTAT_BUMPDOWN(arcstat_hash_elements); + atomic_dec_64(&arc_stats.arcstat_hash_elements.value.ui64); if (buf_hash_table.ht_table[idx] && buf_hash_table.ht_table[idx]->b_hash_next == NULL) @@ -2646,25 +2613,25 @@ arc_space_consume(uint64_t space, arc_space_type_t type) default: break; case ARC_SPACE_DATA: - wmsum_add(&astat_data_size, space); + ARCSTAT_INCR(arcstat_data_size, space); break; case ARC_SPACE_META: - wmsum_add(&astat_metadata_size, space); + ARCSTAT_INCR(arcstat_metadata_size, space); break; case ARC_SPACE_BONUS: - wmsum_add(&astat_bonus_size, space); + ARCSTAT_INCR(arcstat_bonus_size, space); break; case ARC_SPACE_DNODE: - aggsum_add(&astat_dnode_size, space); + aggsum_add(&arc_sums.arcstat_dnode_size, space); break; case ARC_SPACE_DBUF: - wmsum_add(&astat_dbuf_size, space); + ARCSTAT_INCR(arcstat_dbuf_size, space); break; case ARC_SPACE_HDRS: - wmsum_add(&astat_hdr_size, space); + ARCSTAT_INCR(arcstat_hdr_size, space); break; case ARC_SPACE_L2HDRS: - aggsum_add(&astat_l2_hdr_size, space); + aggsum_add(&arc_sums.arcstat_l2_hdr_size, space); break; case ARC_SPACE_ABD_CHUNK_WASTE: /* @@ -2673,14 +2640,14 @@ arc_space_consume(uint64_t space, arc_space_type_t type) * scatter ABD's come from the ARC, because other users are * very short-lived. */ - wmsum_add(&astat_abd_chunk_waste_size, space); + ARCSTAT_INCR(arcstat_abd_chunk_waste_size, space); break; } if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) - aggsum_add(&arc_meta_used, space); + aggsum_add(&arc_sums.arcstat_meta_used, space); - aggsum_add(&arc_size, space); + aggsum_add(&arc_sums.arcstat_size, space); } void @@ -2692,45 +2659,41 @@ arc_space_return(uint64_t space, arc_space_type_t type) default: break; case ARC_SPACE_DATA: - wmsum_add(&astat_data_size, -space); + ARCSTAT_INCR(arcstat_data_size, -space); break; case ARC_SPACE_META: - wmsum_add(&astat_metadata_size, -space); + ARCSTAT_INCR(arcstat_metadata_size, -space); break; case ARC_SPACE_BONUS: - wmsum_add(&astat_bonus_size, -space); + ARCSTAT_INCR(arcstat_bonus_size, -space); break; case ARC_SPACE_DNODE: - aggsum_add(&astat_dnode_size, -space); + aggsum_add(&arc_sums.arcstat_dnode_size, -space); break; case ARC_SPACE_DBUF: - wmsum_add(&astat_dbuf_size, -space); + ARCSTAT_INCR(arcstat_dbuf_size, -space); break; case ARC_SPACE_HDRS: - wmsum_add(&astat_hdr_size, -space); + ARCSTAT_INCR(arcstat_hdr_size, -space); break; case ARC_SPACE_L2HDRS: - aggsum_add(&astat_l2_hdr_size, -space); + aggsum_add(&arc_sums.arcstat_l2_hdr_size, -space); break; case ARC_SPACE_ABD_CHUNK_WASTE: - wmsum_add(&astat_abd_chunk_waste_size, -space); + ARCSTAT_INCR(arcstat_abd_chunk_waste_size, -space); break; } if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) { - ASSERT(aggsum_compare(&arc_meta_used, space) >= 0); - /* - * We use the upper bound here rather than the precise value - * because the arc_meta_max value doesn't need to be - * precise. It's only consumed by humans via arcstats. - */ - if (arc_meta_max < aggsum_upper_bound(&arc_meta_used)) - arc_meta_max = aggsum_upper_bound(&arc_meta_used); - aggsum_add(&arc_meta_used, -space); + ASSERT(aggsum_compare(&arc_sums.arcstat_meta_used, + space) >= 0); + ARCSTAT_MAX(arcstat_meta_max, + aggsum_upper_bound(&arc_sums.arcstat_meta_used)); + aggsum_add(&arc_sums.arcstat_meta_used, -space); } - ASSERT(aggsum_compare(&arc_size, space) >= 0); - aggsum_add(&arc_size, -space); + ASSERT(aggsum_compare(&arc_sums.arcstat_size, space) >= 0); + aggsum_add(&arc_sums.arcstat_size, -space); } /* @@ -4246,9 +4209,10 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes, * Request that 10% of the LRUs be scanned by the superblock * shrinker. */ - if (type == ARC_BUFC_DATA && aggsum_compare(&astat_dnode_size, - arc_dnode_size_limit) > 0) { - arc_prune_async((aggsum_upper_bound(&astat_dnode_size) - + if (type == ARC_BUFC_DATA && aggsum_compare( + &arc_sums.arcstat_dnode_size, arc_dnode_size_limit) > 0) { + arc_prune_async((aggsum_upper_bound( + &arc_sums.arcstat_dnode_size) - arc_dnode_size_limit) / sizeof (dnode_t) / zfs_arc_dnode_reduce_percent); } @@ -4478,7 +4442,7 @@ restart: } /* - * Evict metadata buffers from the cache, such that arc_meta_used is + * Evict metadata buffers from the cache, such that arcstat_meta_used is * capped by the arc_meta_limit tunable. */ static uint64_t @@ -4599,7 +4563,7 @@ arc_evict_type(arc_state_t *state) } /* - * Evict buffers from the cache, such that arc_size is capped by arc_c. + * Evict buffers from the cache, such that arcstat_size is capped by arc_c. */ static uint64_t arc_evict(void) @@ -4607,8 +4571,8 @@ arc_evict(void) uint64_t total_evicted = 0; uint64_t bytes; int64_t target; - uint64_t asize = aggsum_value(&arc_size); - uint64_t ameta = aggsum_value(&arc_meta_used); + uint64_t asize = aggsum_value(&arc_sums.arcstat_size); + uint64_t ameta = aggsum_value(&arc_sums.arcstat_meta_used); /* * If we're over arc_meta_limit, we want to correct that before @@ -4668,8 +4632,8 @@ arc_evict(void) /* * Re-sum ARC stats after the first round of evictions. */ - asize = aggsum_value(&arc_size); - ameta = aggsum_value(&arc_meta_used); + asize = aggsum_value(&arc_sums.arcstat_size); + ameta = aggsum_value(&arc_sums.arcstat_meta_used); /* @@ -4783,7 +4747,7 @@ arc_flush(spa_t *spa, boolean_t retry) void arc_reduce_target_size(int64_t to_free) { - uint64_t asize = aggsum_value(&arc_size); + uint64_t asize = aggsum_value(&arc_sums.arcstat_size); /* * All callers want the ARC to actually evict (at least) this much @@ -4836,8 +4800,8 @@ arc_kmem_reap_soon(void) extern kmem_cache_t *zio_data_buf_cache[]; #ifdef _KERNEL - if ((aggsum_compare(&arc_meta_used, arc_meta_limit) >= 0) && - zfs_arc_meta_prune) { + if ((aggsum_compare(&arc_sums.arcstat_meta_used, + arc_meta_limit) >= 0) && zfs_arc_meta_prune) { /* * We are exceeding our meta-data cache limit. * Prune some entries to release holds on meta-data. @@ -4940,7 +4904,7 @@ arc_evict_cb(void *arg, zthr_t *zthr) */ mutex_enter(&arc_evict_lock); arc_evict_needed = !zthr_iscancelled(arc_evict_zthr) && - evicted > 0 && aggsum_compare(&arc_size, arc_c) > 0; + evicted > 0 && aggsum_compare(&arc_sums.arcstat_size, arc_c) > 0; if (!arc_evict_needed) { /* * We're either no longer overflowing, or we @@ -5153,7 +5117,7 @@ arc_adapt(int bytes, arc_state_t *state) * cache size, increment the target cache size */ ASSERT3U(arc_c, >=, 2ULL << SPA_MAXBLOCKSHIFT); - if (aggsum_upper_bound(&arc_size) >= + if (aggsum_upper_bound(&arc_sums.arcstat_size) >= arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) { atomic_add_64(&arc_c, (int64_t)bytes); if (arc_c > arc_c_max) @@ -5186,7 +5150,8 @@ arc_is_overflowing(void) * in the ARC. In practice, that's in the tens of MB, which is low * enough to be safe. */ - return (aggsum_lower_bound(&arc_size) >= (int64_t)arc_c + overflow); + return (aggsum_lower_bound(&arc_sums.arcstat_size) >= + (int64_t)arc_c + overflow); } static abd_t * @@ -5355,7 +5320,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag, * If we are growing the cache, and we are adding anonymous * data, and we have outgrown arc_p, update arc_p */ - if (aggsum_upper_bound(&arc_size) < arc_c && + if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c && hdr->b_l1hdr.b_state == arc_anon && (zfs_refcount_count(&arc_anon->arcs_size) + zfs_refcount_count(&arc_mru->arcs_size) > arc_p)) @@ -7240,55 +7205,219 @@ arc_kstat_update(kstat_t *ksp, int rw) { arc_stats_t *as = ksp->ks_data; - if (rw == KSTAT_WRITE) { + if (rw == KSTAT_WRITE) return (SET_ERROR(EACCES)); - } else { - arc_kstat_update_state(arc_anon, - &as->arcstat_anon_size, - &as->arcstat_anon_evictable_data, - &as->arcstat_anon_evictable_metadata); - arc_kstat_update_state(arc_mru, - &as->arcstat_mru_size, - &as->arcstat_mru_evictable_data, - &as->arcstat_mru_evictable_metadata); - arc_kstat_update_state(arc_mru_ghost, - &as->arcstat_mru_ghost_size, - &as->arcstat_mru_ghost_evictable_data, - &as->arcstat_mru_ghost_evictable_metadata); - arc_kstat_update_state(arc_mfu, - &as->arcstat_mfu_size, - &as->arcstat_mfu_evictable_data, - &as->arcstat_mfu_evictable_metadata); - arc_kstat_update_state(arc_mfu_ghost, - &as->arcstat_mfu_ghost_size, - &as->arcstat_mfu_ghost_evictable_data, - &as->arcstat_mfu_ghost_evictable_metadata); - ARCSTAT(arcstat_size) = aggsum_value(&arc_size); - ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used); - ARCSTAT(arcstat_data_size) = wmsum_value(&astat_data_size); - ARCSTAT(arcstat_metadata_size) = - wmsum_value(&astat_metadata_size); - ARCSTAT(arcstat_hdr_size) = wmsum_value(&astat_hdr_size); - ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size); - ARCSTAT(arcstat_dbuf_size) = wmsum_value(&astat_dbuf_size); + as->arcstat_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_hits); + as->arcstat_misses.value.ui64 = + wmsum_value(&arc_sums.arcstat_misses); + as->arcstat_demand_data_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_data_hits); + as->arcstat_demand_data_misses.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_data_misses); + as->arcstat_demand_metadata_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_metadata_hits); + as->arcstat_demand_metadata_misses.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_metadata_misses); + as->arcstat_prefetch_data_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_prefetch_data_hits); + as->arcstat_prefetch_data_misses.value.ui64 = + wmsum_value(&arc_sums.arcstat_prefetch_data_misses); + as->arcstat_prefetch_metadata_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_prefetch_metadata_hits); + as->arcstat_prefetch_metadata_misses.value.ui64 = + wmsum_value(&arc_sums.arcstat_prefetch_metadata_misses); + as->arcstat_mru_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_mru_hits); + as->arcstat_mru_ghost_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_mru_ghost_hits); + as->arcstat_mfu_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_mfu_hits); + as->arcstat_mfu_ghost_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_mfu_ghost_hits); + as->arcstat_deleted.value.ui64 = + wmsum_value(&arc_sums.arcstat_deleted); + as->arcstat_mutex_miss.value.ui64 = + wmsum_value(&arc_sums.arcstat_mutex_miss); + as->arcstat_access_skip.value.ui64 = + wmsum_value(&arc_sums.arcstat_access_skip); + as->arcstat_evict_skip.value.ui64 = + wmsum_value(&arc_sums.arcstat_evict_skip); + as->arcstat_evict_not_enough.value.ui64 = + wmsum_value(&arc_sums.arcstat_evict_not_enough); + as->arcstat_evict_l2_cached.value.ui64 = + wmsum_value(&arc_sums.arcstat_evict_l2_cached); + as->arcstat_evict_l2_eligible.value.ui64 = + wmsum_value(&arc_sums.arcstat_evict_l2_eligible); + as->arcstat_evict_l2_eligible_mfu.value.ui64 = + wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mfu); + as->arcstat_evict_l2_eligible_mru.value.ui64 = + wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mru); + as->arcstat_evict_l2_ineligible.value.ui64 = + wmsum_value(&arc_sums.arcstat_evict_l2_ineligible); + as->arcstat_evict_l2_skip.value.ui64 = + wmsum_value(&arc_sums.arcstat_evict_l2_skip); + as->arcstat_hash_collisions.value.ui64 = + wmsum_value(&arc_sums.arcstat_hash_collisions); + as->arcstat_hash_chains.value.ui64 = + wmsum_value(&arc_sums.arcstat_hash_chains); + as->arcstat_size.value.ui64 = + aggsum_value(&arc_sums.arcstat_size); + as->arcstat_compressed_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_compressed_size); + as->arcstat_uncompressed_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_uncompressed_size); + as->arcstat_overhead_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_overhead_size); + as->arcstat_hdr_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_hdr_size); + as->arcstat_data_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_data_size); + as->arcstat_metadata_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_metadata_size); + as->arcstat_dbuf_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_dbuf_size); #if defined(COMPAT_FREEBSD11) - ARCSTAT(arcstat_other_size) = wmsum_value(&astat_bonus_size) + - aggsum_value(&astat_dnode_size) + - wmsum_value(&astat_dbuf_size); + as->arcstat_other_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_bonus_size) + + aggsum_value(&arc_sums.arcstat_dnode_size) + + wmsum_value(&arc_sums.arcstat_dbuf_size); #endif - ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size); - ARCSTAT(arcstat_bonus_size) = wmsum_value(&astat_bonus_size); - ARCSTAT(arcstat_abd_chunk_waste_size) = - wmsum_value(&astat_abd_chunk_waste_size); - as->arcstat_memory_all_bytes.value.ui64 = - arc_all_memory(); - as->arcstat_memory_free_bytes.value.ui64 = - arc_free_memory(); - as->arcstat_memory_available_bytes.value.i64 = - arc_available_memory(); - } + arc_kstat_update_state(arc_anon, + &as->arcstat_anon_size, + &as->arcstat_anon_evictable_data, + &as->arcstat_anon_evictable_metadata); + arc_kstat_update_state(arc_mru, + &as->arcstat_mru_size, + &as->arcstat_mru_evictable_data, + &as->arcstat_mru_evictable_metadata); + arc_kstat_update_state(arc_mru_ghost, + &as->arcstat_mru_ghost_size, + &as->arcstat_mru_ghost_evictable_data, + &as->arcstat_mru_ghost_evictable_metadata); + arc_kstat_update_state(arc_mfu, + &as->arcstat_mfu_size, + &as->arcstat_mfu_evictable_data, + &as->arcstat_mfu_evictable_metadata); + arc_kstat_update_state(arc_mfu_ghost, + &as->arcstat_mfu_ghost_size, + &as->arcstat_mfu_ghost_evictable_data, + &as->arcstat_mfu_ghost_evictable_metadata); + + as->arcstat_dnode_size.value.ui64 = + aggsum_value(&arc_sums.arcstat_dnode_size); + as->arcstat_bonus_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_bonus_size); + as->arcstat_l2_hits.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_hits); + as->arcstat_l2_misses.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_misses); + as->arcstat_l2_prefetch_asize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_prefetch_asize); + as->arcstat_l2_mru_asize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_mru_asize); + as->arcstat_l2_mfu_asize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_mfu_asize); + as->arcstat_l2_bufc_data_asize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_bufc_data_asize); + as->arcstat_l2_bufc_metadata_asize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_bufc_metadata_asize); + as->arcstat_l2_feeds.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_feeds); + as->arcstat_l2_rw_clash.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rw_clash); + as->arcstat_l2_read_bytes.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_read_bytes); + as->arcstat_l2_write_bytes.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_write_bytes); + as->arcstat_l2_writes_sent.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_writes_sent); + as->arcstat_l2_writes_done.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_writes_done); + as->arcstat_l2_writes_error.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_writes_error); + as->arcstat_l2_writes_lock_retry.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_writes_lock_retry); + as->arcstat_l2_evict_lock_retry.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_evict_lock_retry); + as->arcstat_l2_evict_reading.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_evict_reading); + as->arcstat_l2_evict_l1cached.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_evict_l1cached); + as->arcstat_l2_free_on_write.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_free_on_write); + as->arcstat_l2_abort_lowmem.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_abort_lowmem); + as->arcstat_l2_cksum_bad.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_cksum_bad); + as->arcstat_l2_io_error.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_io_error); + as->arcstat_l2_lsize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_lsize); + as->arcstat_l2_psize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_psize); + as->arcstat_l2_hdr_size.value.ui64 = + aggsum_value(&arc_sums.arcstat_l2_hdr_size); + as->arcstat_l2_log_blk_writes.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_log_blk_writes); + as->arcstat_l2_log_blk_asize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_log_blk_asize); + as->arcstat_l2_log_blk_count.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_log_blk_count); + as->arcstat_l2_rebuild_success.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_success); + as->arcstat_l2_rebuild_abort_unsupported.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_unsupported); + as->arcstat_l2_rebuild_abort_io_errors.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_io_errors); + as->arcstat_l2_rebuild_abort_dh_errors.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_dh_errors); + as->arcstat_l2_rebuild_abort_cksum_lb_errors.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors); + as->arcstat_l2_rebuild_abort_lowmem.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_lowmem); + as->arcstat_l2_rebuild_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_size); + as->arcstat_l2_rebuild_asize.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_asize); + as->arcstat_l2_rebuild_bufs.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs); + as->arcstat_l2_rebuild_bufs_precached.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs_precached); + as->arcstat_l2_rebuild_log_blks.value.ui64 = + wmsum_value(&arc_sums.arcstat_l2_rebuild_log_blks); + as->arcstat_memory_throttle_count.value.ui64 = + wmsum_value(&arc_sums.arcstat_memory_throttle_count); + as->arcstat_memory_direct_count.value.ui64 = + wmsum_value(&arc_sums.arcstat_memory_direct_count); + as->arcstat_memory_indirect_count.value.ui64 = + wmsum_value(&arc_sums.arcstat_memory_indirect_count); + + as->arcstat_memory_all_bytes.value.ui64 = + arc_all_memory(); + as->arcstat_memory_free_bytes.value.ui64 = + arc_free_memory(); + as->arcstat_memory_available_bytes.value.i64 = + arc_available_memory(); + + as->arcstat_prune.value.ui64 = + wmsum_value(&arc_sums.arcstat_prune); + as->arcstat_meta_used.value.ui64 = + aggsum_value(&arc_sums.arcstat_meta_used); + as->arcstat_async_upgrade_sync.value.ui64 = + wmsum_value(&arc_sums.arcstat_async_upgrade_sync); + as->arcstat_demand_hit_predictive_prefetch.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_hit_predictive_prefetch); + as->arcstat_demand_hit_prescient_prefetch.value.ui64 = + wmsum_value(&arc_sums.arcstat_demand_hit_prescient_prefetch); + as->arcstat_raw_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_raw_size); + as->arcstat_cached_only_in_progress.value.ui64 = + wmsum_value(&arc_sums.arcstat_cached_only_in_progress); + as->arcstat_abd_chunk_waste_size.value.ui64 = + wmsum_value(&arc_sums.arcstat_abd_chunk_waste_size); return (0); } @@ -7511,16 +7640,93 @@ arc_state_init(void) zfs_refcount_create(&arc_mfu_ghost->arcs_size); zfs_refcount_create(&arc_l2c_only->arcs_size); - aggsum_init(&arc_meta_used, 0); - aggsum_init(&arc_size, 0); - wmsum_init(&astat_data_size, 0); - wmsum_init(&astat_metadata_size, 0); - wmsum_init(&astat_hdr_size, 0); - aggsum_init(&astat_l2_hdr_size, 0); - wmsum_init(&astat_bonus_size, 0); - aggsum_init(&astat_dnode_size, 0); - wmsum_init(&astat_dbuf_size, 0); - wmsum_init(&astat_abd_chunk_waste_size, 0); + wmsum_init(&arc_sums.arcstat_hits, 0); + wmsum_init(&arc_sums.arcstat_misses, 0); + wmsum_init(&arc_sums.arcstat_demand_data_hits, 0); + wmsum_init(&arc_sums.arcstat_demand_data_misses, 0); + wmsum_init(&arc_sums.arcstat_demand_metadata_hits, 0); + wmsum_init(&arc_sums.arcstat_demand_metadata_misses, 0); + wmsum_init(&arc_sums.arcstat_prefetch_data_hits, 0); + wmsum_init(&arc_sums.arcstat_prefetch_data_misses, 0); + wmsum_init(&arc_sums.arcstat_prefetch_metadata_hits, 0); + wmsum_init(&arc_sums.arcstat_prefetch_metadata_misses, 0); + wmsum_init(&arc_sums.arcstat_mru_hits, 0); + wmsum_init(&arc_sums.arcstat_mru_ghost_hits, 0); + wmsum_init(&arc_sums.arcstat_mfu_hits, 0); + wmsum_init(&arc_sums.arcstat_mfu_ghost_hits, 0); + wmsum_init(&arc_sums.arcstat_deleted, 0); + wmsum_init(&arc_sums.arcstat_mutex_miss, 0); + wmsum_init(&arc_sums.arcstat_access_skip, 0); + wmsum_init(&arc_sums.arcstat_evict_skip, 0); + wmsum_init(&arc_sums.arcstat_evict_not_enough, 0); + wmsum_init(&arc_sums.arcstat_evict_l2_cached, 0); + wmsum_init(&arc_sums.arcstat_evict_l2_eligible, 0); + wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mfu, 0); + wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mru, 0); + wmsum_init(&arc_sums.arcstat_evict_l2_ineligible, 0); + wmsum_init(&arc_sums.arcstat_evict_l2_skip, 0); + wmsum_init(&arc_sums.arcstat_hash_collisions, 0); + wmsum_init(&arc_sums.arcstat_hash_chains, 0); + aggsum_init(&arc_sums.arcstat_size, 0); + wmsum_init(&arc_sums.arcstat_compressed_size, 0); + wmsum_init(&arc_sums.arcstat_uncompressed_size, 0); + wmsum_init(&arc_sums.arcstat_overhead_size, 0); + wmsum_init(&arc_sums.arcstat_hdr_size, 0); + wmsum_init(&arc_sums.arcstat_data_size, 0); + wmsum_init(&arc_sums.arcstat_metadata_size, 0); + wmsum_init(&arc_sums.arcstat_dbuf_size, 0); + aggsum_init(&arc_sums.arcstat_dnode_size, 0); + wmsum_init(&arc_sums.arcstat_bonus_size, 0); + wmsum_init(&arc_sums.arcstat_l2_hits, 0); + wmsum_init(&arc_sums.arcstat_l2_misses, 0); + wmsum_init(&arc_sums.arcstat_l2_prefetch_asize, 0); + wmsum_init(&arc_sums.arcstat_l2_mru_asize, 0); + wmsum_init(&arc_sums.arcstat_l2_mfu_asize, 0); + wmsum_init(&arc_sums.arcstat_l2_bufc_data_asize, 0); + wmsum_init(&arc_sums.arcstat_l2_bufc_metadata_asize, 0); + wmsum_init(&arc_sums.arcstat_l2_feeds, 0); + wmsum_init(&arc_sums.arcstat_l2_rw_clash, 0); + wmsum_init(&arc_sums.arcstat_l2_read_bytes, 0); + wmsum_init(&arc_sums.arcstat_l2_write_bytes, 0); + wmsum_init(&arc_sums.arcstat_l2_writes_sent, 0); + wmsum_init(&arc_sums.arcstat_l2_writes_done, 0); + wmsum_init(&arc_sums.arcstat_l2_writes_error, 0); + wmsum_init(&arc_sums.arcstat_l2_writes_lock_retry, 0); + wmsum_init(&arc_sums.arcstat_l2_evict_lock_retry, 0); + wmsum_init(&arc_sums.arcstat_l2_evict_reading, 0); + wmsum_init(&arc_sums.arcstat_l2_evict_l1cached, 0); + wmsum_init(&arc_sums.arcstat_l2_free_on_write, 0); + wmsum_init(&arc_sums.arcstat_l2_abort_lowmem, 0); + wmsum_init(&arc_sums.arcstat_l2_cksum_bad, 0); + wmsum_init(&arc_sums.arcstat_l2_io_error, 0); + wmsum_init(&arc_sums.arcstat_l2_lsize, 0); + wmsum_init(&arc_sums.arcstat_l2_psize, 0); + aggsum_init(&arc_sums.arcstat_l2_hdr_size, 0); + wmsum_init(&arc_sums.arcstat_l2_log_blk_writes, 0); + wmsum_init(&arc_sums.arcstat_l2_log_blk_asize, 0); + wmsum_init(&arc_sums.arcstat_l2_log_blk_count, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_success, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_unsupported, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_io_errors, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_dh_errors, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_lowmem, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_size, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_asize, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs_precached, 0); + wmsum_init(&arc_sums.arcstat_l2_rebuild_log_blks, 0); + wmsum_init(&arc_sums.arcstat_memory_throttle_count, 0); + wmsum_init(&arc_sums.arcstat_memory_direct_count, 0); + wmsum_init(&arc_sums.arcstat_memory_indirect_count, 0); + wmsum_init(&arc_sums.arcstat_prune, 0); + aggsum_init(&arc_sums.arcstat_meta_used, 0); + wmsum_init(&arc_sums.arcstat_async_upgrade_sync, 0); + wmsum_init(&arc_sums.arcstat_demand_hit_predictive_prefetch, 0); + wmsum_init(&arc_sums.arcstat_demand_hit_prescient_prefetch, 0); + wmsum_init(&arc_sums.arcstat_raw_size, 0); + wmsum_init(&arc_sums.arcstat_cached_only_in_progress, 0); + wmsum_init(&arc_sums.arcstat_abd_chunk_waste_size, 0); arc_anon->arcs_state = ARC_STATE_ANON; arc_mru->arcs_state = ARC_STATE_MRU; @@ -7564,16 +7770,93 @@ arc_state_fini(void) multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]); - aggsum_fini(&arc_meta_used); - aggsum_fini(&arc_size); - wmsum_fini(&astat_data_size); - wmsum_fini(&astat_metadata_size); - wmsum_fini(&astat_hdr_size); - aggsum_fini(&astat_l2_hdr_size); - wmsum_fini(&astat_bonus_size); - aggsum_fini(&astat_dnode_size); - wmsum_fini(&astat_dbuf_size); - wmsum_fini(&astat_abd_chunk_waste_size); + wmsum_fini(&arc_sums.arcstat_hits); + wmsum_fini(&arc_sums.arcstat_misses); + wmsum_fini(&arc_sums.arcstat_demand_data_hits); + wmsum_fini(&arc_sums.arcstat_demand_data_misses); + wmsum_fini(&arc_sums.arcstat_demand_metadata_hits); + wmsum_fini(&arc_sums.arcstat_demand_metadata_misses); + wmsum_fini(&arc_sums.arcstat_prefetch_data_hits); + wmsum_fini(&arc_sums.arcstat_prefetch_data_misses); + wmsum_fini(&arc_sums.arcstat_prefetch_metadata_hits); + wmsum_fini(&arc_sums.arcstat_prefetch_metadata_misses); + wmsum_fini(&arc_sums.arcstat_mru_hits); + wmsum_fini(&arc_sums.arcstat_mru_ghost_hits); + wmsum_fini(&arc_sums.arcstat_mfu_hits); + wmsum_fini(&arc_sums.arcstat_mfu_ghost_hits); + wmsum_fini(&arc_sums.arcstat_deleted); + wmsum_fini(&arc_sums.arcstat_mutex_miss); + wmsum_fini(&arc_sums.arcstat_access_skip); + wmsum_fini(&arc_sums.arcstat_evict_skip); + wmsum_fini(&arc_sums.arcstat_evict_not_enough); + wmsum_fini(&arc_sums.arcstat_evict_l2_cached); + wmsum_fini(&arc_sums.arcstat_evict_l2_eligible); + wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mfu); + wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mru); + wmsum_fini(&arc_sums.arcstat_evict_l2_ineligible); + wmsum_fini(&arc_sums.arcstat_evict_l2_skip); + wmsum_fini(&arc_sums.arcstat_hash_collisions); + wmsum_fini(&arc_sums.arcstat_hash_chains); + aggsum_fini(&arc_sums.arcstat_size); + wmsum_fini(&arc_sums.arcstat_compressed_size); + wmsum_fini(&arc_sums.arcstat_uncompressed_size); + wmsum_fini(&arc_sums.arcstat_overhead_size); + wmsum_fini(&arc_sums.arcstat_hdr_size); + wmsum_fini(&arc_sums.arcstat_data_size); + wmsum_fini(&arc_sums.arcstat_metadata_size); + wmsum_fini(&arc_sums.arcstat_dbuf_size); + aggsum_fini(&arc_sums.arcstat_dnode_size); + wmsum_fini(&arc_sums.arcstat_bonus_size); + wmsum_fini(&arc_sums.arcstat_l2_hits); + wmsum_fini(&arc_sums.arcstat_l2_misses); + wmsum_fini(&arc_sums.arcstat_l2_prefetch_asize); + wmsum_fini(&arc_sums.arcstat_l2_mru_asize); + wmsum_fini(&arc_sums.arcstat_l2_mfu_asize); + wmsum_fini(&arc_sums.arcstat_l2_bufc_data_asize); + wmsum_fini(&arc_sums.arcstat_l2_bufc_metadata_asize); + wmsum_fini(&arc_sums.arcstat_l2_feeds); + wmsum_fini(&arc_sums.arcstat_l2_rw_clash); + wmsum_fini(&arc_sums.arcstat_l2_read_bytes); + wmsum_fini(&arc_sums.arcstat_l2_write_bytes); + wmsum_fini(&arc_sums.arcstat_l2_writes_sent); + wmsum_fini(&arc_sums.arcstat_l2_writes_done); + wmsum_fini(&arc_sums.arcstat_l2_writes_error); + wmsum_fini(&arc_sums.arcstat_l2_writes_lock_retry); + wmsum_fini(&arc_sums.arcstat_l2_evict_lock_retry); + wmsum_fini(&arc_sums.arcstat_l2_evict_reading); + wmsum_fini(&arc_sums.arcstat_l2_evict_l1cached); + wmsum_fini(&arc_sums.arcstat_l2_free_on_write); + wmsum_fini(&arc_sums.arcstat_l2_abort_lowmem); + wmsum_fini(&arc_sums.arcstat_l2_cksum_bad); + wmsum_fini(&arc_sums.arcstat_l2_io_error); + wmsum_fini(&arc_sums.arcstat_l2_lsize); + wmsum_fini(&arc_sums.arcstat_l2_psize); + aggsum_fini(&arc_sums.arcstat_l2_hdr_size); + wmsum_fini(&arc_sums.arcstat_l2_log_blk_writes); + wmsum_fini(&arc_sums.arcstat_l2_log_blk_asize); + wmsum_fini(&arc_sums.arcstat_l2_log_blk_count); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_success); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_unsupported); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_io_errors); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_dh_errors); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_lowmem); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_size); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_asize); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs_precached); + wmsum_fini(&arc_sums.arcstat_l2_rebuild_log_blks); + wmsum_fini(&arc_sums.arcstat_memory_throttle_count); + wmsum_fini(&arc_sums.arcstat_memory_direct_count); + wmsum_fini(&arc_sums.arcstat_memory_indirect_count); + wmsum_fini(&arc_sums.arcstat_prune); + aggsum_fini(&arc_sums.arcstat_meta_used); + wmsum_fini(&arc_sums.arcstat_async_upgrade_sync); + wmsum_fini(&arc_sums.arcstat_demand_hit_predictive_prefetch); + wmsum_fini(&arc_sums.arcstat_demand_hit_prescient_prefetch); + wmsum_fini(&arc_sums.arcstat_raw_size); + wmsum_fini(&arc_sums.arcstat_cached_only_in_progress); + wmsum_fini(&arc_sums.arcstat_abd_chunk_waste_size); } uint64_t @@ -7623,8 +7906,6 @@ arc_init(void) /* Set min to 1/2 of arc_c_min */ arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT; - /* Initialize maximum observed usage to zero */ - arc_meta_max = 0; /* * Set arc_meta_limit to a percent of arc_c_max with a floor of * arc_meta_min, and a ceiling of arc_c_max. @@ -8350,7 +8631,7 @@ top: } } - atomic_inc_64(&l2arc_writes_done); + ARCSTAT_BUMP(arcstat_l2_writes_done); list_remove(buflist, head); ASSERT(!HDR_HAS_L1HDR(head)); kmem_cache_free(hdr_l2only_cache, head); @@ -9322,7 +9603,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) static boolean_t l2arc_hdr_limit_reached(void) { - int64_t s = aggsum_upper_bound(&astat_l2_hdr_size); + int64_t s = aggsum_upper_bound(&arc_sums.arcstat_l2_hdr_size); return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) || (s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100)); @@ -9656,8 +9937,6 @@ l2arc_init(void) { l2arc_thread_exit = 0; l2arc_ndev = 0; - l2arc_writes_sent = 0; - l2arc_writes_done = 0; mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 9d741545fae..5b072f02613 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -52,6 +52,7 @@ #include #include #include +#include kstat_t *dbuf_ksp; @@ -135,8 +136,22 @@ dbuf_stats_t dbuf_stats = { { "metadata_cache_overflow", KSTAT_DATA_UINT64 } }; +struct { + wmsum_t cache_count; + wmsum_t cache_total_evicts; + wmsum_t cache_levels[DN_MAX_LEVELS]; + wmsum_t cache_levels_bytes[DN_MAX_LEVELS]; + wmsum_t hash_hits; + wmsum_t hash_misses; + wmsum_t hash_collisions; + wmsum_t hash_chains; + wmsum_t hash_insert_race; + wmsum_t metadata_cache_count; + wmsum_t metadata_cache_overflow; +} dbuf_sums; + #define DBUF_STAT_INCR(stat, val) \ - atomic_add_64(&dbuf_stats.stat.value.ui64, (val)); + wmsum_add(&dbuf_sums.stat, val); #define DBUF_STAT_DECR(stat, val) \ DBUF_STAT_INCR(stat, -(val)); #define DBUF_STAT_BUMP(stat) \ @@ -297,8 +312,6 @@ dbuf_dest(void *vdb, void *unused) */ static dbuf_hash_table_t dbuf_hash_table; -static uint64_t dbuf_hash_count; - /* * We use Cityhash for this. It's fast, and has good hash properties without * requiring any large static buffers. @@ -409,8 +422,8 @@ dbuf_hash_insert(dmu_buf_impl_t *db) db->db_hash_next = h->hash_table[idx]; h->hash_table[idx] = db; mutex_exit(DBUF_HASH_MUTEX(h, idx)); - atomic_inc_64(&dbuf_hash_count); - DBUF_STAT_MAX(hash_elements_max, dbuf_hash_count); + uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64); + DBUF_STAT_MAX(hash_elements_max, he); return (NULL); } @@ -483,7 +496,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db) h->hash_table[idx]->db_hash_next == NULL) DBUF_STAT_BUMPDOWN(hash_chains); mutex_exit(DBUF_HASH_MUTEX(h, idx)); - atomic_dec_64(&dbuf_hash_count); + atomic_dec_64(&dbuf_stats.hash_elements.value.ui64); } typedef enum { @@ -767,19 +780,40 @@ dbuf_kstat_update(kstat_t *ksp, int rw) { dbuf_stats_t *ds = ksp->ks_data; - if (rw == KSTAT_WRITE) { + if (rw == KSTAT_WRITE) return (SET_ERROR(EACCES)); - } else { - ds->metadata_cache_size_bytes.value.ui64 = zfs_refcount_count( - &dbuf_caches[DB_DBUF_METADATA_CACHE].size); - ds->cache_size_bytes.value.ui64 = - zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size); - ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes(); - ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes(); - ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes(); - ds->hash_elements.value.ui64 = dbuf_hash_count; - } + ds->cache_count.value.ui64 = + wmsum_value(&dbuf_sums.cache_count); + ds->cache_size_bytes.value.ui64 = + zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size); + ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes(); + ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes(); + ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes(); + ds->cache_total_evicts.value.ui64 = + wmsum_value(&dbuf_sums.cache_total_evicts); + for (int i = 0; i < DN_MAX_LEVELS; i++) { + ds->cache_levels[i].value.ui64 = + wmsum_value(&dbuf_sums.cache_levels[i]); + ds->cache_levels_bytes[i].value.ui64 = + wmsum_value(&dbuf_sums.cache_levels_bytes[i]); + } + ds->hash_hits.value.ui64 = + wmsum_value(&dbuf_sums.hash_hits); + ds->hash_misses.value.ui64 = + wmsum_value(&dbuf_sums.hash_misses); + ds->hash_collisions.value.ui64 = + wmsum_value(&dbuf_sums.hash_collisions); + ds->hash_chains.value.ui64 = + wmsum_value(&dbuf_sums.hash_chains); + ds->hash_insert_race.value.ui64 = + wmsum_value(&dbuf_sums.hash_insert_race); + ds->metadata_cache_count.value.ui64 = + wmsum_value(&dbuf_sums.metadata_cache_count); + ds->metadata_cache_size_bytes.value.ui64 = zfs_refcount_count( + &dbuf_caches[DB_DBUF_METADATA_CACHE].size); + ds->metadata_cache_overflow.value.ui64 = + wmsum_value(&dbuf_sums.metadata_cache_overflow); return (0); } @@ -846,6 +880,20 @@ retry: dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread, NULL, 0, &p0, TS_RUN, minclsyspri); + wmsum_init(&dbuf_sums.cache_count, 0); + wmsum_init(&dbuf_sums.cache_total_evicts, 0); + for (i = 0; i < DN_MAX_LEVELS; i++) { + wmsum_init(&dbuf_sums.cache_levels[i], 0); + wmsum_init(&dbuf_sums.cache_levels_bytes[i], 0); + } + wmsum_init(&dbuf_sums.hash_hits, 0); + wmsum_init(&dbuf_sums.hash_misses, 0); + wmsum_init(&dbuf_sums.hash_collisions, 0); + wmsum_init(&dbuf_sums.hash_chains, 0); + wmsum_init(&dbuf_sums.hash_insert_race, 0); + wmsum_init(&dbuf_sums.metadata_cache_count, 0); + wmsum_init(&dbuf_sums.metadata_cache_overflow, 0); + dbuf_ksp = kstat_create("zfs", 0, "dbufstats", "misc", KSTAT_TYPE_NAMED, sizeof (dbuf_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); @@ -908,6 +956,20 @@ dbuf_fini(void) kstat_delete(dbuf_ksp); dbuf_ksp = NULL; } + + wmsum_fini(&dbuf_sums.cache_count); + wmsum_fini(&dbuf_sums.cache_total_evicts); + for (i = 0; i < DN_MAX_LEVELS; i++) { + wmsum_fini(&dbuf_sums.cache_levels[i]); + wmsum_fini(&dbuf_sums.cache_levels_bytes[i]); + } + wmsum_fini(&dbuf_sums.hash_hits); + wmsum_fini(&dbuf_sums.hash_misses); + wmsum_fini(&dbuf_sums.hash_collisions); + wmsum_fini(&dbuf_sums.hash_chains); + wmsum_fini(&dbuf_sums.hash_insert_race); + wmsum_fini(&dbuf_sums.metadata_cache_count); + wmsum_fini(&dbuf_sums.metadata_cache_overflow); } /* @@ -3708,9 +3770,11 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) db->db_caching_status = dcs; multilist_insert(&dbuf_caches[dcs].cache, db); + uint64_t db_size = db->db.db_size; size = zfs_refcount_add_many( - &dbuf_caches[dcs].size, - db->db.db_size, db); + &dbuf_caches[dcs].size, db_size, db); + uint8_t db_level = db->db_level; + mutex_exit(&db->db_mtx); if (dcs == DB_DBUF_METADATA_CACHE) { DBUF_STAT_BUMP(metadata_cache_count); @@ -3718,16 +3782,14 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) metadata_cache_size_bytes_max, size); } else { - DBUF_STAT_BUMP( - cache_levels[db->db_level]); DBUF_STAT_BUMP(cache_count); - DBUF_STAT_INCR( - cache_levels_bytes[db->db_level], - db->db.db_size); DBUF_STAT_MAX(cache_size_bytes_max, size); + DBUF_STAT_BUMP(cache_levels[db_level]); + DBUF_STAT_INCR( + cache_levels_bytes[db_level], + db_size); } - mutex_exit(&db->db_mtx); if (dcs == DB_DBUF_CACHE && !evicting) dbuf_evict_notify(size); diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index 3d7407016d2..4a323fa990f 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -34,6 +34,7 @@ #include #include #include +#include /* * This tunable disables predictive prefetch. Note that it leaves "prescient" @@ -69,27 +70,54 @@ static zfetch_stats_t zfetch_stats = { { "io_issued", KSTAT_DATA_UINT64 }, }; -#define ZFETCHSTAT_BUMP(stat) \ - atomic_inc_64(&zfetch_stats.stat.value.ui64) +struct { + wmsum_t zfetchstat_hits; + wmsum_t zfetchstat_misses; + wmsum_t zfetchstat_max_streams; + wmsum_t zfetchstat_io_issued; +} zfetch_sums; + +#define ZFETCHSTAT_BUMP(stat) \ + wmsum_add(&zfetch_sums.stat, 1) #define ZFETCHSTAT_ADD(stat, val) \ - atomic_add_64(&zfetch_stats.stat.value.ui64, val) -#define ZFETCHSTAT_SET(stat, val) \ - zfetch_stats.stat.value.ui64 = val -#define ZFETCHSTAT_GET(stat) \ - zfetch_stats.stat.value.ui64 + wmsum_add(&zfetch_sums.stat, val) kstat_t *zfetch_ksp; +static int +zfetch_kstats_update(kstat_t *ksp, int rw) +{ + zfetch_stats_t *zs = ksp->ks_data; + + if (rw == KSTAT_WRITE) + return (EACCES); + zs->zfetchstat_hits.value.ui64 = + wmsum_value(&zfetch_sums.zfetchstat_hits); + zs->zfetchstat_misses.value.ui64 = + wmsum_value(&zfetch_sums.zfetchstat_misses); + zs->zfetchstat_max_streams.value.ui64 = + wmsum_value(&zfetch_sums.zfetchstat_max_streams); + zs->zfetchstat_io_issued.value.ui64 = + wmsum_value(&zfetch_sums.zfetchstat_io_issued); + return (0); +} + void zfetch_init(void) { + wmsum_init(&zfetch_sums.zfetchstat_hits, 0); + wmsum_init(&zfetch_sums.zfetchstat_misses, 0); + wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0); + wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0); + zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (zfetch_ksp != NULL) { zfetch_ksp->ks_data = &zfetch_stats; + zfetch_ksp->ks_update = zfetch_kstats_update; kstat_install(zfetch_ksp); } } @@ -101,6 +129,11 @@ zfetch_fini(void) kstat_delete(zfetch_ksp); zfetch_ksp = NULL; } + + wmsum_fini(&zfetch_sums.zfetchstat_hits); + wmsum_fini(&zfetch_sums.zfetchstat_misses); + wmsum_fini(&zfetch_sums.zfetchstat_max_streams); + wmsum_fini(&zfetch_sums.zfetchstat_io_issued); } /*