diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h index 836ed679dba..9d67dab06ca 100644 --- a/sys/contrib/openzfs/include/sys/arc.h +++ b/sys/contrib/openzfs/include/sys/arc.h @@ -304,9 +304,8 @@ int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, - arc_write_done_func_t *physdone, arc_write_done_func_t *done, - void *priv, zio_priority_t priority, int zio_flags, - const zbookmark_phys_t *zb); + arc_write_done_func_t *done, void *priv, zio_priority_t priority, + int zio_flags, const zbookmark_phys_t *zb); arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv); void arc_remove_prune_callback(arc_prune_t *p); diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h index fd24d2f3c8b..78774792f36 100644 --- a/sys/contrib/openzfs/include/sys/arc_impl.h +++ b/sys/contrib/openzfs/include/sys/arc_impl.h @@ -123,7 +123,6 @@ struct arc_write_callback { void *awcb_private; arc_write_done_func_t *awcb_ready; arc_write_done_func_t *awcb_children_ready; - arc_write_done_func_t *awcb_physdone; arc_write_done_func_t *awcb_done; arc_buf_t *awcb_buf; }; diff --git a/sys/contrib/openzfs/include/sys/zfs_refcount.h b/sys/contrib/openzfs/include/sys/zfs_refcount.h index 4efa266a53c..77965a0aa58 100644 --- a/sys/contrib/openzfs/include/sys/zfs_refcount.h +++ b/sys/contrib/openzfs/include/sys/zfs_refcount.h @@ -27,6 +27,7 @@ #define _SYS_ZFS_REFCOUNT_H #include +#include #include #include @@ -43,19 +44,22 @@ extern "C" { #ifdef ZFS_DEBUG typedef struct reference { - list_node_t ref_link; + union { + avl_node_t a; + list_node_t l; + } ref_link; const void *ref_holder; uint64_t ref_number; - uint8_t *ref_removed; + boolean_t ref_search; } reference_t; typedef struct refcount { - kmutex_t rc_mtx; - boolean_t rc_tracked; - list_t rc_list; - list_t rc_removed; uint64_t rc_count; - uint64_t rc_removed_count; + kmutex_t rc_mtx; + avl_tree_t rc_tree; + list_t rc_removed; + uint_t rc_removed_count; + boolean_t rc_tracked; } zfs_refcount_t; /* diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index 6b1352a72b9..ec32211f690 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -460,7 +460,6 @@ struct zio { /* Callback info */ zio_done_func_t *io_ready; zio_done_func_t *io_children_ready; - zio_done_func_t *io_physdone; zio_done_func_t *io_done; void *io_private; int64_t io_prev_space_delta; /* DMU private */ @@ -503,9 +502,6 @@ struct zio { int io_error; int io_child_error[ZIO_CHILD_TYPES]; uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES]; - uint64_t io_child_count; - uint64_t io_phys_children; - uint64_t io_parent_count; uint64_t *io_stall; zio_t *io_gang_leader; zio_gang_node_t *io_gang_tree; @@ -553,9 +549,8 @@ extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp, zio_done_func_t *ready, zio_done_func_t *children_ready, - zio_done_func_t *physdone, zio_done_func_t *done, - void *priv, zio_priority_t priority, zio_flag_t flags, - const zbookmark_phys_t *zb); + zio_done_func_t *done, void *priv, zio_priority_t priority, + zio_flag_t flags, const zbookmark_phys_t *zb); extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, struct abd *data, uint64_t size, zio_done_func_t *done, void *priv, diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index a23715309f2..7023f448182 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -6675,18 +6675,6 @@ arc_write_children_ready(zio_t *zio) callback->awcb_children_ready(zio, buf, callback->awcb_private); } -/* - * The SPA calls this callback for each physical write that happens on behalf - * of a logical write. See the comment in dbuf_write_physdone() for details. - */ -static void -arc_write_physdone(zio_t *zio) -{ - arc_write_callback_t *cb = zio->io_private; - if (cb->awcb_physdone != NULL) - cb->awcb_physdone(zio, cb->awcb_buf, cb->awcb_private); -} - static void arc_write_done(zio_t *zio) { @@ -6776,9 +6764,9 @@ zio_t * arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready, - arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone, - arc_write_done_func_t *done, void *private, zio_priority_t priority, - int zio_flags, const zbookmark_phys_t *zb) + arc_write_done_func_t *children_ready, arc_write_done_func_t *done, + void *private, zio_priority_t priority, int zio_flags, + const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = buf->b_hdr; arc_write_callback_t *callback; @@ -6825,7 +6813,6 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); callback->awcb_ready = ready; callback->awcb_children_ready = children_ready; - callback->awcb_physdone = physdone; callback->awcb_done = done; callback->awcb_private = private; callback->awcb_buf = buf; @@ -6862,8 +6849,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)), HDR_GET_LSIZE(hdr), arc_buf_size(buf), &localprop, arc_write_ready, (children_ready != NULL) ? arc_write_children_ready : NULL, - arc_write_physdone, arc_write_done, callback, - priority, zio_flags, zb); + arc_write_done, callback, priority, zio_flags, zb); return (zio); } diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c index 272e712586f..1ea075217fb 100644 --- a/sys/contrib/openzfs/module/zfs/dbuf.c +++ b/sys/contrib/openzfs/module/zfs/dbuf.c @@ -4369,22 +4369,6 @@ dbuf_lightweight_ready(zio_t *zio) rw_exit(&parent_db->db_rwlock); } -static void -dbuf_lightweight_physdone(zio_t *zio) -{ - dbuf_dirty_record_t *dr = zio->io_private; - dsl_pool_t *dp = spa_get_dsl(zio->io_spa); - ASSERT3U(dr->dr_txg, ==, zio->io_txg); - - /* - * The callback will be called io_phys_children times. Retire one - * portion of our dirty space each time we are called. Any rounding - * error will be cleaned up by dbuf_lightweight_done(). - */ - int delta = dr->dr_accounted / zio->io_phys_children; - dsl_pool_undirty_space(dp, delta, zio->io_txg); -} - static void dbuf_lightweight_done(zio_t *zio) { @@ -4403,16 +4387,8 @@ dbuf_lightweight_done(zio_t *zio) dsl_dataset_block_born(ds, zio->io_bp, tx); } - /* - * See comment in dbuf_write_done(). - */ - if (zio->io_phys_children == 0) { - dsl_pool_undirty_space(dmu_objset_pool(os), - dr->dr_accounted, zio->io_txg); - } else { - dsl_pool_undirty_space(dmu_objset_pool(os), - dr->dr_accounted % zio->io_phys_children, zio->io_txg); - } + dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted, + zio->io_txg); abd_free(dr->dt.dll.dr_abd); kmem_free(dr, sizeof (*dr)); @@ -4446,8 +4422,7 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx) dmu_tx_get_txg(tx), &dr->dr_bp_copy, dr->dt.dll.dr_abd, dn->dn_datablksz, abd_get_size(dr->dt.dll.dr_abd), &dr->dt.dll.dr_props, dbuf_lightweight_ready, NULL, - dbuf_lightweight_physdone, dbuf_lightweight_done, dr, - ZIO_PRIORITY_ASYNC_WRITE, + dbuf_lightweight_done, dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | dr->dt.dll.dr_flags, &zb); zio_nowait(dr->dr_zio); @@ -4789,37 +4764,6 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb) DB_DNODE_EXIT(db); } -/* - * The SPA will call this callback several times for each zio - once - * for every physical child i/o (zio->io_phys_children times). This - * allows the DMU to monitor the progress of each logical i/o. For example, - * there may be 2 copies of an indirect block, or many fragments of a RAID-Z - * block. There may be a long delay before all copies/fragments are completed, - * so this callback allows us to retire dirty space gradually, as the physical - * i/os complete. - */ -static void -dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg) -{ - (void) buf; - dmu_buf_impl_t *db = arg; - objset_t *os = db->db_objset; - dsl_pool_t *dp = dmu_objset_pool(os); - dbuf_dirty_record_t *dr; - int delta = 0; - - dr = db->db_data_pending; - ASSERT3U(dr->dr_txg, ==, zio->io_txg); - - /* - * The callback will be called io_phys_children times. Retire one - * portion of our dirty space each time we are called. Any rounding - * error will be cleaned up by dbuf_write_done(). - */ - delta = dr->dr_accounted / zio->io_phys_children; - dsl_pool_undirty_space(dp, delta, zio->io_txg); -} - static void dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) { @@ -4894,27 +4838,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) db->db_data_pending = NULL; dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE); - /* - * If we didn't do a physical write in this ZIO and we - * still ended up here, it means that the space of the - * dbuf that we just released (and undirtied) above hasn't - * been marked as undirtied in the pool's accounting. - * - * Thus, we undirty that space in the pool's view of the - * world here. For physical writes this type of update - * happens in dbuf_write_physdone(). - * - * If we did a physical write, cleanup any rounding errors - * that came up due to writing multiple copies of a block - * on disk [see dbuf_write_physdone()]. - */ - if (zio->io_phys_children == 0) { - dsl_pool_undirty_space(dmu_objset_pool(os), - dr->dr_accounted, zio->io_txg); - } else { - dsl_pool_undirty_space(dmu_objset_pool(os), - dr->dr_accounted % zio->io_phys_children, zio->io_txg); - } + dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted, + zio->io_txg); kmem_free(dr, sizeof (dbuf_dirty_record_t)); } @@ -5162,7 +5087,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) dr->dr_zio = zio_write(pio, os->os_spa, txg, &dr->dr_bp_copy, contents, db->db.db_size, db->db.db_size, &zp, - dbuf_write_override_ready, NULL, NULL, + dbuf_write_override_ready, NULL, dbuf_write_override_done, dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); mutex_enter(&db->db_mtx); @@ -5176,7 +5101,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) zp.zp_checksum == ZIO_CHECKSUM_NOPARITY); dr->dr_zio = zio_write(pio, os->os_spa, txg, &dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp, - dbuf_write_nofill_ready, NULL, NULL, + dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb); @@ -5195,9 +5120,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) dr->dr_zio = arc_write(pio, os->os_spa, txg, &dr->dr_bp_copy, data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db), &zp, dbuf_write_ready, - children_ready_cb, dbuf_write_physdone, - dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE, - ZIO_FLAG_MUSTSUCCEED, &zb); + children_ready_cb, dbuf_write_done, db, + ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); } } diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index 8a13b8f410a..dda869287c7 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -1698,7 +1698,7 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size), zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp, - dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done, + dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); return (0); @@ -1864,7 +1864,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp, dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db), - &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, + &zp, dmu_sync_ready, NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); return (0); diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index 778b18817ee..d134d4958f7 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -1698,7 +1698,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) zio = arc_write(pio, os->os_spa, tx->tx_txg, blkptr_copy, os->os_phys_buf, B_FALSE, dmu_os_is_l2cacheable(os), - &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, + &zp, dmu_objset_write_ready, NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* diff --git a/sys/contrib/openzfs/module/zfs/refcount.c b/sys/contrib/openzfs/module/zfs/refcount.c index 601d27f8c47..718bbb34a8d 100644 --- a/sys/contrib/openzfs/module/zfs/refcount.c +++ b/sys/contrib/openzfs/module/zfs/refcount.c @@ -36,33 +36,40 @@ int reference_tracking_enable = B_FALSE; static uint_t reference_history = 3; /* tunable */ static kmem_cache_t *reference_cache; -static kmem_cache_t *reference_history_cache; void zfs_refcount_init(void) { reference_cache = kmem_cache_create("reference_cache", sizeof (reference_t), 0, NULL, NULL, NULL, NULL, NULL, 0); - - reference_history_cache = kmem_cache_create("reference_history_cache", - sizeof (uint64_t), 0, NULL, NULL, NULL, NULL, NULL, 0); } void zfs_refcount_fini(void) { kmem_cache_destroy(reference_cache); - kmem_cache_destroy(reference_history_cache); +} + +static int +zfs_refcount_compare(const void *x1, const void *x2) +{ + const reference_t *r1 = (const reference_t *)x1; + const reference_t *r2 = (const reference_t *)x2; + + int cmp1 = TREE_CMP(r1->ref_holder, r2->ref_holder); + int cmp2 = TREE_CMP(r1->ref_number, r2->ref_number); + int cmp = cmp1 ? cmp1 : cmp2; + return ((cmp || r1->ref_search) ? cmp : TREE_PCMP(r1, r2)); } void zfs_refcount_create(zfs_refcount_t *rc) { mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL); - list_create(&rc->rc_list, sizeof (reference_t), - offsetof(reference_t, ref_link)); + avl_create(&rc->rc_tree, zfs_refcount_compare, sizeof (reference_t), + offsetof(reference_t, ref_link.a)); list_create(&rc->rc_removed, sizeof (reference_t), - offsetof(reference_t, ref_link)); + offsetof(reference_t, ref_link.l)); rc->rc_count = 0; rc->rc_removed_count = 0; rc->rc_tracked = reference_tracking_enable; @@ -86,16 +93,15 @@ void zfs_refcount_destroy_many(zfs_refcount_t *rc, uint64_t number) { reference_t *ref; + void *cookie = NULL; ASSERT3U(rc->rc_count, ==, number); - while ((ref = list_remove_head(&rc->rc_list))) + while ((ref = avl_destroy_nodes(&rc->rc_tree, &cookie)) != NULL) kmem_cache_free(reference_cache, ref); - list_destroy(&rc->rc_list); + avl_destroy(&rc->rc_tree); - while ((ref = list_remove_head(&rc->rc_removed))) { - kmem_cache_free(reference_history_cache, ref->ref_removed); + while ((ref = list_remove_head(&rc->rc_removed))) kmem_cache_free(reference_cache, ref); - } list_destroy(&rc->rc_removed); mutex_destroy(&rc->rc_mtx); } @@ -121,10 +127,10 @@ zfs_refcount_count(zfs_refcount_t *rc) int64_t zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder) { - reference_t *ref = NULL; + reference_t *ref; int64_t count; - if (!rc->rc_tracked) { + if (likely(!rc->rc_tracked)) { count = atomic_add_64_nv(&(rc)->rc_count, number); ASSERT3U(count, >=, number); return (count); @@ -133,8 +139,9 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder) ref = kmem_cache_alloc(reference_cache, KM_SLEEP); ref->ref_holder = holder; ref->ref_number = number; + ref->ref_search = B_FALSE; mutex_enter(&rc->rc_mtx); - list_insert_head(&rc->rc_list, ref); + avl_add(&rc->rc_tree, ref); rc->rc_count += number; count = rc->rc_count; mutex_exit(&rc->rc_mtx); @@ -151,7 +158,7 @@ zfs_refcount_add(zfs_refcount_t *rc, const void *holder) void zfs_refcount_add_few(zfs_refcount_t *rc, uint64_t number, const void *holder) { - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) (void) zfs_refcount_add_many(rc, number, holder); else for (; number > 0; number--) (void) zfs_refcount_add(rc, holder); @@ -161,47 +168,42 @@ int64_t zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, const void *holder) { - reference_t *ref; + reference_t *ref, s; int64_t count; - if (!rc->rc_tracked) { + if (likely(!rc->rc_tracked)) { count = atomic_add_64_nv(&(rc)->rc_count, -number); ASSERT3S(count, >=, 0); return (count); } + s.ref_holder = holder; + s.ref_number = number; + s.ref_search = B_TRUE; mutex_enter(&rc->rc_mtx); ASSERT3U(rc->rc_count, >=, number); - for (ref = list_head(&rc->rc_list); ref; - ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == holder && ref->ref_number == number) { - list_remove(&rc->rc_list, ref); - if (reference_history > 0) { - ref->ref_removed = - kmem_cache_alloc(reference_history_cache, - KM_SLEEP); - list_insert_head(&rc->rc_removed, ref); - rc->rc_removed_count++; - if (rc->rc_removed_count > reference_history) { - ref = list_tail(&rc->rc_removed); - list_remove(&rc->rc_removed, ref); - kmem_cache_free(reference_history_cache, - ref->ref_removed); - kmem_cache_free(reference_cache, ref); - rc->rc_removed_count--; - } - } else { - kmem_cache_free(reference_cache, ref); - } - rc->rc_count -= number; - count = rc->rc_count; - mutex_exit(&rc->rc_mtx); - return (count); - } + ref = avl_find(&rc->rc_tree, &s, NULL); + if (unlikely(ref == NULL)) { + panic("No such hold %p on refcount %llx", holder, + (u_longlong_t)(uintptr_t)rc); + return (-1); } - panic("No such hold %p on refcount %llx", holder, - (u_longlong_t)(uintptr_t)rc); - return (-1); + avl_remove(&rc->rc_tree, ref); + if (reference_history > 0) { + list_insert_head(&rc->rc_removed, ref); + if (rc->rc_removed_count >= reference_history) { + ref = list_remove_tail(&rc->rc_removed); + kmem_cache_free(reference_cache, ref); + } else { + rc->rc_removed_count++; + } + } else { + kmem_cache_free(reference_cache, ref); + } + rc->rc_count -= number; + count = rc->rc_count; + mutex_exit(&rc->rc_mtx); + return (count); } int64_t @@ -213,7 +215,7 @@ zfs_refcount_remove(zfs_refcount_t *rc, const void *holder) void zfs_refcount_remove_few(zfs_refcount_t *rc, uint64_t number, const void *holder) { - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) (void) zfs_refcount_remove_many(rc, number, holder); else for (; number > 0; number--) (void) zfs_refcount_remove(rc, holder); @@ -222,31 +224,38 @@ zfs_refcount_remove_few(zfs_refcount_t *rc, uint64_t number, const void *holder) void zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src) { - int64_t count, removed_count; - list_t list, removed; + avl_tree_t tree; + list_t removed; + reference_t *ref; + void *cookie = NULL; + uint64_t count; + uint_t removed_count; - list_create(&list, sizeof (reference_t), - offsetof(reference_t, ref_link)); + avl_create(&tree, zfs_refcount_compare, sizeof (reference_t), + offsetof(reference_t, ref_link.a)); list_create(&removed, sizeof (reference_t), - offsetof(reference_t, ref_link)); + offsetof(reference_t, ref_link.l)); mutex_enter(&src->rc_mtx); count = src->rc_count; removed_count = src->rc_removed_count; src->rc_count = 0; src->rc_removed_count = 0; - list_move_tail(&list, &src->rc_list); + avl_swap(&tree, &src->rc_tree); list_move_tail(&removed, &src->rc_removed); mutex_exit(&src->rc_mtx); mutex_enter(&dst->rc_mtx); dst->rc_count += count; dst->rc_removed_count += removed_count; - list_move_tail(&dst->rc_list, &list); + if (avl_is_empty(&dst->rc_tree)) + avl_swap(&dst->rc_tree, &tree); + else while ((ref = avl_destroy_nodes(&tree, &cookie)) != NULL) + avl_add(&dst->rc_tree, ref); list_move_tail(&dst->rc_removed, &removed); mutex_exit(&dst->rc_mtx); - list_destroy(&list); + avl_destroy(&tree); list_destroy(&removed); } @@ -254,23 +263,19 @@ void zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, const void *current_holder, const void *new_holder) { - reference_t *ref; - boolean_t found = B_FALSE; + reference_t *ref, s; - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) return; + s.ref_holder = current_holder; + s.ref_number = number; + s.ref_search = B_TRUE; mutex_enter(&rc->rc_mtx); - for (ref = list_head(&rc->rc_list); ref; - ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == current_holder && - ref->ref_number == number) { - ref->ref_holder = new_holder; - found = B_TRUE; - break; - } - } - ASSERT(found); + ref = avl_find(&rc->rc_tree, &s, NULL); + ASSERT(ref); + ref->ref_holder = new_holder; + avl_update(&rc->rc_tree, ref); mutex_exit(&rc->rc_mtx); } @@ -290,21 +295,23 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, const void *current_holder, boolean_t zfs_refcount_held(zfs_refcount_t *rc, const void *holder) { - reference_t *ref; + reference_t *ref, s; + avl_index_t idx; + boolean_t res; - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) return (zfs_refcount_count(rc) > 0); + s.ref_holder = holder; + s.ref_number = 0; + s.ref_search = B_TRUE; mutex_enter(&rc->rc_mtx); - for (ref = list_head(&rc->rc_list); ref; - ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == holder) { - mutex_exit(&rc->rc_mtx); - return (B_TRUE); - } - } + ref = avl_find(&rc->rc_tree, &s, &idx); + if (likely(ref == NULL)) + ref = avl_nearest(&rc->rc_tree, idx, AVL_AFTER); + res = ref && ref->ref_holder == holder; mutex_exit(&rc->rc_mtx); - return (B_FALSE); + return (res); } /* @@ -315,21 +322,23 @@ zfs_refcount_held(zfs_refcount_t *rc, const void *holder) boolean_t zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder) { - reference_t *ref; + reference_t *ref, s; + avl_index_t idx; + boolean_t res; - if (!rc->rc_tracked) + if (likely(!rc->rc_tracked)) return (B_TRUE); mutex_enter(&rc->rc_mtx); - for (ref = list_head(&rc->rc_list); ref; - ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == holder) { - mutex_exit(&rc->rc_mtx); - return (B_FALSE); - } - } + s.ref_holder = holder; + s.ref_number = 0; + s.ref_search = B_TRUE; + ref = avl_find(&rc->rc_tree, &s, &idx); + if (likely(ref == NULL)) + ref = avl_nearest(&rc->rc_tree, idx, AVL_AFTER); + res = ref == NULL || ref->ref_holder != holder; mutex_exit(&rc->rc_mtx); - return (B_TRUE); + return (res); } EXPORT_SYMBOL(zfs_refcount_create); diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c index 85c7134ca4c..a5c76808f2d 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_label.c +++ b/sys/contrib/openzfs/module/zfs/vdev_label.c @@ -486,6 +486,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_isspare) fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1); + if (flags & VDEV_CONFIG_L2CACHE) + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift); + if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) && vd == vd->vdev_top) { fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index d7b2217623e..fb8164f0aea 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -650,9 +650,6 @@ zio_add_child(zio_t *pio, zio_t *cio) list_insert_head(&pio->io_child_list, zl); list_insert_head(&cio->io_parent_list, zl); - pio->io_child_count++; - cio->io_parent_count++; - mutex_exit(&cio->io_lock); mutex_exit(&pio->io_lock); } @@ -669,9 +666,6 @@ zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl) list_remove(&pio->io_child_list, zl); list_remove(&cio->io_parent_list, zl); - pio->io_child_count--; - cio->io_parent_count--; - mutex_exit(&cio->io_lock); mutex_exit(&pio->io_lock); kmem_cache_free(zio_link_cache, zl); @@ -1162,9 +1156,8 @@ zio_t * zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp, zio_done_func_t *ready, zio_done_func_t *children_ready, - zio_done_func_t *physdone, zio_done_func_t *done, - void *private, zio_priority_t priority, zio_flag_t flags, - const zbookmark_phys_t *zb) + zio_done_func_t *done, void *private, zio_priority_t priority, + zio_flag_t flags, const zbookmark_phys_t *zb) { zio_t *zio; @@ -1184,7 +1177,6 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zio->io_ready = ready; zio->io_children_ready = children_ready; - zio->io_physdone = physdone; zio->io_prop = *zp; /* @@ -1517,16 +1509,11 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, flags &= ~ZIO_FLAG_IO_ALLOCATING; } - zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size, done, private, type, priority, flags, vd, offset, &pio->io_bookmark, ZIO_STAGE_VDEV_IO_START >> 1, pipeline); ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV); - zio->io_physdone = pio->io_physdone; - if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL) - zio->io_logical->io_phys_children++; - return (zio); } @@ -2711,7 +2698,7 @@ zio_gang_tree_assemble_done(zio_t *zio) blkptr_t *bp = zio->io_bp; ASSERT(gio == zio_unique_parent(zio)); - ASSERT(zio->io_child_count == 0); + ASSERT(list_is_empty(&zio->io_child_list)); if (zio->io_error) return; @@ -2969,7 +2956,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], has_data ? abd_get_offset(pio->io_abd, pio->io_size - resid) : NULL, lsize, lsize, &zp, - zio_write_gang_member_ready, NULL, NULL, + zio_write_gang_member_ready, NULL, zio_write_gang_done, &gn->gn_child[g], pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); @@ -3431,7 +3418,7 @@ zio_ddt_write(zio_t *zio) } else { cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd, zio->io_orig_size, zio->io_orig_size, zp, - zio_ddt_child_write_ready, NULL, NULL, + zio_ddt_child_write_ready, NULL, zio_ddt_child_write_done, dde, zio->io_priority, ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); @@ -4134,13 +4121,6 @@ zio_vdev_io_assess(zio_t *zio) if (zio->io_error) zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; - if (vd != NULL && vd->vdev_ops->vdev_op_leaf && - zio->io_physdone != NULL) { - ASSERT(!(zio->io_flags & ZIO_FLAG_DELEGATED)); - ASSERT(zio->io_child_type == ZIO_CHILD_VDEV); - zio->io_physdone(zio->io_logical); - } - return (zio); } @@ -4890,7 +4870,7 @@ zio_done(zio_t *zio) return (NULL); } - ASSERT(zio->io_child_count == 0); + ASSERT(list_is_empty(&zio->io_child_list)); ASSERT(zio->io_reexecute == 0); ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL)); diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in index 9517ce8073a..cf438e0e649 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in @@ -173,6 +173,7 @@ if sys.platform.startswith('freebsd'): 'link_count/link_count_001': ['SKIP', na_reason], 'casenorm/mixed_create_failure': ['FAIL', 13215], 'mmap/mmap_sync_001_pos': ['SKIP', na_reason], + 'rsend/send_raw_ashift': ['SKIP', 14961], }) elif sys.platform.startswith('linux'): known.update({ diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib index 133f8387dda..844caa17d8e 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib +++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib @@ -3706,7 +3706,7 @@ function arcstat_quiescence # stat echo while $do_once || [ $stat1 -ne $stat2 ] || [ $stat2 -eq 0 ]; do typeset stat1=$(get_arcstat $stat) - sleep 2 + sleep 0.5 typeset stat2=$(get_arcstat $stat) do_once=false done diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh index 6f7b9aff7c3..a9968723c3c 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh @@ -27,15 +27,14 @@ # # STRATEGY: # 1. Create pool with a cache device. -# 2. Export and re-import pool without writing any data. -# 3. Create a random file in that pool and random read for 10 sec. -# 4. Export pool. -# 5. Read the amount of log blocks written from the header of the +# 2. Create a random file in that pool and random read for 10 sec. +# 3. Export pool. +# 4. Read the amount of log blocks written from the header of the # L2ARC device. -# 6. Import pool. -# 7. Read the amount of log blocks rebuilt in arcstats and compare to +# 5. Import pool. +# 6. Read the amount of log blocks rebuilt in arcstats and compare to # (5). -# 8. Check if the labels of the L2ARC device are intact. +# 7. Check if the labels of the L2ARC device are intact. # # * We can predict the minimum bytes of L2ARC restored if we subtract # from the effective size of the cache device the bytes l2arc_evict() @@ -77,10 +76,8 @@ export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))M log_must truncate -s ${cache_sz}M $VDEV_CACHE -log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE - -log_must zpool export $TESTPOOL -log_must zpool import -d $VDIR $TESTPOOL +log_must zpool create -f -o ashift=12 $TESTPOOL $VDEV +log_must zpool add $TESTPOOL cache $VDEV_CACHE log_must fio $FIO_SCRIPTS/mkfiles.fio log_must fio $FIO_SCRIPTS/random_reads.fio diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh index 3cea334495d..f238c361134 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh @@ -37,6 +37,10 @@ verify_runnable "both" log_assert "Verify raw sending to pools with greater ashift succeeds" +if is_freebsd; then + log_unsupported "Runs too long on FreeBSD 14 (Issue #14961)" +fi + function cleanup { rm -f $BACKDIR/fs@* diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index dc40483fe78..caf1b68f872 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -1051,7 +1051,7 @@ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gfeff9dfed" +#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g10e36e176" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -1081,7 +1081,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "FreeBSD_gfeff9dfed" +#define ZFS_META_RELEASE "FreeBSD_g10e36e176" /* Define the project version. */ #define ZFS_META_VERSION "2.1.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 932bf9730c7..668f15e8533 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.1.99-1993-gfeff9dfed" +#define ZFS_META_GITREV "zfs-2.1.99-1998-g10e36e176"