mirror of
https://github.com/opnsense/src.git
synced 2026-06-11 01:30:30 -04:00
zfs: merge openzfs/zfs@bdd11cbb9 (master) into main
Notable upstream pull request merges:
#12274 Optimize txg_kick() process
#12281 Move gethrtime() calls out of vdev queue lock
#12287 Remove refcount from spa_config_*(
#12289 Compact dbuf/buf hashes and lock arrays
#12290 Remove avl_size field from struct avl_tree
#12294 Upstream: dmu_zfetch_stream_fini leaks refcount
#12295 Fix abd leak, kmem_free correct size of abd_t
#12328 FreeBSD: Hardcode abd_chunk_size to PAGE_SIZE
Obtained from: OpenZFS
OpenZFS commit: bdd11cbb90
This commit is contained in:
commit
7cd22ac434
26 changed files with 303 additions and 199 deletions
|
|
@ -640,6 +640,27 @@ devid_iter(const char *devid, zfs_process_func_t func, boolean_t is_slice)
|
|||
return (data.dd_found);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a device guid, find any vdevs with a matching guid.
|
||||
*/
|
||||
static boolean_t
|
||||
guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
|
||||
zfs_process_func_t func, boolean_t is_slice)
|
||||
{
|
||||
dev_data_t data = { 0 };
|
||||
|
||||
data.dd_func = func;
|
||||
data.dd_found = B_FALSE;
|
||||
data.dd_pool_guid = pool_guid;
|
||||
data.dd_vdev_guid = vdev_guid;
|
||||
data.dd_islabeled = is_slice;
|
||||
data.dd_new_devid = devid;
|
||||
|
||||
(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
|
||||
|
||||
return (data.dd_found);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle a EC_DEV_ADD.ESC_DISK event.
|
||||
*
|
||||
|
|
@ -663,15 +684,18 @@ static int
|
|||
zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
|
||||
{
|
||||
char *devpath = NULL, *devid;
|
||||
uint64_t pool_guid = 0, vdev_guid = 0;
|
||||
boolean_t is_slice;
|
||||
|
||||
/*
|
||||
* Expecting a devid string and an optional physical location
|
||||
* Expecting a devid string and an optional physical location and guid
|
||||
*/
|
||||
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
|
||||
return (-1);
|
||||
|
||||
(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
|
||||
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
|
||||
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
|
||||
|
||||
is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);
|
||||
|
||||
|
|
@ -682,12 +706,16 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
|
|||
* Iterate over all vdevs looking for a match in the following order:
|
||||
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
|
||||
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
|
||||
*
|
||||
* For disks, we only want to pay attention to vdevs marked as whole
|
||||
* disks or are a multipath device.
|
||||
* 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
|
||||
*/
|
||||
if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
|
||||
(void) devphys_iter(devpath, devid, zfs_process_add, is_slice);
|
||||
if (devid_iter(devid, zfs_process_add, is_slice))
|
||||
return (0);
|
||||
if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add,
|
||||
is_slice))
|
||||
return (0);
|
||||
if (vdev_guid != 0)
|
||||
(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
|
||||
is_slice);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,6 +72,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
|
|||
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
|
||||
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
|
||||
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
|
||||
if (nvlist_lookup_boolean(nvl, DEV_IS_PART) == B_TRUE)
|
||||
zed_log_msg(LOG_INFO, "\t%s: B_TRUE", DEV_IS_PART);
|
||||
if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
|
||||
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
|
||||
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
|
||||
|
|
|
|||
|
|
@ -38,40 +38,39 @@
|
|||
static int
|
||||
ioctl_get_msg(char *var, int fd)
|
||||
{
|
||||
int error = 0;
|
||||
int ret;
|
||||
char msg[ZFS_MAX_DATASET_NAME_LEN];
|
||||
|
||||
error = ioctl(fd, BLKZNAME, msg);
|
||||
if (error < 0) {
|
||||
return (error);
|
||||
ret = ioctl(fd, BLKZNAME, msg);
|
||||
if (ret < 0) {
|
||||
return (ret);
|
||||
}
|
||||
|
||||
snprintf(var, ZFS_MAX_DATASET_NAME_LEN, "%s", msg);
|
||||
return (error);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int fd, error = 0;
|
||||
int fd = -1, ret = 0, status = EXIT_FAILURE;
|
||||
char zvol_name[ZFS_MAX_DATASET_NAME_LEN];
|
||||
char *zvol_name_part = NULL;
|
||||
char *dev_name;
|
||||
struct stat64 statbuf;
|
||||
int dev_minor, dev_part;
|
||||
int i;
|
||||
int rc;
|
||||
|
||||
if (argc < 2) {
|
||||
printf("Usage: %s /dev/zvol_device_node\n", argv[0]);
|
||||
return (EINVAL);
|
||||
fprintf(stderr, "Usage: %s /dev/zvol_device_node\n", argv[0]);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
dev_name = argv[1];
|
||||
error = stat64(dev_name, &statbuf);
|
||||
if (error != 0) {
|
||||
printf("Unable to access device file: %s\n", dev_name);
|
||||
return (errno);
|
||||
ret = stat64(dev_name, &statbuf);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Unable to access device file: %s\n", dev_name);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
dev_minor = minor(statbuf.st_rdev);
|
||||
|
|
@ -79,23 +78,23 @@ main(int argc, char **argv)
|
|||
|
||||
fd = open(dev_name, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
printf("Unable to open device file: %s\n", dev_name);
|
||||
return (errno);
|
||||
fprintf(stderr, "Unable to open device file: %s\n", dev_name);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
error = ioctl_get_msg(zvol_name, fd);
|
||||
if (error < 0) {
|
||||
printf("ioctl_get_msg failed:%s\n", strerror(errno));
|
||||
return (errno);
|
||||
ret = ioctl_get_msg(zvol_name, fd);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "ioctl_get_msg failed: %s\n", strerror(errno));
|
||||
goto fail;
|
||||
}
|
||||
if (dev_part > 0)
|
||||
rc = asprintf(&zvol_name_part, "%s-part%d", zvol_name,
|
||||
ret = asprintf(&zvol_name_part, "%s-part%d", zvol_name,
|
||||
dev_part);
|
||||
else
|
||||
rc = asprintf(&zvol_name_part, "%s", zvol_name);
|
||||
ret = asprintf(&zvol_name_part, "%s", zvol_name);
|
||||
|
||||
if (rc == -1 || zvol_name_part == NULL)
|
||||
goto error;
|
||||
if (ret == -1 || zvol_name_part == NULL)
|
||||
goto fail;
|
||||
|
||||
for (i = 0; i < strlen(zvol_name_part); i++) {
|
||||
if (isblank(zvol_name_part[i]))
|
||||
|
|
@ -103,8 +102,13 @@ main(int argc, char **argv)
|
|||
}
|
||||
|
||||
printf("%s\n", zvol_name_part);
|
||||
free(zvol_name_part);
|
||||
error:
|
||||
close(fd);
|
||||
return (error);
|
||||
status = EXIT_SUCCESS;
|
||||
|
||||
fail:
|
||||
if (zvol_name_part)
|
||||
free(zvol_name_part);
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ typedef struct abd {
|
|||
struct abd_scatter {
|
||||
uint_t abd_offset;
|
||||
#if defined(__FreeBSD__) && defined(_KERNEL)
|
||||
uint_t abd_chunk_size;
|
||||
void *abd_chunks[1]; /* actually variable-length */
|
||||
#else
|
||||
uint_t abd_nents;
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ void abd_free_struct(abd_t *);
|
|||
*/
|
||||
|
||||
abd_t *abd_alloc_struct_impl(size_t);
|
||||
abd_t *abd_get_offset_scatter(abd_t *, abd_t *, size_t);
|
||||
abd_t *abd_get_offset_scatter(abd_t *, abd_t *, size_t, size_t);
|
||||
void abd_free_struct_impl(abd_t *);
|
||||
void abd_alloc_chunks(abd_t *, size_t);
|
||||
void abd_free_chunks(abd_t *);
|
||||
|
|
|
|||
|
|
@ -147,7 +147,9 @@ struct avl_tree {
|
|||
int (*avl_compar)(const void *, const void *);
|
||||
size_t avl_offset; /* offsetof(type, avl_link_t field) */
|
||||
ulong_t avl_numnodes; /* number of nodes in the tree */
|
||||
size_t avl_size; /* sizeof user type struct */
|
||||
#ifndef _KERNEL
|
||||
size_t avl_pad; /* For backwards ABI compatibility. */
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -322,12 +322,12 @@ typedef struct dmu_buf_impl {
|
|||
} dmu_buf_impl_t;
|
||||
|
||||
/* Note: the dbuf hash table is exposed only for the mdb module */
|
||||
#define DBUF_MUTEXES 8192
|
||||
#define DBUF_MUTEXES 2048
|
||||
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
|
||||
typedef struct dbuf_hash_table {
|
||||
uint64_t hash_table_mask;
|
||||
dmu_buf_impl_t **hash_table;
|
||||
kmutex_t hash_mutexes[DBUF_MUTEXES];
|
||||
kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
|
||||
} dbuf_hash_table_t;
|
||||
|
||||
typedef void (*dbuf_prefetch_fn)(void *, boolean_t);
|
||||
|
|
|
|||
|
|
@ -141,9 +141,9 @@ typedef struct spa_config_lock {
|
|||
kmutex_t scl_lock;
|
||||
kthread_t *scl_writer;
|
||||
int scl_write_wanted;
|
||||
int scl_count;
|
||||
kcondvar_t scl_cv;
|
||||
zfs_refcount_t scl_count;
|
||||
} spa_config_lock_t;
|
||||
} ____cacheline_aligned spa_config_lock_t;
|
||||
|
||||
typedef struct spa_config_dirent {
|
||||
list_node_t scd_link;
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ extern void txg_register_callbacks(txg_handle_t *txghp, list_t *tx_callbacks);
|
|||
|
||||
extern void txg_delay(struct dsl_pool *dp, uint64_t txg, hrtime_t delta,
|
||||
hrtime_t resolution);
|
||||
extern void txg_kick(struct dsl_pool *dp);
|
||||
extern void txg_kick(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Wait until the given transaction group has finished syncing.
|
||||
|
|
|
|||
|
|
@ -875,7 +875,6 @@ avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
|
|||
|
||||
ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
|
||||
ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
|
||||
ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
|
||||
|
||||
temp_node = tree1->avl_root;
|
||||
temp_numnodes = tree1->avl_numnodes;
|
||||
|
|
@ -903,7 +902,6 @@ avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
|
|||
tree->avl_compar = compar;
|
||||
tree->avl_root = NULL;
|
||||
tree->avl_numnodes = 0;
|
||||
tree->avl_size = size;
|
||||
tree->avl_offset = offset;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -79,22 +79,29 @@ struct {
|
|||
} abd_sums;
|
||||
|
||||
/*
|
||||
* The size of the chunks ABD allocates. Because the sizes allocated from the
|
||||
* kmem_cache can't change, this tunable can only be modified at boot. Changing
|
||||
* it at runtime would cause ABD iteration to work incorrectly for ABDs which
|
||||
* were allocated with the old size, so a safeguard has been put in place which
|
||||
* will cause the machine to panic if you change it and try to access the data
|
||||
* within a scattered ABD.
|
||||
* zfs_abd_scatter_min_size is the minimum allocation size to use scatter
|
||||
* ABD's for. Smaller allocations will use linear ABD's which use
|
||||
* zio_[data_]buf_alloc().
|
||||
*
|
||||
* Scatter ABD's use at least one page each, so sub-page allocations waste
|
||||
* some space when allocated as scatter (e.g. 2KB scatter allocation wastes
|
||||
* half of each page). Using linear ABD's for small allocations means that
|
||||
* they will be put on slabs which contain many allocations.
|
||||
*
|
||||
* Linear ABDs for multi-page allocations are easier to use, and in some cases
|
||||
* it allows to avoid buffer copying. But allocation and especially free
|
||||
* of multi-page linear ABDs are expensive operations due to KVA mapping and
|
||||
* unmapping, and with time they cause KVA fragmentations.
|
||||
*/
|
||||
size_t zfs_abd_chunk_size = 4096;
|
||||
size_t zfs_abd_scatter_min_size = PAGE_SIZE + 1;
|
||||
|
||||
#if defined(_KERNEL)
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
|
||||
&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
|
||||
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN,
|
||||
&zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates");
|
||||
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_scatter_min_size, CTLFLAG_RWTUN,
|
||||
&zfs_abd_scatter_min_size, 0, "Minimum size of scatter allocations.");
|
||||
#endif
|
||||
|
||||
kmem_cache_t *abd_chunk_cache;
|
||||
|
|
@ -102,23 +109,16 @@ static kstat_t *abd_ksp;
|
|||
|
||||
/*
|
||||
* We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
|
||||
* just a single zero'd sized zfs_abd_chunk_size buffer. This
|
||||
* allows us to conserve memory by only using a single zero buffer
|
||||
* for the scatter chunks.
|
||||
* just a single zero'd page-sized buffer. This allows us to conserve
|
||||
* memory by only using a single zero buffer for the scatter chunks.
|
||||
*/
|
||||
abd_t *abd_zero_scatter = NULL;
|
||||
static char *abd_zero_buf = NULL;
|
||||
|
||||
static void
|
||||
abd_free_chunk(void *c)
|
||||
{
|
||||
kmem_cache_free(abd_chunk_cache, c);
|
||||
}
|
||||
|
||||
static uint_t
|
||||
abd_chunkcnt_for_bytes(size_t size)
|
||||
{
|
||||
return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size);
|
||||
return ((size + PAGE_MASK) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static inline uint_t
|
||||
|
|
@ -132,7 +132,7 @@ abd_scatter_chunkcnt(abd_t *abd)
|
|||
boolean_t
|
||||
abd_size_alloc_linear(size_t size)
|
||||
{
|
||||
return (size <= zfs_abd_chunk_size ? B_TRUE : B_FALSE);
|
||||
return (size < zfs_abd_scatter_min_size ? B_TRUE : B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -140,7 +140,7 @@ abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
|||
{
|
||||
uint_t n = abd_scatter_chunkcnt(abd);
|
||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||
int waste = n * zfs_abd_chunk_size - abd->abd_size;
|
||||
int waste = (n << PAGE_SHIFT) - abd->abd_size;
|
||||
if (op == ABDSTAT_INCR) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
||||
|
|
@ -173,11 +173,11 @@ abd_verify_scatter(abd_t *abd)
|
|||
uint_t i, n;
|
||||
|
||||
/*
|
||||
* There is no scatter linear pages in FreeBSD so there is an
|
||||
* if an error if the ABD has been marked as a linear page.
|
||||
* There is no scatter linear pages in FreeBSD so there is
|
||||
* an error if the ABD has been marked as a linear page.
|
||||
*/
|
||||
ASSERT(!abd_is_linear_page(abd));
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_offset, <, zfs_abd_chunk_size);
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_offset, <, PAGE_SIZE);
|
||||
n = abd_scatter_chunkcnt(abd);
|
||||
for (i = 0; i < n; i++) {
|
||||
ASSERT3P(ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
|
||||
|
|
@ -191,11 +191,9 @@ abd_alloc_chunks(abd_t *abd, size_t size)
|
|||
|
||||
n = abd_chunkcnt_for_bytes(size);
|
||||
for (i = 0; i < n; i++) {
|
||||
void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
|
||||
ASSERT3P(c, !=, NULL);
|
||||
ABD_SCATTER(abd).abd_chunks[i] = c;
|
||||
ABD_SCATTER(abd).abd_chunks[i] =
|
||||
kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
|
||||
}
|
||||
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -205,7 +203,8 @@ abd_free_chunks(abd_t *abd)
|
|||
|
||||
n = abd_scatter_chunkcnt(abd);
|
||||
for (i = 0; i < n; i++) {
|
||||
abd_free_chunk(ABD_SCATTER(abd).abd_chunks[i]);
|
||||
kmem_cache_free(abd_chunk_cache,
|
||||
ABD_SCATTER(abd).abd_chunks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -250,15 +249,13 @@ abd_alloc_zero_scatter(void)
|
|||
uint_t i, n;
|
||||
|
||||
n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
|
||||
abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP);
|
||||
abd_zero_buf = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
|
||||
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);
|
||||
|
||||
abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
|
||||
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
|
||||
|
||||
ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
|
||||
ABD_SCATTER(abd_zero_scatter).abd_chunk_size =
|
||||
zfs_abd_chunk_size;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
|
||||
|
|
@ -266,18 +263,18 @@ abd_alloc_zero_scatter(void)
|
|||
}
|
||||
|
||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, zfs_abd_chunk_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
abd_free_zero_scatter(void)
|
||||
{
|
||||
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGE_SIZE);
|
||||
|
||||
abd_free_struct(abd_zero_scatter);
|
||||
abd_zero_scatter = NULL;
|
||||
kmem_free(abd_zero_buf, zfs_abd_chunk_size);
|
||||
kmem_cache_free(abd_chunk_cache, abd_zero_buf);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -305,7 +302,7 @@ abd_kstats_update(kstat_t *ksp, int rw)
|
|||
void
|
||||
abd_init(void)
|
||||
{
|
||||
abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0,
|
||||
abd_chunk_cache = kmem_cache_create("abd_chunk", PAGE_SIZE, 0,
|
||||
NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
|
||||
|
||||
wmsum_init(&abd_sums.abdstat_struct_size, 0);
|
||||
|
|
@ -374,14 +371,17 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata)
|
|||
}
|
||||
|
||||
abd_t *
|
||||
abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off)
|
||||
abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
|
||||
size_t size)
|
||||
{
|
||||
abd_verify(sabd);
|
||||
ASSERT3U(off, <=, sabd->abd_size);
|
||||
|
||||
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
|
||||
uint_t chunkcnt = abd_scatter_chunkcnt(sabd) -
|
||||
(new_offset / zfs_abd_chunk_size);
|
||||
size_t chunkcnt = abd_chunkcnt_for_bytes(
|
||||
(new_offset & PAGE_MASK) + size);
|
||||
|
||||
ASSERT3U(chunkcnt, <=, abd_scatter_chunkcnt(sabd));
|
||||
|
||||
/*
|
||||
* If an abd struct is provided, it is only the minimum size. If we
|
||||
|
|
@ -394,7 +394,7 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off)
|
|||
}
|
||||
|
||||
if (abd == NULL)
|
||||
abd = abd_alloc_struct(chunkcnt * zfs_abd_chunk_size);
|
||||
abd = abd_alloc_struct(chunkcnt << PAGE_SHIFT);
|
||||
|
||||
/*
|
||||
* Even if this buf is filesystem metadata, we only track that
|
||||
|
|
@ -402,34 +402,16 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off)
|
|||
* this case. Therefore, we don't ever use ABD_FLAG_META here.
|
||||
*/
|
||||
|
||||
ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size;
|
||||
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
|
||||
ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK;
|
||||
|
||||
/* Copy the scatterlist starting at the correct offset */
|
||||
(void) memcpy(&ABD_SCATTER(abd).abd_chunks,
|
||||
&ABD_SCATTER(sabd).abd_chunks[new_offset /
|
||||
zfs_abd_chunk_size],
|
||||
&ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT],
|
||||
chunkcnt * sizeof (void *));
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_iter_scatter_chunk_offset(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT(!abd_is_linear(aiter->iter_abd));
|
||||
return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
|
||||
aiter->iter_pos) % zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_iter_scatter_chunk_index(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT(!abd_is_linear(aiter->iter_abd));
|
||||
return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
|
||||
aiter->iter_pos) / zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the abd_iter.
|
||||
*/
|
||||
|
|
@ -480,29 +462,25 @@ void
|
|||
abd_iter_map(struct abd_iter *aiter)
|
||||
{
|
||||
void *paddr;
|
||||
size_t offset = 0;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
/* Panic if someone has changed zfs_abd_chunk_size */
|
||||
IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size ==
|
||||
ABD_SCATTER(aiter->iter_abd).abd_chunk_size);
|
||||
|
||||
/* There's nothing left to iterate over, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
if (abd_is_linear(aiter->iter_abd)) {
|
||||
offset = aiter->iter_pos;
|
||||
aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
|
||||
paddr = ABD_LINEAR_BUF(aiter->iter_abd);
|
||||
abd_t *abd = aiter->iter_abd;
|
||||
size_t offset = aiter->iter_pos;
|
||||
if (abd_is_linear(abd)) {
|
||||
aiter->iter_mapsize = abd->abd_size - offset;
|
||||
paddr = ABD_LINEAR_BUF(abd);
|
||||
} else {
|
||||
size_t index = abd_iter_scatter_chunk_index(aiter);
|
||||
offset = abd_iter_scatter_chunk_offset(aiter);
|
||||
aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset,
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
paddr = ABD_SCATTER(aiter->iter_abd).abd_chunks[index];
|
||||
offset += ABD_SCATTER(abd).abd_offset;
|
||||
paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT];
|
||||
offset &= PAGE_MASK;
|
||||
aiter->iter_mapsize = MIN(PAGE_SIZE - offset,
|
||||
abd->abd_size - aiter->iter_pos);
|
||||
}
|
||||
aiter->iter_mapaddr = (char *)paddr + offset;
|
||||
}
|
||||
|
|
@ -514,12 +492,10 @@ abd_iter_map(struct abd_iter *aiter)
|
|||
void
|
||||
abd_iter_unmap(struct abd_iter *aiter)
|
||||
{
|
||||
/* There's nothing left to unmap, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
ASSERT3U(aiter->iter_mapsize, >, 0);
|
||||
if (!abd_iter_at_end(aiter)) {
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
ASSERT3U(aiter->iter_mapsize, >, 0);
|
||||
}
|
||||
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
|
|
|
|||
|
|
@ -835,7 +835,8 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata)
|
|||
}
|
||||
|
||||
abd_t *
|
||||
abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off)
|
||||
abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off,
|
||||
size_t size)
|
||||
{
|
||||
int i = 0;
|
||||
struct scatterlist *sg = NULL;
|
||||
|
|
|
|||
|
|
@ -531,7 +531,7 @@ abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size)
|
|||
}
|
||||
ASSERT3U(left, ==, 0);
|
||||
} else {
|
||||
abd = abd_get_offset_scatter(abd, sabd, off);
|
||||
abd = abd_get_offset_scatter(abd, sabd, off, size);
|
||||
}
|
||||
|
||||
ASSERT3P(abd, !=, NULL);
|
||||
|
|
|
|||
|
|
@ -740,29 +740,18 @@ taskq_t *arc_prune_taskq;
|
|||
* Hash table routines
|
||||
*/
|
||||
|
||||
#define HT_LOCK_ALIGN 64
|
||||
#define HT_LOCK_PAD (P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
|
||||
|
||||
struct ht_lock {
|
||||
kmutex_t ht_lock;
|
||||
#ifdef _KERNEL
|
||||
unsigned char pad[HT_LOCK_PAD];
|
||||
#endif
|
||||
};
|
||||
|
||||
#define BUF_LOCKS 8192
|
||||
#define BUF_LOCKS 2048
|
||||
typedef struct buf_hash_table {
|
||||
uint64_t ht_mask;
|
||||
arc_buf_hdr_t **ht_table;
|
||||
struct ht_lock ht_locks[BUF_LOCKS];
|
||||
kmutex_t ht_locks[BUF_LOCKS] ____cacheline_aligned;
|
||||
} buf_hash_table_t;
|
||||
|
||||
static buf_hash_table_t buf_hash_table;
|
||||
|
||||
#define BUF_HASH_INDEX(spa, dva, birth) \
|
||||
(buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
|
||||
#define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
|
||||
#define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
|
||||
#define BUF_HASH_LOCK(idx) (&buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
|
||||
#define HDR_LOCK(hdr) \
|
||||
(BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
|
||||
|
||||
|
|
@ -1111,7 +1100,7 @@ buf_fini(void)
|
|||
(buf_hash_table.ht_mask + 1) * sizeof (void *));
|
||||
#endif
|
||||
for (i = 0; i < BUF_LOCKS; i++)
|
||||
mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
|
||||
mutex_destroy(BUF_HASH_LOCK(i));
|
||||
kmem_cache_destroy(hdr_full_cache);
|
||||
kmem_cache_destroy(hdr_full_crypt_cache);
|
||||
kmem_cache_destroy(hdr_l2only_cache);
|
||||
|
|
@ -1276,10 +1265,8 @@ retry:
|
|||
for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
|
||||
*ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
|
||||
|
||||
for (i = 0; i < BUF_LOCKS; i++) {
|
||||
mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
|
||||
NULL, MUTEX_DEFAULT, NULL);
|
||||
}
|
||||
for (i = 0; i < BUF_LOCKS; i++)
|
||||
mutex_init(BUF_HASH_LOCK(i), NULL, MUTEX_DEFAULT, NULL);
|
||||
}
|
||||
|
||||
#define ARC_MINTIME (hz>>4) /* 62 ms */
|
||||
|
|
|
|||
|
|
@ -826,12 +826,12 @@ dbuf_init(void)
|
|||
int i;
|
||||
|
||||
/*
|
||||
* The hash table is big enough to fill all of physical memory
|
||||
* The hash table is big enough to fill one eighth of physical memory
|
||||
* with an average block size of zfs_arc_average_blocksize (default 8K).
|
||||
* By default, the table will take up
|
||||
* totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers).
|
||||
*/
|
||||
while (hsize * zfs_arc_average_blocksize < physmem * PAGESIZE)
|
||||
while (hsize * zfs_arc_average_blocksize < arc_all_memory() / 8)
|
||||
hsize <<= 1;
|
||||
|
||||
retry:
|
||||
|
|
@ -3055,8 +3055,8 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
|
|||
db->db_state = DB_EVICTING; /* not worth logging this state change */
|
||||
if ((odb = dbuf_hash_insert(db)) != NULL) {
|
||||
/* someone else inserted it first */
|
||||
kmem_cache_free(dbuf_kmem_cache, db);
|
||||
mutex_exit(&dn->dn_dbufs_mtx);
|
||||
kmem_cache_free(dbuf_kmem_cache, db);
|
||||
DBUF_STAT_BUMP(hash_insert_race);
|
||||
return (odb);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -159,6 +159,8 @@ static void
|
|||
dmu_zfetch_stream_fini(zstream_t *zs)
|
||||
{
|
||||
ASSERT(!list_link_active(&zs->zs_node));
|
||||
zfs_refcount_destroy(&zs->zs_callers);
|
||||
zfs_refcount_destroy(&zs->zs_refs);
|
||||
kmem_free(zs, sizeof (*zs));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -898,18 +898,26 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
|
|||
{
|
||||
uint64_t delay_min_bytes =
|
||||
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
||||
uint64_t dirty_min_bytes =
|
||||
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
||||
uint64_t dirty;
|
||||
|
||||
mutex_enter(&dp->dp_lock);
|
||||
dirty = dp->dp_dirty_total;
|
||||
uint64_t dirty = dp->dp_dirty_total;
|
||||
mutex_exit(&dp->dp_lock);
|
||||
if (dirty > dirty_min_bytes)
|
||||
txg_kick(dp);
|
||||
|
||||
return (dirty > delay_min_bytes);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&dp->dp_lock));
|
||||
|
||||
uint64_t dirty_min_bytes =
|
||||
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
||||
uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
|
||||
|
||||
return (dirty > dirty_min_bytes);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
|
||||
{
|
||||
|
|
@ -917,7 +925,12 @@ dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
|
|||
mutex_enter(&dp->dp_lock);
|
||||
dp->dp_dirty_pertxg[tx->tx_txg & TXG_MASK] += space;
|
||||
dsl_pool_dirty_delta(dp, space);
|
||||
boolean_t needsync = !dmu_tx_is_syncing(tx) &&
|
||||
dsl_pool_need_dirty_sync(dp, tx->tx_txg);
|
||||
mutex_exit(&dp->dp_lock);
|
||||
|
||||
if (needsync)
|
||||
txg_kick(dp, tx->tx_txg);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -444,9 +444,9 @@ spa_config_lock_init(spa_t *spa)
|
|||
spa_config_lock_t *scl = &spa->spa_config_lock[i];
|
||||
mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
|
||||
zfs_refcount_create_untracked(&scl->scl_count);
|
||||
scl->scl_writer = NULL;
|
||||
scl->scl_write_wanted = 0;
|
||||
scl->scl_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -457,9 +457,9 @@ spa_config_lock_destroy(spa_t *spa)
|
|||
spa_config_lock_t *scl = &spa->spa_config_lock[i];
|
||||
mutex_destroy(&scl->scl_lock);
|
||||
cv_destroy(&scl->scl_cv);
|
||||
zfs_refcount_destroy(&scl->scl_count);
|
||||
ASSERT(scl->scl_writer == NULL);
|
||||
ASSERT(scl->scl_write_wanted == 0);
|
||||
ASSERT(scl->scl_count == 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -480,7 +480,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
|
|||
}
|
||||
} else {
|
||||
ASSERT(scl->scl_writer != curthread);
|
||||
if (!zfs_refcount_is_zero(&scl->scl_count)) {
|
||||
if (scl->scl_count != 0) {
|
||||
mutex_exit(&scl->scl_lock);
|
||||
spa_config_exit(spa, locks & ((1 << i) - 1),
|
||||
tag);
|
||||
|
|
@ -488,7 +488,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
|
|||
}
|
||||
scl->scl_writer = curthread;
|
||||
}
|
||||
(void) zfs_refcount_add(&scl->scl_count, tag);
|
||||
scl->scl_count++;
|
||||
mutex_exit(&scl->scl_lock);
|
||||
}
|
||||
return (1);
|
||||
|
|
@ -514,14 +514,14 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
|
|||
}
|
||||
} else {
|
||||
ASSERT(scl->scl_writer != curthread);
|
||||
while (!zfs_refcount_is_zero(&scl->scl_count)) {
|
||||
while (scl->scl_count != 0) {
|
||||
scl->scl_write_wanted++;
|
||||
cv_wait(&scl->scl_cv, &scl->scl_lock);
|
||||
scl->scl_write_wanted--;
|
||||
}
|
||||
scl->scl_writer = curthread;
|
||||
}
|
||||
(void) zfs_refcount_add(&scl->scl_count, tag);
|
||||
scl->scl_count++;
|
||||
mutex_exit(&scl->scl_lock);
|
||||
}
|
||||
ASSERT3U(wlocks_held, <=, locks);
|
||||
|
|
@ -535,8 +535,8 @@ spa_config_exit(spa_t *spa, int locks, const void *tag)
|
|||
if (!(locks & (1 << i)))
|
||||
continue;
|
||||
mutex_enter(&scl->scl_lock);
|
||||
ASSERT(!zfs_refcount_is_zero(&scl->scl_count));
|
||||
if (zfs_refcount_remove(&scl->scl_count, tag) == 0) {
|
||||
ASSERT(scl->scl_count > 0);
|
||||
if (--scl->scl_count == 0) {
|
||||
ASSERT(scl->scl_writer == NULL ||
|
||||
scl->scl_writer == curthread);
|
||||
scl->scl_writer = NULL; /* OK in either case */
|
||||
|
|
@ -555,8 +555,7 @@ spa_config_held(spa_t *spa, int locks, krw_t rw)
|
|||
spa_config_lock_t *scl = &spa->spa_config_lock[i];
|
||||
if (!(locks & (1 << i)))
|
||||
continue;
|
||||
if ((rw == RW_READER &&
|
||||
!zfs_refcount_is_zero(&scl->scl_count)) ||
|
||||
if ((rw == RW_READER && scl->scl_count != 0) ||
|
||||
(rw == RW_WRITER && scl->scl_writer == curthread))
|
||||
locks_held |= 1 << i;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -498,14 +498,6 @@ txg_wait_callbacks(dsl_pool_t *dp)
|
|||
taskq_wait_outstanding(tx->tx_commit_cb_taskq, 0);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
txg_is_syncing(dsl_pool_t *dp)
|
||||
{
|
||||
tx_state_t *tx = &dp->dp_tx;
|
||||
ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
|
||||
return (tx->tx_syncing_txg != 0);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
txg_is_quiescing(dsl_pool_t *dp)
|
||||
{
|
||||
|
|
@ -539,8 +531,6 @@ txg_sync_thread(void *arg)
|
|||
clock_t timeout = zfs_txg_timeout * hz;
|
||||
clock_t timer;
|
||||
uint64_t txg;
|
||||
uint64_t dirty_min_bytes =
|
||||
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
||||
|
||||
/*
|
||||
* We sync when we're scanning, there's someone waiting
|
||||
|
|
@ -551,8 +541,7 @@ txg_sync_thread(void *arg)
|
|||
while (!dsl_scan_active(dp->dp_scan) &&
|
||||
!tx->tx_exiting && timer > 0 &&
|
||||
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
|
||||
!txg_has_quiesced_to_sync(dp) &&
|
||||
dp->dp_dirty_total < dirty_min_bytes) {
|
||||
!txg_has_quiesced_to_sync(dp)) {
|
||||
dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
|
||||
(u_longlong_t)tx->tx_synced_txg,
|
||||
(u_longlong_t)tx->tx_sync_txg_waiting, dp);
|
||||
|
|
@ -566,6 +555,11 @@ txg_sync_thread(void *arg)
|
|||
* prompting it to do so if necessary.
|
||||
*/
|
||||
while (!tx->tx_exiting && !txg_has_quiesced_to_sync(dp)) {
|
||||
if (txg_is_quiescing(dp)) {
|
||||
txg_thread_wait(tx, &cpr,
|
||||
&tx->tx_quiesce_done_cv, 0);
|
||||
continue;
|
||||
}
|
||||
if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
|
||||
tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
|
||||
cv_broadcast(&tx->tx_quiesce_more_cv);
|
||||
|
|
@ -791,24 +785,22 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce)
|
|||
}
|
||||
|
||||
/*
|
||||
* If there isn't a txg syncing or in the pipeline, push another txg through
|
||||
* the pipeline by quiescing the open txg.
|
||||
* Pass in the txg number that should be synced.
|
||||
*/
|
||||
void
|
||||
txg_kick(dsl_pool_t *dp)
|
||||
txg_kick(dsl_pool_t *dp, uint64_t txg)
|
||||
{
|
||||
tx_state_t *tx = &dp->dp_tx;
|
||||
|
||||
ASSERT(!dsl_pool_config_held(dp));
|
||||
|
||||
if (tx->tx_sync_txg_waiting >= txg)
|
||||
return;
|
||||
|
||||
mutex_enter(&tx->tx_sync_lock);
|
||||
if (!txg_is_syncing(dp) &&
|
||||
!txg_is_quiescing(dp) &&
|
||||
tx->tx_quiesce_txg_waiting <= tx->tx_open_txg &&
|
||||
tx->tx_sync_txg_waiting <= tx->tx_synced_txg &&
|
||||
tx->tx_quiesced_txg <= tx->tx_synced_txg) {
|
||||
tx->tx_quiesce_txg_waiting = tx->tx_open_txg + 1;
|
||||
cv_broadcast(&tx->tx_quiesce_more_cv);
|
||||
if (tx->tx_sync_txg_waiting < txg) {
|
||||
tx->tx_sync_txg_waiting = txg;
|
||||
cv_broadcast(&tx->tx_sync_more_cv);
|
||||
}
|
||||
mutex_exit(&tx->tx_sync_lock);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -912,9 +912,9 @@ vdev_queue_io(zio_t *zio)
|
|||
}
|
||||
|
||||
zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
|
||||
zio->io_timestamp = gethrtime();
|
||||
|
||||
mutex_enter(&vq->vq_lock);
|
||||
zio->io_timestamp = gethrtime();
|
||||
vdev_queue_io_add(vq, zio);
|
||||
nio = vdev_queue_io_to_issue(vq);
|
||||
mutex_exit(&vq->vq_lock);
|
||||
|
|
@ -936,14 +936,13 @@ vdev_queue_io_done(zio_t *zio)
|
|||
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
|
||||
zio_t *nio;
|
||||
|
||||
hrtime_t now = gethrtime();
|
||||
vq->vq_io_complete_ts = now;
|
||||
vq->vq_io_delta_ts = zio->io_delta = now - zio->io_timestamp;
|
||||
|
||||
mutex_enter(&vq->vq_lock);
|
||||
|
||||
vdev_queue_pending_remove(vq, zio);
|
||||
|
||||
zio->io_delta = gethrtime() - zio->io_timestamp;
|
||||
vq->vq_io_complete_ts = gethrtime();
|
||||
vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
|
||||
|
||||
while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
|
||||
mutex_exit(&vq->vq_lock);
|
||||
if (nio->io_done == vdev_queue_agg_io_done) {
|
||||
|
|
|
|||
|
|
@ -98,10 +98,11 @@ tests = ['fallocate_prealloc', 'fallocate_punch-hole']
|
|||
tags = ['functional', 'fallocate']
|
||||
|
||||
[tests/functional/fault:Linux]
|
||||
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
|
||||
'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_multiple',
|
||||
'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
|
||||
'decompress_fault', 'scrub_after_resilver', 'zpool_status_-s']
|
||||
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
|
||||
'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
|
||||
'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
|
||||
'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
|
||||
'zpool_status_-s']
|
||||
tags = ['functional', 'fault']
|
||||
|
||||
[tests/functional/features/large_dnode:Linux]
|
||||
|
|
|
|||
|
|
@ -323,6 +323,7 @@ if os.environ.get('CI') == 'true':
|
|||
'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason],
|
||||
'fault/auto_offline_001_pos': ['SKIP', ci_reason],
|
||||
'fault/auto_online_001_pos': ['SKIP', ci_reason],
|
||||
'fault/auto_online_002_pos': ['SKIP', ci_reason],
|
||||
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
|
||||
'fault/auto_spare_ashift': ['SKIP', ci_reason],
|
||||
'fault/auto_spare_shared': ['SKIP', ci_reason],
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \
|
|||
cleanup.ksh \
|
||||
auto_offline_001_pos.ksh \
|
||||
auto_online_001_pos.ksh \
|
||||
auto_online_002_pos.ksh \
|
||||
auto_replace_001_pos.ksh \
|
||||
auto_spare_001_pos.ksh \
|
||||
auto_spare_002_pos.ksh \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,94 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
|
||||
# Copyright (c) 2019 by Delphix. All rights reserved.
|
||||
# Portions Copyright 2021 iXsystems, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/fault/fault.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Testing Fault Management Agent ZED Logic - Automated Auto-Online Test.
|
||||
# Now with partitioned vdevs.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Partition a scsi_debug device for simulating removal
|
||||
# 2. Create a pool
|
||||
# 3. Offline disk
|
||||
# 4. ZED polls for an event change for online disk to be automatically
|
||||
# added back to the pool.
|
||||
#
|
||||
verify_runnable "both"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
poolexists ${TESTPOOL} && destroy_pool ${TESTPOOL}
|
||||
unload_scsi_debug
|
||||
}
|
||||
|
||||
log_assert "Testing automated auto-online FMA test with partitioned vdev"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
load_scsi_debug ${SDSIZE} ${SDHOSTS} ${SDTGTS} ${SDLUNS} '512b'
|
||||
SDDEVICE=$(get_debug_device)
|
||||
zpool labelclear -f ${SDDEVICE}
|
||||
partition_disk ${SDSIZE} ${SDDEVICE} 1
|
||||
part=${SDDEVICE}1
|
||||
host=$(get_scsi_host ${SDDEVICE})
|
||||
|
||||
block_device_wait /dev/${part}
|
||||
log_must zpool create -f ${TESTPOOL} raidz1 ${part} ${DISKS}
|
||||
|
||||
# Add some data to the pool
|
||||
log_must mkfile ${FSIZE} /${TESTPOOL}/data
|
||||
|
||||
remove_disk ${SDDEVICE}
|
||||
check_state ${TESTPOOL} "" "degraded" || \
|
||||
log_fail "${TESTPOOL} is not degraded"
|
||||
|
||||
# Clear zpool events
|
||||
log_must zpool events -c
|
||||
|
||||
# Online disk
|
||||
insert_disk ${SDDEVICE} ${host}
|
||||
|
||||
log_note "Delay for ZED auto-online"
|
||||
typeset -i timeout=0
|
||||
until is_pool_resilvered ${TESTPOOL}; do
|
||||
if ((timeout++ == MAXTIMEOUT)); then
|
||||
log_fail "Timeout occurred"
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
log_note "Auto-online of ${SDDEVICE} is complete"
|
||||
|
||||
# Validate auto-online was successful
|
||||
sleep 1
|
||||
check_state ${TESTPOOL} "" "online" || \
|
||||
log_fail "${TESTPOOL} is not back online"
|
||||
|
||||
log_must zpool destroy ${TESTPOOL}
|
||||
|
||||
log_pass "Auto-online with partitioned vdev test successful"
|
||||
|
|
@ -1,6 +1,11 @@
|
|||
# Persistent links for zvol
|
||||
#
|
||||
# persistent disk links: /dev/zvol/dataset_name
|
||||
# also creates compatibility symlink of /dev/dataset_name
|
||||
#
|
||||
# NOTE: We used to also create an additional tree of zvol symlinks located at
|
||||
# /dev/dataset_name (i.e. without the 'zvol' path component) for
|
||||
# compatibility reasons. These are no longer created anymore, and should
|
||||
# not be relied upon.
|
||||
#
|
||||
|
||||
KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="@udevdir@/zvol_id $tempnode" SYMLINK+="zvol/%c %c"
|
||||
KERNEL=="zd*", SUBSYSTEM=="block", ACTION=="add|change", PROGRAM=="@udevdir@/zvol_id $devnode", SYMLINK+="zvol/%c"
|
||||
|
|
|
|||
|
|
@ -734,7 +734,7 @@
|
|||
/* #undef ZFS_IS_GPL_COMPATIBLE */
|
||||
|
||||
/* Define the project alias string. */
|
||||
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g4694131a0"
|
||||
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gbdd11cbb9"
|
||||
|
||||
/* Define the project author. */
|
||||
#define ZFS_META_AUTHOR "OpenZFS"
|
||||
|
|
@ -764,7 +764,7 @@
|
|||
#define ZFS_META_NAME "zfs"
|
||||
|
||||
/* Define the project release. */
|
||||
#define ZFS_META_RELEASE "FreeBSD_g4694131a0"
|
||||
#define ZFS_META_RELEASE "FreeBSD_gbdd11cbb9"
|
||||
|
||||
/* Define the project version. */
|
||||
#define ZFS_META_VERSION "2.1.99"
|
||||
|
|
|
|||
Loading…
Reference in a new issue