Notable upstream pull request merges:
 #15197 3bd4df384 Improve ZFS objset sync parallelism
 #15455 020f6fd09 FreeBSD: Implement taskq_init_ent()
 #15476 3d86999c7 sa_lookup() ignores buffer size
 #15478 2a154b848 Fix accounting error for pending sync IO ops in
                  zpool iostat
 #15484 dc45a00ea Add kern.features.zfs
 #15486 e36ff84c3 Update the kstat dataset_name when renaming a zvol
 #15491 f4cd1bac7 Make abd_raidz_gen_iterate() pass an initialized
                  pointer to the callback
 #15495 58398cbd0 FreeBSD: Optimize large kstat outputs

Obtained from:	OpenZFS
OpenZFS commit:	9198de8f10
This commit is contained in:
Martin Matuska 2023-11-08 09:16:01 +01:00
commit 14c2e0a0c5
42 changed files with 755 additions and 177 deletions

View file

@ -233,8 +233,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
}
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
update_vdev_config_dev_sysfs_path(vdev, path,
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
&enc_sysfs_path);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted);

View file

@ -35,6 +35,7 @@
#include "zed_strings.h"
#include "agents/zfs_agents.h"
#include <libzutil.h>
#define MAXBUF 4096
@ -922,6 +923,25 @@ _zed_event_add_time_strings(uint64_t eid, zed_strings_t *zsp, int64_t etime[])
}
}
static void
_zed_event_update_enc_sysfs_path(nvlist_t *nvl)
{
const char *vdev_path;
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
&vdev_path) != 0) {
return; /* some other kind of event, ignore it */
}
if (vdev_path == NULL) {
return;
}
update_vdev_config_dev_sysfs_path(nvl, vdev_path,
FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH);
}
/*
* Service the next zevent, blocking until one is available.
*/
@ -969,6 +989,17 @@ zed_event_service(struct zed_conf *zcp)
zed_log_msg(LOG_WARNING,
"Failed to lookup zevent class (eid=%llu)", eid);
} else {
/*
* Special case: If we can dynamically detect an enclosure sysfs
* path, then use that value rather than the one stored in the
* vd->vdev_enc_sysfs_path. There have been rare cases where
* vd->vdev_enc_sysfs_path becomes outdated. However, there
* will be other times when we can not dynamically detect the
* sysfs path (like if a disk disappears) and have to rely on
* the old value for things like turning on the fault LED.
*/
_zed_event_update_enc_sysfs_path(nvl);
/* let internal modules see this event first */
zfs_agent_post_event(class, NULL, nvl);

View file

@ -14,7 +14,6 @@ large_blocks
livelist
log_spacemap
lz4_compress
obsolete_counts
project_quota
resilver_defer
spacemap_histogram

View file

@ -33,10 +33,18 @@ for i in $scripts ; do
val=""
case $i in
enc)
val=$(ls "$VDEV_ENC_SYSFS_PATH/../../" 2>/dev/null)
if echo "$VDEV_ENC_SYSFS_PATH" | grep -q '/sys/bus/pci/slots' ; then
val="$VDEV_ENC_SYSFS_PATH"
else
val="$(ls """$VDEV_ENC_SYSFS_PATH/../../""" 2>/dev/null)"
fi
;;
slot)
val=$(cat "$VDEV_ENC_SYSFS_PATH/slot" 2>/dev/null)
if echo "$VDEV_ENC_SYSFS_PATH" | grep -q '/sys/bus/pci/slots' ; then
val="$(basename """$VDEV_ENC_SYSFS_PATH""")"
else
val="$(cat """$VDEV_ENC_SYSFS_PATH/slot""" 2>/dev/null)"
fi
;;
encdev)
val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)

View file

@ -372,6 +372,10 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary)
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
/* Lookup and add the enclosure sysfs path (if exists) */
update_vdev_config_dev_sysfs_path(vdev, path,
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
if (strcmp(type, VDEV_TYPE_DISK) == 0)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
(uint64_t)wholedisk) == 0);

View file

@ -44,7 +44,7 @@ AM_INIT_AUTOMAKE([subdir-objects foreign])
# Remove default macros from config.h:
# PACKAGE, PACKAGE_{BUGREPORT,NAME,STRING,TARNAME,VERSION}, STDC_HEADERS, VERSION
AC_CONFIG_HEADERS([zfs_config.h], [
sed -nri~ -e '/^$/be' -e 'N;N;/#define (PACKAGE|VERSION|STDC_HEADERS)/d' -e ':e' -e 'p' zfs_config.h && rm zfs_config.h~ || exit])
$SED -nri~ -e '/^$/be' -e 'N;N;/#define (PACKAGE|VERSION|STDC_HEADERS)/d' -e ':e' -e 'p' zfs_config.h && rm zfs_config.h~ || exit])
LT_INIT
AC_PROG_INSTALL

View file

@ -208,6 +208,8 @@ int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func,
int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func,
void *data);
void update_vdevs_config_dev_sysfs_path(nvlist_t *config);
_LIBZUTIL_H void update_vdev_config_dev_sysfs_path(nvlist_t *nv,
const char *path, const char *key);
#ifdef __cplusplus
}
#endif

View file

@ -42,6 +42,7 @@ extern "C" {
typedef struct taskq {
struct taskqueue *tq_queue;
int tq_nthreads;
} taskq_t;
typedef uintptr_t taskqid_t;
@ -81,7 +82,6 @@ typedef struct taskq_ent {
#define TASKQID_INVALID ((taskqid_t)0)
#define taskq_init_ent(x)
extern taskq_t *system_taskq;
/* Global dynamic task queue for long delay */
extern taskq_t *system_delay_taskq;
@ -92,7 +92,10 @@ extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
taskq_ent_t *);
extern int taskq_empty_ent(taskq_ent_t *);
extern void taskq_init_ent(taskq_ent_t *);
taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
taskq_t *taskq_create_synced(const char *, int, pri_t, int, int, uint_t,
kthread_t ***);
taskq_t *taskq_create_instance(const char *, int, int, pri_t, int, int, uint_t);
taskq_t *taskq_create_proc(const char *, int, pri_t, int, int,
struct proc *, uint_t);
@ -117,6 +120,7 @@ void taskq_resume(taskq_t *);
#endif /* _KERNEL */
#ifdef _STANDALONE
typedef void taskq_t;
typedef int taskq_ent_t;
#define taskq_init_ent(x)
#endif /* _STANDALONE */

View file

@ -150,6 +150,8 @@ extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
extern int taskq_empty_ent(taskq_ent_t *);
extern void taskq_init_ent(taskq_ent_t *);
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
extern taskq_t *taskq_create_synced(const char *, int, pri_t, int, int, uint_t,
kthread_t ***);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait_id(taskq_t *, taskqid_t);
extern void taskq_wait_outstanding(taskq_t *, taskqid_t);

View file

@ -71,6 +71,7 @@ typedef struct dataset_kstats {
int dataset_kstats_create(dataset_kstats_t *, objset_t *);
void dataset_kstats_destroy(dataset_kstats_t *);
void dataset_kstats_rename(dataset_kstats_t *dk, const char *);
void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);

View file

@ -825,6 +825,11 @@ extern void spa_sync_allpools(void);
extern uint_t zfs_sync_pass_deferred_free;
/* spa sync taskqueues */
taskq_t *spa_sync_tq_create(spa_t *spa, const char *name);
void spa_sync_tq_destroy(spa_t *spa);
void spa_select_allocator(zio_t *zio);
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;

View file

@ -188,6 +188,12 @@ typedef struct spa_taskqs {
taskq_t **stqs_taskq;
} spa_taskqs_t;
/* one for each thread in the spa sync taskq */
typedef struct spa_syncthread_info {
kthread_t *sti_thread;
taskq_t *sti_wr_iss_tq; /* assigned wr_iss taskq */
} spa_syncthread_info_t;
typedef enum spa_all_vdev_zap_action {
AVZ_ACTION_NONE = 0,
AVZ_ACTION_DESTROY, /* Destroy all per-vdev ZAPs and the AVZ. */
@ -265,6 +271,10 @@ struct spa {
int spa_alloc_count;
int spa_active_allocator; /* selectable allocator */
/* per-allocator sync thread taskqs */
taskq_t *spa_sync_tq;
spa_syncthread_info_t *spa_syncthreads;
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
nvlist_t *spa_label_features; /* Features for reading MOS */
@ -456,7 +466,7 @@ extern char *spa_config_path;
extern const char *zfs_deadman_failmode;
extern uint_t spa_slop_shift;
extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent, zio_t *zio);
extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags);
extern void spa_load_spares(spa_t *spa);

View file

@ -131,7 +131,10 @@ typedef const struct vdev_ops {
* Virtual device properties
*/
typedef union vdev_queue_class {
list_t vqc_list;
struct {
ulong_t vqc_list_numnodes;
list_t vqc_list;
};
avl_tree_t vqc_tree;
} vdev_queue_class_t;

View file

@ -496,6 +496,8 @@ extern taskq_t *system_taskq;
extern taskq_t *system_delay_taskq;
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
extern taskq_t *taskq_create_synced(const char *, int, pri_t, int, int, uint_t,
kthread_t ***);
#define taskq_create_proc(a, b, c, d, e, p, f) \
(taskq_create(a, b, c, d, e, f))
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \

View file

@ -223,6 +223,9 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
#define ZIO_FLAG_DELEGATED (1ULL << 30)
#define ZIO_ALLOCATOR_NONE (-1)
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
@ -526,6 +529,9 @@ struct zio {
/* Taskq dispatching state */
taskq_ent_t io_tqent;
/* write issue taskq selection, based upon sync thread */
taskq_t *io_wr_iss_tq;
};
enum blk_verify_flag {

View file

@ -260,6 +260,7 @@
<elf-symbol name='tpool_suspended' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='tpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='update_vdev_config_dev_strs' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='update_vdev_config_dev_sysfs_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='use_color' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='vdev_expand_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='vdev_name_to_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -8332,6 +8333,12 @@
<parameter type-id='b59d7dce' name='buflen'/>
<return type-id='95e97e5e'/>
</function-decl>
<function-decl name='update_vdev_config_dev_sysfs_path' mangled-name='update_vdev_config_dev_sysfs_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='update_vdev_config_dev_sysfs_path'>
<parameter type-id='5ce45b60' name='nv'/>
<parameter type-id='80f4b756' name='path'/>
<parameter type-id='80f4b756' name='key'/>
<return type-id='48b5725f'/>
</function-decl>
<function-type size-in-bits='64' id='2ec2411e'>
<parameter type-id='eaa32e2f'/>
<parameter type-id='5ce45b60'/>

View file

@ -337,6 +337,36 @@ taskq_destroy(taskq_t *tq)
kmem_free(tq, sizeof (taskq_t));
}
/*
* Create a taskq with a specified number of pool threads. Allocate
* and return an array of nthreads kthread_t pointers, one for each
* thread in the pool. The array is not ordered and must be freed
* by the caller.
*/
taskq_t *
taskq_create_synced(const char *name, int nthreads, pri_t pri,
int minalloc, int maxalloc, uint_t flags, kthread_t ***ktpp)
{
taskq_t *tq;
kthread_t **kthreads = kmem_zalloc(sizeof (*kthreads) * nthreads,
KM_SLEEP);
(void) pri; (void) minalloc; (void) maxalloc;
flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);
tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
flags | TASKQ_PREPOPULATE);
VERIFY(tq != NULL);
VERIFY(tq->tq_nthreads == nthreads);
for (int i = 0; i < nthreads; i++) {
kthreads[i] = tq->tq_threadlist[i];
}
*ktpp = kthreads;
return (tq);
}
int
taskq_member(taskq_t *tq, kthread_t *t)
{

View file

@ -249,6 +249,15 @@ zfs_dev_flush(int fd)
return (0);
}
void
update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
const char *key)
{
(void) nv;
(void) path;
(void) key;
}
void
update_vdevs_config_dev_sysfs_path(nvlist_t *config)
{

View file

@ -766,9 +766,12 @@ no_dev:
* Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
* in the nvlist * (if applicable). Like:
* vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
*
* key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH)
*/
static void
update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path)
void
update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
const char *key)
{
char *upath, *spath;
@ -777,9 +780,9 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path)
spath = zfs_get_enclosure_sysfs_path(upath);
if (spath) {
nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, spath);
(void) nvlist_add_string(nv, key, spath);
} else {
nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
(void) nvlist_remove_all(nv, key);
}
free(upath);
@ -799,7 +802,8 @@ sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
return (1);
/* Rescan our enclosure sysfs path for this vdev */
update_vdev_config_dev_sysfs_path(nv, path);
update_vdev_config_dev_sysfs_path(nv, path,
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
return (0);
}
@ -888,7 +892,8 @@ update_vdev_config_dev_strs(nvlist_t *nv)
(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
vds.vds_devphys);
}
update_vdev_config_dev_sysfs_path(nv, path);
update_vdev_config_dev_sysfs_path(nv, path,
ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
} else {
/* Clear out any stale entries. */
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);

View file

@ -496,6 +496,13 @@ If we have less than this amount of free space,
most ZPL operations (e.g. write, create) will return
.Sy ENOSPC .
.
.It Sy spa_num_allocators Ns = Ns Sy 4 Pq int
Determines the number of block alloctators to use per spa instance.
Capped by the number of actual CPUs in the system.
.Pp
Note that setting this value too high could result in performance
degredation and/or excess fragmentation.
.
.It Sy spa_upgrade_errlog_limit Ns = Ns Sy 0 Pq uint
Limits the number of on-disk error log entries that will be converted to the
new format when enabling the
@ -1974,13 +1981,6 @@ and may need to load new metaslabs to satisfy these allocations.
.It Sy zfs_sync_pass_rewrite Ns = Ns Sy 2 Pq uint
Rewrite new block pointers starting in this pass.
.
.It Sy zfs_sync_taskq_batch_pct Ns = Ns Sy 75 Ns % Pq int
This controls the number of threads used by
.Sy dp_sync_taskq .
The default value of
.Sy 75%
will create a maximum of one thread per CPU.
.
.It Sy zfs_trim_extent_bytes_max Ns = Ns Sy 134217728 Ns B Po 128 MiB Pc Pq uint
Maximum size of TRIM command.
Larger ranges will be split into chunks no larger than this value before
@ -2265,6 +2265,14 @@ If
.Sy 0 ,
generate a system-dependent value close to 6 threads per taskq.
.
.It Sy zio_taskq_wr_iss_ncpus Ns = Ns Sy 0 Pq uint
Determines the number of CPUs to run write issue taskqs.
.Pp
When 0 (the default), the value to use is computed internally
as the number of actual CPUs in the system divided by the
.Sy spa_num_allocators
value.
.
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint
Do not create zvol device nodes.
This may slightly improve startup time on

View file

@ -187,19 +187,18 @@ kstat_sysctl_dataset_string(SYSCTL_HANDLER_ARGS)
static int
kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
{
struct sbuf *sb;
struct sbuf sb;
kstat_t *ksp = arg1;
kstat_io_t *kip = ksp->ks_data;
int rc;
sb = sbuf_new_auto();
if (sb == NULL)
return (ENOMEM);
sbuf_new_for_sysctl(&sb, NULL, 0, req);
/* Update the aggsums before reading */
(void) ksp->ks_update(ksp, KSTAT_READ);
/* though wlentime & friends are signed, they will never be negative */
sbuf_printf(sb,
sbuf_printf(&sb,
"%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
"%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
kip->nread, kip->nwritten,
@ -207,25 +206,21 @@ kstat_sysctl_io(SYSCTL_HANDLER_ARGS)
kip->wtime, kip->wlentime, kip->wlastupdate,
kip->rtime, kip->rlentime, kip->rlastupdate,
kip->wcnt, kip->rcnt);
rc = sbuf_finish(sb);
if (rc == 0)
rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
sbuf_delete(sb);
rc = sbuf_finish(&sb);
sbuf_delete(&sb);
return (rc);
}
static int
kstat_sysctl_raw(SYSCTL_HANDLER_ARGS)
{
struct sbuf *sb;
struct sbuf sb;
void *data;
kstat_t *ksp = arg1;
void *(*addr_op)(kstat_t *ksp, loff_t index);
int n, has_header, rc = 0;
sb = sbuf_new_auto();
if (sb == NULL)
return (ENOMEM);
sbuf_new_for_sysctl(&sb, NULL, PAGE_SIZE, req);
if (ksp->ks_raw_ops.addr)
addr_op = ksp->ks_raw_ops.addr;
@ -258,8 +253,10 @@ restart_headers:
if (has_header) {
if (rc == ENOMEM && !kstat_resize_raw(ksp))
goto restart_headers;
if (rc == 0)
sbuf_printf(sb, "\n%s", ksp->ks_raw_buf);
if (rc == 0) {
sbuf_cat(&sb, "\n");
sbuf_cat(&sb, ksp->ks_raw_buf);
}
}
while ((data = addr_op(ksp, n)) != NULL) {
@ -270,22 +267,19 @@ restart:
if (rc == ENOMEM && !kstat_resize_raw(ksp))
goto restart;
if (rc == 0)
sbuf_printf(sb, "%s", ksp->ks_raw_buf);
sbuf_cat(&sb, ksp->ks_raw_buf);
} else {
ASSERT3U(ksp->ks_ndata, ==, 1);
sbuf_hexdump(sb, ksp->ks_data,
sbuf_hexdump(&sb, ksp->ks_data,
ksp->ks_data_size, NULL, 0);
}
n++;
}
free(ksp->ks_raw_buf, M_TEMP);
mutex_exit(ksp->ks_lock);
sbuf_trim(sb);
rc = sbuf_finish(sb);
if (rc == 0)
rc = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
sbuf_delete(sb);
rc = sbuf_finish(&sb);
sbuf_delete(&sb);
return (rc);
}

View file

@ -220,6 +220,7 @@ taskq_create_impl(const char *name, int nthreads, pri_t pri,
nthreads = MAX((mp_ncpus * nthreads) / 100, 1);
tq = kmem_alloc(sizeof (*tq), KM_SLEEP);
tq->tq_nthreads = nthreads;
tq->tq_queue = taskqueue_create(name, M_WAITOK,
taskqueue_thread_enqueue, &tq->tq_queue);
taskqueue_set_callback(tq->tq_queue, TASKQUEUE_CALLBACK_TYPE_INIT,
@ -254,6 +255,87 @@ taskq_destroy(taskq_t *tq)
kmem_free(tq, sizeof (*tq));
}
static void taskq_sync_assign(void *arg);
typedef struct taskq_sync_arg {
kthread_t *tqa_thread;
kcondvar_t tqa_cv;
kmutex_t tqa_lock;
int tqa_ready;
} taskq_sync_arg_t;
static void
taskq_sync_assign(void *arg)
{
taskq_sync_arg_t *tqa = arg;
mutex_enter(&tqa->tqa_lock);
tqa->tqa_thread = curthread;
tqa->tqa_ready = 1;
cv_signal(&tqa->tqa_cv);
while (tqa->tqa_ready == 1)
cv_wait(&tqa->tqa_cv, &tqa->tqa_lock);
mutex_exit(&tqa->tqa_lock);
}
/*
* Create a taskq with a specified number of pool threads. Allocate
* and return an array of nthreads kthread_t pointers, one for each
* thread in the pool. The array is not ordered and must be freed
* by the caller.
*/
taskq_t *
taskq_create_synced(const char *name, int nthreads, pri_t pri,
int minalloc, int maxalloc, uint_t flags, kthread_t ***ktpp)
{
taskq_t *tq;
taskq_sync_arg_t *tqs = kmem_zalloc(sizeof (*tqs) * nthreads, KM_SLEEP);
kthread_t **kthreads = kmem_zalloc(sizeof (*kthreads) * nthreads,
KM_SLEEP);
flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);
tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
flags | TASKQ_PREPOPULATE);
VERIFY(tq != NULL);
VERIFY(tq->tq_nthreads == nthreads);
/* spawn all syncthreads */
for (int i = 0; i < nthreads; i++) {
cv_init(&tqs[i].tqa_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&tqs[i].tqa_lock, NULL, MUTEX_DEFAULT, NULL);
(void) taskq_dispatch(tq, taskq_sync_assign,
&tqs[i], TQ_FRONT);
}
/* wait on all syncthreads to start */
for (int i = 0; i < nthreads; i++) {
mutex_enter(&tqs[i].tqa_lock);
while (tqs[i].tqa_ready == 0)
cv_wait(&tqs[i].tqa_cv, &tqs[i].tqa_lock);
mutex_exit(&tqs[i].tqa_lock);
}
/* let all syncthreads resume, finish */
for (int i = 0; i < nthreads; i++) {
mutex_enter(&tqs[i].tqa_lock);
tqs[i].tqa_ready = 2;
cv_broadcast(&tqs[i].tqa_cv);
mutex_exit(&tqs[i].tqa_lock);
}
taskq_wait(tq);
for (int i = 0; i < nthreads; i++) {
kthreads[i] = tqs[i].tqa_thread;
mutex_destroy(&tqs[i].tqa_lock);
cv_destroy(&tqs[i].tqa_cv);
}
kmem_free(tqs, sizeof (*tqs) * nthreads);
*ktpp = kthreads;
return (tq);
}
int
taskq_member(taskq_t *tq, kthread_t *thread)
{
@ -398,21 +480,33 @@ void
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
taskq_ent_t *task)
{
int prio;
/*
* If TQ_FRONT is given, we want higher priority for this task, so it
* can go at the front of the queue.
*/
prio = !!(flags & TQ_FRONT);
task->tqent_id = 0;
task->tqent_task.ta_priority = !!(flags & TQ_FRONT);
task->tqent_func = func;
task->tqent_arg = arg;
TASK_INIT(&task->tqent_task, prio, taskq_run_ent, task);
taskqueue_enqueue(tq->tq_queue, &task->tqent_task);
}
void
taskq_init_ent(taskq_ent_t *task)
{
TASK_INIT(&task->tqent_task, 0, taskq_run_ent, task);
task->tqent_func = NULL;
task->tqent_arg = NULL;
task->tqent_id = 0;
task->tqent_type = NORMAL_TASK;
task->tqent_rc = 0;
}
int
taskq_empty_ent(taskq_ent_t *task)
{
return (task->tqent_task.ta_pending == 0);
}
void
taskq_wait(taskq_t *tq)
{
@ -439,9 +533,3 @@ taskq_wait_outstanding(taskq_t *tq, taskqid_t id __unused)
{
taskqueue_drain_all(tq->tq_queue);
}
int
taskq_empty_ent(taskq_ent_t *t)
{
return (t->tqent_task.ta_pending == 0);
}

View file

@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <sys/spa_impl.h>
#include <sys/stat.h>
#include <sys/sunddi.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <sys/uio.h>
@ -336,6 +337,8 @@ static moduledata_t zfs_mod = {
EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0);
#endif
FEATURE(zfs, "OpenZFS support");
DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_CLOCKS, SI_ORDER_ANY);
MODULE_VERSION(zfsctrl, 1);
#if __FreeBSD_version > 1300092

View file

@ -1319,6 +1319,7 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
}
}
strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
dataset_kstats_rename(&zv->zv_kstat, newname);
}
/*

View file

@ -1262,6 +1262,42 @@ taskq_destroy(taskq_t *tq)
}
EXPORT_SYMBOL(taskq_destroy);
/*
* Create a taskq with a specified number of pool threads. Allocate
* and return an array of nthreads kthread_t pointers, one for each
* thread in the pool. The array is not ordered and must be freed
* by the caller.
*/
taskq_t *
taskq_create_synced(const char *name, int nthreads, pri_t pri,
int minalloc, int maxalloc, uint_t flags, kthread_t ***ktpp)
{
taskq_t *tq;
taskq_thread_t *tqt;
int i = 0;
kthread_t **kthreads = kmem_zalloc(sizeof (*kthreads) * nthreads,
KM_SLEEP);
flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);
/* taskq_create spawns all the threads before returning */
tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
flags | TASKQ_PREPOPULATE);
VERIFY(tq != NULL);
VERIFY(tq->tq_nthreads == nthreads);
list_for_each_entry(tqt, &tq->tq_thread_list, tqt_thread_list) {
kthreads[i] = tqt->tqt_thread;
i++;
}
ASSERT3S(i, ==, nthreads);
*ktpp = kthreads;
return (tq);
}
EXPORT_SYMBOL(taskq_create_synced);
static unsigned int spl_taskq_kick = 0;
/*

View file

@ -1488,7 +1488,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
* read-only flag, pretend it was set, as done for snapshots.
*/
if (!canwrite)
vfs->vfs_readonly = true;
vfs->vfs_readonly = B_TRUE;
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
if (error) {

View file

@ -1528,6 +1528,8 @@ zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
*/
set_disk_ro(zv->zv_zso->zvo_disk, !readonly);
set_disk_ro(zv->zv_zso->zvo_disk, readonly);
dataset_kstats_rename(&zv->zv_kstat, newname);
}
void

View file

@ -1025,7 +1025,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
size_t len, dlen;
struct abd_iter caiters[3];
struct abd_iter daiter;
void *caddrs[3];
void *caddrs[3], *daddr;
unsigned long flags __maybe_unused = 0;
abd_t *c_cabds[3];
abd_t *c_dabd = NULL;
@ -1057,10 +1057,13 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
if (dsize > 0) {
IMPLY(abd_is_gang(dabd), c_dabd != NULL);
abd_iter_map(&daiter);
daddr = daiter.iter_mapaddr;
len = MIN(daiter.iter_mapsize, len);
dlen = len;
} else
} else {
daddr = NULL;
dlen = 0;
}
/* must be progressive */
ASSERT3U(len, >, 0);
@ -1070,7 +1073,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
*/
ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen);
func_raidz_gen(caddrs, daddr, len, dlen);
for (i = parity-1; i >= 0; i--) {
abd_iter_unmap(&caiters[i]);

View file

@ -198,6 +198,18 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
zil_sums_fini(&dk->dk_zil_sums);
}
void
dataset_kstats_rename(dataset_kstats_t *dk, const char *name)
{
dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data;
char *ds_name;
ds_name = KSTAT_NAMED_STR_PTR(&dkv->dkv_ds_name);
ASSERT3S(ds_name, !=, NULL);
(void) strlcpy(ds_name, name,
KSTAT_NAMED_STR_BUFLEN(&dkv->dkv_ds_name));
}
void
dataset_kstats_update_write_kstats(dataset_kstats_t *dk,
int64_t nwritten)

View file

@ -4587,6 +4587,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
}
}
/*
* Syncs out a range of dirty records for indirect or leaf dbufs. May be
* called recursively from dbuf_sync_indirect().
*/
void
dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx)
{
@ -5005,7 +5009,10 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
}
/* Issue I/O to commit a dirty buffer to disk. */
/*
* Populate dr->dr_zio with a zio to commit a dirty buffer to disk.
* Caller is responsible for issuing the zio_[no]wait(dr->dr_zio).
*/
static void
dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
{

View file

@ -1639,28 +1639,90 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
kmem_free(bp, sizeof (*bp));
}
typedef struct sync_objset_arg {
zio_t *soa_zio;
objset_t *soa_os;
dmu_tx_t *soa_tx;
kmutex_t soa_mutex;
int soa_count;
taskq_ent_t soa_tq_ent;
} sync_objset_arg_t;
typedef struct sync_dnodes_arg {
multilist_t *sda_list;
int sda_sublist_idx;
multilist_t *sda_newlist;
dmu_tx_t *sda_tx;
multilist_t *sda_list;
int sda_sublist_idx;
multilist_t *sda_newlist;
sync_objset_arg_t *sda_soa;
} sync_dnodes_arg_t;
static void sync_meta_dnode_task(void *arg);
static void
sync_dnodes_task(void *arg)
{
sync_dnodes_arg_t *sda = arg;
sync_objset_arg_t *soa = sda->sda_soa;
objset_t *os = soa->soa_os;
multilist_sublist_t *ms =
multilist_sublist_lock(sda->sda_list, sda->sda_sublist_idx);
dmu_objset_sync_dnodes(ms, sda->sda_tx);
dmu_objset_sync_dnodes(ms, soa->soa_tx);
multilist_sublist_unlock(ms);
kmem_free(sda, sizeof (*sda));
mutex_enter(&soa->soa_mutex);
ASSERT(soa->soa_count != 0);
if (--soa->soa_count != 0) {
mutex_exit(&soa->soa_mutex);
return;
}
mutex_exit(&soa->soa_mutex);
taskq_dispatch_ent(dmu_objset_pool(os)->dp_sync_taskq,
sync_meta_dnode_task, soa, TQ_FRONT, &soa->soa_tq_ent);
}
/*
* Issue the zio_nowait() for all dirty record zios on the meta dnode,
* then trigger the callback for the zil_sync. This runs once for each
* objset, only after any/all sublists in the objset have been synced.
*/
static void
sync_meta_dnode_task(void *arg)
{
sync_objset_arg_t *soa = arg;
objset_t *os = soa->soa_os;
dmu_tx_t *tx = soa->soa_tx;
int txgoff = tx->tx_txg & TXG_MASK;
dbuf_dirty_record_t *dr;
ASSERT0(soa->soa_count);
list_t *list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
while ((dr = list_remove_head(list)) != NULL) {
ASSERT0(dr->dr_dbuf->db_level);
zio_nowait(dr->dr_zio);
}
/* Enable dnode backfill if enough objects have been freed. */
if (os->os_freed_dnodes >= dmu_rescan_dnode_threshold) {
os->os_rescan_dnodes = B_TRUE;
os->os_freed_dnodes = 0;
}
/*
* Free intent log blocks up to this tx.
*/
zil_sync(os->os_zil, tx);
os->os_phys->os_zil_header = os->os_zil_header;
zio_nowait(soa->soa_zio);
mutex_destroy(&soa->soa_mutex);
kmem_free(soa, sizeof (*soa));
}
/* called from dsl */
void
@ -1670,8 +1732,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
zbookmark_phys_t zb;
zio_prop_t zp;
zio_t *zio;
list_t *list;
dbuf_dirty_record_t *dr;
int num_sublists;
multilist_t *ml;
blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
@ -1758,39 +1818,49 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
offsetof(dnode_t, dn_dirty_link[txgoff]));
}
/*
* zio_nowait(zio) is done after any/all sublist and meta dnode
* zios have been nowaited, and the zil_sync() has been performed.
* The soa is freed at the end of sync_meta_dnode_task.
*/
sync_objset_arg_t *soa = kmem_alloc(sizeof (*soa), KM_SLEEP);
soa->soa_zio = zio;
soa->soa_os = os;
soa->soa_tx = tx;
taskq_init_ent(&soa->soa_tq_ent);
mutex_init(&soa->soa_mutex, NULL, MUTEX_DEFAULT, NULL);
ml = &os->os_dirty_dnodes[txgoff];
num_sublists = multilist_get_num_sublists(ml);
soa->soa_count = num_sublists = multilist_get_num_sublists(ml);
for (int i = 0; i < num_sublists; i++) {
if (multilist_sublist_is_empty_idx(ml, i))
continue;
sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP);
sda->sda_list = ml;
sda->sda_sublist_idx = i;
sda->sda_tx = tx;
(void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
sync_dnodes_task, sda, 0);
/* callback frees sda */
}
taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);
list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
while ((dr = list_remove_head(list)) != NULL) {
ASSERT0(dr->dr_dbuf->db_level);
zio_nowait(dr->dr_zio);
soa->soa_count--;
}
/* Enable dnode backfill if enough objects have been freed. */
if (os->os_freed_dnodes >= dmu_rescan_dnode_threshold) {
os->os_rescan_dnodes = B_TRUE;
os->os_freed_dnodes = 0;
if (soa->soa_count == 0) {
taskq_dispatch_ent(dmu_objset_pool(os)->dp_sync_taskq,
sync_meta_dnode_task, soa, TQ_FRONT, &soa->soa_tq_ent);
} else {
/*
* Sync sublists in parallel. The last to finish
* (i.e., when soa->soa_count reaches zero) must
* dispatch sync_meta_dnode_task.
*/
for (int i = 0; i < num_sublists; i++) {
if (multilist_sublist_is_empty_idx(ml, i))
continue;
sync_dnodes_arg_t *sda =
kmem_alloc(sizeof (*sda), KM_SLEEP);
sda->sda_list = ml;
sda->sda_sublist_idx = i;
sda->sda_soa = soa;
(void) taskq_dispatch(
dmu_objset_pool(os)->dp_sync_taskq,
sync_dnodes_task, sda, 0);
/* sync_dnodes_task frees sda */
}
}
/*
* Free intent log blocks up to this tx.
*/
zil_sync(os->os_zil, tx);
os->os_phys->os_zil_header = os->os_zil_header;
zio_nowait(zio);
}
boolean_t

View file

@ -627,6 +627,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
/*
* Write out the dnode's dirty buffers.
* Does not wait for zio completions.
*/
void
dnode_sync(dnode_t *dn, dmu_tx_t *tx)

View file

@ -2069,8 +2069,9 @@ dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
return (error);
}
/* Nonblocking dataset sync. Assumes dataset:objset is always 1:1 */
void
dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
dsl_dataset_sync(dsl_dataset_t *ds, zio_t *rio, dmu_tx_t *tx)
{
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(ds->ds_objset != NULL);
@ -2098,7 +2099,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;
}
dmu_objset_sync(ds->ds_objset, zio, tx);
dmu_objset_sync(ds->ds_objset, rio, tx);
}
/*

View file

@ -140,11 +140,6 @@ uint_t zfs_delay_min_dirty_percent = 60;
*/
uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
/*
* This determines the number of threads used by the dp_sync_taskq.
*/
static int zfs_sync_taskq_batch_pct = 75;
/*
* These tunables determine the behavior of how zil_itxg_clean() is
* called via zil_clean() in the context of spa_sync(). When an itxg
@ -214,9 +209,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
txg_list_create(&dp->dp_early_sync_tasks, spa,
offsetof(dsl_sync_task_t, dst_node));
dp->dp_sync_taskq = taskq_create("dp_sync_taskq",
zfs_sync_taskq_batch_pct, minclsyspri, 1, INT_MAX,
TASKQ_THREADS_CPU_PCT);
dp->dp_sync_taskq = spa_sync_tq_create(spa, "dp_sync_taskq");
dp->dp_zil_clean_taskq = taskq_create("dp_zil_clean_taskq",
zfs_zil_clean_taskq_nthr_pct, minclsyspri,
@ -409,7 +402,7 @@ dsl_pool_close(dsl_pool_t *dp)
txg_list_destroy(&dp->dp_dirty_dirs);
taskq_destroy(dp->dp_zil_clean_taskq);
taskq_destroy(dp->dp_sync_taskq);
spa_sync_tq_destroy(dp->dp_spa);
/*
* We can't set retry to TRUE since we're explicitly specifying
@ -674,7 +667,7 @@ dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
void
dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
{
zio_t *zio;
zio_t *rio; /* root zio for all dirty dataset syncs */
dmu_tx_t *tx;
dsl_dir_t *dd;
dsl_dataset_t *ds;
@ -704,9 +697,10 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
}
/*
* Write out all dirty blocks of dirty datasets.
* Write out all dirty blocks of dirty datasets. Note, this could
* create a very large (+10k) zio tree.
*/
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
rio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
/*
* We must not sync any non-MOS datasets twice, because
@ -715,9 +709,9 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
ASSERT(!list_link_active(&ds->ds_synced_link));
list_insert_tail(&synced_datasets, ds);
dsl_dataset_sync(ds, zio, tx);
dsl_dataset_sync(ds, rio, tx);
}
VERIFY0(zio_wait(zio));
VERIFY0(zio_wait(rio));
/*
* Update the long range free counter after
@ -748,13 +742,13 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
* user accounting information (and we won't get confused
* about which blocks are part of the snapshot).
*/
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
rio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
objset_t *os = ds->ds_objset;
ASSERT(list_link_active(&ds->ds_synced_link));
dmu_buf_rele(ds->ds_dbuf, ds);
dsl_dataset_sync(ds, zio, tx);
dsl_dataset_sync(ds, rio, tx);
/*
* Release any key mappings created by calls to
@ -767,7 +761,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds);
}
}
VERIFY0(zio_wait(zio));
VERIFY0(zio_wait(rio));
/*
* Now that the datasets have been completely synced, we can
@ -1481,9 +1475,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_sync_percent, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, delay_scale, U64, ZMOD_RW,
"How quickly delay approaches infinity");
ZFS_MODULE_PARAM(zfs, zfs_, sync_taskq_batch_pct, INT, ZMOD_RW,
"Max percent of CPUs that are used to sync dirty data");
ZFS_MODULE_PARAM(zfs_zil, zfs_zil_, clean_taskq_nthr_pct, INT, ZMOD_RW,
"Max percent of CPUs that are used per dp_sync_taskq");

View file

@ -23,6 +23,7 @@
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2023 RackTop Systems, Inc.
*/
#include <sys/zfs_context.h>
@ -369,7 +370,7 @@ sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
if (bulk[i].sa_data) {
SA_COPY_DATA(bulk[i].sa_data_func,
bulk[i].sa_addr, bulk[i].sa_data,
bulk[i].sa_size);
MIN(bulk[i].sa_size, bulk[i].sa_length));
}
continue;

View file

@ -99,6 +99,7 @@
#include "zfs_prop.h"
#include "zfs_comutil.h"
#include <cityhash.h>
/*
* spa_thread() existed on Illumos as a parent thread for the various worker
@ -128,16 +129,16 @@ int zfs_ccw_retry_interval = 300;
typedef enum zti_modes {
ZTI_MODE_FIXED, /* value is # of threads (min 1) */
ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
ZTI_MODE_SCALE, /* Taskqs scale with CPUs. */
ZTI_MODE_SYNC, /* sync thread assigned */
ZTI_MODE_NULL, /* don't create a taskq */
ZTI_NMODES
} zti_modes_t;
#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
#define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
#define ZTI_SCALE { ZTI_MODE_SCALE, 0, 1 }
#define ZTI_SYNC { ZTI_MODE_SYNC, 0, 1 }
#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
#define ZTI_N(n) ZTI_P(n, 1)
@ -158,14 +159,14 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
* initializing a pool, we use this table to create an appropriately sized
* taskq. Some operations are low volume and therefore have a small, static
* number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
* macros. Other operations process a large amount of data; the ZTI_BATCH
* macros. Other operations process a large amount of data; the ZTI_SCALE
* macro causes us to create a taskq oriented for throughput. Some operations
* are so high frequency and short-lived that the taskq itself can become a
* point of lock contention. The ZTI_P(#, #) macro indicates that we need an
* additional degree of parallelism specified by the number of threads per-
* taskq and the number of taskqs; when dispatching an event in this case, the
* particular taskq is chosen at random. ZTI_SCALE is similar to ZTI_BATCH,
* but with number of taskqs also scaling with number of CPUs.
* particular taskq is chosen at random. ZTI_SCALE uses a number of taskqs
* that scales with the number of CPUs.
*
* The different taskq priorities are to handle the different contexts (issue
* and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
@ -175,7 +176,7 @@ static const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
{ ZTI_N(8), ZTI_NULL, ZTI_SCALE, ZTI_NULL }, /* READ */
{ ZTI_BATCH, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */
{ ZTI_SYNC, ZTI_N(5), ZTI_SCALE, ZTI_N(5) }, /* WRITE */
{ ZTI_SCALE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
@ -206,6 +207,8 @@ static const uint_t zio_taskq_basedc = 80; /* base duty cycle */
static const boolean_t spa_create_process = B_TRUE; /* no process => no sysdc */
#endif
static uint_t zio_taskq_wr_iss_ncpus = 0;
/*
* Report any spa_load_verify errors found, but do not fail spa_load.
* This is used by zdb to analyze non-idle pools.
@ -1054,21 +1057,34 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
uint_t count = ztip->zti_count;
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
uint_t cpus, flags = TASKQ_DYNAMIC;
#ifdef HAVE_SYSDC
boolean_t batch = B_FALSE;
#endif
switch (mode) {
case ZTI_MODE_FIXED:
ASSERT3U(value, >, 0);
break;
case ZTI_MODE_BATCH:
#ifdef HAVE_SYSDC
batch = B_TRUE;
#endif
case ZTI_MODE_SYNC:
/*
* Create one wr_iss taskq for every 'zio_taskq_wr_iss_ncpus',
* not to exceed the number of spa allocators.
*/
if (zio_taskq_wr_iss_ncpus == 0) {
count = MAX(boot_ncpus / spa->spa_alloc_count, 1);
} else {
count = MAX(1,
boot_ncpus / MAX(1, zio_taskq_wr_iss_ncpus));
}
count = MAX(count, (zio_taskq_batch_pct + 99) / 100);
count = MIN(count, spa->spa_alloc_count);
/*
* zio_taskq_batch_pct is unbounded and may exceed 100%, but no
* single taskq may have more threads than 100% of online cpus.
*/
value = (zio_taskq_batch_pct + count / 2) / count;
value = MIN(value, 100);
flags |= TASKQ_THREADS_CPU_PCT;
value = MIN(zio_taskq_batch_pct, 100);
break;
case ZTI_MODE_SCALE:
@ -1115,7 +1131,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
default:
panic("unrecognized mode for %s_%s taskq (%u:%u) in "
"spa_activate()",
"spa_taskqs_init()",
zio_type_name[t], zio_taskq_types[q], mode, value);
break;
}
@ -1137,9 +1153,6 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
#ifdef HAVE_SYSDC
if (zio_taskq_sysdc && spa->spa_proc != &p0) {
if (batch)
flags |= TASKQ_DC_BATCH;
(void) zio_taskq_basedc;
tq = taskq_create_sysdc(name, value, 50, INT_MAX,
spa->spa_proc, zio_taskq_basedc, flags);
@ -1200,12 +1213,11 @@ spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
/*
* Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
* Note that a type may have multiple discrete taskqs to avoid lock contention
* on the taskq itself. In that case we choose which taskq at random by using
* the low bits of gethrtime().
* on the taskq itself.
*/
void
spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
static taskq_t *
spa_taskq_dispatch_select(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
zio_t *zio)
{
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
taskq_t *tq;
@ -1213,12 +1225,27 @@ spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
ASSERT3P(tqs->stqs_taskq, !=, NULL);
ASSERT3U(tqs->stqs_count, !=, 0);
if ((t == ZIO_TYPE_WRITE) && (q == ZIO_TASKQ_ISSUE) &&
(zio != NULL) && (zio->io_wr_iss_tq != NULL)) {
/* dispatch to assigned write issue taskq */
tq = zio->io_wr_iss_tq;
return (tq);
}
if (tqs->stqs_count == 1) {
tq = tqs->stqs_taskq[0];
} else {
tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
}
return (tq);
}
void
spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent,
zio_t *zio)
{
taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, zio);
taskq_dispatch_ent(tq, func, arg, flags, ent);
}
@ -1229,20 +1256,8 @@ void
spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags)
{
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
taskq_t *tq;
taskqid_t id;
ASSERT3P(tqs->stqs_taskq, !=, NULL);
ASSERT3U(tqs->stqs_count, !=, 0);
if (tqs->stqs_count == 1) {
tq = tqs->stqs_taskq[0];
} else {
tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
}
id = taskq_dispatch(tq, func, arg, flags);
taskq_t *tq = spa_taskq_dispatch_select(spa, t, q, NULL);
taskqid_t id = taskq_dispatch(tq, func, arg, flags);
if (id)
taskq_wait_id(tq, id);
}
@ -9649,6 +9664,104 @@ spa_sync_allpools(void)
mutex_exit(&spa_namespace_lock);
}
taskq_t *
spa_sync_tq_create(spa_t *spa, const char *name)
{
kthread_t **kthreads;
ASSERT(spa->spa_sync_tq == NULL);
ASSERT3S(spa->spa_alloc_count, <=, boot_ncpus);
/*
* - do not allow more allocators than cpus.
* - there may be more cpus than allocators.
* - do not allow more sync taskq threads than allocators or cpus.
*/
int nthreads = spa->spa_alloc_count;
spa->spa_syncthreads = kmem_zalloc(sizeof (spa_syncthread_info_t) *
nthreads, KM_SLEEP);
spa->spa_sync_tq = taskq_create_synced(name, nthreads, minclsyspri,
nthreads, INT_MAX, TASKQ_PREPOPULATE, &kthreads);
VERIFY(spa->spa_sync_tq != NULL);
VERIFY(kthreads != NULL);
spa_taskqs_t *tqs =
&spa->spa_zio_taskq[ZIO_TYPE_WRITE][ZIO_TASKQ_ISSUE];
spa_syncthread_info_t *ti = spa->spa_syncthreads;
for (int i = 0, w = 0; i < nthreads; i++, w++, ti++) {
ti->sti_thread = kthreads[i];
if (w == tqs->stqs_count) {
w = 0;
}
ti->sti_wr_iss_tq = tqs->stqs_taskq[w];
}
kmem_free(kthreads, sizeof (*kthreads) * nthreads);
return (spa->spa_sync_tq);
}
void
spa_sync_tq_destroy(spa_t *spa)
{
ASSERT(spa->spa_sync_tq != NULL);
taskq_wait(spa->spa_sync_tq);
taskq_destroy(spa->spa_sync_tq);
kmem_free(spa->spa_syncthreads,
sizeof (spa_syncthread_info_t) * spa->spa_alloc_count);
spa->spa_sync_tq = NULL;
}
void
spa_select_allocator(zio_t *zio)
{
zbookmark_phys_t *bm = &zio->io_bookmark;
spa_t *spa = zio->io_spa;
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
/*
* A gang block (for example) may have inherited its parent's
* allocator, in which case there is nothing further to do here.
*/
if (ZIO_HAS_ALLOCATOR(zio))
return;
ASSERT(spa != NULL);
ASSERT(bm != NULL);
/*
* First try to use an allocator assigned to the syncthread, and set
* the corresponding write issue taskq for the allocator.
* Note, we must have an open pool to do this.
*/
if (spa->spa_sync_tq != NULL) {
spa_syncthread_info_t *ti = spa->spa_syncthreads;
for (int i = 0; i < spa->spa_alloc_count; i++, ti++) {
if (ti->sti_thread == curthread) {
zio->io_allocator = i;
zio->io_wr_iss_tq = ti->sti_wr_iss_tq;
return;
}
}
}
/*
* We want to try to use as many allocators as possible to help improve
* performance, but we also want logically adjacent IOs to be physically
* adjacent to improve sequential read performance. We chunk each object
* into 2^20 block regions, and then hash based on the objset, object,
* level, and region to accomplish both of these goals.
*/
uint64_t hv = cityhash4(bm->zb_objset, bm->zb_object, bm->zb_level,
bm->zb_blkid >> 20);
zio->io_allocator = (uint_t)hv % spa->spa_alloc_count;
zio->io_wr_iss_tq = NULL;
}
/*
* ==========================================================================
* Miscellaneous routines
@ -10242,3 +10355,6 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT,
"Whether extra ALLOC blkptrs were added to a livelist entry while it "
"was being condensed");
/* END CSTYLED */
ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_wr_iss_ncpus, UINT, ZMOD_RW,
"Number of CPUs to run write issue taskqs");

View file

@ -388,7 +388,11 @@ uint_t spa_asize_inflation = 24;
uint_t spa_slop_shift = 5;
static const uint64_t spa_min_slop = 128ULL * 1024 * 1024;
static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024;
static const int spa_allocators = 4;
/*
* Number of allocators to use, per spa instance
*/
static int spa_num_allocators = 4;
/*
* Spa active allocator.
@ -730,7 +734,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
if (altroot)
spa->spa_root = spa_strdup(altroot);
spa->spa_alloc_count = spa_allocators;
/* Do not allow more allocators than CPUs. */
spa->spa_alloc_count = MIN(MAX(spa_num_allocators, 1), boot_ncpus);
spa->spa_allocs = kmem_zalloc(spa->spa_alloc_count *
sizeof (spa_alloc_t), KM_SLEEP);
for (int i = 0; i < spa->spa_alloc_count; i++) {
@ -739,6 +745,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
avl_create(&spa->spa_allocs[i].spaa_tree, zio_bookmark_compare,
sizeof (zio_t), offsetof(zio_t, io_queue_node.a));
}
avl_create(&spa->spa_metaslabs_by_flushed, metaslab_sort_by_flushed,
sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node));
avl_create(&spa->spa_sm_logs_by_txg, spa_log_sm_sort_by_txg,
@ -3009,3 +3016,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, UINT, ZMOD_RW,
ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift,
param_get_uint, ZMOD_RW, "Reserved free space in pool");
ZFS_MODULE_PARAM(zfs, spa_, num_allocators, INT, ZMOD_RW,
"Number of allocators per spa, capped by ncpus");

View file

@ -273,8 +273,10 @@ vdev_queue_class_add(vdev_queue_t *vq, zio_t *zio)
{
zio_priority_t p = zio->io_priority;
vq->vq_cqueued |= 1U << p;
if (vdev_queue_class_fifo(p))
if (vdev_queue_class_fifo(p)) {
list_insert_tail(&vq->vq_class[p].vqc_list, zio);
vq->vq_class[p].vqc_list_numnodes++;
}
else
avl_add(&vq->vq_class[p].vqc_tree, zio);
}
@ -288,6 +290,7 @@ vdev_queue_class_remove(vdev_queue_t *vq, zio_t *zio)
list_t *list = &vq->vq_class[p].vqc_list;
list_remove(list, zio);
empty = list_is_empty(list);
vq->vq_class[p].vqc_list_numnodes--;
} else {
avl_tree_t *tree = &vq->vq_class[p].vqc_tree;
avl_remove(tree, zio);
@ -1069,7 +1072,7 @@ vdev_queue_class_length(vdev_t *vd, zio_priority_t p)
{
vdev_queue_t *vq = &vd->vdev_queue;
if (vdev_queue_class_fifo(p))
return (list_is_empty(&vq->vq_class[p].vqc_list) == 0);
return (vq->vq_class[p].vqc_list_numnodes);
else
return (avl_numnodes(&vq->vq_class[p].vqc_tree));
}

View file

@ -899,6 +899,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_orig_stage = zio->io_stage = stage;
zio->io_orig_pipeline = zio->io_pipeline = pipeline;
zio->io_pipeline_trace = ZIO_STAGE_OPEN;
zio->io_allocator = ZIO_ALLOCATOR_NONE;
zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY) ||
(pipeline & ZIO_STAGE_READY) == 0;
@ -2007,7 +2008,7 @@ zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline)
*/
ASSERT(taskq_empty_ent(&zio->io_tqent));
spa_taskq_dispatch_ent(spa, t, q, zio_execute, zio, flags,
&zio->io_tqent);
&zio->io_tqent, zio);
}
static boolean_t
@ -2032,8 +2033,8 @@ zio_taskq_member(zio_t *zio, zio_taskq_type_t q)
static zio_t *
zio_issue_async(zio_t *zio)
{
ASSERT((zio->io_type != ZIO_TYPE_WRITE) || ZIO_HAS_ALLOCATOR(zio));
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE);
return (NULL);
}
@ -2347,6 +2348,9 @@ zio_wait(zio_t *zio)
ASSERT0(zio->io_queued_timestamp);
zio->io_queued_timestamp = gethrtime();
if (zio->io_type == ZIO_TYPE_WRITE) {
spa_select_allocator(zio);
}
__zio_execute(zio);
mutex_enter(&zio->io_lock);
@ -2399,6 +2403,9 @@ zio_nowait(zio_t *zio)
ASSERT0(zio->io_queued_timestamp);
zio->io_queued_timestamp = gethrtime();
if (zio->io_type == ZIO_TYPE_WRITE) {
spa_select_allocator(zio);
}
__zio_execute(zio);
}
@ -2863,6 +2870,13 @@ zio_gang_issue(zio_t *zio)
return (zio);
}
static void
zio_gang_inherit_allocator(zio_t *pio, zio_t *cio)
{
cio->io_allocator = pio->io_allocator;
cio->io_wr_iss_tq = pio->io_wr_iss_tq;
}
static void
zio_write_gang_member_ready(zio_t *zio)
{
@ -2934,6 +2948,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
gbh_copies = MIN(2, spa_max_replication(spa));
}
ASSERT(ZIO_HAS_ALLOCATOR(pio));
int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
@ -2997,6 +3012,8 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
zio_write_gang_done, NULL, pio->io_priority,
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
zio_gang_inherit_allocator(pio, zio);
/*
* Create and nowait the gang children.
*/
@ -3027,6 +3044,8 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
zio_write_gang_done, &gn->gn_child[g], pio->io_priority,
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
zio_gang_inherit_allocator(zio, cio);
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(has_data);
@ -3539,6 +3558,7 @@ zio_io_to_allocate(spa_t *spa, int allocator)
return (NULL);
ASSERT(IO_IS_ALLOCATING(zio));
ASSERT(ZIO_HAS_ALLOCATOR(zio));
/*
* Try to place a reservation for this zio. If we're unable to
@ -3575,21 +3595,12 @@ zio_dva_throttle(zio_t *zio)
}
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
ASSERT(ZIO_HAS_ALLOCATOR(zio));
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
ASSERT3U(zio->io_queued_timestamp, >, 0);
ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE);
zbookmark_phys_t *bm = &zio->io_bookmark;
/*
* We want to try to use as many allocators as possible to help improve
* performance, but we also want logically adjacent IOs to be physically
* adjacent to improve sequential read performance. We chunk each object
* into 2^20 block regions, and then hash based on the objset, object,
* level, and region to accomplish both of these goals.
*/
int allocator = (uint_t)cityhash4(bm->zb_objset, bm->zb_object,
bm->zb_level, bm->zb_blkid >> 20) % spa->spa_alloc_count;
zio->io_allocator = allocator;
int allocator = zio->io_allocator;
zio->io_metaslab_class = mc;
mutex_enter(&spa->spa_allocs[allocator].spaa_lock);
avl_add(&spa->spa_allocs[allocator].spaa_tree, zio);
@ -3663,6 +3674,7 @@ zio_dva_allocate(zio_t *zio)
* sync write performance. If a log allocation fails, we will fall
* back to spa_sync() which is abysmal for performance.
*/
ASSERT(ZIO_HAS_ALLOCATOR(zio));
error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
&zio->io_alloc_list, zio, zio->io_allocator);
@ -4515,6 +4527,7 @@ zio_ready(zio_t *zio)
ASSERT(IO_IS_ALLOCATING(zio));
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(zio->io_metaslab_class != NULL);
ASSERT(ZIO_HAS_ALLOCATOR(zio));
/*
* We were unable to allocate anything, unreserve and
@ -4601,6 +4614,7 @@ zio_dva_throttle_done(zio_t *zio)
}
ASSERT(IO_IS_ALLOCATING(pio));
ASSERT(ZIO_HAS_ALLOCATOR(pio));
ASSERT3P(zio, !=, zio->io_logical);
ASSERT(zio->io_logical != NULL);
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
@ -4663,6 +4677,7 @@ zio_done(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(zio->io_bp != NULL);
ASSERT(ZIO_HAS_ALLOCATOR(zio));
metaslab_group_alloc_verify(zio->io_spa, zio->io_bp, zio,
zio->io_allocator);
@ -4928,7 +4943,7 @@ zio_done(zio_t *zio)
ASSERT(taskq_empty_ent(&zio->io_tqent));
spa_taskq_dispatch_ent(zio->io_spa,
ZIO_TYPE_CLAIM, ZIO_TASKQ_ISSUE,
zio_reexecute, zio, 0, &zio->io_tqent);
zio_reexecute, zio, 0, &zio->io_tqent, NULL);
}
return (NULL);
}

View file

@ -24,6 +24,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
BuildArch: noarch
Requires: dkms >= 2.2.0.3
Requires(pre): dkms >= 2.2.0.3
Requires(post): dkms >= 2.2.0.3
Requires(preun): dkms >= 2.2.0.3
Requires: gcc, make, perl, diffutils
@ -68,9 +69,92 @@ fi
%defattr(-,root,root)
/usr/src/%{module}-%{version}
%pre
echo "Running pre installation script: $0. Parameters: $*"
# We don't want any other versions lingering around in dkms.
# Tests with 'dnf' showed that in case of reinstall, or upgrade
# the preun scriptlet removed the version we are trying to install.
# Because of this, find all zfs dkms sources in /var/lib/dkms and
# remove them, if we find a matching version in dkms.
dkms_root=/var/lib/dkms
if [ -d ${dkms_root}/%{module} ]; then
cd ${dkms_root}/%{module}
for x in [[:digit:]]*; do
[ -d "$x" ] || continue
otherver="$x"
opath="${dkms_root}/%{module}/${otherver}"
if [ "$otherver" != %{version} ]; then
# This is a workaround for a broken 'dkms status', we caused in a previous version.
# One day it might be not needed anymore, but it does not hurt to keep it.
if dkms status -m %{module} -v "$otherver" 2>&1 | grep "${opath}/source/dkms.conf does not exist"
then
echo "ERROR: dkms status is broken!" >&2
if [ -L "${opath}/source" -a ! -d "${opath}/source" ]
then
echo "Trying to fix it by removing the symlink: ${opath}/source" >&2
echo "You should manually remove ${opath}" >&2
rm -f "${opath}/source" || echo "Removal failed!" >&2
fi
fi
if [ `dkms status -m %{module} -v "$otherver" | grep -c %{module}` -gt 0 ]; then
echo "Removing old %{module} dkms modules version $otherver from all kernels."
dkms remove -m %{module} -v "$otherver" --all ||:
fi
fi
done
fi
# Uninstall this version of zfs dkms modules before installation of the package.
if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
echo "Removing %{module} dkms modules version %{version} from all kernels."
dkms remove -m %{module} -v %{version} --all ||:
fi
%post
echo "Running post installation script: $0. Parameters: $*"
# Add the module to dkms, as reccommended in the dkms man page.
# This is generally rpm specfic.
# But this also may help, if we have a broken 'dkms status'.
# Because, if the sources are available and only the symlink pointing
# to them is missing, this will resolve the situation
echo "Adding %{module} dkms modules version %{version} to dkms."
dkms add -m %{module} -v %{version} %{!?not_rpm:--rpm_safe_upgrade} ||:
# After installing the package, dkms install this zfs version for the current kernel.
# Force the overwriting of old modules to avoid diff warnings in dkms status.
# Or in case of a downgrade to overwrite newer versions.
# Or if some other backed up versions have been restored before.
echo "Installing %{module} dkms modules version %{version} for the current kernel."
dkms install --force -m %{module} -v %{version} ||:
%preun
dkms remove -m %{module} -v %{version} --all
dkms_root="/var/lib/dkms/%{module}/%{version}"
echo "Running pre uninstall script: $0. Parameters: $*"
# In case of upgrade we do nothing. See above comment in pre hook.
if [ "$1" = "1" -o "$1" = "upgrade" ] ; then
echo "This is an upgrade. Skipping pre uninstall action."
exit 0
fi
%posttrans
/usr/lib/dkms/common.postinst %{module} %{version}
# Check if we uninstall the package. In that case remove the dkms modules.
# '0' is the value for the first parameter for rpm packages.
# 'remove' or 'purge' are the possible names for deb packages.
if [ "$1" = "0" -o "$1" = "remove" -o "$1" = "purge" ] ; then
if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
echo "Removing %{module} dkms modules version %{version} from all kernels."
dkms remove -m %{module} -v %{version} --all %{!?not_rpm:--rpm_safe_upgrade} && exit 0
fi
# If removing the modules failed, it might be because of the broken 'dkms status'.
if dkms status -m %{module} -v %{version} 2>&1 | grep "${dkms_root}/source/dkms.conf does not exist"
then
echo "ERROR: dkms status is broken!" >&2
echo "You should manually remove ${dkms_root}" >&2
echo "WARNING: installed modules in /lib/modules/`uname -r`/extra could not be removed automatically!" >&2
fi
else
echo "Script parameter $1 did not match any removal condition."
fi
exit 0

View file

@ -1110,7 +1110,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.2.99-184-FreeBSD_g41e55b476"
#define ZFS_META_ALIAS "zfs-2.2.99-197-FreeBSD_g9198de8f1"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -1140,7 +1140,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "184-FreeBSD_g41e55b476"
#define ZFS_META_RELEASE "197-FreeBSD_g9198de8f1"
/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"

View file

@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.2.99-184-g41e55b476"
#define ZFS_META_GITREV "zfs-2.2.99-197-g9198de8f1"