From dd120f9e9d9250602a74e2963dfbd0bc4418dbbc Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 20 Nov 2013 11:02:18 +0000 Subject: [PATCH 1/9] 4248 dtrace(1M) should never create DOF with empty probes section 4249 Only probes from the first DTrace object file will be included illumos/illumos-gate@54a20ab41aadcb81c53e72fc65886e964e9add59 --- lib/libdtrace/common/dt_dof.c | 22 +++++++++++++++++----- lib/libdtrace/common/dt_error.c | 4 +++- lib/libdtrace/common/dt_impl.h | 5 +++-- uts/common/dtrace/dtrace.c | 6 +++--- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/lib/libdtrace/common/dt_dof.c b/lib/libdtrace/common/dt_dof.c index 04c4c89cdbd..c1f5dc827e2 100644 --- a/lib/libdtrace/common/dt_dof.c +++ b/lib/libdtrace/common/dt_dof.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ #include @@ -482,7 +483,7 @@ dof_add_probe(dt_idhash_t *dhp, dt_ident_t *idp, void *data) return (0); } -static void +static int dof_add_provider(dt_dof_t *ddo, const dt_provider_t *pvp) { dtrace_hdl_t *dtp = ddo->ddo_hdl; @@ -493,8 +494,12 @@ dof_add_provider(dt_dof_t *ddo, const dt_provider_t *pvp) size_t sz; id_t i; - if (pvp->pv_flags & DT_PROVIDER_IMPL) - return; /* ignore providers that are exported by dtrace(7D) */ + if (pvp->pv_flags & DT_PROVIDER_IMPL) { + /* + * ignore providers that are exported by dtrace(7D) + */ + return (0); + } nxr = dt_popcb(pvp->pv_xrefs, pvp->pv_xrmax); dofs = alloca(sizeof (dof_secidx_t) * (nxr + 1)); @@ -521,6 +526,9 @@ dof_add_provider(dt_dof_t *ddo, const dt_provider_t *pvp) (void) dt_idhash_iter(pvp->pv_probes, dof_add_probe, ddo); + if (dt_buf_len(&ddo->ddo_probes) == 0) + return (dt_set_errno(dtp, EDT_NOPROBES)); + dofpv.dofpv_probes = dof_add_lsect(ddo, NULL, DOF_SECT_PROBES, sizeof (uint64_t), 0, sizeof (dof_probe_t), dt_buf_len(&ddo->ddo_probes)); @@ -575,6 +583,8 @@ dof_add_provider(dt_dof_t *ddo, const dt_provider_t *pvp) sizeof (dof_secidx_t), 0, sizeof (dof_secidx_t), sizeof (dof_secidx_t) * (nxr + 1)); } + + return (0); } static int @@ -818,8 +828,10 @@ dtrace_dof_create(dtrace_hdl_t *dtp, dtrace_prog_t *pgp, uint_t flags) */ if (flags & DTRACE_D_PROBES) { for (pvp = dt_list_next(&dtp->dt_provlist); - pvp != NULL; pvp = dt_list_next(pvp)) - dof_add_provider(ddo, pvp); + pvp != NULL; pvp = dt_list_next(pvp)) { + if (dof_add_provider(ddo, pvp) != 0) + return (NULL); + } } /* diff --git a/lib/libdtrace/common/dt_error.c b/lib/libdtrace/common/dt_error.c index 9c1cbd73bce..f7ad28ebae6 100644 --- a/lib/libdtrace/common/dt_error.c +++ b/lib/libdtrace/common/dt_error.c @@ -26,6 +26,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ #include @@ -108,7 +109,8 @@ static const struct { { EDT_BADSTACKPC, "Invalid stack program counter size" }, { EDT_BADAGGVAR, "Invalid aggregation variable identifier" }, { EDT_OVERSION, "Client requested deprecated version of library" }, - { EDT_ENABLING_ERR, "Failed to enable probe" } + { EDT_ENABLING_ERR, "Failed to enable probe" }, + { EDT_NOPROBES, "No probe sites found for declared provider" } }; static const int _dt_nerr = sizeof (_dt_errlist) / sizeof (_dt_errlist[0]); diff --git a/lib/libdtrace/common/dt_impl.h b/lib/libdtrace/common/dt_impl.h index fa4c66540d4..fc275e6805e 100644 --- a/lib/libdtrace/common/dt_impl.h +++ b/lib/libdtrace/common/dt_impl.h @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. */ @@ -512,7 +512,8 @@ enum { EDT_BADSTACKPC, /* invalid stack program counter size */ EDT_BADAGGVAR, /* invalid aggregation variable identifier */ EDT_OVERSION, /* client is requesting deprecated version */ - EDT_ENABLING_ERR /* failed to enable probe */ + EDT_ENABLING_ERR, /* failed to enable probe */ + EDT_NOPROBES /* no probes sites for declared provider */ }; /* diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c index 4bee43839fd..c3b3311e0b8 100644 --- a/uts/common/dtrace/dtrace.c +++ b/uts/common/dtrace/dtrace.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. */ @@ -13941,8 +13941,8 @@ dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) * Check to make sure this isn't a duplicate. */ for (i = 0; i < help->dthps_nprovs; i++) { - if (dofhp->dofhp_addr == - help->dthps_provs[i]->dthp_prov.dofhp_addr) + if (dofhp->dofhp_dof == + help->dthps_provs[i]->dthp_prov.dofhp_dof) return (EALREADY); } From 8ad7d49ec5f43f95ca152a89fde1f2f2cbd6fa34 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 20 Nov 2013 11:03:11 +0000 Subject: [PATCH 2/9] 4236 Internet Packet Disturber 4237 net_* and hook_* man pages are wrong illumos/illumos-gate@fe77cc0407fb667ddc04e1a8f2e203bb7b9c80e1 --- uts/common/Makefile.files | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uts/common/Makefile.files b/uts/common/Makefile.files index 075432f012f..e49ffe089bd 100644 --- a/uts/common/Makefile.files +++ b/uts/common/Makefile.files @@ -1751,6 +1751,8 @@ IPF_OBJS += ip_fil_solaris.o fil.o solaris.o ip_state.o ip_frag.o ip_nat.o \ ip_proxy.o ip_auth.o ip_pool.o ip_htable.o ip_lookup.o \ ip_log.o misc.o ip_compat.o ip_nat6.o drand48.o +IPD_OBJS += ipd.o + IBD_OBJS += ibd.o ibd_cm.o EIBNX_OBJS += enx_main.o enx_hdlrs.o enx_ibt.o enx_log.o enx_fip.o \ From 7df1fb6a6ef5a86c4e83eba2442e91b6b14c7b17 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 20 Nov 2013 11:04:14 +0000 Subject: [PATCH 3/9] 4251 libdtrace leaks open file handles 4250 libdtrace should use "F" specifier on fopen() illumos/illumos-gate@93ed8d0d4b068b95d0bb50d57bb854df462a8485 --- lib/libdtrace/common/dt_cc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/libdtrace/common/dt_cc.c b/lib/libdtrace/common/dt_cc.c index 9661ad9d0b4..f4b0509b4a8 100644 --- a/lib/libdtrace/common/dt_cc.c +++ b/lib/libdtrace/common/dt_cc.c @@ -2149,7 +2149,7 @@ dt_load_libs_dir(dtrace_hdl_t *dtp, const char *path) (void) snprintf(fname, sizeof (fname), "%s/%s", path, dp->d_name); - if ((fp = fopen(fname, "r")) == NULL) { + if ((fp = fopen(fname, "rF")) == NULL) { dt_dprintf("skipping library %s: %s\n", fname, strerror(errno)); continue; @@ -2171,12 +2171,15 @@ dt_load_libs_dir(dtrace_hdl_t *dtp, const char *path) dt_dprintf("skipping library %s, already processed " "library with the same name: %s", dp->d_name, dld->dtld_library); + (void) fclose(fp); continue; } dtp->dt_filetag = fname; - if (dt_lib_depend_add(dtp, &dtp->dt_lib_dep, fname) != 0) + if (dt_lib_depend_add(dtp, &dtp->dt_lib_dep, fname) != 0) { + (void) fclose(fp); return (-1); /* preserve dt_errno */ + } rv = dt_compile(dtp, DT_CTX_DPROG, DTRACE_PROBESPEC_NAME, NULL, @@ -2184,8 +2187,10 @@ dt_load_libs_dir(dtrace_hdl_t *dtp, const char *path) if (rv != NULL && dtp->dt_errno && (dtp->dt_errno != EDT_COMPILER || - dtp->dt_errtag != dt_errtag(D_PRAGMA_DEPEND))) + dtp->dt_errtag != dt_errtag(D_PRAGMA_DEPEND))) { + (void) fclose(fp); return (-1); /* preserve dt_errno */ + } if (dtp->dt_errno) dt_dprintf("error parsing library %s: %s\n", From 12ad6bef9bb91a33e42c894b8e23300e26828efa Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 20 Nov 2013 11:05:07 +0000 Subject: [PATCH 4/9] 4161 deadlock between zfs_read() and zfs_putpage() illumos/illumos-gate@b3d32f0ceb59362ba287dcfd6de471e98bfc7fa9 --- lib/libzpool/common/kernel.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/libzpool/common/kernel.c b/lib/libzpool/common/kernel.c index 4dd614f7c1b..bf7042b15db 100644 --- a/lib/libzpool/common/kernel.c +++ b/lib/libzpool/common/kernel.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ #include @@ -221,10 +222,10 @@ rw_enter(krwlock_t *rwlp, krw_t rw) ASSERT(rwlp->rw_owner != (void *)-1UL); ASSERT(rwlp->rw_owner != curthread); - if (rw == RW_READER) - VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); - else + if (rw == RW_WRITER) VERIFY(rw_wrlock(&rwlp->rw_lock) == 0); + else + VERIFY(rw_rdlock(&rwlp->rw_lock) == 0); rwlp->rw_owner = curthread; } @@ -247,10 +248,10 @@ rw_tryenter(krwlock_t *rwlp, krw_t rw) ASSERT(rwlp->initialized == B_TRUE); ASSERT(rwlp->rw_owner != (void *)-1UL); - if (rw == RW_READER) - rv = rw_tryrdlock(&rwlp->rw_lock); - else + if (rw == RW_WRITER) rv = rw_trywrlock(&rwlp->rw_lock); + else + rv = rw_tryrdlock(&rwlp->rw_lock); if (rv == 0) { rwlp->rw_owner = curthread; From 491ececfb91da7049751346d03bcbf10291b62d0 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 20 Nov 2013 11:06:02 +0000 Subject: [PATCH 5/9] 3928 `tail -f ...` doesn't notice file truncation 3929 `man tail` doesn't mentioned "-F" option 3930 'tail -F ...' not resetting the offset of file rotation properly 3968 want FILE_TRUNC event for PORT_SOURCE_FILE illumos/illumos-gate@72102e7461c97dc268d21d9dd8f02da45f174acd --- uts/common/fs/vnode.c | 10 ++++++++++ uts/common/fs/zfs/zfs_vnops.c | 6 ++++++ uts/common/sys/vnode.h | 5 ++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/uts/common/fs/vnode.c b/uts/common/fs/vnode.c index 382369c7fc7..d67d1642868 100644 --- a/uts/common/fs/vnode.c +++ b/uts/common/fs/vnode.c @@ -21,6 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -2582,6 +2583,15 @@ vnevent_mountedover(vnode_t *vp, caller_context_t *ct) (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct); } +void +vnevent_truncate(vnode_t *vp, caller_context_t *ct) +{ + if (vp == NULL || vp->v_femhead == NULL) { + return; + } + (void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct); +} + /* * Vnode accessors. */ diff --git a/uts/common/fs/zfs/zfs_vnops.c b/uts/common/fs/zfs/zfs_vnops.c index 6fa1d837008..78e2a8cce5a 100644 --- a/uts/common/fs/zfs/zfs_vnops.c +++ b/uts/common/fs/zfs/zfs_vnops.c @@ -2756,6 +2756,9 @@ top: ZFS_EXIT(zfsvfs); return (err); } + + if (vap->va_size == 0) + vnevent_truncate(ZTOV(zp), ct); } if (mask & (AT_ATIME|AT_MTIME) || @@ -4770,6 +4773,9 @@ zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, error = zfs_freesp(zp, off, len, flag, TRUE); + if (error == 0 && off == 0 && len == 0) + vnevent_truncate(ZTOV(zp), ct); + ZFS_EXIT(zfsvfs); return (error); } diff --git a/uts/common/sys/vnode.h b/uts/common/sys/vnode.h index d29152346e2..af9516fe52f 100644 --- a/uts/common/sys/vnode.h +++ b/uts/common/sys/vnode.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -733,7 +734,8 @@ typedef enum vnevent { VE_CREATE = 5, /* Create with vnode's name which exists */ VE_LINK = 6, /* Link with vnode's name as source */ VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */ - VE_MOUNTEDOVER = 8 /* File or Filesystem got mounted over vnode */ + VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */ + VE_TRUNCATE = 9 /* Truncate */ } vnevent_t; /* @@ -1290,6 +1292,7 @@ void vnevent_create(vnode_t *, caller_context_t *); void vnevent_link(vnode_t *, caller_context_t *); void vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct); void vnevent_mountedover(vnode_t *, caller_context_t *); +void vnevent_truncate(vnode_t *, caller_context_t *); int vnevent_support(vnode_t *, caller_context_t *); /* Vnode specific data */ From 4f662c9721dc32d7be27a1f4bc153a775d33b889 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 20 Nov 2013 11:06:57 +0000 Subject: [PATCH 6/9] 2583 Add -p (parsable) option to zfs list illumos/illumos-gate@43d68d68c1ce08fb35026bebfb141af422e7082e --- cmd/zfs/zfs_iter.c | 5 ++- cmd/zfs/zfs_iter.h | 3 ++ cmd/zfs/zfs_main.c | 71 +++++++++++++++--------------- lib/libzfs/common/libzfs.h | 5 ++- lib/libzfs/common/libzfs_dataset.c | 13 +++--- man/man1m/zfs.1m | 34 +++++++------- 6 files changed, 68 insertions(+), 63 deletions(-) diff --git a/cmd/zfs/zfs_iter.c b/cmd/zfs/zfs_iter.c index e2ab90eaf14..7599ff2bb5e 100644 --- a/cmd/zfs/zfs_iter.c +++ b/cmd/zfs/zfs_iter.c @@ -18,8 +18,10 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #include @@ -107,7 +109,8 @@ zfs_callback(zfs_handle_t *zhp, void *data) cb->cb_props_table); if (zfs_expand_proplist(zhp, cb->cb_proplist, - (cb->cb_flags & ZFS_ITER_RECVD_PROPS)) + (cb->cb_flags & ZFS_ITER_RECVD_PROPS), + (cb->cb_flags & ZFS_ITER_LITERAL_PROPS)) != 0) { free(node); return (-1); diff --git a/cmd/zfs/zfs_iter.h b/cmd/zfs/zfs_iter.h index 8c6b9fdef54..d896d7cf57a 100644 --- a/cmd/zfs/zfs_iter.h +++ b/cmd/zfs/zfs_iter.h @@ -18,9 +18,11 @@ * * CDDL HEADER END */ + /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #ifndef ZFS_ITER_H @@ -43,6 +45,7 @@ typedef struct zfs_sort_column { #define ZFS_ITER_PROP_LISTSNAPS (1 << 2) #define ZFS_ITER_DEPTH_LIMIT (1 << 3) #define ZFS_ITER_RECVD_PROPS (1 << 4) +#define ZFS_ITER_LITERAL_PROPS (1 << 5) int zfs_for_each(int, char **, int options, zfs_type_t, zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *); diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 68c42eda521..14eb55ecb9f 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -21,11 +21,11 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2012 Milan Jurik. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #include @@ -232,9 +232,8 @@ get_usage(zfs_help_t idx) return (gettext("\tupgrade [-v]\n" "\tupgrade [-r] [-V version] <-a | filesystem ...>\n")); case HELP_LIST: - return (gettext("\tlist [-rH][-d max] " - "[-o property[,...]] [-t type[,...]] [-s property] ...\n" - "\t [-S property] ... " + return (gettext("\tlist [-Hp] [-r|-d max] [-o property[,...]] " + "[-s property]...\n\t [-S property]... [-t type[,...]] " "[filesystem|volume|snapshot] ...\n")); case HELP_MOUNT: return (gettext("\tmount\n" @@ -291,12 +290,12 @@ get_usage(zfs_help_t idx) "\n")); case HELP_USERSPACE: return (gettext("\tuserspace [-Hinp] [-o field[,...]] " - "[-s field] ...\n\t[-S field] ... " - "[-t type[,...]] \n")); + "[-s field]...\n\t [-S field]... [-t type[,...]] " + "\n")); case HELP_GROUPSPACE: return (gettext("\tgroupspace [-Hinp] [-o field[,...]] " - "[-s field] ...\n\t[-S field] ... " - "[-t type[,...]] \n")); + "[-s field]...\n\t [-S field]... [-t type[,...]] " + "\n")); case HELP_HOLD: return (gettext("\thold [-r] ...\n")); case HELP_HOLDS: @@ -2104,7 +2103,7 @@ zfs_do_upgrade(int argc, char **argv) * -i Translate SID to POSIX ID. * -n Print numeric ID instead of user/group name. * -o Control which fields to display. - * -p Use exact (parseable) numeric output. + * -p Use exact (parsable) numeric output. * -s Specify sort columns, descending order. * -S Specify sort columns, ascending order. * -t Control which object types to display. @@ -2782,24 +2781,25 @@ zfs_do_userspace(int argc, char **argv) } /* - * list [-r][-d max] [-H] [-o property[,property]...] [-t type[,type]...] - * [-s property [-s property]...] [-S property [-S property]...] - * ... + * list [-Hp][-r|-d max] [-o property[,...]] [-s property] ... [-S property] ... + * [-t type[,...]] [filesystem|volume|snapshot] ... * - * -r Recurse over all children + * -H Scripted mode; elide headers and separate columns by tabs. + * -p Display values in parsable (literal) format. + * -r Recurse over all children. * -d Limit recursion by depth. - * -H Scripted mode; elide headers and separate columns by tabs * -o Control which fields to display. - * -t Control which object types to display. * -s Specify sort columns, descending order. * -S Specify sort columns, ascending order. + * -t Control which object types to display. * - * When given no arguments, lists all filesystems in the system. + * When given no arguments, list all filesystems in the system. * Otherwise, list the specified datasets, optionally recursing down them if * '-r' is specified. */ typedef struct list_cbdata { boolean_t cb_first; + boolean_t cb_literal; boolean_t cb_scripted; zprop_list_t *cb_proplist; } list_cbdata_t; @@ -2808,8 +2808,9 @@ typedef struct list_cbdata { * Given a list of columns to display, output appropriate headers for each one. */ static void -print_header(zprop_list_t *pl) +print_header(list_cbdata_t *cb) { + zprop_list_t *pl = cb->cb_proplist; char headerbuf[ZFS_MAXPROPLEN]; const char *header; int i; @@ -2850,19 +2851,19 @@ print_header(zprop_list_t *pl) * to the described layout. */ static void -print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted) +print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) { + zprop_list_t *pl = cb->cb_proplist; boolean_t first = B_TRUE; char property[ZFS_MAXPROPLEN]; nvlist_t *userprops = zfs_get_user_props(zhp); nvlist_t *propval; char *propstr; boolean_t right_justify; - int width; for (; pl != NULL; pl = pl->pl_next) { if (!first) { - if (scripted) + if (cb->cb_scripted) (void) printf("\t"); else (void) printf(" "); @@ -2872,22 +2873,22 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted) if (pl->pl_prop != ZPROP_INVAL) { if (zfs_prop_get(zhp, pl->pl_prop, property, - sizeof (property), NULL, NULL, 0, B_FALSE) != 0) + sizeof (property), NULL, NULL, 0, + cb->cb_literal) != 0) propstr = "-"; else propstr = property; - right_justify = zfs_prop_align_right(pl->pl_prop); } else if (zfs_prop_userquota(pl->pl_user_prop)) { if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, - property, sizeof (property), B_FALSE) != 0) + property, sizeof (property), cb->cb_literal) != 0) propstr = "-"; else propstr = property; right_justify = B_TRUE; } else if (zfs_prop_written(pl->pl_user_prop)) { if (zfs_prop_get_written(zhp, pl->pl_user_prop, - property, sizeof (property), B_FALSE) != 0) + property, sizeof (property), cb->cb_literal) != 0) propstr = "-"; else propstr = property; @@ -2902,19 +2903,17 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted) right_justify = B_FALSE; } - width = pl->pl_width; - /* * If this is being called in scripted mode, or if this is the * last column and it is left-justified, don't include a width * format specifier. */ - if (scripted || (pl->pl_next == NULL && !right_justify)) + if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) (void) printf("%s", propstr); else if (right_justify) - (void) printf("%*s", width, propstr); + (void) printf("%*s", pl->pl_width, propstr); else - (void) printf("%-*s", width, propstr); + (void) printf("%-*s", pl->pl_width, propstr); } (void) printf("\n"); @@ -2930,11 +2929,11 @@ list_callback(zfs_handle_t *zhp, void *data) if (cbp->cb_first) { if (!cbp->cb_scripted) - print_header(cbp->cb_proplist); + print_header(cbp); cbp->cb_first = B_FALSE; } - print_dataset(zhp, cbp->cb_proplist, cbp->cb_scripted); + print_dataset(zhp, cbp); return (0); } @@ -2943,7 +2942,6 @@ static int zfs_do_list(int argc, char **argv) { int c; - boolean_t scripted = B_FALSE; static char default_fields[] = "name,used,available,referenced,mountpoint"; int types = ZFS_TYPE_DATASET; @@ -2957,11 +2955,15 @@ zfs_do_list(int argc, char **argv) int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS; /* check options */ - while ((c = getopt(argc, argv, ":d:o:rt:Hs:S:")) != -1) { + while ((c = getopt(argc, argv, "HS:d:o:prs:t:")) != -1) { switch (c) { case 'o': fields = optarg; break; + case 'p': + cb.cb_literal = B_TRUE; + flags |= ZFS_ITER_LITERAL_PROPS; + break; case 'd': limit = parse_depth(optarg, &flags); break; @@ -2969,7 +2971,7 @@ zfs_do_list(int argc, char **argv) flags |= ZFS_ITER_RECURSE; break; case 'H': - scripted = B_TRUE; + cb.cb_scripted = B_TRUE; break; case 's': if (zfs_add_sort_column(&sortcol, optarg, @@ -3051,7 +3053,6 @@ zfs_do_list(int argc, char **argv) != 0) usage(B_FALSE); - cb.cb_scripted = scripted; cb.cb_first = B_TRUE; ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist, diff --git a/lib/libzfs/common/libzfs.h b/lib/libzfs/common/libzfs.h index cb84b62c34e..3bf7e5c2a01 100644 --- a/lib/libzfs/common/libzfs.h +++ b/lib/libzfs/common/libzfs.h @@ -21,10 +21,10 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #ifndef _LIBZFS_H @@ -458,7 +458,8 @@ typedef struct zprop_list { boolean_t pl_fixed; } zprop_list_t; -extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t); +extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t, + boolean_t); extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *); #define ZFS_MOUNTPOINT_NONE "none" diff --git a/lib/libzfs/common/libzfs_dataset.c b/lib/libzfs/common/libzfs_dataset.c index e9359bc2c65..2eb4190b3a4 100644 --- a/lib/libzfs/common/libzfs_dataset.c +++ b/lib/libzfs/common/libzfs_dataset.c @@ -23,9 +23,9 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #include @@ -3798,7 +3798,8 @@ zfs_get_recvd_props(zfs_handle_t *zhp) * of the RECEIVED column. */ int -zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received) +zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received, + boolean_t literal) { libzfs_handle_t *hdl = zhp->zfs_hdl; zprop_list_t *entry; @@ -3860,18 +3861,18 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received) * Now go through and check the width of any non-fixed columns */ for (entry = *plp; entry != NULL; entry = entry->pl_next) { - if (entry->pl_fixed) + if (entry->pl_fixed && !literal) continue; if (entry->pl_prop != ZPROP_INVAL) { if (zfs_prop_get(zhp, entry->pl_prop, - buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) { + buf, sizeof (buf), NULL, NULL, 0, literal) == 0) { if (strlen(buf) > entry->pl_width) entry->pl_width = strlen(buf); } if (received && zfs_prop_get_recvd(zhp, zfs_prop_to_name(entry->pl_prop), - buf, sizeof (buf), B_FALSE) == 0) + buf, sizeof (buf), literal) == 0) if (strlen(buf) > entry->pl_recvd_width) entry->pl_recvd_width = strlen(buf); } else { @@ -3884,7 +3885,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received) } if (received && zfs_prop_get_recvd(zhp, entry->pl_user_prop, - buf, sizeof (buf), B_FALSE) == 0) + buf, sizeof (buf), literal) == 0) if (strlen(buf) > entry->pl_recvd_width) entry->pl_recvd_width = strlen(buf); } diff --git a/man/man1m/zfs.1m b/man/man1m/zfs.1m index 9ab3ef473dd..2abb0d13a98 100644 --- a/man/man1m/zfs.1m +++ b/man/man1m/zfs.1m @@ -27,7 +27,7 @@ .\" Copyright 2013 Nexenta Systems, Inc. All Rights Reserved. .\" Copyright (c) 2013, Joyent, Inc. All rights reserved. .\" -.TH ZFS 1M "Jan 26, 2013" +.TH ZFS 1M "Oct 16, 2013" .SH NAME zfs \- configures ZFS file systems .SH SYNOPSIS @@ -95,7 +95,7 @@ zfs \- configures ZFS file systems .LP .nf -\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-H\fR][\fB-o\fR \fIproperty\fR[,\fIproperty\fR]...] [\fB-t\fR \fItype\fR[,\fItype\fR]...] +\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIproperty\fR[,\fIproperty\fR]...] [\fB-t\fR \fItype\fR[,\fItype\fR]...] [\fB-s\fR \fIproperty\fR]... [\fB-S\fR \fIproperty\fR]... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR]... .fi @@ -248,7 +248,6 @@ zfs \- configures ZFS file systems \fBzfs\fR \fBdiff\fR [\fB-FHt\fR] \fIsnapshot\fR \fIsnapshot|filesystem\fR .SH DESCRIPTION -.sp .LP The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage pool, as described in \fBzpool\fR(1M). A dataset is identified by a unique path @@ -306,7 +305,6 @@ specified as \fIfilesystem@name\fR or \fIvolume@name\fR. .RE .SS "ZFS File System Hierarchy" -.sp .LP A \fBZFS\fR storage pool is a logical collection of devices that provide space for datasets. A storage pool is also the root of the \fBZFS\fR file system @@ -320,7 +318,6 @@ characteristics, however, are managed by the \fBzpool\fR(1M) command. .LP See \fBzpool\fR(1M) for more information on creating and administering pools. .SS "Snapshots" -.sp .LP A snapshot is a read-only copy of a file system or volume. Snapshots can be created extremely quickly, and initially consume no additional space within the @@ -337,7 +334,6 @@ in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the \fB\&.zfs\fR directory can be controlled by the \fBsnapdir\fR property. .SS "Clones" -.sp .LP A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly @@ -357,7 +353,6 @@ The clone parent-child dependency relationship can be reversed by using the clone of the specified file system, which makes it possible to destroy the file system that the clone was created from. .SS "Mount Points" -.sp .LP Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system is likely to be numerous. To cope with this, \fBZFS\fR @@ -390,7 +385,6 @@ is set to \fBlegacy\fR, \fBZFS\fR makes no attempt to manage the file system, and the administrator is responsible for mounting and unmounting the file system. .SS "Zones" -.sp .LP A \fBZFS\fR file system can be added to a non-global zone by using the \fBzonecfg\fR \fBadd fs\fR subcommand. A \fBZFS\fR file system that is added to @@ -428,7 +422,6 @@ The global administrator can forcibly clear the \fBzoned\fR property, though this should be done with extreme care. The global administrator should verify that all the mount points are acceptable before clearing the property. .SS "Native Properties" -.sp .LP Properties are divided into two types, native properties and user-defined (or "user") properties. Native properties either export internal statistics or @@ -1536,7 +1529,6 @@ The \fBcasesensitivity\fR, \fBnormalization\fR, and \fButf8only\fR properties are also new permissions that can be assigned to non-privileged users by using the \fBZFS\fR delegated administration feature. .SS "Temporary Mount Point Properties" -.sp .LP When a file system is mounted, either through \fBmount\fR(1M) for legacy mounts or the \fBzfs mount\fR command for normal file systems, its mount options are @@ -1565,7 +1557,6 @@ are reported as "temporary" by the \fBzfs get\fR command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings. .SS "User Properties" -.sp .LP In addition to the standard native properties, \fBZFS\fR supports arbitrary user properties. User properties have no effect on \fBZFS\fR behavior, but @@ -1598,7 +1589,6 @@ to clear a user property . If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 1024 characters. .SS "ZFS Volumes as Swap or Dump Devices" -.sp .LP During an initial installation a swap device and dump device are created on \fBZFS\fR volumes in the \fBZFS\fR root pool. By default, the swap area size is @@ -1614,7 +1604,6 @@ installed or upgraded, use the \fBswap\fR(1M) and \fBdumpadm\fR(1M) commands. If you need to change the size of your swap area or dump device, see the \fISolaris ZFS Administration Guide\fR. .SH SUBCOMMANDS -.sp .LP All subcommands that modify state are logged persistently to the pool in their original form. @@ -2117,7 +2106,7 @@ only dataset that can be renamed recursively. .sp .ne 2 .na -\fB\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-H\fR] [\fB-o\fR +\fB\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-Hp\fR] [\fB-o\fR \fIproperty\fR[,\fIproperty\fR]...] [ \fB-t\fR \fItype\fR[,\fItype\fR]...] [ \fB-s\fR \fIproperty\fR ]... [ \fB-S\fR \fIproperty\fR ]... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR]...\fR @@ -2141,6 +2130,16 @@ Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary white space. .RE +.sp +.ne 2 +.na +\fB\fB-p\fR\fR +.ad +.sp .6 +.RS 4n +Display numbers in parsable (exact) values. +.RE + .sp .ne 2 .na @@ -2380,7 +2379,7 @@ is all sources. .ad .sp .6 .RS 4n -Display numbers in parseable (exact) values. +Display numbers in parsable (exact) values. .RE .RE @@ -3392,7 +3391,7 @@ F Regular file .ad .sp .6 .RS 4n -Give more parseable tab-separated output, without header lines and without arrows. +Give more parsable tab-separated output, without header lines and without arrows. .RE .sp .ne 2 @@ -3953,7 +3952,6 @@ M F /tank/test/modified .sp .SH EXIT STATUS -.sp .LP The following exit values are returned: .sp @@ -3987,7 +3985,6 @@ Invalid command line options were specified. .RE .SH ATTRIBUTES -.sp .LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -4003,7 +4000,6 @@ Interface Stability Committed .TE .SH SEE ALSO -.sp .LP \fBssh\fR(1), \fBiscsitadm\fR(1M), \fBmount\fR(1M), \fBshare\fR(1M), \fBsharemgr\fR(1M), \fBunshare\fR(1M), \fBzonecfg\fR(1M), \fBzpool\fR(1M), From 0ec97988142dfa9b61795254e0cbb3c8942124d6 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 20 Nov 2013 11:08:07 +0000 Subject: [PATCH 7/9] 4128 disks in zpools never go away when pulled illumos/illumos-gate@39cddb10a31c1c2e66aed69e6871d09caa4c8147 --- uts/common/fs/zfs/sys/vdev_disk.h | 3 + uts/common/fs/zfs/vdev_disk.c | 245 +++++++++++++++++++++++++++--- 2 files changed, 230 insertions(+), 18 deletions(-) diff --git a/uts/common/fs/zfs/sys/vdev_disk.h b/uts/common/fs/zfs/sys/vdev_disk.h index 9055d0ed656..61e2f273f0a 100644 --- a/uts/common/fs/zfs/sys/vdev_disk.h +++ b/uts/common/fs/zfs/sys/vdev_disk.h @@ -22,6 +22,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright (c) 2013 Joyent, Inc. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_VDEV_DISK_H @@ -44,6 +45,8 @@ typedef struct vdev_disk { ddi_devid_t vd_devid; char *vd_minor; ldi_handle_t vd_lh; + list_t vd_ldi_cbs; + boolean_t vd_ldi_offline; } vdev_disk_t; #endif diff --git a/uts/common/fs/zfs/vdev_disk.c b/uts/common/fs/zfs/vdev_disk.c index d9e9357658b..5cc9d9f226b 100644 --- a/uts/common/fs/zfs/vdev_disk.c +++ b/uts/common/fs/zfs/vdev_disk.c @@ -22,7 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. - * Copyright 2013 Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Joyent, Inc. All rights reserved. */ #include @@ -42,6 +42,146 @@ extern ldi_ident_t zfs_li; +static void vdev_disk_close(vdev_t *); + +typedef struct vdev_disk_ldi_cb { + list_node_t lcb_next; + ldi_callback_id_t lcb_id; +} vdev_disk_ldi_cb_t; + +static void +vdev_disk_alloc(vdev_t *vd) +{ + vdev_disk_t *dvd; + + dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); + /* + * Create the LDI event callback list. + */ + list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t), + offsetof(vdev_disk_ldi_cb_t, lcb_next)); +} + +static void +vdev_disk_free(vdev_t *vd) +{ + vdev_disk_t *dvd = vd->vdev_tsd; + vdev_disk_ldi_cb_t *lcb; + + if (dvd == NULL) + return; + + /* + * We have already closed the LDI handle. Clean up the LDI event + * callbacks and free vd->vdev_tsd. + */ + while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) { + list_remove(&dvd->vd_ldi_cbs, lcb); + (void) ldi_ev_remove_callbacks(lcb->lcb_id); + kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t)); + } + list_destroy(&dvd->vd_ldi_cbs); + kmem_free(dvd, sizeof (vdev_disk_t)); + vd->vdev_tsd = NULL; +} + +/* ARGSUSED */ +static int +vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg, + void *ev_data) +{ + vdev_t *vd = (vdev_t *)arg; + vdev_disk_t *dvd = vd->vdev_tsd; + + /* + * Ignore events other than offline. + */ + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0) + return (LDI_EV_SUCCESS); + + /* + * All LDI handles must be closed for the state change to succeed, so + * call on vdev_disk_close() to do this. + * + * We inform vdev_disk_close that it is being called from offline + * notify context so it will defer cleanup of LDI event callbacks and + * freeing of vd->vdev_tsd to the offline finalize or a reopen. + */ + dvd->vd_ldi_offline = B_TRUE; + vdev_disk_close(vd); + + /* + * Now that the device is closed, request that the spa_async_thread + * mark the device as REMOVED and notify FMA of the removal. + */ + zfs_post_remove(vd->vdev_spa, vd); + vd->vdev_remove_wanted = B_TRUE; + spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE); + + return (LDI_EV_SUCCESS); +} + +/* ARGSUSED */ +static void +vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie, + int ldi_result, void *arg, void *ev_data) +{ + vdev_t *vd = (vdev_t *)arg; + + /* + * Ignore events other than offline. + */ + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0) + return; + + /* + * We have already closed the LDI handle in notify. + * Clean up the LDI event callbacks and free vd->vdev_tsd. + */ + vdev_disk_free(vd); + + /* + * Request that the vdev be reopened if the offline state change was + * unsuccessful. + */ + if (ldi_result != LDI_EV_SUCCESS) { + vd->vdev_probe_wanted = B_TRUE; + spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE); + } +} + +static ldi_ev_callback_t vdev_disk_off_callb = { + .cb_vers = LDI_EV_CB_VERS, + .cb_notify = vdev_disk_off_notify, + .cb_finalize = vdev_disk_off_finalize +}; + +/* ARGSUSED */ +static void +vdev_disk_dgrd_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie, + int ldi_result, void *arg, void *ev_data) +{ + vdev_t *vd = (vdev_t *)arg; + + /* + * Ignore events other than degrade. + */ + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0) + return; + + /* + * Degrade events always succeed. Mark the vdev as degraded. + * This status is purely informative for the user. + */ + (void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0); +} + +static ldi_ev_callback_t vdev_disk_dgrd_callb = { + .cb_vers = LDI_EV_CB_VERS, + .cb_notify = NULL, + .cb_finalize = vdev_disk_dgrd_finalize +}; + static void vdev_disk_hold(vdev_t *vd) { @@ -146,7 +286,9 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; - vdev_disk_t *dvd; + vdev_disk_t *dvd = vd->vdev_tsd; + ldi_ev_cookie_t ecookie; + vdev_disk_ldi_cb_t *lcb; union { struct dk_minfo_ext ude; struct dk_minfo ud; @@ -172,13 +314,25 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ - if (vd->vdev_tsd != NULL) { - ASSERT(vd->vdev_reopening); - dvd = vd->vdev_tsd; - goto skip_open; + if (dvd != NULL) { + if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) { + /* + * If we are opening a device in its offline notify + * context, the LDI handle was just closed. Clean + * up the LDI event callbacks and free vd->vdev_tsd. + */ + vdev_disk_free(vd); + } else { + ASSERT(vd->vdev_reopening); + goto skip_open; + } } - dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); + /* + * Create vd->vdev_tsd. + */ + vdev_disk_alloc(vd); + dvd = vd->vdev_tsd; /* * When opening a disk device, we want to preserve the user's original @@ -211,23 +365,28 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; char *buf = kmem_alloc(len, KM_SLEEP); - ldi_handle_t lh; (void) snprintf(buf, len, "%ss0", vd->vdev_path); - if (ldi_open_by_name(buf, spa_mode(spa), kcred, - &lh, zfs_li) == 0) { + error = ldi_open_by_name(buf, spa_mode(spa), kcred, + &dvd->vd_lh, zfs_li); + if (error == 0) { spa_strfree(vd->vdev_path); vd->vdev_path = buf; vd->vdev_wholedisk = 1ULL; - (void) ldi_close(lh, spa_mode(spa), kcred); } else { kmem_free(buf, len); } } - error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, - &dvd->vd_lh, zfs_li); + /* + * If we have not yet opened the device, try to open it by the + * specified path. + */ + if (error != 0) { + error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), + kcred, &dvd->vd_lh, zfs_li); + } /* * Compare the devid to the stored value. @@ -334,6 +493,27 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, kmem_free(physpath, MAXPATHLEN); } + /* + * Register callbacks for the LDI offline event. + */ + if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) == + LDI_EV_SUCCESS) { + lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); + list_insert_tail(&dvd->vd_ldi_cbs, lcb); + (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, + &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id); + } + + /* + * Register callbacks for the LDI degrade event. + */ + if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) == + LDI_EV_SUCCESS) { + lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); + list_insert_tail(&dvd->vd_ldi_cbs, lcb); + (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, + &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id); + } skip_open: /* * Determine the actual size of the device. @@ -412,18 +592,31 @@ vdev_disk_close(vdev_t *vd) if (vd->vdev_reopening || dvd == NULL) return; - if (dvd->vd_minor != NULL) + if (dvd->vd_minor != NULL) { ddi_devid_str_free(dvd->vd_minor); + dvd->vd_minor = NULL; + } - if (dvd->vd_devid != NULL) + if (dvd->vd_devid != NULL) { ddi_devid_free(dvd->vd_devid); + dvd->vd_devid = NULL; + } - if (dvd->vd_lh != NULL) + if (dvd->vd_lh != NULL) { (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred); + dvd->vd_lh = NULL; + } vd->vdev_delayed_close = B_FALSE; - kmem_free(dvd, sizeof (vdev_disk_t)); - vd->vdev_tsd = NULL; + /* + * If we closed the LDI handle due to an offline notify from LDI, + * don't free vd->vdev_tsd or unregister the callbacks here; + * the offline finalize callback or a reopen will take care of it. + */ + if (dvd->vd_ldi_offline) + return; + + vdev_disk_free(vd); } int @@ -432,6 +625,13 @@ vdev_disk_physio(vdev_t *vd, caddr_t data, { vdev_disk_t *dvd = vd->vdev_tsd; + /* + * If the vdev is closed, it's likely in the REMOVED or FAULTED state. + * Nothing to be done here but return failure. + */ + if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) + return (EIO); + ASSERT(vd->vdev_ops == &vdev_disk_ops); /* @@ -527,6 +727,15 @@ vdev_disk_io_start(zio_t *zio) buf_t *bp; int error; + /* + * If the vdev is closed, it's likely in the REMOVED or FAULTED state. + * Nothing to be done here but return failure. + */ + if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) { + zio->io_error = ENXIO; + return (ZIO_PIPELINE_CONTINUE); + } + if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { From 5bef7b63cc3cfb8de7074aa903f2e8646262ff2a Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 20 Nov 2013 11:09:12 +0000 Subject: [PATCH 8/9] 4322 ZFS deadlock on dp_config_rwlock illumos/illumos-gate@c50d56f667f119d78fa3d94d6bef2c298ba556f6 --- uts/common/fs/zfs/dsl_userhold.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/uts/common/fs/zfs/dsl_userhold.c b/uts/common/fs/zfs/dsl_userhold.c index a948e10b12b..7f2c26f766e 100644 --- a/uts/common/fs/zfs/dsl_userhold.c +++ b/uts/common/fs/zfs/dsl_userhold.c @@ -564,21 +564,23 @@ dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist, ddura.ddura_holdfunc = dsl_dataset_hold_obj_string; pool = spa_name(tmpdp->dp_spa); #ifdef _KERNEL - dsl_pool_config_enter(tmpdp, FTAG); for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; pair = nvlist_next_nvpair(holds, pair)) { dsl_dataset_t *ds; + dsl_pool_config_enter(tmpdp, FTAG); error = dsl_dataset_hold_obj_string(tmpdp, nvpair_name(pair), FTAG, &ds); if (error == 0) { char name[MAXNAMELEN]; dsl_dataset_name(ds, name); + dsl_pool_config_exit(tmpdp, FTAG); dsl_dataset_rele(ds, FTAG); (void) zfs_unmount_snap(name); + } else { + dsl_pool_config_exit(tmpdp, FTAG); } } - dsl_pool_config_exit(tmpdp, FTAG); #endif } else { /* Non-temporary holds are specified by name. */ From 69dd9f346af22c14f505f4c07efeb81f932482d9 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 26 Nov 2013 20:09:43 +0000 Subject: [PATCH 9/9] 4347 ZPL can use dmu_tx_assign(TXG_WAIT) illumos/illumos-gate@e722410c49fe67cbf0f639cbcc288bd6cbcf7dd1 --- uts/common/fs/zfs/zfs_dir.c | 8 +------ uts/common/fs/zfs/zfs_vnops.c | 40 ++++++++++++++--------------------- uts/common/fs/zfs/zfs_znode.c | 23 +++----------------- 3 files changed, 20 insertions(+), 51 deletions(-) diff --git a/uts/common/fs/zfs/zfs_dir.c b/uts/common/fs/zfs/zfs_dir.c index 439420ba41d..cb3b4aca15c 100644 --- a/uts/common/fs/zfs/zfs_dir.c +++ b/uts/common/fs/zfs/zfs_dir.c @@ -937,7 +937,6 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) return (SET_ERROR(EDQUOT)); } -top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); @@ -946,13 +945,8 @@ top: fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); - error = dmu_tx_assign(tx, TXG_NOWAIT); + error = dmu_tx_assign(tx, TXG_WAIT); if (error) { - if (error == ERESTART) { - dmu_tx_wait(tx); - dmu_tx_abort(tx); - goto top; - } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); return (error); diff --git a/uts/common/fs/zfs/zfs_vnops.c b/uts/common/fs/zfs/zfs_vnops.c index 78e2a8cce5a..6b5c6f852cc 100644 --- a/uts/common/fs/zfs/zfs_vnops.c +++ b/uts/common/fs/zfs/zfs_vnops.c @@ -111,11 +111,18 @@ * (3) All range locks must be grabbed before calling dmu_tx_assign(), * as they can span dmu_tx_assign() calls. * - * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). - * This is critical because we don't want to block while holding locks. - * Note, in particular, that if a lock is sometimes acquired before - * the tx assigns, and sometimes after (e.g. z_lock), then failing to - * use a non-blocking assign can deadlock the system. The scenario: + * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to + * dmu_tx_assign(). This is critical because we don't want to block + * while holding locks. + * + * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This + * reduces lock contention and CPU usage when we must wait (note that if + * throughput is constrained by the storage, nearly every transaction + * must wait). + * + * Note, in particular, that if a lock is sometimes acquired before + * the tx assigns, and sometimes after (e.g. z_lock), then failing + * to use a non-blocking assign can deadlock the system. The scenario: * * Thread A has grabbed a lock before calling dmu_tx_assign(). * Thread B is in an already-assigned tx, and blocks for this lock. @@ -728,7 +735,6 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) while (n > 0) { abuf = NULL; woff = uio->uio_loffset; -again: if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { if (abuf != NULL) @@ -780,13 +786,8 @@ again: dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); zfs_sa_upgrade_txholds(tx, zp); - error = dmu_tx_assign(tx, TXG_NOWAIT); + error = dmu_tx_assign(tx, TXG_WAIT); if (error) { - if (error == ERESTART) { - dmu_tx_wait(tx); - dmu_tx_abort(tx); - goto again; - } dmu_tx_abort(tx); if (abuf != NULL) dmu_return_arcbuf(abuf); @@ -3043,12 +3044,9 @@ top: zfs_sa_upgrade_txholds(tx, zp); - err = dmu_tx_assign(tx, TXG_NOWAIT); - if (err) { - if (err == ERESTART) - dmu_tx_wait(tx); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) goto out; - } count = 0; /* @@ -4140,19 +4138,13 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, err = SET_ERROR(EDQUOT); goto out; } -top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_write(tx, zp->z_id, off, len); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); - err = dmu_tx_assign(tx, TXG_NOWAIT); + err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { - if (err == ERESTART) { - dmu_tx_wait(tx); - dmu_tx_abort(tx); - goto top; - } dmu_tx_abort(tx); goto out; } diff --git a/uts/common/fs/zfs/zfs_znode.c b/uts/common/fs/zfs/zfs_znode.c index f9433b6b447..3a3eee52bec 100644 --- a/uts/common/fs/zfs/zfs_znode.c +++ b/uts/common/fs/zfs/zfs_znode.c @@ -1469,7 +1469,6 @@ zfs_extend(znode_t *zp, uint64_t end) zfs_range_unlock(rl); return (0); } -top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); @@ -1489,13 +1488,8 @@ top: newblksz = 0; } - error = dmu_tx_assign(tx, TXG_NOWAIT); + error = dmu_tx_assign(tx, TXG_WAIT); if (error) { - if (error == ERESTART) { - dmu_tx_wait(tx); - dmu_tx_abort(tx); - goto top; - } dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); @@ -1592,17 +1586,11 @@ zfs_trunc(znode_t *zp, uint64_t end) zfs_range_unlock(rl); return (error); } -top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); - error = dmu_tx_assign(tx, TXG_NOWAIT); + error = dmu_tx_assign(tx, TXG_WAIT); if (error) { - if (error == ERESTART) { - dmu_tx_wait(tx); - dmu_tx_abort(tx); - goto top; - } dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); @@ -1709,13 +1697,8 @@ log: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); - error = dmu_tx_assign(tx, TXG_NOWAIT); + error = dmu_tx_assign(tx, TXG_WAIT); if (error) { - if (error == ERESTART) { - dmu_tx_wait(tx); - dmu_tx_abort(tx); - goto log; - } dmu_tx_abort(tx); return (error); }