diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c index e8c305a9be9..b7254261cfb 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c @@ -29,6 +29,9 @@ #include #include +#include +#include +#include #include #include #include @@ -52,7 +55,7 @@ typedef struct mirror_map { int *mm_preferred; int mm_preferred_cnt; int mm_children; - boolean_t mm_replacing; + boolean_t mm_resilvering; boolean_t mm_root; mirror_child_t mm_child[]; } mirror_map_t; @@ -119,13 +122,13 @@ vdev_mirror_map_size(int children) } static inline mirror_map_t * -vdev_mirror_map_alloc(int children, boolean_t replacing, boolean_t root) +vdev_mirror_map_alloc(int children, boolean_t resilvering, boolean_t root) { mirror_map_t *mm; mm = kmem_zalloc(vdev_mirror_map_size(children), KM_SLEEP); mm->mm_children = children; - mm->mm_replacing = replacing; + mm->mm_resilvering = resilvering; mm->mm_root = root; mm->mm_preferred = (int *)((uintptr_t)mm + offsetof(mirror_map_t, mm_child[children])); @@ -217,9 +220,39 @@ vdev_mirror_map_init(zio_t *zio) mc->mc_offset = DVA_GET_OFFSET(&dva[c]); } } else { - mm = vdev_mirror_map_alloc(vd->vdev_children, - (vd->vdev_ops == &vdev_replacing_ops || - vd->vdev_ops == &vdev_spare_ops), B_FALSE); + /* + * If we are resilvering, then we should handle scrub reads + * differently; we shouldn't issue them to the resilvering + * device because it might not have those blocks. + * + * We are resilvering iff: + * 1) We are a replacing vdev (ie our name is "replacing-1" or + * "spare-1" or something like that), and + * 2) The pool is currently being resilvered. + * + * We cannot simply check vd->vdev_resilver_txg, because it's + * not set in this path. + * + * Nor can we just check our vdev_ops; there are cases (such as + * when a user types "zpool replace pool odev spare_dev" and + * spare_dev is in the spare list, or when a spare device is + * automatically used to replace a DEGRADED device) when + * resilvering is complete but both the original vdev and the + * spare vdev remain in the pool. That behavior is intentional. + * It helps implement the policy that a spare should be + * automatically removed from the pool after the user replaces + * the device that originally failed. + * + * If a spa load is in progress, then spa_dsl_pool may be + * uninitialized. But we shouldn't be resilvering during a spa + * load anyway. + */ + boolean_t replacing = (vd->vdev_ops == &vdev_replacing_ops || + vd->vdev_ops == &vdev_spare_ops) && + spa_load_state(vd->vdev_spa) == SPA_LOAD_NONE && + dsl_scan_resilvering(vd->vdev_spa->spa_dsl_pool); + mm = vdev_mirror_map_alloc(vd->vdev_children, replacing, + B_FALSE); for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; mc->mc_vd = vd->vdev_child[c]; @@ -448,7 +481,7 @@ vdev_mirror_io_start(zio_t *zio) mm = vdev_mirror_map_init(zio); if (zio->io_type == ZIO_TYPE_READ) { - if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing && + if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_resilvering && mm->mm_children > 1) { /* * For scrubbing reads we need to allocate a read @@ -589,7 +622,7 @@ vdev_mirror_io_done(zio_t *zio) if (good_copies && spa_writeable(zio->io_spa) && (unexpected_errors || (zio->io_flags & ZIO_FLAG_RESILVER) || - ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) { + ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_resilvering))) { /* * Use the good data we have in hand to repair damaged children. */