5220 L2ARC does not support devices that do not provide 512B access

illumos/illumos-gate@403a8da73c
403a8da73c

https://www.illumos.org/issues/5220
  There are disk devices that have logical sector size larger than 512B, for
  example 4KB. That is, their physical sector size is larger than 512B and they
  do not provide emulation for 512B sector sizes. For such devices both a data
  offset and a data size must be properly aligned. L2ARC should arrange that
  because it uses physical I/O.
  zio_vdev_io_start() performs a necessary transformation if io_size is not
  aligned to vdev_ashift, but that is done only for logical I/O. Something
  similar should be done in L2ARC code.
      * a temporary write buffer should be allocated if the original buffer is
        not going to be compressed and its size is not aligned
      * size of a temporary compression buffer should be ashift aligned
      * for the reads, if a size of a target buffer is not sufficiently large and
        it is not aligned then a temporary read buffer should be allocated

Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Author: Andriy Gapon <avg@FreeBSD.org>
This commit is contained in:
Andriy Gapon 2017-06-14 16:44:10 +00:00
parent 4b16e7e931
commit b06c8f09e9

View file

@ -1075,6 +1075,7 @@ typedef struct l2arc_read_callback {
blkptr_t l2rcb_bp; /* original blkptr */
zbookmark_phys_t l2rcb_zb; /* original bookmark */
int l2rcb_flags; /* original flags */
abd_t *l2rcb_abd; /* temporary buffer */
} l2arc_read_callback_t;
typedef struct l2arc_write_callback {
@ -5048,6 +5049,8 @@ top:
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
!(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
l2arc_read_callback_t *cb;
abd_t *abd;
uint64_t asize;
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
@ -5059,8 +5062,17 @@ top:
cb->l2rcb_zb = *zb;
cb->l2rcb_flags = zio_flags;
asize = vdev_psize_to_asize(vd, size);
if (asize != size) {
abd = abd_alloc_for_io(asize,
HDR_ISTYPE_METADATA(hdr));
cb->l2rcb_abd = abd;
} else {
abd = hdr->b_l1hdr.b_pabd;
}
ASSERT(addr >= VDEV_LABEL_START_SIZE &&
addr + lsize < vd->vdev_psize -
addr + asize <= vd->vdev_psize -
VDEV_LABEL_END_SIZE);
/*
@ -5072,7 +5084,7 @@ top:
ASSERT3U(HDR_GET_COMPRESS(hdr), !=,
ZIO_COMPRESS_EMPTY);
rzio = zio_read_phys(pio, vd, addr,
size, hdr->b_l1hdr.b_pabd,
asize, abd,
ZIO_CHECKSUM_OFF,
l2arc_read_done, cb, priority,
zio_flags | ZIO_FLAG_DONT_CACHE |
@ -6566,6 +6578,33 @@ l2arc_read_done(zio_t *zio)
mutex_enter(hash_lock);
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
/*
* If the data was read into a temporary buffer,
* move it and free the buffer.
*/
if (cb->l2rcb_abd != NULL) {
ASSERT3U(arc_hdr_size(hdr), <, zio->io_size);
if (zio->io_error == 0) {
abd_copy(hdr->b_l1hdr.b_pabd, cb->l2rcb_abd,
arc_hdr_size(hdr));
}
/*
* The following must be done regardless of whether
* there was an error:
* - free the temporary buffer
* - point zio to the real ARC buffer
* - set zio size accordingly
* These are required because zio is either re-used for
* an I/O of the block in the case of the error
* or the zio is passed to arc_read_done() and it
* needs real data.
*/
abd_free(cb->l2rcb_abd);
zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
}
ASSERT3P(zio->io_abd, !=, NULL);
/*
@ -6903,23 +6942,34 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
* Normally the L2ARC can use the hdr's data, but if
* we're sharing data between the hdr and one of its
* bufs, L2ARC needs its own copy of the data so that
* the ZIO below can't race with the buf consumer. To
* ensure that this copy will be available for the
* the ZIO below can't race with the buf consumer.
* Another case where we need to create a copy of the
* data is when the buffer size is not device-aligned
* and we need to pad the block to make it such.
* That also keeps the clock hand suitably aligned.
*
* To ensure that the copy will be available for the
* lifetime of the ZIO and be cleaned up afterwards, we
* add it to the l2arc_free_on_write queue.
*/
uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
size);
abd_t *to_write;
if (!HDR_SHARED_DATA(hdr)) {
if (!HDR_SHARED_DATA(hdr) && size == asize) {
to_write = hdr->b_l1hdr.b_pabd;
} else {
to_write = abd_alloc_for_io(size,
to_write = abd_alloc_for_io(asize,
HDR_ISTYPE_METADATA(hdr));
abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
if (asize != size) {
abd_zero_off(to_write, size,
asize - size);
}
l2arc_free_abd_on_write(to_write, size,
arc_buf_type(hdr));
}
wzio = zio_write_phys(pio, dev->l2ad_vdev,
hdr->b_l2hdr.b_daddr, size, to_write,
hdr->b_l2hdr.b_daddr, asize, to_write,
ZIO_CHECKSUM_OFF, NULL, hdr,
ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_CANFAIL, B_FALSE);
@ -6929,11 +6979,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
zio_t *, wzio);
write_asize += size;
/*
* Keep the clock hand suitably device-aligned.
*/
uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
size);
write_psize += asize;
dev->l2ad_hand += asize;