From ffd50bca7e4d51151ed32dbde7d7a4c6d06fcd12 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Sun, 1 May 2016 21:06:59 +0000 Subject: [PATCH] bcache read ahead may attempt to read past end of disk The new bcache code does not know the size of the disk, and therefore may attempt to read past the end of the disk while trying to fill its read-ahead cache. This is usually not an issue, it fails gracefully on all of my machines, but some BIOSes seem to retry the reads for up to 30 seconds each, resulting in a long stall during boot Submitted by: Toomas Soome Reviewed by: jhb, np Differential Revision: https://reviews.freebsd.org/D6109 --- sys/boot/common/bcache.c | 47 ++++++++++++++++++++++++++------ sys/boot/efi/libefi/efipart.c | 10 ++++++- sys/boot/i386/libi386/bioscd.c | 29 ++++++++++++++++++-- sys/boot/i386/libi386/biosdisk.c | 12 ++++++++ 4 files changed, 86 insertions(+), 12 deletions(-) diff --git a/sys/boot/common/bcache.c b/sys/boot/common/bcache.c index e5cf75baf69..4bb9082bea2 100644 --- a/sys/boot/common/bcache.c +++ b/sys/boot/common/bcache.c @@ -272,20 +272,43 @@ read_strategy(void *devdata, int rw, daddr_t blk, size_t offset, for (i = 0; i < p_size; i++) { bcache_invalidate(bc, p_blk + i); } + r_size = 0; + /* + * with read-ahead, it may happen we are attempting to read past + * disk end, as bcache has no information about disk size. + * in such case we should get partial read if some blocks can be + * read or error, if no blocks can be read. + * in either case we should return the data in bcache and only + * return error if there is no data. + */ result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0, p_size * bcache_blksize, p_buf, &r_size); - if (result) - goto done; - r_size /= bcache_blksize; for (i = 0; i < r_size; i++) bcache_insert(bc, p_blk + i); - bcache_rablks += ra; - bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, buf, - size); + /* update ra statistics */ + if (r_size != 0) { + if (r_size < p_size) + bcache_rablks += (p_size - r_size); + else + bcache_rablks += ra; + } + + /* check how much data can we copy */ + for (i = 0; i < nblk; i++) { + if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) + break; + } + + size = i * bcache_blksize; + if (size != 0) { + bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, + buf, size); + result = 0; + } done: if ((result == 0) && (rsize != NULL)) @@ -349,8 +372,16 @@ bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset, ret = read_strategy(devdata, rw, blk, offset, csize, buf+total, &isize); - if (ret != 0) - return (ret); + + /* + * we may have error from read ahead, if we have read some data + * return partial read. + */ + if (ret != 0 || isize == 0) { + if (total != 0) + ret = 0; + break; + } blk += (offset+isize) / bcache_blksize; offset = 0; total += isize; diff --git a/sys/boot/efi/libefi/efipart.c b/sys/boot/efi/libefi/efipart.c index 2cf009a958a..da420bf9d08 100644 --- a/sys/boot/efi/libefi/efipart.c +++ b/sys/boot/efi/libefi/efipart.c @@ -321,6 +321,15 @@ efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t offset, if (size == 0 || (size % 512) != 0) return (EIO); + off = blk * 512; + /* make sure we don't read past disk end */ + if ((off + size) / blkio->Media->BlockSize - 1 > + blkio->Media->LastBlock) { + size = blkio->Media->LastBlock + 1 - + off / blkio->Media->BlockSize; + size = size * blkio->Media->BlockSize; + } + if (rsize != NULL) *rsize = size; @@ -335,7 +344,6 @@ efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t offset, return (ENOMEM); error = 0; - off = blk * 512; blk = off / blkio->Media->BlockSize; blkoff = off % blkio->Media->BlockSize; blksz = blkio->Media->BlockSize - blkoff; diff --git a/sys/boot/i386/libi386/bioscd.c b/sys/boot/i386/libi386/bioscd.c index c387513e7ca..6d1d1e140eb 100644 --- a/sys/boot/i386/libi386/bioscd.c +++ b/sys/boot/i386/libi386/bioscd.c @@ -271,14 +271,25 @@ bc_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, if (rsize) *rsize = 0; - if (blks && bc_read(unit, dblk, blks, buf)) { + if ((blks = bc_read(unit, dblk, blks, buf)) < 0) { DEBUG("read error"); return (EIO); + } else { + if (size / BIOSCD_SECSIZE > blks) { + if (rsize) + *rsize = blks * BIOSCD_SECSIZE; + return (0); + } } #ifdef BD_SUPPORT_FRAGS DEBUG("frag read %d from %lld+%d to %p", fragsize, dblk, blks, buf + (blks * BIOSCD_SECSIZE)); - if (fragsize && bc_read(unit, dblk + blks, 1, fragbuf)) { + if (fragsize && bc_read(unit, dblk + blks, 1, fragbuf) != 1) { + if (blks) { + if (rsize) + *rsize = blks * BIOSCD_SECSIZE; + return (0); + } DEBUG("frag read error"); return(EIO); } @@ -292,6 +303,7 @@ bc_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, /* Max number of sectors to bounce-buffer at a time. */ #define CD_BOUNCEBUF 8 +/* return negative value for an error, otherwise blocks read */ static int bc_read(int unit, daddr_t dblk, int blks, caddr_t dest) { @@ -368,6 +380,8 @@ bc_read(int unit, daddr_t dblk, int blks, caddr_t dest) result = V86_CY(v86.efl); if (result == 0) break; + /* fall back to 1 sector read */ + x = 1; } #ifdef DISK_DEBUG @@ -376,6 +390,11 @@ bc_read(int unit, daddr_t dblk, int blks, caddr_t dest) DEBUG("%d sectors from %lld to %p (0x%x) %s", x, dblk, p, VTOP(p), result ? "failed" : "ok"); DEBUG("unit %d status 0x%x", unit, error); + + /* still an error? break off */ + if (result != 0) + break; + if (bbuf != NULL) bcopy(bbuf, p, x * BIOSCD_SECSIZE); p += (x * BIOSCD_SECSIZE); @@ -384,7 +403,11 @@ bc_read(int unit, daddr_t dblk, int blks, caddr_t dest) } /* hexdump(dest, (blks * BIOSCD_SECSIZE)); */ - return(0); + + if (blks - resid == 0) + return (-1); /* read failed */ + + return (blks - resid); } /* diff --git a/sys/boot/i386/libi386/biosdisk.c b/sys/boot/i386/libi386/biosdisk.c index 4dfb6f87425..e5b78355173 100644 --- a/sys/boot/i386/libi386/biosdisk.c +++ b/sys/boot/i386/libi386/biosdisk.c @@ -508,6 +508,18 @@ bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, if (rsize) *rsize = 0; + if (dblk >= BD(dev).bd_sectors) { + DEBUG("IO past disk end %llu", (unsigned long long)dblk); + return (EIO); + } + + if (dblk + blks > BD(dev).bd_sectors) { + /* perform partial read */ + blks = BD(dev).bd_sectors - dblk; + size = blks * BD(dev).bd_sectorsize; + DEBUG("short read %d", blks); + } + switch(rw){ case F_READ: DEBUG("read %d from %lld to %p", blks, dblk, buf);