mirror of
https://github.com/opnsense/src.git
synced 2026-03-14 06:35:31 -04:00
the bio and buffer structures to have daddr64_t bio_pblkno, b_blkno, and b_lblkno fields which allows access to disks larger than a Terabyte in size. This change also requires that the VOP_BMAP vnode operation accept and return daddr64_t blocks. This delta should not affect system operation in any way. It merely sets up the necessary interfaces to allow the development of disk drivers that work with these larger disk block addresses. It also allows for the development of UFS2 which will use 64-bit block addresses.
949 lines
22 KiB
C
949 lines
22 KiB
C
/*
|
|
* ----------------------------------------------------------------------------
|
|
* "THE BEER-WARE LICENSE" (Revision 42):
|
|
* <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
|
|
* can do whatever you want with this stuff. If we meet some day, and you think
|
|
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
|
|
* ----------------------------------------------------------------------------
|
|
*
|
|
* $FreeBSD$
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* The following functions are based in the vn(4) driver: mdstart_swap(),
|
|
* mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
|
|
* and as such under the following copyright:
|
|
*
|
|
* Copyright (c) 1988 University of Utah.
|
|
* Copyright (c) 1990, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* the Systems Programming Group of the University of Utah Computer
|
|
* Science Department.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* from: Utah Hdr: vn.c 1.13 94/04/02
|
|
*
|
|
* from: @(#)vn.c 8.6 (Berkeley) 4/1/94
|
|
* From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
|
|
*/
|
|
|
|
#include "opt_md.h"
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/bio.h>
|
|
#include <sys/conf.h>
|
|
#include <sys/devicestat.h>
|
|
#include <sys/disk.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/linker.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mdioctl.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/namei.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/vnode.h>
|
|
|
|
#include <machine/atomic.h>
|
|
|
|
#include <vm/vm.h>
|
|
#include <vm/vm_object.h>
|
|
#include <vm/vm_page.h>
|
|
#include <vm/vm_pager.h>
|
|
#include <vm/vm_zone.h>
|
|
#include <vm/swap_pager.h>
|
|
|
|
#define MD_MODVER 1
|
|
|
|
#ifndef MD_NSECT
|
|
#define MD_NSECT (10000 * 2)
|
|
#endif
|
|
|
|
static MALLOC_DEFINE(M_MD, "MD disk", "Memory Disk");
|
|
static MALLOC_DEFINE(M_MDSECT, "MD sectors", "Memory Disk Sectors");
|
|
|
|
static int md_debug;
|
|
SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
|
|
|
|
#if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
|
|
/* Image gets put here: */
|
|
static u_char mfs_root[MD_ROOT_SIZE*1024] = "MFS Filesystem goes here";
|
|
static u_char end_mfs_root[] __unused = "MFS Filesystem had better STOP here";
|
|
#endif
|
|
|
|
static int mdrootready;
|
|
static int mdunits;
|
|
static dev_t status_dev = 0;
|
|
|
|
|
|
#define CDEV_MAJOR 95
|
|
|
|
static d_strategy_t mdstrategy;
|
|
static d_open_t mdopen;
|
|
static d_close_t mdclose;
|
|
static d_ioctl_t mdioctl, mdctlioctl;
|
|
|
|
static struct cdevsw md_cdevsw = {
|
|
/* open */ mdopen,
|
|
/* close */ mdclose,
|
|
/* read */ physread,
|
|
/* write */ physwrite,
|
|
/* ioctl */ mdioctl,
|
|
/* poll */ nopoll,
|
|
/* mmap */ nommap,
|
|
/* strategy */ mdstrategy,
|
|
/* name */ MD_NAME,
|
|
/* maj */ CDEV_MAJOR,
|
|
/* dump */ nodump,
|
|
/* psize */ nopsize,
|
|
/* flags */ D_DISK | D_CANFREE | D_MEMDISK,
|
|
};
|
|
|
|
static struct cdevsw mdctl_cdevsw = {
|
|
/* open */ nullopen,
|
|
/* close */ nullclose,
|
|
/* read */ noread,
|
|
/* write */ nowrite,
|
|
/* ioctl */ mdctlioctl,
|
|
/* poll */ nopoll,
|
|
/* mmap */ nommap,
|
|
/* strategy */ nostrategy,
|
|
/* name */ MD_NAME,
|
|
/* maj */ CDEV_MAJOR
|
|
};
|
|
|
|
static struct cdevsw mddisk_cdevsw;
|
|
|
|
static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(&md_softc_list);
|
|
|
|
struct md_s {
|
|
int unit;
|
|
LIST_ENTRY(md_s) list;
|
|
struct devstat stats;
|
|
struct bio_queue_head bio_queue;
|
|
struct disk disk;
|
|
dev_t dev;
|
|
int busy;
|
|
enum md_types type;
|
|
unsigned nsect;
|
|
unsigned opencount;
|
|
unsigned secsize;
|
|
unsigned flags;
|
|
|
|
/* MD_MALLOC related fields */
|
|
u_char **secp;
|
|
|
|
/* MD_PRELOAD related fields */
|
|
u_char *pl_ptr;
|
|
unsigned pl_len;
|
|
|
|
/* MD_VNODE related fields */
|
|
struct vnode *vnode;
|
|
struct ucred *cred;
|
|
|
|
/* MD_SWAP related fields */
|
|
vm_object_t object;
|
|
};
|
|
|
|
static int
|
|
mdopen(dev_t dev, int flag, int fmt, struct thread *td)
|
|
{
|
|
struct md_s *sc;
|
|
struct disklabel *dl;
|
|
|
|
if (md_debug)
|
|
printf("mdopen(%s %x %x %p)\n",
|
|
devtoname(dev), flag, fmt, td->td_proc);
|
|
|
|
sc = dev->si_drv1;
|
|
|
|
dl = &sc->disk.d_label;
|
|
bzero(dl, sizeof(*dl));
|
|
dl->d_secsize = sc->secsize;
|
|
dl->d_nsectors = sc->nsect > 63 ? 63 : sc->nsect;
|
|
dl->d_ntracks = 1;
|
|
dl->d_secpercyl = dl->d_nsectors * dl->d_ntracks;
|
|
dl->d_secperunit = sc->nsect;
|
|
dl->d_ncylinders = dl->d_secperunit / dl->d_secpercyl;
|
|
sc->opencount++;
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
mdclose(dev_t dev, int flags, int fmt, struct thread *td)
|
|
{
|
|
struct md_s *sc = dev->si_drv1;
|
|
|
|
sc->opencount--;
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
mdioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
|
|
{
|
|
|
|
if (md_debug)
|
|
printf("mdioctl(%s %lx %p %x %p)\n",
|
|
devtoname(dev), cmd, addr, flags, td);
|
|
|
|
return (ENOIOCTL);
|
|
}
|
|
|
|
static int
|
|
mdstart_malloc(struct md_s *sc, struct bio *bp)
|
|
{
|
|
int i;
|
|
devstat_trans_flags dop;
|
|
u_char *secp, **secpp, *dst;
|
|
unsigned secno, nsec, secval, uc;
|
|
|
|
if (bp->bio_cmd == BIO_DELETE)
|
|
dop = DEVSTAT_NO_DATA;
|
|
else if (bp->bio_cmd == BIO_READ)
|
|
dop = DEVSTAT_READ;
|
|
else
|
|
dop = DEVSTAT_WRITE;
|
|
|
|
nsec = bp->bio_bcount / sc->secsize;
|
|
secno = bp->bio_pblkno;
|
|
dst = bp->bio_data;
|
|
while (nsec--) {
|
|
secpp = &sc->secp[secno];
|
|
if ((uintptr_t)*secpp > 255) {
|
|
secp = *secpp;
|
|
secval = 0;
|
|
} else {
|
|
secp = NULL;
|
|
secval = (uintptr_t) *secpp;
|
|
}
|
|
|
|
if (md_debug > 2)
|
|
printf("%x %p %p %d\n",
|
|
bp->bio_flags, secpp, secp, secval);
|
|
|
|
if (bp->bio_cmd == BIO_DELETE) {
|
|
if (!(sc->flags & MD_RESERVE) && secp != NULL) {
|
|
FREE(secp, M_MDSECT);
|
|
*secpp = 0;
|
|
}
|
|
} else if (bp->bio_cmd == BIO_READ) {
|
|
if (secp != NULL) {
|
|
bcopy(secp, dst, sc->secsize);
|
|
} else if (secval) {
|
|
for (i = 0; i < sc->secsize; i++)
|
|
dst[i] = secval;
|
|
} else {
|
|
bzero(dst, sc->secsize);
|
|
}
|
|
} else {
|
|
if (sc->flags & MD_COMPRESS) {
|
|
uc = dst[0];
|
|
for (i = 1; i < sc->secsize; i++)
|
|
if (dst[i] != uc)
|
|
break;
|
|
} else {
|
|
i = 0;
|
|
uc = 0;
|
|
}
|
|
if (i == sc->secsize) {
|
|
if (secp)
|
|
FREE(secp, M_MDSECT);
|
|
*secpp = (u_char *)(uintptr_t)uc;
|
|
} else {
|
|
if (secp == NULL)
|
|
MALLOC(secp, u_char *, sc->secsize, M_MDSECT, M_WAITOK);
|
|
bcopy(dst, secp, sc->secsize);
|
|
*secpp = secp;
|
|
}
|
|
}
|
|
secno++;
|
|
dst += sc->secsize;
|
|
}
|
|
bp->bio_resid = 0;
|
|
return (0);
|
|
}
|
|
|
|
|
|
static int
|
|
mdstart_preload(struct md_s *sc, struct bio *bp)
|
|
{
|
|
devstat_trans_flags dop;
|
|
|
|
if (bp->bio_cmd == BIO_DELETE) {
|
|
dop = DEVSTAT_NO_DATA;
|
|
} else if (bp->bio_cmd == BIO_READ) {
|
|
dop = DEVSTAT_READ;
|
|
bcopy(sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_data, bp->bio_bcount);
|
|
} else {
|
|
dop = DEVSTAT_WRITE;
|
|
bcopy(bp->bio_data, sc->pl_ptr + (bp->bio_pblkno << DEV_BSHIFT), bp->bio_bcount);
|
|
}
|
|
bp->bio_resid = 0;
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
mdstart_vnode(struct md_s *sc, struct bio *bp)
|
|
{
|
|
int error;
|
|
struct uio auio;
|
|
struct iovec aiov;
|
|
struct mount *mp;
|
|
|
|
/*
|
|
* VNODE I/O
|
|
*
|
|
* If an error occurs, we set BIO_ERROR but we do not set
|
|
* B_INVAL because (for a write anyway), the buffer is
|
|
* still valid.
|
|
*/
|
|
|
|
bzero(&auio, sizeof(auio));
|
|
|
|
aiov.iov_base = bp->bio_data;
|
|
aiov.iov_len = bp->bio_bcount;
|
|
auio.uio_iov = &aiov;
|
|
auio.uio_iovcnt = 1;
|
|
auio.uio_offset = (vm_ooffset_t)bp->bio_pblkno * sc->secsize;
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
|
if(bp->bio_cmd == BIO_READ)
|
|
auio.uio_rw = UIO_READ;
|
|
else
|
|
auio.uio_rw = UIO_WRITE;
|
|
auio.uio_resid = bp->bio_bcount;
|
|
auio.uio_td = curthread;
|
|
/*
|
|
* When reading set IO_DIRECT to try to avoid double-caching
|
|
* the data. When writing IO_DIRECT is not optimal, but we
|
|
* must set IO_NOWDRAIN to avoid a wdrain deadlock.
|
|
*/
|
|
if (bp->bio_cmd == BIO_READ) {
|
|
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
|
|
error = VOP_READ(sc->vnode, &auio, IO_DIRECT, sc->cred);
|
|
} else {
|
|
(void) vn_start_write(sc->vnode, &mp, V_WAIT);
|
|
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
|
|
error = VOP_WRITE(sc->vnode, &auio, IO_NOWDRAIN, sc->cred);
|
|
vn_finished_write(mp);
|
|
}
|
|
VOP_UNLOCK(sc->vnode, 0, curthread);
|
|
bp->bio_resid = auio.uio_resid;
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
mdstart_swap(struct md_s *sc, struct bio *bp)
|
|
{
|
|
|
|
if ((bp->bio_cmd == BIO_DELETE) && (sc->flags & MD_RESERVE))
|
|
biodone(bp);
|
|
else
|
|
vm_pager_strategy(sc->object, bp);
|
|
return (-1);
|
|
}
|
|
|
|
static void
|
|
mdstrategy(struct bio *bp)
|
|
{
|
|
struct md_s *sc;
|
|
int error;
|
|
|
|
if (md_debug > 1)
|
|
printf("mdstrategy(%p) %s %x, %lld, %ld, %p)\n",
|
|
bp, devtoname(bp->bio_dev), bp->bio_flags, bp->bio_blkno,
|
|
bp->bio_bcount / DEV_BSIZE, bp->bio_data);
|
|
|
|
sc = bp->bio_dev->si_drv1;
|
|
|
|
/* XXX: LOCK(sc->lock) */
|
|
bioqdisksort(&sc->bio_queue, bp);
|
|
/* XXX: UNLOCK(sc->lock) */
|
|
|
|
if (atomic_cmpset_int(&sc->busy, 0, 1) == 0)
|
|
return;
|
|
|
|
for (;;) {
|
|
/* XXX: LOCK(unique unit numbers) */
|
|
bp = bioq_first(&sc->bio_queue);
|
|
if (bp)
|
|
bioq_remove(&sc->bio_queue, bp);
|
|
/* XXX: UNLOCK(unique unit numbers) */
|
|
if (!bp)
|
|
break;
|
|
|
|
|
|
switch (sc->type) {
|
|
case MD_MALLOC:
|
|
devstat_start_transaction(&sc->stats);
|
|
error = mdstart_malloc(sc, bp);
|
|
break;
|
|
case MD_PRELOAD:
|
|
devstat_start_transaction(&sc->stats);
|
|
error = mdstart_preload(sc, bp);
|
|
break;
|
|
case MD_VNODE:
|
|
devstat_start_transaction(&sc->stats);
|
|
error = mdstart_vnode(sc, bp);
|
|
break;
|
|
case MD_SWAP:
|
|
error = mdstart_swap(sc, bp);
|
|
break;
|
|
default:
|
|
panic("Impossible md(type)");
|
|
break;
|
|
}
|
|
|
|
if (error != -1)
|
|
biofinish(bp, &sc->stats, error);
|
|
}
|
|
sc->busy = 0;
|
|
}
|
|
|
|
static struct md_s *
|
|
mdfind(int unit)
|
|
{
|
|
struct md_s *sc;
|
|
|
|
/* XXX: LOCK(unique unit numbers) */
|
|
LIST_FOREACH(sc, &md_softc_list, list) {
|
|
if (sc->unit == unit)
|
|
break;
|
|
}
|
|
/* XXX: UNLOCK(unique unit numbers) */
|
|
return (sc);
|
|
}
|
|
|
|
static struct md_s *
|
|
mdnew(int unit)
|
|
{
|
|
struct md_s *sc;
|
|
int max = -1;
|
|
|
|
/* XXX: LOCK(unique unit numbers) */
|
|
LIST_FOREACH(sc, &md_softc_list, list) {
|
|
if (sc->unit == unit) {
|
|
/* XXX: UNLOCK(unique unit numbers) */
|
|
return (NULL);
|
|
}
|
|
if (sc->unit > max)
|
|
max = sc->unit;
|
|
}
|
|
if (unit == -1)
|
|
unit = max + 1;
|
|
if (unit > DKMAXUNIT)
|
|
return (NULL);
|
|
MALLOC(sc, struct md_s *, sizeof(*sc), M_MD, M_WAITOK | M_ZERO);
|
|
sc->unit = unit;
|
|
LIST_INSERT_HEAD(&md_softc_list, sc, list);
|
|
/* XXX: UNLOCK(unique unit numbers) */
|
|
return (sc);
|
|
}
|
|
|
|
static void
|
|
mdinit(struct md_s *sc)
|
|
{
|
|
|
|
bioq_init(&sc->bio_queue);
|
|
devstat_add_entry(&sc->stats, MD_NAME, sc->unit, sc->secsize,
|
|
DEVSTAT_NO_ORDERED_TAGS,
|
|
DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
|
|
DEVSTAT_PRIORITY_OTHER);
|
|
sc->dev = disk_create(sc->unit, &sc->disk, 0, &md_cdevsw, &mddisk_cdevsw);
|
|
sc->dev->si_drv1 = sc;
|
|
}
|
|
|
|
/*
|
|
* XXX: we should check that the range they feed us is mapped.
|
|
* XXX: we should implement read-only.
|
|
*/
|
|
|
|
static int
|
|
mdcreate_preload(struct md_ioctl *mdio)
|
|
{
|
|
struct md_s *sc;
|
|
|
|
if (mdio->md_size == 0)
|
|
return (EINVAL);
|
|
if (mdio->md_options & ~(MD_AUTOUNIT))
|
|
return (EINVAL);
|
|
if (mdio->md_options & MD_AUTOUNIT) {
|
|
sc = mdnew(-1);
|
|
if (sc == NULL)
|
|
return (ENOMEM);
|
|
mdio->md_unit = sc->unit;
|
|
} else {
|
|
sc = mdnew(mdio->md_unit);
|
|
if (sc == NULL)
|
|
return (EBUSY);
|
|
}
|
|
sc->type = MD_PRELOAD;
|
|
sc->secsize = DEV_BSIZE;
|
|
sc->nsect = mdio->md_size;
|
|
sc->flags = mdio->md_options & MD_FORCE;
|
|
/* Cast to pointer size, then to pointer to avoid warning */
|
|
sc->pl_ptr = (u_char *)(uintptr_t)mdio->md_base;
|
|
sc->pl_len = (mdio->md_size << DEV_BSHIFT);
|
|
mdinit(sc);
|
|
return (0);
|
|
}
|
|
|
|
|
|
static int
|
|
mdcreate_malloc(struct md_ioctl *mdio)
|
|
{
|
|
struct md_s *sc;
|
|
unsigned u;
|
|
|
|
if (mdio->md_size == 0)
|
|
return (EINVAL);
|
|
if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
|
|
return (EINVAL);
|
|
/* Compression doesn't make sense if we have reserved space */
|
|
if (mdio->md_options & MD_RESERVE)
|
|
mdio->md_options &= ~MD_COMPRESS;
|
|
if (mdio->md_options & MD_AUTOUNIT) {
|
|
sc = mdnew(-1);
|
|
if (sc == NULL)
|
|
return (ENOMEM);
|
|
mdio->md_unit = sc->unit;
|
|
} else {
|
|
sc = mdnew(mdio->md_unit);
|
|
if (sc == NULL)
|
|
return (EBUSY);
|
|
}
|
|
sc->type = MD_MALLOC;
|
|
sc->secsize = DEV_BSIZE;
|
|
sc->nsect = mdio->md_size;
|
|
sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE);
|
|
MALLOC(sc->secp, u_char **, sc->nsect * sizeof(u_char *), M_MD, M_WAITOK | M_ZERO);
|
|
if (mdio->md_options & MD_RESERVE) {
|
|
for (u = 0; u < sc->nsect; u++)
|
|
MALLOC(sc->secp[u], u_char *, DEV_BSIZE, M_MDSECT, M_WAITOK | M_ZERO);
|
|
}
|
|
printf("%s%d: Malloc disk\n", MD_NAME, sc->unit);
|
|
mdinit(sc);
|
|
return (0);
|
|
}
|
|
|
|
|
|
static int
|
|
mdsetcred(struct md_s *sc, struct ucred *cred)
|
|
{
|
|
char *tmpbuf;
|
|
int error = 0;
|
|
|
|
/*
|
|
* Set credits in our softc
|
|
*/
|
|
|
|
if (sc->cred)
|
|
crfree(sc->cred);
|
|
sc->cred = crhold(cred);
|
|
|
|
/*
|
|
* Horrible kludge to establish credentials for NFS XXX.
|
|
*/
|
|
|
|
if (sc->vnode) {
|
|
struct uio auio;
|
|
struct iovec aiov;
|
|
|
|
tmpbuf = malloc(sc->secsize, M_TEMP, M_WAITOK);
|
|
bzero(&auio, sizeof(auio));
|
|
|
|
aiov.iov_base = tmpbuf;
|
|
aiov.iov_len = sc->secsize;
|
|
auio.uio_iov = &aiov;
|
|
auio.uio_iovcnt = 1;
|
|
auio.uio_offset = 0;
|
|
auio.uio_rw = UIO_READ;
|
|
auio.uio_segflg = UIO_SYSSPACE;
|
|
auio.uio_resid = aiov.iov_len;
|
|
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
|
|
error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
|
|
VOP_UNLOCK(sc->vnode, 0, curthread);
|
|
free(tmpbuf, M_TEMP);
|
|
}
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
mdcreate_vnode(struct md_ioctl *mdio, struct thread *td)
|
|
{
|
|
struct md_s *sc;
|
|
struct vattr vattr;
|
|
struct nameidata nd;
|
|
int error, flags;
|
|
|
|
if (mdio->md_options & MD_AUTOUNIT) {
|
|
sc = mdnew(-1);
|
|
mdio->md_unit = sc->unit;
|
|
} else {
|
|
sc = mdnew(mdio->md_unit);
|
|
}
|
|
if (sc == NULL)
|
|
return (EBUSY);
|
|
|
|
sc->type = MD_VNODE;
|
|
sc->flags = mdio->md_options & MD_FORCE;
|
|
|
|
flags = FREAD|FWRITE;
|
|
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, td);
|
|
error = vn_open(&nd, &flags, 0);
|
|
if (error) {
|
|
if (error != EACCES && error != EPERM && error != EROFS)
|
|
return (error);
|
|
flags &= ~FWRITE;
|
|
sc->flags |= MD_READONLY;
|
|
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, mdio->md_file, td);
|
|
error = vn_open(&nd, &flags, 0);
|
|
if (error)
|
|
return (error);
|
|
}
|
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
|
if (nd.ni_vp->v_type != VREG ||
|
|
(error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred, td))) {
|
|
VOP_UNLOCK(nd.ni_vp, 0, td);
|
|
(void) vn_close(nd.ni_vp, flags, td->td_ucred, td);
|
|
return (error ? error : EINVAL);
|
|
}
|
|
VOP_UNLOCK(nd.ni_vp, 0, td);
|
|
sc->secsize = DEV_BSIZE;
|
|
sc->vnode = nd.ni_vp;
|
|
|
|
/*
|
|
* If the size is specified, override the file attributes.
|
|
*/
|
|
if (mdio->md_size)
|
|
sc->nsect = mdio->md_size;
|
|
else
|
|
sc->nsect = vattr.va_size / sc->secsize; /* XXX: round up ? */
|
|
if (sc->nsect == 0) {
|
|
(void) vn_close(nd.ni_vp, flags, td->td_ucred, td);
|
|
return (EINVAL);
|
|
}
|
|
error = mdsetcred(sc, td->td_ucred);
|
|
if (error) {
|
|
(void) vn_close(nd.ni_vp, flags, td->td_ucred, td);
|
|
return (error);
|
|
}
|
|
mdinit(sc);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
mddestroy(struct md_s *sc, struct thread *td)
|
|
{
|
|
unsigned u;
|
|
|
|
GIANT_REQUIRED;
|
|
|
|
if (sc->dev != NULL) {
|
|
devstat_remove_entry(&sc->stats);
|
|
disk_destroy(sc->dev);
|
|
}
|
|
if (sc->vnode != NULL)
|
|
(void)vn_close(sc->vnode, sc->flags & MD_READONLY ?
|
|
FREAD : (FREAD|FWRITE), sc->cred, td);
|
|
if (sc->cred != NULL)
|
|
crfree(sc->cred);
|
|
if (sc->object != NULL) {
|
|
vm_pager_deallocate(sc->object);
|
|
}
|
|
if (sc->secp != NULL) {
|
|
for (u = 0; u < sc->nsect; u++)
|
|
if ((uintptr_t)sc->secp[u] > 255)
|
|
FREE(sc->secp[u], M_MDSECT);
|
|
FREE(sc->secp, M_MD);
|
|
}
|
|
|
|
/* XXX: LOCK(unique unit numbers) */
|
|
LIST_REMOVE(sc, list);
|
|
/* XXX: UNLOCK(unique unit numbers) */
|
|
FREE(sc, M_MD);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
mdcreate_swap(struct md_ioctl *mdio, struct thread *td)
|
|
{
|
|
int error;
|
|
struct md_s *sc;
|
|
|
|
GIANT_REQUIRED;
|
|
|
|
if (mdio->md_options & MD_AUTOUNIT) {
|
|
sc = mdnew(-1);
|
|
mdio->md_unit = sc->unit;
|
|
} else {
|
|
sc = mdnew(mdio->md_unit);
|
|
}
|
|
if (sc == NULL)
|
|
return (EBUSY);
|
|
|
|
sc->type = MD_SWAP;
|
|
|
|
/*
|
|
* Range check. Disallow negative sizes or any size less then the
|
|
* size of a page. Then round to a page.
|
|
*/
|
|
|
|
if (mdio->md_size == 0) {
|
|
mddestroy(sc, td);
|
|
return (EDOM);
|
|
}
|
|
|
|
/*
|
|
* Allocate an OBJT_SWAP object.
|
|
*
|
|
* sc_secsize is PAGE_SIZE'd
|
|
*
|
|
* mdio->size is in DEV_BSIZE'd chunks.
|
|
* Note the truncation.
|
|
*/
|
|
|
|
sc->secsize = PAGE_SIZE;
|
|
sc->nsect = mdio->md_size / (PAGE_SIZE / DEV_BSIZE);
|
|
sc->object = vm_pager_allocate(OBJT_SWAP, NULL, sc->secsize * (vm_offset_t)sc->nsect, VM_PROT_DEFAULT, 0);
|
|
sc->flags = mdio->md_options & MD_FORCE;
|
|
if (mdio->md_options & MD_RESERVE) {
|
|
if (swap_pager_reserve(sc->object, 0, sc->nsect) < 0) {
|
|
vm_pager_deallocate(sc->object);
|
|
sc->object = NULL;
|
|
mddestroy(sc, td);
|
|
return (EDOM);
|
|
}
|
|
}
|
|
error = mdsetcred(sc, td->td_ucred);
|
|
if (error)
|
|
mddestroy(sc, td);
|
|
else
|
|
mdinit(sc);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
mddetach(int unit, struct thread *td)
|
|
{
|
|
struct md_s *sc;
|
|
|
|
sc = mdfind(unit);
|
|
if (sc == NULL)
|
|
return (ENOENT);
|
|
if (sc->opencount != 0 && !(sc->flags & MD_FORCE))
|
|
return (EBUSY);
|
|
switch(sc->type) {
|
|
case MD_VNODE:
|
|
case MD_SWAP:
|
|
case MD_MALLOC:
|
|
case MD_PRELOAD:
|
|
return (mddestroy(sc, td));
|
|
default:
|
|
return (EOPNOTSUPP);
|
|
}
|
|
}
|
|
|
|
static int
|
|
mdctlioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
|
|
{
|
|
struct md_ioctl *mdio;
|
|
struct md_s *sc;
|
|
|
|
if (md_debug)
|
|
printf("mdctlioctl(%s %lx %p %x %p)\n",
|
|
devtoname(dev), cmd, addr, flags, td);
|
|
|
|
/*
|
|
* We assert the version number in the individual ioctl
|
|
* handlers instead of out here because (a) it is possible we
|
|
* may add another ioctl in the future which doesn't read an
|
|
* mdio, and (b) the correct return value for an unknown ioctl
|
|
* is ENOIOCTL, not EINVAL.
|
|
*/
|
|
mdio = (struct md_ioctl *)addr;
|
|
switch (cmd) {
|
|
case MDIOCATTACH:
|
|
if (mdio->md_version != MDIOVERSION)
|
|
return (EINVAL);
|
|
switch (mdio->md_type) {
|
|
case MD_MALLOC:
|
|
return (mdcreate_malloc(mdio));
|
|
case MD_PRELOAD:
|
|
return (mdcreate_preload(mdio));
|
|
case MD_VNODE:
|
|
return (mdcreate_vnode(mdio, td));
|
|
case MD_SWAP:
|
|
return (mdcreate_swap(mdio, td));
|
|
default:
|
|
return (EINVAL);
|
|
}
|
|
case MDIOCDETACH:
|
|
if (mdio->md_version != MDIOVERSION)
|
|
return (EINVAL);
|
|
if (mdio->md_file != NULL || mdio->md_size != 0 ||
|
|
mdio->md_options != 0)
|
|
return (EINVAL);
|
|
return (mddetach(mdio->md_unit, td));
|
|
case MDIOCQUERY:
|
|
if (mdio->md_version != MDIOVERSION)
|
|
return (EINVAL);
|
|
sc = mdfind(mdio->md_unit);
|
|
if (sc == NULL)
|
|
return (ENOENT);
|
|
mdio->md_type = sc->type;
|
|
mdio->md_options = sc->flags;
|
|
switch (sc->type) {
|
|
case MD_MALLOC:
|
|
mdio->md_size = sc->nsect;
|
|
break;
|
|
case MD_PRELOAD:
|
|
mdio->md_size = sc->nsect;
|
|
(u_char *)(uintptr_t)mdio->md_base = sc->pl_ptr;
|
|
break;
|
|
case MD_SWAP:
|
|
mdio->md_size = sc->nsect * (PAGE_SIZE / DEV_BSIZE);
|
|
break;
|
|
case MD_VNODE:
|
|
mdio->md_size = sc->nsect;
|
|
/* XXX fill this in */
|
|
mdio->md_file = NULL;
|
|
break;
|
|
}
|
|
return (0);
|
|
default:
|
|
return (ENOIOCTL);
|
|
};
|
|
return (ENOIOCTL);
|
|
}
|
|
|
|
static void
|
|
md_preloaded(u_char *image, unsigned length)
|
|
{
|
|
struct md_s *sc;
|
|
|
|
sc = mdnew(-1);
|
|
if (sc == NULL)
|
|
return;
|
|
sc->type = MD_PRELOAD;
|
|
sc->secsize = DEV_BSIZE;
|
|
sc->nsect = length / DEV_BSIZE;
|
|
sc->pl_ptr = image;
|
|
sc->pl_len = length;
|
|
if (sc->unit == 0)
|
|
mdrootready = 1;
|
|
mdinit(sc);
|
|
}
|
|
|
|
static void
|
|
md_drvinit(void *unused)
|
|
{
|
|
|
|
caddr_t mod;
|
|
caddr_t c;
|
|
u_char *ptr, *name, *type;
|
|
unsigned len;
|
|
|
|
#ifdef MD_ROOT_SIZE
|
|
md_preloaded(mfs_root, MD_ROOT_SIZE*1024);
|
|
#endif
|
|
mod = NULL;
|
|
while ((mod = preload_search_next_name(mod)) != NULL) {
|
|
name = (char *)preload_search_info(mod, MODINFO_NAME);
|
|
type = (char *)preload_search_info(mod, MODINFO_TYPE);
|
|
if (name == NULL)
|
|
continue;
|
|
if (type == NULL)
|
|
continue;
|
|
if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
|
|
continue;
|
|
c = preload_search_info(mod, MODINFO_ADDR);
|
|
ptr = *(u_char **)c;
|
|
c = preload_search_info(mod, MODINFO_SIZE);
|
|
len = *(unsigned *)c;
|
|
printf("%s%d: Preloaded image <%s> %d bytes at %p\n",
|
|
MD_NAME, mdunits, name, len, ptr);
|
|
md_preloaded(ptr, len);
|
|
}
|
|
status_dev = make_dev(&mdctl_cdevsw, 0xffff00ff, UID_ROOT, GID_WHEEL,
|
|
0600, MDCTL_NAME);
|
|
}
|
|
|
|
static int
|
|
md_modevent(module_t mod, int type, void *data)
|
|
{
|
|
int error;
|
|
struct md_s *sc;
|
|
|
|
switch (type) {
|
|
case MOD_LOAD:
|
|
md_drvinit(NULL);
|
|
break;
|
|
case MOD_UNLOAD:
|
|
LIST_FOREACH(sc, &md_softc_list, list) {
|
|
error = mddetach(sc->unit, curthread);
|
|
if (error != 0)
|
|
return (error);
|
|
}
|
|
if (status_dev)
|
|
destroy_dev(status_dev);
|
|
status_dev = 0;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static moduledata_t md_mod = {
|
|
MD_NAME,
|
|
md_modevent,
|
|
NULL
|
|
};
|
|
DECLARE_MODULE(md, md_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+CDEV_MAJOR);
|
|
MODULE_VERSION(md, MD_MODVER);
|
|
|
|
|
|
#ifdef MD_ROOT
|
|
static void
|
|
md_takeroot(void *junk)
|
|
{
|
|
if (mdrootready)
|
|
rootdevnames[0] = "ufs:/dev/md0c";
|
|
}
|
|
|
|
SYSINIT(md_root, SI_SUB_MOUNT_ROOT, SI_ORDER_FIRST, md_takeroot, NULL);
|
|
#endif
|