- Merge soft-updates journaling from projects/suj/head into head. This

brings in support for an optional intent log which eliminates the need
   for background fsck on unclean shutdown.

Sponsored by:   iXsystems, Yahoo!, and Juniper.
With help from: McKusick and Peter Holm
This commit is contained in:
Jeff Roberson 2010-04-24 07:05:35 +00:00
parent 07b9cc2f46
commit 113db2dddb
40 changed files with 13165 additions and 2139 deletions

View file

@ -3,7 +3,7 @@
LIB= ufs
SHLIBDIR?= /lib
SRCS= block.c cgroup.c inode.c sblock.c type.c
SRCS= block.c cgroup.c inode.c sblock.c type.c ffs_subr.c ffs_tables.c
INCS= libufs.h
MAN= bread.3 cgread.3 libufs.3 sbread.3 ufs_disk_close.3
@ -16,8 +16,11 @@ MLINKS+= ufs_disk_close.3 ufs_disk_fillout.3
MLINKS+= ufs_disk_close.3 ufs_disk_fillout_blank.3
MLINKS+= ufs_disk_close.3 ufs_disk_write.3
WARNS?= 3
.PATH: ${.CURDIR}/../../sys/ufs/ffs
WARNS?= 2
DEBUG_FLAGS = -g
CFLAGS+= -D_LIBUFS
.if defined(LIBUFS_DEBUG)
CFLAGS+= -D_LIBUFS_DEBUGGING

View file

@ -40,11 +40,143 @@ __FBSDID("$FreeBSD$");
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <libufs.h>
ufs2_daddr_t
cgballoc(struct uufsd *disk)
{
u_int8_t *blksfree;
struct cg *cgp;
struct fs *fs;
long bno;
fs = &disk->d_fs;
cgp = &disk->d_cg;
blksfree = cg_blksfree(cgp);
for (bno = 0; bno < fs->fs_fpg / fs->fs_frag; bno++)
if (ffs_isblock(fs, blksfree, bno))
goto gotit;
return (0);
gotit:
fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
ffs_clrblock(fs, blksfree, (long)bno);
ffs_clusteracct(fs, cgp, bno, -1);
cgp->cg_cs.cs_nbfree--;
fs->fs_cstotal.cs_nbfree--;
fs->fs_fmod = 1;
return (cgbase(fs, cgp->cg_cgx) + blkstofrags(fs, bno));
}
int
cgbfree(struct uufsd *disk, ufs2_daddr_t bno, long size)
{
u_int8_t *blksfree;
struct fs *fs;
struct cg *cgp;
ufs1_daddr_t fragno, cgbno;
int i, cg, blk, frags, bbase;
fs = &disk->d_fs;
cg = dtog(fs, bno);
if (cgread1(disk, cg) != 1)
return (-1);
cgp = &disk->d_cg;
cgbno = dtogd(fs, bno);
blksfree = cg_blksfree(cgp);
if (size == fs->fs_bsize) {
fragno = fragstoblks(fs, cgbno);
ffs_setblock(fs, blksfree, fragno);
ffs_clusteracct(fs, cgp, fragno, 1);
cgp->cg_cs.cs_nbfree++;
fs->fs_cstotal.cs_nbfree++;
fs->fs_cs(fs, cg).cs_nbfree++;
} else {
bbase = cgbno - fragnum(fs, cgbno);
/*
* decrement the counts associated with the old frags
*/
blk = blkmap(fs, blksfree, bbase);
ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
/*
* deallocate the fragment
*/
frags = numfrags(fs, size);
for (i = 0; i < frags; i++)
setbit(blksfree, cgbno + i);
cgp->cg_cs.cs_nffree += i;
fs->fs_cstotal.cs_nffree += i;
fs->fs_cs(fs, cg).cs_nffree += i;
/*
* add back in counts associated with the new frags
*/
blk = blkmap(fs, blksfree, bbase);
ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
/*
* if a complete block has been reassembled, account for it
*/
fragno = fragstoblks(fs, bbase);
if (ffs_isblock(fs, blksfree, fragno)) {
cgp->cg_cs.cs_nffree -= fs->fs_frag;
fs->fs_cstotal.cs_nffree -= fs->fs_frag;
fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
ffs_clusteracct(fs, cgp, fragno, 1);
cgp->cg_cs.cs_nbfree++;
fs->fs_cstotal.cs_nbfree++;
fs->fs_cs(fs, cg).cs_nbfree++;
}
}
return cgwrite(disk);
}
ino_t
cgialloc(struct uufsd *disk)
{
struct ufs2_dinode *dp2;
u_int8_t *inosused;
struct cg *cgp;
struct fs *fs;
ino_t ino;
int i;
fs = &disk->d_fs;
cgp = &disk->d_cg;
inosused = cg_inosused(cgp);
for (ino = 0; ino < fs->fs_ipg / NBBY; ino++)
if (isclr(inosused, ino))
goto gotit;
return (0);
gotit:
if (fs->fs_magic == FS_UFS2_MAGIC &&
ino + INOPB(fs) > cgp->cg_initediblk &&
cgp->cg_initediblk < cgp->cg_niblk) {
char block[MAXBSIZE];
bzero(block, (int)fs->fs_bsize);
dp2 = (struct ufs2_dinode *)&block;
for (i = 0; i < INOPB(fs); i++) {
dp2->di_gen = arc4random() / 2 + 1;
dp2++;
}
if (bwrite(disk, ino_to_fsba(fs,
cgp->cg_cgx * fs->fs_ipg + cgp->cg_initediblk),
block, fs->fs_bsize))
return (0);
cgp->cg_initediblk += INOPB(fs);
}
setbit(inosused, ino);
cgp->cg_irotor = ino;
cgp->cg_cs.cs_nifree--;
fs->fs_cstotal.cs_nifree--;
fs->fs_cs(fs, cgp->cg_cgx).cs_nifree--;
fs->fs_fmod = 1;
return (ino + (cgp->cg_cgx * fs->fs_ipg));
}
int
cgread(struct uufsd *disk)
{
@ -55,14 +187,12 @@ int
cgread1(struct uufsd *disk, int c)
{
struct fs *fs;
off_t ccg;
fs = &disk->d_fs;
if ((unsigned)c >= fs->fs_ncg) {
return (0);
}
ccg = fsbtodb(fs, cgtod(fs, c)) * disk->d_bsize;
if (bread(disk, fsbtodb(fs, cgtod(fs, c)), disk->d_cgunion.d_buf,
fs->fs_bsize) == -1) {
ERROR(disk, "unable to read cylinder group");
@ -72,6 +202,12 @@ cgread1(struct uufsd *disk, int c)
return (1);
}
int
cgwrite(struct uufsd *disk)
{
return (cgwrite1(disk, disk->d_lcg));
}
int
cgwrite1(struct uufsd *disk, int c)
{

View file

@ -93,3 +93,19 @@ gotit: switch (disk->d_ufs) {
ERROR(disk, "unknown UFS filesystem type");
return (-1);
}
int
putino(struct uufsd *disk)
{
struct fs *fs;
fs = &disk->d_fs;
if (disk->d_inoblock == NULL) {
ERROR(disk, "No inode block allocated");
return (-1);
}
if (bwrite(disk, fsbtodb(fs, ino_to_fsba(&disk->d_fs, disk->d_inomin)),
disk->d_inoblock, disk->d_fs.fs_bsize) <= 0)
return (-1);
return (0);
}

View file

@ -71,6 +71,7 @@ struct uufsd {
int d_fd; /* raw device file descriptor */
long d_bsize; /* device bsize */
ufs2_daddr_t d_sblock; /* superblock location */
struct csum *d_sbcsum; /* Superblock summary info */
caddr_t d_inoblock; /* inode block */
ino_t d_inomin; /* low inode */
ino_t d_inomax; /* high inode */
@ -109,14 +110,19 @@ int berase(struct uufsd *, ufs2_daddr_t, ufs2_daddr_t);
/*
* cgroup.c
*/
ufs2_daddr_t cgballoc(struct uufsd *);
int cgbfree(struct uufsd *, ufs2_daddr_t, long);
ino_t cgialloc(struct uufsd *);
int cgread(struct uufsd *);
int cgread1(struct uufsd *, int);
int cgwrite(struct uufsd *);
int cgwrite1(struct uufsd *, int);
/*
* inode.c
*/
int getino(struct uufsd *, void **, ino_t, int *);
int putino(struct uufsd *);
/*
* sblock.c
@ -132,6 +138,16 @@ int ufs_disk_fillout(struct uufsd *, const char *);
int ufs_disk_fillout_blank(struct uufsd *, const char *);
int ufs_disk_write(struct uufsd *);
/*
* ffs_subr.c
*/
void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int);
void ffs_fragacct(struct fs *, int, int32_t [], int);
int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t);
int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
__END_DECLS
#endif /* __LIBUFS_H__ */

View file

@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <libufs.h>
@ -49,8 +50,11 @@ static int superblocks[] = SBLOCKSEARCH;
int
sbread(struct uufsd *disk)
{
uint8_t block[MAXBSIZE];
struct fs *fs;
int sb, superblock;
int i, size, blks;
uint8_t *space;
ERROR(disk, NULL);
@ -86,6 +90,34 @@ sbread(struct uufsd *disk)
}
disk->d_bsize = fs->fs_fsize / fsbtodb(fs, 1);
disk->d_sblock = superblock / disk->d_bsize;
/*
* Read in the superblock summary information.
*/
size = fs->fs_cssize;
blks = howmany(size, fs->fs_fsize);
size += fs->fs_ncg * sizeof(int32_t);
space = malloc(size);
if (space == NULL) {
ERROR(disk, "failed to allocate space for summary information");
return (-1);
}
fs->fs_csp = (struct csum *)space;
for (i = 0; i < blks; i += fs->fs_frag) {
size = fs->fs_bsize;
if (i + fs->fs_frag > blks)
size = (blks - i) * fs->fs_fsize;
if (bread(disk, fsbtodb(fs, fs->fs_csaddr + i), block, size)
== -1) {
ERROR(disk, "Failed to read sb summary information");
free(fs->fs_csp);
return (-1);
}
bcopy(block, space, size);
space += size;
}
fs->fs_maxcluster = (uint32_t *)space;
disk->d_sbcsum = fs->fs_csp;
return (0);
}
@ -93,6 +125,8 @@ int
sbwrite(struct uufsd *disk, int all)
{
struct fs *fs;
int blks, size;
uint8_t *space;
unsigned i;
ERROR(disk, NULL);
@ -107,6 +141,22 @@ sbwrite(struct uufsd *disk, int all)
ERROR(disk, "failed to write superblock");
return (-1);
}
/*
* Write superblock summary information.
*/
blks = howmany(fs->fs_cssize, fs->fs_fsize);
space = (uint8_t *)disk->d_sbcsum;
for (i = 0; i < blks; i += fs->fs_frag) {
size = fs->fs_bsize;
if (i + fs->fs_frag > blks)
size = (blks - i) * fs->fs_fsize;
if (bwrite(disk, fsbtodb(fs, fs->fs_csaddr + i), space, size)
== -1) {
ERROR(disk, "Failed to write sb summary information");
return (-1);
}
space += size;
}
if (all) {
for (i = 0; i < fs->fs_ncg; i++)
if (bwrite(disk, fsbtodb(fs, cgsblock(fs, i)),

View file

@ -66,6 +66,10 @@ ufs_disk_close(struct uufsd *disk)
free((char *)(uintptr_t)disk->d_name);
disk->d_name = NULL;
}
if (disk->d_sbcsum != NULL) {
free(disk->d_sbcsum);
disk->d_sbcsum = NULL;
}
return (0);
}
@ -156,6 +160,7 @@ again: if ((ret = stat(name, &st)) < 0) {
disk->d_mine = 0;
disk->d_ufs = 0;
disk->d_error = NULL;
disk->d_sbcsum = NULL;
if (oname != name) {
name = strdup(name);

View file

@ -238,7 +238,7 @@ dumpfs(const char *name)
if (fsflags & FS_UNCLEAN)
printf("unclean ");
if (fsflags & FS_DOSOFTDEP)
printf("soft-updates ");
printf("soft-updates%s ", (fsflags & FS_SUJ) ? "+journal" : "");
if (fsflags & FS_NEEDSFSCK)
printf("needs fsck run ");
if (fsflags & FS_INDEXDIRS)
@ -255,7 +255,7 @@ dumpfs(const char *name)
printf("nfsv4acls ");
fsflags &= ~(FS_UNCLEAN | FS_DOSOFTDEP | FS_NEEDSFSCK | FS_INDEXDIRS |
FS_ACLS | FS_MULTILABEL | FS_GJOURNAL | FS_FLAGS_UPDATED |
FS_NFS4ACLS);
FS_NFS4ACLS | FS_SUJ);
if (fsflags != 0)
printf("unknown flags (%#x)", fsflags);
putchar('\n');

View file

@ -7,8 +7,7 @@ LINKS+= ${BINDIR}/fsck_ffs ${BINDIR}/fsck_4.2bsd
MAN= fsck_ffs.8
MLINKS= fsck_ffs.8 fsck_ufs.8 fsck_ffs.8 fsck_4.2bsd.8
SRCS= dir.c ea.c fsutil.c inode.c main.c pass1.c pass1b.c pass2.c pass3.c \
pass4.c pass5.c setup.c utilities.c ffs_subr.c ffs_tables.c gjournal.c \
getmntopts.c
pass4.c pass5.c setup.c suj.c utilities.c gjournal.c getmntopts.c
DPADD= ${LIBUFS}
LDADD= -lufs
WARNS?= 2

View file

@ -347,10 +347,6 @@ void direrror(ino_t ino, const char *errmesg);
int dirscan(struct inodesc *);
int dofix(struct inodesc *, const char *msg);
int eascan(struct inodesc *, struct ufs2_dinode *dp);
void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
void ffs_fragacct(struct fs *, int, int32_t [], int);
int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t);
void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
void fileerror(ino_t cwd, ino_t ino, const char *errmesg);
int findino(struct inodesc *);
int findname(struct inodesc *);
@ -392,3 +388,4 @@ void sblock_init(void);
void setinodebuf(ino_t);
int setup(char *dev);
void gjournal_check(const char *filesys);
int suj_check(const char *filesys);

View file

@ -95,27 +95,6 @@ struct ufs2_dinode ufs2_zino;
static void putcgs(void);
/*
* Write current block of inodes.
*/
static int
putino(struct uufsd *disk, ino_t inode)
{
caddr_t inoblock;
struct fs *fs;
ssize_t ret;
fs = &disk->d_fs;
inoblock = disk->d_inoblock;
assert(inoblock != NULL);
assert(inode >= disk->d_inomin && inode <= disk->d_inomax);
ret = bwrite(disk, fsbtodb(fs, ino_to_fsba(fs, inode)), inoblock,
fs->fs_bsize);
return (ret == -1 ? -1 : 0);
}
/*
* Return cylinder group from the cache or load it if it is not in the
* cache yet.
@ -242,13 +221,11 @@ cancelcgs(void)
#endif
/*
* Open the given provider, load statistics.
* Open the given provider, load superblock.
*/
static void
getdisk(void)
opendisk(void)
{
int i;
if (disk != NULL)
return;
disk = malloc(sizeof(*disk));
@ -259,24 +236,6 @@ getdisk(void)
disk->d_error);
}
fs = &disk->d_fs;
fs->fs_csp = malloc((size_t)fs->fs_cssize);
if (fs->fs_csp == NULL)
err(1, "malloc(%zu)", (size_t)fs->fs_cssize);
bzero(fs->fs_csp, (size_t)fs->fs_cssize);
for (i = 0; i < fs->fs_cssize; i += fs->fs_bsize) {
if (bread(disk, fsbtodb(fs, fs->fs_csaddr + numfrags(fs, i)),
(void *)(((char *)fs->fs_csp) + i),
(size_t)(fs->fs_cssize - i < fs->fs_bsize ? fs->fs_cssize - i : fs->fs_bsize)) == -1) {
err(1, "bread: %s", disk->d_error);
}
}
if (fs->fs_contigsumsize > 0) {
fs->fs_maxcluster = malloc(fs->fs_ncg * sizeof(int32_t));
if (fs->fs_maxcluster == NULL)
err(1, "malloc(%zu)", fs->fs_ncg * sizeof(int32_t));
for (i = 0; i < fs->fs_ncg; i++)
fs->fs_maxcluster[i] = fs->fs_contigsumsize;
}
}
/*
@ -286,11 +245,6 @@ static void
closedisk(void)
{
free(fs->fs_csp);
if (fs->fs_contigsumsize > 0) {
free(fs->fs_maxcluster);
fs->fs_maxcluster = NULL;
}
fs->fs_clean = 1;
if (sbwrite(disk, 0) == -1)
err(1, "sbwrite(%s)", devnam);
@ -301,227 +255,6 @@ closedisk(void)
fs = NULL;
}
/*
* Write the statistics back, call closedisk().
*/
static void
putdisk(void)
{
int i;
assert(disk != NULL && fs != NULL);
for (i = 0; i < fs->fs_cssize; i += fs->fs_bsize) {
if (bwrite(disk, fsbtodb(fs, fs->fs_csaddr + numfrags(fs, i)),
(void *)(((char *)fs->fs_csp) + i),
(size_t)(fs->fs_cssize - i < fs->fs_bsize ? fs->fs_cssize - i : fs->fs_bsize)) == -1) {
err(1, "bwrite: %s", disk->d_error);
}
}
closedisk();
}
#if 0
/*
* Free memory, close the disk, but don't write anything back.
*/
static void
canceldisk(void)
{
int i;
assert(disk != NULL && fs != NULL);
free(fs->fs_csp);
if (fs->fs_contigsumsize > 0)
free(fs->fs_maxcluster);
if (ufs_disk_close(disk) == -1)
err(1, "ufs_disk_close(%s)", devnam);
free(disk);
disk = NULL;
fs = NULL;
}
#endif
static int
isblock(unsigned char *cp, ufs1_daddr_t h)
{
unsigned char mask;
switch ((int)fs->fs_frag) {
case 8:
return (cp[h] == 0xff);
case 4:
mask = 0x0f << ((h & 0x1) << 2);
return ((cp[h >> 1] & mask) == mask);
case 2:
mask = 0x03 << ((h & 0x3) << 1);
return ((cp[h >> 2] & mask) == mask);
case 1:
mask = 0x01 << (h & 0x7);
return ((cp[h >> 3] & mask) == mask);
default:
assert(!"isblock: invalid number of fragments");
}
return (0);
}
/*
* put a block into the map
*/
static void
setblock(unsigned char *cp, ufs1_daddr_t h)
{
switch ((int)fs->fs_frag) {
case 8:
cp[h] = 0xff;
return;
case 4:
cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
return;
case 2:
cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
return;
case 1:
cp[h >> 3] |= (0x01 << (h & 0x7));
return;
default:
assert(!"setblock: invalid number of fragments");
}
}
/*
* check if a block is free
*/
static int
isfreeblock(u_char *cp, ufs1_daddr_t h)
{
switch ((int)fs->fs_frag) {
case 8:
return (cp[h] == 0);
case 4:
return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
case 2:
return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
case 1:
return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
default:
assert(!"isfreeblock: invalid number of fragments");
}
return (0);
}
/*
* Update the frsum fields to reflect addition or deletion
* of some frags.
*/
void
fragacct(int fragmap, int32_t fraglist[], int cnt)
{
int inblk;
int field, subfield;
int siz, pos;
inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
fragmap <<= 1;
for (siz = 1; siz < fs->fs_frag; siz++) {
if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
continue;
field = around[siz];
subfield = inside[siz];
for (pos = siz; pos <= fs->fs_frag; pos++) {
if ((fragmap & field) == subfield) {
fraglist[siz] += cnt;
pos += siz;
field <<= siz;
subfield <<= siz;
}
field <<= 1;
subfield <<= 1;
}
}
}
static void
clusteracct(struct cg *cgp, ufs1_daddr_t blkno)
{
int32_t *sump;
int32_t *lp;
u_char *freemapp, *mapp;
int i, start, end, forw, back, map, bit;
if (fs->fs_contigsumsize <= 0)
return;
freemapp = cg_clustersfree(cgp);
sump = cg_clustersum(cgp);
/*
* Clear the actual block.
*/
setbit(freemapp, blkno);
/*
* Find the size of the cluster going forward.
*/
start = blkno + 1;
end = start + fs->fs_contigsumsize;
if (end >= cgp->cg_nclusterblks)
end = cgp->cg_nclusterblks;
mapp = &freemapp[start / NBBY];
map = *mapp++;
bit = 1 << (start % NBBY);
for (i = start; i < end; i++) {
if ((map & bit) == 0)
break;
if ((i & (NBBY - 1)) != (NBBY - 1)) {
bit <<= 1;
} else {
map = *mapp++;
bit = 1;
}
}
forw = i - start;
/*
* Find the size of the cluster going backward.
*/
start = blkno - 1;
end = start - fs->fs_contigsumsize;
if (end < 0)
end = -1;
mapp = &freemapp[start / NBBY];
map = *mapp--;
bit = 1 << (start % NBBY);
for (i = start; i > end; i--) {
if ((map & bit) == 0)
break;
if ((i & (NBBY - 1)) != 0) {
bit >>= 1;
} else {
map = *mapp--;
bit = 1 << (NBBY - 1);
}
}
back = start - i;
/*
* Account for old cluster and the possibly new forward and
* back clusters.
*/
i = back + forw + 1;
if (i > fs->fs_contigsumsize)
i = fs->fs_contigsumsize;
sump[i]++;
if (back > 0)
sump[back]--;
if (forw > 0)
sump[forw]--;
/*
* Update cluster summary information.
*/
lp = &sump[fs->fs_contigsumsize];
for (i = fs->fs_contigsumsize; i > 0; i--)
if (*lp-- > 0)
break;
fs->fs_maxcluster[cgp->cg_cgx] = i;
}
static void
blkfree(ufs2_daddr_t bno, long size)
{
@ -539,10 +272,10 @@ blkfree(ufs2_daddr_t bno, long size)
blksfree = cg_blksfree(cgp);
if (size == fs->fs_bsize) {
fragno = fragstoblks(fs, cgbno);
if (!isfreeblock(blksfree, fragno))
if (!ffs_isfreeblock(fs, blksfree, fragno))
assert(!"blkfree: freeing free block");
setblock(blksfree, fragno);
clusteracct(cgp, fragno);
ffs_setblock(fs, blksfree, fragno);
ffs_clusteracct(fs, cgp, fragno, 1);
cgp->cg_cs.cs_nbfree++;
fs->fs_cstotal.cs_nbfree++;
fs->fs_cs(fs, cg).cs_nbfree++;
@ -552,7 +285,7 @@ blkfree(ufs2_daddr_t bno, long size)
* decrement the counts associated with the old frags
*/
blk = blkmap(fs, blksfree, bbase);
fragacct(blk, cgp->cg_frsum, -1);
ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
/*
* deallocate the fragment
*/
@ -569,16 +302,16 @@ blkfree(ufs2_daddr_t bno, long size)
* add back in counts associated with the new frags
*/
blk = blkmap(fs, blksfree, bbase);
fragacct(blk, cgp->cg_frsum, 1);
ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
/*
* if a complete block has been reassembled, account for it
*/
fragno = fragstoblks(fs, bbase);
if (isblock(blksfree, fragno)) {
if (ffs_isblock(fs, blksfree, fragno)) {
cgp->cg_cs.cs_nffree -= fs->fs_frag;
fs->fs_cstotal.cs_nffree -= fs->fs_frag;
fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
clusteracct(cgp, fragno);
ffs_clusteracct(fs, cgp, fragno, 1);
cgp->cg_cs.cs_nbfree++;
fs->fs_cstotal.cs_nbfree++;
fs->fs_cs(fs, cg).cs_nbfree++;
@ -599,7 +332,7 @@ freeindir(ufs2_daddr_t blk, int level)
if (bread(disk, fsbtodb(fs, blk), (void *)&sblks, (size_t)fs->fs_bsize) == -1)
err(1, "bread: %s", disk->d_error);
blks = (ufs2_daddr_t *)&sblks;
for (i = 0; i < howmany(fs->fs_bsize, sizeof(ufs2_daddr_t)); i++) {
for (i = 0; i < NINDIR(fs); i++) {
if (blks[i] == 0)
break;
if (level == 0)
@ -671,7 +404,7 @@ gjournal_check(const char *filesys)
int cg, mode;
devnam = filesys;
getdisk();
opendisk();
/* Are there any unreferenced inodes in this file system? */
if (fs->fs_unrefs == 0) {
//printf("No unreferenced inodes.\n");
@ -747,7 +480,7 @@ gjournal_check(const char *filesys)
/* Zero-fill the inode. */
*dino = ufs2_zino;
/* Write the inode back. */
if (putino(disk, ino) == -1)
if (putino(disk) == -1)
err(1, "putino(cg=%d ino=%d)", cg, ino);
if (cgp->cg_unrefs == 0) {
//printf("No more unreferenced inodes in cg=%d.\n", cg);
@ -772,5 +505,5 @@ gjournal_check(const char *filesys)
/* Write back modified cylinder groups. */
putcgs();
/* Write back updated statistics and super-block. */
putdisk();
closedisk();
}

View file

@ -242,8 +242,9 @@ checkfilesys(char *filesys)
if ((fsreadfd = open(filesys, O_RDONLY)) < 0 || readsb(0) == 0)
exit(3); /* Cannot read superblock */
close(fsreadfd);
if (sblock.fs_flags & FS_NEEDSFSCK)
exit(4); /* Earlier background failed */
/* Earlier background failed or journaled */
if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ))
exit(4);
if ((sblock.fs_flags & FS_DOSOFTDEP) == 0)
exit(5); /* Not running soft updates */
size = MIBSIZE;
@ -299,7 +300,7 @@ checkfilesys(char *filesys)
pfatal("MOUNTED READ-ONLY, CANNOT RUN IN BACKGROUND\n");
} else if ((fsreadfd = open(filesys, O_RDONLY)) >= 0) {
if (readsb(0) != 0) {
if (sblock.fs_flags & FS_NEEDSFSCK) {
if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ)) {
bkgrdflag = 0;
pfatal("UNEXPECTED INCONSISTENCY, %s\n",
"CANNOT RUN IN BACKGROUND\n");
@ -384,6 +385,26 @@ checkfilesys(char *filesys)
sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize);
return (0);
}
/*
* Determine if we can and should do journal recovery.
*/
if ((sblock.fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == FS_SUJ) {
if (preen || reply("USE JOURNAL?")) {
if (suj_check(filesys) == 0) {
if (chkdoreload(mntp) == 0)
exit(0);
exit(4);
}
/* suj_check failed, fall through. */
}
printf("** Skipping journal, falling through to full fsck\n");
/*
* Write the superblock so we don't try to recover the
* journal on another pass.
*/
sblock.fs_mtime = time(NULL);
sbdirty();
}
/*
* Cleared if any questions answered no. Used to decide if

View file

@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <inttypes.h>
#include <limits.h>
#include <string.h>
#include <libufs.h>
#include "fsck.h"

4699
sbin/fsck_ffs/suj.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -396,7 +396,8 @@ const char *typename[] = {
"unregistered #13",
"whiteout",
};
int diroff;
int slot;
int
@ -404,9 +405,10 @@ scannames(struct inodesc *idesc)
{
struct direct *dirp = idesc->id_dirp;
printf("slot %d ino %d reclen %d: %s, `%.*s'\n",
slot++, dirp->d_ino, dirp->d_reclen, typename[dirp->d_type],
dirp->d_namlen, dirp->d_name);
printf("slot %d off %d ino %d reclen %d: %s, `%.*s'\n",
slot++, diroff, dirp->d_ino, dirp->d_reclen,
typename[dirp->d_type], dirp->d_namlen, dirp->d_name);
diroff += dirp->d_reclen;
return (KEEPON);
}
@ -416,6 +418,7 @@ CMDFUNCSTART(ls)
checkactivedir(); /* let it go on anyway */
slot = 0;
diroff = 0;
idesc.id_number = curinum;
idesc.id_func = scannames;
idesc.id_type = DATA;

View file

@ -52,7 +52,7 @@ static const char rcsid[] =
#include "fsck.h"
static int charsperline(void);
static int printindir(ufs2_daddr_t blk, int level, char *bufp);
static void printindir(ufs2_daddr_t blk, int level, char *bufp);
static void printblocks(ino_t inum, union dinode *dp);
char **
@ -226,7 +226,7 @@ charsperline(void)
/*
* Recursively print a list of indirect blocks.
*/
static int
static void
printindir(ufs2_daddr_t blk, int level, char *bufp)
{
struct bufarea buf, *bp;
@ -234,6 +234,9 @@ printindir(ufs2_daddr_t blk, int level, char *bufp)
int i, j, cpl, charssofar;
ufs2_daddr_t blkno;
if (blk == 0)
return;
printf("%jd (%d) =>\n", (intmax_t)blk, level);
if (level == 0) {
/* for the final indirect level, don't use the cache */
bp = &buf;
@ -251,11 +254,8 @@ printindir(ufs2_daddr_t blk, int level, char *bufp)
blkno = bp->b_un.b_indir1[i];
else
blkno = bp->b_un.b_indir2[i];
if (blkno == 0) {
if (level == 0)
putchar('\n');
return 0;
}
if (blkno == 0)
continue;
j = sprintf(tempbuf, "%jd", (intmax_t)blkno);
if (level == 0) {
charssofar += j;
@ -270,13 +270,14 @@ printindir(ufs2_daddr_t blk, int level, char *bufp)
charssofar += 2;
} else {
printf(" =>\n");
if (printindir(blkno, level - 1, bufp) == 0)
return 0;
printindir(blkno, level - 1, bufp);
printf("\n");
charssofar = 0;
}
}
if (level == 0)
putchar('\n');
return 1;
return;
}
@ -309,7 +310,7 @@ printblocks(ino_t inum, union dinode *dp)
}
}
putchar('\n');
if (DIP(dp, di_ib[0]) == 0)
if (ndb == 0)
return;
bufp = malloc((unsigned int)sblock.fs_bsize);
@ -317,8 +318,7 @@ printblocks(ino_t inum, union dinode *dp)
errx(EEXIT, "cannot allocate indirect block buffer");
printf("Indirect blocks:\n");
for (i = 0; i < NIADDR; i++)
if (printindir(DIP(dp, di_ib[i]), i, bufp) == 0)
break;
printindir(DIP(dp, di_ib[i]), i, bufp);
free(bufp);
}

View file

@ -28,7 +28,7 @@
.\" @(#)tunefs.8 8.2 (Berkeley) 12/11/93
.\" $FreeBSD$
.\"
.Dd October 21, 2009
.Dd March 6, 2010
.Dt TUNEFS 8
.Os
.Sh NAME
@ -40,6 +40,7 @@
.Op Fl a Cm enable | disable
.Op Fl e Ar maxbpg
.Op Fl f Ar avgfilesize
.Op Fl j Cm enable | disable
.Op Fl J Cm enable | disable
.Op Fl L Ar volname
.Op Fl l Cm enable | disable
@ -49,6 +50,7 @@
.Op Fl o Cm space | time
.Op Fl p
.Op Fl s Ar avgfpdir
.Op Fl S Ar size
.Ar special | filesystem
.Sh DESCRIPTION
The
@ -89,6 +91,8 @@ For file systems with exclusively large files,
this parameter should be set higher.
.It Fl f Ar avgfilesize
Specify the expected average file size.
.It Fl j Cm enable | disable
Turn on/off soft updates journaling.
.It Fl J Cm enable | disable
Turn on/off gjournal flag.
.It Fl L Ar volname
@ -136,6 +140,9 @@ obtained from the
utility.
.It Fl s Ar avgfpdir
Specify the expected number of files per directory.
.It Fl S Ar size
Specify the softdep journal size in bytes.
The minimum is 4M.
.El
.Pp
At least one of the above flags is required.

View file

@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/dinode.h>
#include <ufs/ffs/fs.h>
#include <ufs/ufs/dir.h>
#include <ctype.h>
#include <err.h>
@ -61,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <paths.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
@ -72,16 +74,20 @@ struct uufsd disk;
void usage(void);
void printfs(void);
int journal_alloc(int64_t size);
void journal_clear(void);
void sbdirty(void);
int
main(int argc, char *argv[])
{
char *avalue, *Jvalue, *Lvalue, *lvalue, *Nvalue, *nvalue;
char *avalue, *jvalue, *Jvalue, *Lvalue, *lvalue, *Nvalue, *nvalue;
const char *special, *on;
const char *name;
int active;
int Aflag, aflag, eflag, evalue, fflag, fvalue, Jflag, Lflag, lflag;
int mflag, mvalue, Nflag, nflag, oflag, ovalue, pflag, sflag, svalue;
int Aflag, aflag, eflag, evalue, fflag, fvalue, jflag, Jflag, Lflag;
int lflag, mflag, mvalue, Nflag, nflag, oflag, ovalue, pflag, sflag;
int svalue, Sflag, Svalue;
int ch, found_arg, i;
const char *chg[2];
struct ufs_args args;
@ -89,13 +95,13 @@ main(int argc, char *argv[])
if (argc < 3)
usage();
Aflag = aflag = eflag = fflag = Jflag = Lflag = lflag = mflag = 0;
Nflag = nflag = oflag = pflag = sflag = 0;
avalue = Jvalue = Lvalue = lvalue = Nvalue = nvalue = NULL;
evalue = fvalue = mvalue = ovalue = svalue = 0;
Aflag = aflag = eflag = fflag = jflag = Jflag = Lflag = lflag = 0;
mflag = Nflag = nflag = oflag = pflag = sflag = 0;
avalue = jvalue = Jvalue = Lvalue = lvalue = Nvalue = nvalue = NULL;
evalue = fvalue = mvalue = ovalue = svalue = Svalue = 0;
active = 0;
found_arg = 0; /* At least one arg is required. */
while ((ch = getopt(argc, argv, "Aa:e:f:J:L:l:m:N:n:o:ps:")) != -1)
while ((ch = getopt(argc, argv, "Aa:e:f:j:J:L:l:m:N:n:o:ps:S:")) != -1)
switch (ch) {
case 'A':
@ -135,6 +141,18 @@ main(int argc, char *argv[])
fflag = 1;
break;
case 'j':
found_arg = 1;
name = "softdep journaled file system";
jvalue = optarg;
if (strcmp(jvalue, "enable") &&
strcmp(jvalue, "disable")) {
errx(10, "bad %s (options are %s)",
name, "`enable' or `disable'");
}
jflag = 1;
break;
case 'J':
found_arg = 1;
name = "gjournaled file system";
@ -240,6 +258,16 @@ main(int argc, char *argv[])
sflag = 1;
break;
case 'S':
found_arg = 1;
name = "Softdep Journal Size";
Svalue = atoi(optarg);
if (Svalue < SUJ_MIN)
errx(10, "%s must be >= %d (was %s)",
name, SUJ_MIN, optarg);
Sflag = 1;
break;
default:
usage();
}
@ -310,6 +338,33 @@ main(int argc, char *argv[])
sblock.fs_avgfilesize = fvalue;
}
}
if (jflag) {
name = "soft updates journaling";
if (strcmp(jvalue, "enable") == 0) {
if ((sblock.fs_flags & (FS_DOSOFTDEP | FS_SUJ)) ==
(FS_DOSOFTDEP | FS_SUJ)) {
warnx("%s remains unchanged as enabled", name);
} else if (sblock.fs_clean == 0) {
warnx("%s cannot be enabled until fsck is run",
name);
} else if (journal_alloc(Svalue) != 0) {
warnx("%s can not be enabled", name);
} else {
sblock.fs_flags |= FS_DOSOFTDEP | FS_SUJ;
warnx("%s set", name);
}
} else if (strcmp(jvalue, "disable") == 0) {
if ((~sblock.fs_flags & FS_SUJ) == FS_SUJ) {
warnx("%s remains unchanged as disabled", name);
} else {
journal_clear();
sblock.fs_flags &= ~(FS_DOSOFTDEP | FS_SUJ);
sblock.fs_sujfree = 0;
warnx("%s cleared, "
"remove .sujournal to reclaim space", name);
}
}
}
if (Jflag) {
name = "gjournal";
if (strcmp(Jvalue, "enable") == 0) {
@ -455,6 +510,500 @@ err:
err(12, "%s", special);
}
void
sbdirty(void)
{
disk.d_fs.fs_flags |= FS_UNCLEAN | FS_NEEDSFSCK;
disk.d_fs.fs_clean = 0;
}
int blocks;
static char clrbuf[MAXBSIZE];
static ufs2_daddr_t
journal_balloc(void)
{
ufs2_daddr_t blk;
struct cg *cgp;
int valid;
static int contig = 1;
cgp = &disk.d_cg;
for (;;) {
blk = cgballoc(&disk);
if (blk > 0)
break;
/*
* If we failed to allocate a block from this cg, move to
* the next.
*/
if (cgwrite(&disk) < 0) {
warn("Failed to write updated cg");
return (-1);
}
while ((valid = cgread(&disk)) == 1) {
/*
* Try to minimize fragmentation by requiring a minimum
* number of blocks present.
*/
if (cgp->cg_cs.cs_nbfree > blocks / 8)
break;
if (contig == 0 && cgp->cg_cs.cs_nbfree)
break;
}
if (valid)
continue;
/*
* Try once through looking only for large contiguous regions
* and again taking any space we can find.
*/
if (contig) {
contig = 0;
disk.d_ccg = 0;
warnx("Journal file fragmented.");
continue;
}
warnx("Failed to find sufficient free blocks for the journal");
return -1;
}
if (bwrite(&disk, fsbtodb(&sblock, blk), clrbuf,
sblock.fs_bsize) <= 0) {
warn("Failed to initialize new block");
return -1;
}
return (blk);
}
/*
* Search a directory block for the SUJ_FILE.
*/
static ino_t
dir_search(ufs2_daddr_t blk, int bytes)
{
char block[MAXBSIZE];
struct direct *dp;
int off;
if (bread(&disk, fsbtodb(&sblock, blk), block, bytes) <= 0) {
warn("Failed to read dir block");
return (-1);
}
for (off = 0; off < bytes; off += dp->d_reclen) {
dp = (struct direct *)&block[off];
if (dp->d_reclen == 0)
break;
if (dp->d_ino == 0)
continue;
if (dp->d_namlen != strlen(SUJ_FILE))
continue;
if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0)
continue;
return (dp->d_ino);
}
return (0);
}
/*
* Search in the ROOTINO for the SUJ_FILE. If it exists we can not enable
* journaling.
*/
static ino_t
journal_findfile(void)
{
struct ufs1_dinode *dp1;
struct ufs2_dinode *dp2;
ino_t ino;
int mode;
void *ip;
int i;
if (getino(&disk, &ip, ROOTINO, &mode) != 0) {
warn("Failed to get root inode");
return (-1);
}
dp2 = ip;
dp1 = ip;
if (sblock.fs_magic == FS_UFS1_MAGIC) {
if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
warnx("ROOTINO extends beyond direct blocks.");
return (-1);
}
for (i = 0; i < NDADDR; i++) {
if (dp1->di_db[i] == 0)
break;
if ((ino = dir_search(dp1->di_db[i],
sblksize(&sblock, (off_t)dp1->di_size, i))) != 0)
return (ino);
}
} else {
if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) {
warnx("ROOTINO extends beyond direct blocks.");
return (-1);
}
for (i = 0; i < NDADDR; i++) {
if (dp2->di_db[i] == 0)
break;
if ((ino = dir_search(dp2->di_db[i],
sblksize(&sblock, (off_t)dp2->di_size, i))) != 0)
return (ino);
}
}
return (0);
}
/*
* Insert the journal at inode 'ino' into directory blk 'blk' at the first
* free offset of 'off'. DIRBLKSIZ blocks after off are initialized as
* empty.
*/
static int
dir_insert(ufs2_daddr_t blk, off_t off, ino_t ino)
{
struct direct *dp;
char block[MAXBSIZE];
if (bread(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) {
warn("Failed to read dir block");
return (-1);
}
bzero(&block[off], sblock.fs_bsize - off);
dp = (struct direct *)&block[off];
dp->d_ino = ino;
dp->d_reclen = DIRBLKSIZ;
dp->d_type = DT_REG;
dp->d_namlen = strlen(SUJ_FILE);
bcopy(SUJ_FILE, &dp->d_name, strlen(SUJ_FILE));
off += DIRBLKSIZ;
for (; off < sblock.fs_bsize; off += DIRBLKSIZ) {
dp = (struct direct *)&block[off];
dp->d_ino = 0;
dp->d_reclen = DIRBLKSIZ;
dp->d_type = DT_UNKNOWN;
}
if (bwrite(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) {
warn("Failed to write dir block");
return (-1);
}
return (0);
}
/*
* Extend a directory block in 'blk' by copying it to a full size block
* and inserting the new journal inode into .sujournal.
*/
static int
dir_extend(ufs2_daddr_t blk, ufs2_daddr_t nblk, off_t size, ino_t ino)
{
char block[MAXBSIZE];
if (bread(&disk, fsbtodb(&sblock, blk), block, size) <= 0) {
warn("Failed to read dir block");
return (-1);
}
if (bwrite(&disk, fsbtodb(&sblock, nblk), block, size) <= 0) {
warn("Failed to write dir block");
return (-1);
}
return dir_insert(nblk, size, ino);
}
/*
* Insert the journal file into the ROOTINO directory. We always extend the
* last frag
*/
static int
journal_insertfile(ino_t ino)
{
struct ufs1_dinode *dp1;
struct ufs2_dinode *dp2;
void *ip;
ufs2_daddr_t nblk;
ufs2_daddr_t blk;
ufs_lbn_t lbn;
int size;
int mode;
int off;
if (getino(&disk, &ip, ROOTINO, &mode) != 0) {
warn("Failed to get root inode");
sbdirty();
return (-1);
}
dp2 = ip;
dp1 = ip;
blk = 0;
size = 0;
nblk = journal_balloc();
if (nblk <= 0)
return (-1);
/*
* For simplicity sake we aways extend the ROOTINO into a new
* directory block rather than searching for space and inserting
* into an existing block. However, if the rootino has frags
* have to free them and extend the block.
*/
if (sblock.fs_magic == FS_UFS1_MAGIC) {
lbn = lblkno(&sblock, dp1->di_size);
off = blkoff(&sblock, dp1->di_size);
blk = dp1->di_db[lbn];
size = sblksize(&sblock, (off_t)dp1->di_size, lbn);
} else {
lbn = lblkno(&sblock, dp2->di_size);
off = blkoff(&sblock, dp2->di_size);
blk = dp2->di_db[lbn];
size = sblksize(&sblock, (off_t)dp2->di_size, lbn);
}
if (off != 0) {
if (dir_extend(blk, nblk, off, ino) == -1)
return (-1);
} else {
blk = 0;
if (dir_insert(nblk, 0, ino) == -1)
return (-1);
}
if (sblock.fs_magic == FS_UFS1_MAGIC) {
dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
dp1->di_db[lbn] = nblk;
dp1->di_size = lblktosize(&sblock, lbn+1);
} else {
dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
dp2->di_db[lbn] = nblk;
dp2->di_size = lblktosize(&sblock, lbn+1);
}
if (putino(&disk) < 0) {
warn("Failed to write root inode");
return (-1);
}
if (cgwrite(&disk) < 0) {
warn("Failed to write updated cg");
sbdirty();
return (-1);
}
if (blk) {
if (cgbfree(&disk, blk, size) < 0) {
warn("Failed to write cg");
return (-1);
}
}
return (0);
}
static int
indir_fill(ufs2_daddr_t blk, int level, int *resid)
{
char indirbuf[MAXBSIZE];
ufs1_daddr_t *bap1;
ufs2_daddr_t *bap2;
ufs2_daddr_t nblk;
int ncnt;
int cnt;
int i;
bzero(indirbuf, sizeof(indirbuf));
bap1 = (ufs1_daddr_t *)indirbuf;
bap2 = (void *)bap1;
cnt = 0;
for (i = 0; i < NINDIR(&sblock) && *resid != 0; i++) {
nblk = journal_balloc();
if (nblk <= 0)
return (-1);
cnt++;
if (sblock.fs_magic == FS_UFS1_MAGIC)
*bap1++ = nblk;
else
*bap2++ = nblk;
if (level != 0) {
ncnt = indir_fill(nblk, level - 1, resid);
if (ncnt <= 0)
return (-1);
cnt += ncnt;
} else
(*resid)--;
}
if (bwrite(&disk, fsbtodb(&sblock, blk), indirbuf,
sblock.fs_bsize) <= 0) {
warn("Failed to write indirect");
return (-1);
}
return (cnt);
}
/*
* Clear the flag bits so the journal can be removed.
*/
void
journal_clear(void)
{
struct ufs1_dinode *dp1;
struct ufs2_dinode *dp2;
ino_t ino;
int mode;
void *ip;
ino = journal_findfile();
if (ino == (ino_t)-1 || ino == 0) {
warnx("Journal file does not exist");
return;
}
printf("Clearing journal flags from inode %d\n", ino);
if (getino(&disk, &ip, ino, &mode) != 0) {
warn("Failed to get journal inode");
return;
}
dp2 = ip;
dp1 = ip;
if (sblock.fs_magic == FS_UFS1_MAGIC)
dp1->di_flags = 0;
else
dp2->di_flags = 0;
if (putino(&disk) < 0) {
warn("Failed to write journal inode");
return;
}
}
int
journal_alloc(int64_t size)
{
struct ufs1_dinode *dp1;
struct ufs2_dinode *dp2;
ufs2_daddr_t blk;
void *ip;
struct cg *cgp;
int resid;
ino_t ino;
int blks;
int mode;
int i;
cgp = &disk.d_cg;
ino = 0;
/*
* If the journal file exists we can't allocate it.
*/
ino = journal_findfile();
if (ino == (ino_t)-1)
return (-1);
if (ino > 0) {
warnx("Journal file %s already exists, please remove.",
SUJ_FILE);
return (-1);
}
/*
* If the user didn't supply a size pick one based on the filesystem
* size constrained with hardcoded MIN and MAX values. We opt for
* 1/1024th of the filesystem up to MAX but not exceeding one CG and
* not less than the MIN.
*/
if (size == 0) {
size = (sblock.fs_size * sblock.fs_bsize) / 1024;
size = MIN(SUJ_MAX, size);
if (size / sblock.fs_fsize > sblock.fs_fpg)
size = sblock.fs_fpg * sblock.fs_fsize;
size = MAX(SUJ_MIN, size);
}
resid = blocks = size / sblock.fs_bsize;
if (sblock.fs_cstotal.cs_nbfree < blocks) {
warn("Insufficient free space for %jd byte journal", size);
return (-1);
}
/*
* Find a cg with enough blocks to satisfy the journal
* size. Presently the journal does not span cgs.
*/
while (cgread(&disk) == 1) {
if (cgp->cg_cs.cs_nifree == 0)
continue;
ino = cgialloc(&disk);
if (ino <= 0)
break;
printf("Using inode %d in cg %d for %jd byte journal\n",
ino, cgp->cg_cgx, size);
if (getino(&disk, &ip, ino, &mode) != 0) {
warn("Failed to get allocated inode");
sbdirty();
goto out;
}
/*
* We leave fields unrelated to the number of allocated
* blocks and size uninitialized. This causes legacy
* fsck implementations to clear the inode.
*/
dp2 = ip;
dp1 = ip;
if (sblock.fs_magic == FS_UFS1_MAGIC) {
bzero(dp1, sizeof(*dp1));
dp1->di_size = size;
dp1->di_mode = IFREG | IREAD;
dp1->di_nlink = 1;
dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
} else {
bzero(dp2, sizeof(*dp2));
dp2->di_size = size;
dp2->di_mode = IFREG | IREAD;
dp2->di_nlink = 1;
dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
}
for (i = 0; i < NDADDR && resid; i++, resid--) {
blk = journal_balloc();
if (blk <= 0)
goto out;
if (sblock.fs_magic == FS_UFS1_MAGIC) {
dp1->di_db[i] = blk;
dp1->di_blocks++;
} else {
dp2->di_db[i] = blk;
dp2->di_blocks++;
}
}
for (i = 0; i < NIADDR && resid; i++) {
blk = journal_balloc();
if (blk <= 0)
goto out;
blks = indir_fill(blk, i, &resid) + 1;
if (blks <= 0) {
sbdirty();
goto out;
}
if (sblock.fs_magic == FS_UFS1_MAGIC) {
dp1->di_ib[i] = blk;
dp1->di_blocks += blks;
} else {
dp2->di_ib[i] = blk;
dp2->di_blocks += blks;
}
}
if (sblock.fs_magic == FS_UFS1_MAGIC)
dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize;
else
dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize;
if (putino(&disk) < 0) {
warn("Failed to write inode");
sbdirty();
return (-1);
}
if (cgwrite(&disk) < 0) {
warn("Failed to write updated cg");
sbdirty();
return (-1);
}
if (journal_insertfile(ino) < 0) {
sbdirty();
return (-1);
}
sblock.fs_sujfree = 0;
return (0);
}
warnx("Insufficient free space for the journal.");
out:
return (-1);
}
void
usage(void)
{
@ -477,6 +1026,8 @@ printfs(void)
(sblock.fs_flags & FS_MULTILABEL)? "enabled" : "disabled");
warnx("soft updates: (-n) %s",
(sblock.fs_flags & FS_DOSOFTDEP)? "enabled" : "disabled");
warnx("soft update journaling: (-j) %s",
(sblock.fs_flags & FS_SUJ)? "enabled" : "disabled");
warnx("gjournal: (-J) %s",
(sblock.fs_flags & FS_GJOURNAL)? "enabled" : "disabled");
warnx("maximum blocks per file in a cylinder group: (-e) %d",

View file

@ -215,6 +215,14 @@ SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLAG_RD, &notbufdflashes, 0,
*/
static int bd_request;
/*
* Request for the buf daemon to write more buffers than is indicated by
* lodirtybuf. This may be necessary to push out excess dependencies or
* defragment the address space where a simple count of the number of dirty
* buffers is insufficient to characterize the demand for flushing them.
*/
static int bd_speedupreq;
/*
* This lock synchronizes access to bd_request.
*/
@ -467,12 +475,20 @@ bd_wakeup(int dirtybuflevel)
* bd_speedup - speedup the buffer cache flushing code
*/
static __inline
void
bd_speedup(void)
{
int needwake;
bd_wakeup(1);
mtx_lock(&bdlock);
needwake = 0;
if (bd_speedupreq == 0 || bd_request == 0)
needwake = 1;
bd_speedupreq = 1;
bd_request = 1;
if (needwake)
wakeup(&bd_request);
mtx_unlock(&bdlock);
}
/*
@ -2120,6 +2136,7 @@ buf_do_flush(struct vnode *vp)
static void
buf_daemon()
{
int lodirtysave;
/*
* This process needs to be suspended prior to shutdown sync.
@ -2137,7 +2154,11 @@ buf_daemon()
mtx_unlock(&bdlock);
kproc_suspend_check(bufdaemonproc);
lodirtysave = lodirtybuffers;
if (bd_speedupreq) {
lodirtybuffers = numdirtybuffers / 2;
bd_speedupreq = 0;
}
/*
* Do the flush. Limit the amount of in-transit I/O we
* allow to build up, otherwise we would completely saturate
@ -2149,6 +2170,7 @@ buf_daemon()
break;
uio_yield();
}
lodirtybuffers = lodirtysave;
/*
* Only clear bd_request if we have reached our low water

View file

@ -2815,6 +2815,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
MNT_FLAG(MNT_FORCE);
MNT_FLAG(MNT_SNAPSHOT);
MNT_FLAG(MNT_BYFSID);
MNT_FLAG(MNT_SOFTDEP);
#undef MNT_FLAG
if (flags != 0) {
if (buf[0] != '\0')

View file

@ -215,7 +215,7 @@ struct buf {
#define B_DIRTY 0x00200000 /* Needs writing later (in EXT2FS). */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
#define B_00800000 0x00800000 /* Available flag. */
#define B_01000000 0x01000000 /* Available flag. */
#define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */
#define B_NEEDSGIANT 0x02000000 /* Buffer's vnode needs giant. */
#define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */
#define B_MANAGED 0x08000000 /* Managed by FS. */
@ -493,6 +493,7 @@ int bufwait(struct buf *);
int bufwrite(struct buf *);
void bufdone(struct buf *);
void bufdone_finish(struct buf *);
void bd_speedup(void);
int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, long, int, struct buf **);

View file

@ -275,7 +275,8 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \
MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \
MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \
MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | MNT_NFS4ACLS)
MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \
MNT_NFS4ACLS)
/* Mask of flags that can be updated. */
#define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \
@ -324,6 +325,7 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
#define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */
#define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */
#define MNTK_SHARED_WRITES 0x00000080 /* Allow shared locking for writes */
#define MNTK_SUJ 0x00000100 /* Softdep journaling enabled */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
#define MNTK_SUSPEND 0x08000000 /* request write suspension */

View file

@ -94,24 +94,24 @@ __FBSDID("$FreeBSD$");
#include <ufs/ffs/ffs_extern.h>
typedef ufs2_daddr_t allocfcn_t(struct inode *ip, u_int cg, ufs2_daddr_t bpref,
int size);
int size, int rsize);
static ufs2_daddr_t ffs_alloccg(struct inode *, u_int, ufs2_daddr_t, int);
static ufs2_daddr_t ffs_alloccg(struct inode *, u_int, ufs2_daddr_t, int, int);
static ufs2_daddr_t
ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t);
ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t, int);
#ifdef INVARIANTS
static int ffs_checkblk(struct inode *, ufs2_daddr_t, long);
#endif
static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int);
static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *,
ufs1_daddr_t, int);
static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int,
int);
static ino_t ffs_dirpref(struct inode *);
static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t,
int, int);
static void ffs_fserr(struct fs *, ino_t, char *);
static ufs2_daddr_t ffs_hashalloc
(struct inode *, u_int, ufs2_daddr_t, int, allocfcn_t *);
static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int);
(struct inode *, u_int, ufs2_daddr_t, int, int, allocfcn_t *);
static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int,
int);
static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int);
static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *);
static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *);
@ -188,7 +188,7 @@ retry:
cg = ino_to_cg(fs, ip->i_number);
else
cg = dtog(fs, bpref);
bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
bno = ffs_hashalloc(ip, cg, bpref, size, size, ffs_alloccg);
if (bno > 0) {
delta = btodb(size);
if (ip->i_flag & IN_SPACECOUNTED) {
@ -387,16 +387,12 @@ retry:
panic("ffs_realloccg: bad optim");
/* NOTREACHED */
}
bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
bno = ffs_hashalloc(ip, cg, bpref, request, nsize, ffs_alloccg);
if (bno > 0) {
bp->b_blkno = fsbtodb(fs, bno);
if (!DOINGSOFTDEP(vp))
ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize,
ip->i_number);
if (nsize < request)
ffs_blkfree(ump, fs, ip->i_devvp,
bno + numfrags(fs, nsize),
(long)(request - nsize), ip->i_number);
ip->i_number, NULL);
delta = btodb(nsize - osize);
if (ip->i_flag & IN_SPACECOUNTED) {
UFS_LOCK(ump);
@ -487,6 +483,14 @@ ffs_reallocblks(ap)
if (doreallocblks == 0)
return (ENOSPC);
/*
* We can't wait in softdep prealloc as it may fsync and recurse
* here. Instead we simply fail to reallocate blocks if this
* rare condition arises.
*/
if (DOINGSOFTDEP(ap->a_vp))
if (softdep_prealloc(ap->a_vp, MNT_NOWAIT) != 0)
return (ENOSPC);
if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1)
return (ffs_reallocblks_ufs1(ap));
return (ffs_reallocblks_ufs2(ap));
@ -587,7 +591,7 @@ ffs_reallocblks_ufs1(ap)
* Search the block map looking for an allocation of the desired size.
*/
if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
len, ffs_clusteralloc)) == 0) {
len, len, ffs_clusteralloc)) == 0) {
UFS_UNLOCK(ump);
goto fail;
}
@ -673,7 +677,7 @@ ffs_reallocblks_ufs1(ap)
if (!DOINGSOFTDEP(vp))
ffs_blkfree(ump, fs, ip->i_devvp,
dbtofsb(fs, buflist->bs_children[i]->b_blkno),
fs->fs_bsize, ip->i_number);
fs->fs_bsize, ip->i_number, NULL);
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
#ifdef INVARIANTS
if (!ffs_checkblk(ip,
@ -795,7 +799,7 @@ ffs_reallocblks_ufs2(ap)
* Search the block map looking for an allocation of the desired size.
*/
if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
len, ffs_clusteralloc)) == 0) {
len, len, ffs_clusteralloc)) == 0) {
UFS_UNLOCK(ump);
goto fail;
}
@ -881,7 +885,7 @@ ffs_reallocblks_ufs2(ap)
if (!DOINGSOFTDEP(vp))
ffs_blkfree(ump, fs, ip->i_devvp,
dbtofsb(fs, buflist->bs_children[i]->b_blkno),
fs->fs_bsize, ip->i_number);
fs->fs_bsize, ip->i_number, NULL);
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
#ifdef INVARIANTS
if (!ffs_checkblk(ip,
@ -969,7 +973,7 @@ ffs_valloc(pvp, mode, cred, vpp)
if (fs->fs_contigdirs[cg] > 0)
fs->fs_contigdirs[cg]--;
}
ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode,
ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0,
(allocfcn_t *)ffs_nodealloccg);
if (ino == 0)
goto noinodes;
@ -1278,11 +1282,12 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap)
*/
/*VARARGS5*/
static ufs2_daddr_t
ffs_hashalloc(ip, cg, pref, size, allocator)
ffs_hashalloc(ip, cg, pref, size, rsize, allocator)
struct inode *ip;
u_int cg;
ufs2_daddr_t pref;
int size; /* size for data blocks, mode for inodes */
int size; /* Search size for data blocks, mode for inodes */
int rsize; /* Real allocated size. */
allocfcn_t *allocator;
{
struct fs *fs;
@ -1298,7 +1303,7 @@ ffs_hashalloc(ip, cg, pref, size, allocator)
/*
* 1: preferred cylinder group
*/
result = (*allocator)(ip, cg, pref, size);
result = (*allocator)(ip, cg, pref, size, rsize);
if (result)
return (result);
/*
@ -1308,7 +1313,7 @@ ffs_hashalloc(ip, cg, pref, size, allocator)
cg += i;
if (cg >= fs->fs_ncg)
cg -= fs->fs_ncg;
result = (*allocator)(ip, cg, 0, size);
result = (*allocator)(ip, cg, 0, size, rsize);
if (result)
return (result);
}
@ -1319,7 +1324,7 @@ ffs_hashalloc(ip, cg, pref, size, allocator)
*/
cg = (icg + 2) % fs->fs_ncg;
for (i = 2; i < fs->fs_ncg; i++) {
result = (*allocator)(ip, cg, 0, size);
result = (*allocator)(ip, cg, 0, size, rsize);
if (result)
return (result);
cg++;
@ -1401,7 +1406,8 @@ ffs_fragextend(ip, cg, bprev, osize, nsize)
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
if (DOINGSOFTDEP(ITOV(ip)))
softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev);
softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev,
frags, numfrags(fs, osize));
bdwrite(bp);
return (bprev);
@ -1419,11 +1425,12 @@ fail:
* and if it is, allocate it.
*/
static ufs2_daddr_t
ffs_alloccg(ip, cg, bpref, size)
ffs_alloccg(ip, cg, bpref, size, rsize)
struct inode *ip;
u_int cg;
ufs2_daddr_t bpref;
int size;
int rsize;
{
struct fs *fs;
struct cg *cgp;
@ -1451,7 +1458,7 @@ ffs_alloccg(ip, cg, bpref, size)
cgp->cg_old_time = cgp->cg_time = time_second;
if (size == fs->fs_bsize) {
UFS_LOCK(ump);
blkno = ffs_alloccgblk(ip, bp, bpref);
blkno = ffs_alloccgblk(ip, bp, bpref, rsize);
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
bdwrite(bp);
@ -1475,21 +1482,14 @@ ffs_alloccg(ip, cg, bpref, size)
if (cgp->cg_cs.cs_nbfree == 0)
goto fail;
UFS_LOCK(ump);
blkno = ffs_alloccgblk(ip, bp, bpref);
bno = dtogd(fs, blkno);
for (i = frags; i < fs->fs_frag; i++)
setbit(blksfree, bno + i);
i = fs->fs_frag - frags;
cgp->cg_cs.cs_nffree += i;
fs->fs_cstotal.cs_nffree += i;
fs->fs_cs(fs, cg).cs_nffree += i;
fs->fs_fmod = 1;
cgp->cg_frsum[i]++;
blkno = ffs_alloccgblk(ip, bp, bpref, rsize);
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
bdwrite(bp);
return (blkno);
}
KASSERT(size == rsize,
("ffs_alloccg: size(%d) != rsize(%d)", size, rsize));
bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
if (bno < 0)
goto fail;
@ -1507,7 +1507,7 @@ ffs_alloccg(ip, cg, bpref, size)
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
if (DOINGSOFTDEP(ITOV(ip)))
softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno);
softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, frags, 0);
bdwrite(bp);
return (blkno);
@ -1529,10 +1529,11 @@ fail:
* blocks may be fragmented by the routine that allocates them.
*/
static ufs2_daddr_t
ffs_alloccgblk(ip, bp, bpref)
ffs_alloccgblk(ip, bp, bpref, size)
struct inode *ip;
struct buf *bp;
ufs2_daddr_t bpref;
int size;
{
struct fs *fs;
struct cg *cgp;
@ -1540,6 +1541,7 @@ ffs_alloccgblk(ip, bp, bpref)
ufs1_daddr_t bno;
ufs2_daddr_t blkno;
u_int8_t *blksfree;
int i;
fs = ip->i_fs;
ump = ip->i_ump;
@ -1567,16 +1569,32 @@ ffs_alloccgblk(ip, bp, bpref)
gotit:
blkno = fragstoblks(fs, bno);
ffs_clrblock(fs, blksfree, (long)blkno);
ffs_clusteracct(ump, fs, cgp, blkno, -1);
ffs_clusteracct(fs, cgp, blkno, -1);
cgp->cg_cs.cs_nbfree--;
fs->fs_cstotal.cs_nbfree--;
fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
fs->fs_fmod = 1;
blkno = cgbase(fs, cgp->cg_cgx) + bno;
/*
* If the caller didn't want the whole block free the frags here.
*/
size = numfrags(fs, size);
if (size != fs->fs_frag) {
bno = dtogd(fs, blkno);
for (i = size; i < fs->fs_frag; i++)
setbit(blksfree, bno + i);
i = fs->fs_frag - size;
cgp->cg_cs.cs_nffree += i;
fs->fs_cstotal.cs_nffree += i;
fs->fs_cs(fs, cgp->cg_cgx).cs_nffree += i;
fs->fs_fmod = 1;
cgp->cg_frsum[i]++;
}
/* XXX Fixme. */
UFS_UNLOCK(ump);
if (DOINGSOFTDEP(ITOV(ip)))
softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno);
softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno,
size, 0);
UFS_LOCK(ump);
return (blkno);
}
@ -1589,11 +1607,12 @@ gotit:
* take the first one that we find following bpref.
*/
static ufs2_daddr_t
ffs_clusteralloc(ip, cg, bpref, len)
ffs_clusteralloc(ip, cg, bpref, len, unused)
struct inode *ip;
u_int cg;
ufs2_daddr_t bpref;
int len;
int unused;
{
struct fs *fs;
struct cg *cgp;
@ -1689,7 +1708,7 @@ ffs_clusteralloc(ip, cg, bpref, len)
len = blkstofrags(fs, len);
UFS_LOCK(ump);
for (i = 0; i < len; i += fs->fs_frag)
if (ffs_alloccgblk(ip, bp, bno + i) != bno + i)
if (ffs_alloccgblk(ip, bp, bno + i, fs->fs_bsize) != bno + i)
panic("ffs_clusteralloc: lost block");
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
@ -1713,11 +1732,12 @@ fail:
* inode in the specified cylinder group.
*/
static ufs2_daddr_t
ffs_nodealloccg(ip, cg, ipref, mode)
ffs_nodealloccg(ip, cg, ipref, mode, unused)
struct inode *ip;
u_int cg;
ufs2_daddr_t ipref;
int mode;
int unused;
{
struct fs *fs;
struct cg *cgp;
@ -1819,28 +1839,6 @@ gotit:
return ((ino_t)(cg * fs->fs_ipg + ipref));
}
/*
* check if a block is free
*/
static int
ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h)
{
switch ((int)fs->fs_frag) {
case 8:
return (cp[h] == 0);
case 4:
return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
case 2:
return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
case 1:
return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
default:
panic("ffs_isfreeblock");
}
return (0);
}
/*
* Free a block or fragment.
*
@ -1849,14 +1847,16 @@ ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h)
* block reassembly is checked.
*/
void
ffs_blkfree(ump, fs, devvp, bno, size, inum)
ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd)
struct ufsmount *ump;
struct fs *fs;
struct vnode *devvp;
ufs2_daddr_t bno;
long size;
ino_t inum;
struct workhead *dephd;
{
struct mount *mp;
struct cg *cgp;
struct buf *bp;
ufs1_daddr_t fragno, cgbno;
@ -1923,7 +1923,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum)
panic("ffs_blkfree: freeing free block");
}
ffs_setblock(fs, blksfree, fragno);
ffs_clusteracct(ump, fs, cgp, fragno, 1);
ffs_clusteracct(fs, cgp, fragno, 1);
cgp->cg_cs.cs_nbfree++;
fs->fs_cstotal.cs_nbfree++;
fs->fs_cs(fs, cg).cs_nbfree++;
@ -1963,7 +1963,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum)
cgp->cg_cs.cs_nffree -= fs->fs_frag;
fs->fs_cstotal.cs_nffree -= fs->fs_frag;
fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
ffs_clusteracct(ump, fs, cgp, fragno, 1);
ffs_clusteracct(fs, cgp, fragno, 1);
cgp->cg_cs.cs_nbfree++;
fs->fs_cstotal.cs_nbfree++;
fs->fs_cs(fs, cg).cs_nbfree++;
@ -1972,6 +1972,10 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum)
fs->fs_fmod = 1;
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
mp = UFSTOVFS(ump);
if (mp->mnt_flag & MNT_SOFTDEP && devvp->v_type != VREG)
softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
numfrags(fs, size), dephd);
bdwrite(bp);
}
@ -2042,7 +2046,8 @@ ffs_vfree(pvp, ino, mode)
return (0);
}
ip = VTOI(pvp);
return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode));
return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode,
NULL));
}
/*
@ -2050,12 +2055,13 @@ ffs_vfree(pvp, ino, mode)
* The specified inode is placed back in the free map.
*/
int
ffs_freefile(ump, fs, devvp, ino, mode)
ffs_freefile(ump, fs, devvp, ino, mode, wkhd)
struct ufsmount *ump;
struct fs *fs;
struct vnode *devvp;
ino_t ino;
int mode;
struct workhead *wkhd;
{
struct cg *cgp;
struct buf *bp;
@ -2112,6 +2118,9 @@ ffs_freefile(ump, fs, devvp, ino, mode)
fs->fs_fmod = 1;
ACTIVECLEAR(fs, cg);
UFS_UNLOCK(ump);
if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP && devvp->v_type != VREG)
softdep_setup_inofree(UFSTOVFS(ump), bp,
ino + cg * fs->fs_ipg, wkhd);
bdwrite(bp);
return (0);
}
@ -2225,101 +2234,6 @@ ffs_mapsearch(fs, cgp, bpref, allocsiz)
return (-1);
}
/*
* Update the cluster map because of an allocation or free.
*
* Cnt == 1 means free; cnt == -1 means allocating.
*/
void
ffs_clusteracct(ump, fs, cgp, blkno, cnt)
struct ufsmount *ump;
struct fs *fs;
struct cg *cgp;
ufs1_daddr_t blkno;
int cnt;
{
int32_t *sump;
int32_t *lp;
u_char *freemapp, *mapp;
int i, start, end, forw, back, map, bit;
mtx_assert(UFS_MTX(ump), MA_OWNED);
if (fs->fs_contigsumsize <= 0)
return;
freemapp = cg_clustersfree(cgp);
sump = cg_clustersum(cgp);
/*
* Allocate or clear the actual block.
*/
if (cnt > 0)
setbit(freemapp, blkno);
else
clrbit(freemapp, blkno);
/*
* Find the size of the cluster going forward.
*/
start = blkno + 1;
end = start + fs->fs_contigsumsize;
if (end >= cgp->cg_nclusterblks)
end = cgp->cg_nclusterblks;
mapp = &freemapp[start / NBBY];
map = *mapp++;
bit = 1 << (start % NBBY);
for (i = start; i < end; i++) {
if ((map & bit) == 0)
break;
if ((i & (NBBY - 1)) != (NBBY - 1)) {
bit <<= 1;
} else {
map = *mapp++;
bit = 1;
}
}
forw = i - start;
/*
* Find the size of the cluster going backward.
*/
start = blkno - 1;
end = start - fs->fs_contigsumsize;
if (end < 0)
end = -1;
mapp = &freemapp[start / NBBY];
map = *mapp--;
bit = 1 << (start % NBBY);
for (i = start; i > end; i--) {
if ((map & bit) == 0)
break;
if ((i & (NBBY - 1)) != 0) {
bit >>= 1;
} else {
map = *mapp--;
bit = 1 << (NBBY - 1);
}
}
back = start - i;
/*
* Account for old cluster and the possibly new forward and
* back clusters.
*/
i = back + forw + 1;
if (i > fs->fs_contigsumsize)
i = fs->fs_contigsumsize;
sump[i] += cnt;
if (back > 0)
sump[back] -= cnt;
if (forw > 0)
sump[forw] -= cnt;
/*
* Update cluster summary information.
*/
lp = &sump[fs->fs_contigsumsize];
for (i = fs->fs_contigsumsize; i > 0; i--)
if (*lp-- > 0)
break;
fs->fs_maxcluster[cgp->cg_cgx] = i;
}
/*
* Fserr prints the name of a filesystem with an error diagnostic.
*
@ -2540,7 +2454,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
#endif /* DEBUG */
while (cmd.size > 0) {
if ((error = ffs_freefile(ump, fs, ump->um_devvp,
cmd.value, filetype)))
cmd.value, filetype, NULL)))
break;
cmd.size -= 1;
cmd.value += 1;
@ -2568,7 +2482,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
if (blksize > blkcnt)
blksize = blkcnt;
ffs_blkfree(ump, fs, ump->um_devvp, blkno,
blksize * fs->fs_fsize, ROOTINO);
blksize * fs->fs_fsize, ROOTINO, NULL);
blkno += blksize;
blkcnt -= blksize;
blksize = fs->fs_frag;

View file

@ -120,6 +120,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
if (lbn < 0)
return (EFBIG);
if (DOINGSOFTDEP(vp))
softdep_prealloc(vp, MNT_WAIT);
/*
* If the next write will extend the file into a new block,
* and the file is currently composed of a fragment
@ -418,6 +420,8 @@ fail:
* slow, running out of disk space is not expected to be a common
* occurence. The error return from fsync is ignored as we already
* have an error to return to the user.
*
* XXX Still have to journal the free below
*/
(void) ffs_syncvnode(vp, MNT_WAIT);
for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
@ -473,7 +477,7 @@ fail:
*/
for (blkp = allociblk; blkp < allocblk; blkp++) {
ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
ip->i_number);
ip->i_number, NULL);
}
return (error);
}
@ -515,6 +519,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
if (lbn < 0)
return (EFBIG);
if (DOINGSOFTDEP(vp))
softdep_prealloc(vp, MNT_WAIT);
/*
* Check for allocating external data.
*/
@ -930,6 +937,8 @@ fail:
* slow, running out of disk space is not expected to be a common
* occurence. The error return from fsync is ignored as we already
* have an error to return to the user.
*
* XXX Still have to journal the free below
*/
(void) ffs_syncvnode(vp, MNT_WAIT);
for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
@ -985,7 +994,7 @@ fail:
*/
for (blkp = allociblk; blkp < allocblk; blkp++) {
ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
ip->i_number);
ip->i_number, NULL);
}
return (error);
}

View file

@ -47,6 +47,7 @@ struct ucred;
struct vnode;
struct vop_fsync_args;
struct vop_reallocblks_args;
struct workhead;
int ffs_alloc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, int, int,
struct ucred *, ufs2_daddr_t *);
@ -56,20 +57,23 @@ int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size,
struct ucred *a_cred, int a_flags, struct buf **a_bpp);
int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **);
void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *,
ufs2_daddr_t, long, ino_t);
ufs2_daddr_t, long, ino_t, struct workhead *);
ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *);
ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *);
int ffs_checkfreefile(struct fs *, struct vnode *, ino_t);
void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int);
void ffs_bdflush(struct bufobj *, struct buf *);
int ffs_copyonwrite(struct vnode *, struct buf *);
int ffs_flushfiles(struct mount *, int, struct thread *);
void ffs_fragacct(struct fs *, int, int32_t [], int);
int ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t,
int);
int, struct workhead *);
int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t);
int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
int ffs_mountroot(void);
void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
int ffs_reallocblks(struct vop_reallocblks_args *);
int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
@ -103,12 +107,14 @@ extern struct vop_vector ffs_fifoops2;
int softdep_check_suspend(struct mount *, struct vnode *,
int, int, int, int);
int softdep_complete_trunc(struct vnode *, void *);
void softdep_get_depcounts(struct mount *, int *, int *);
void softdep_initialize(void);
void softdep_uninitialize(void);
int softdep_mount(struct vnode *, struct mount *, struct fs *,
struct ucred *);
void softdep_move_dependencies(struct buf *, struct buf *);
void softdep_unmount(struct mount *);
int softdep_move_dependencies(struct buf *, struct buf *);
int softdep_flushworklist(struct mount *, int *, struct thread *);
int softdep_flushfiles(struct mount *, int, struct thread *);
void softdep_update_inodeblock(struct inode *, struct buf *, int);
@ -117,7 +123,8 @@ void softdep_freefile(struct vnode *, ino_t, int);
int softdep_request_cleanup(struct fs *, struct vnode *);
void softdep_setup_freeblocks(struct inode *, off_t, int);
void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t);
void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t);
void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t,
int, int);
void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t,
ufs2_daddr_t, long, long, struct buf *);
void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t,
@ -126,11 +133,20 @@ void softdep_setup_allocindir_meta(struct buf *, struct inode *,
struct buf *, int, ufs2_daddr_t);
void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t,
struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *);
void softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int,
struct workhead *);
void softdep_setup_inofree(struct mount *, struct buf *, ino_t,
struct workhead *);
void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *);
void *softdep_setup_trunc(struct vnode *vp, off_t length, int flags);
void softdep_fsync_mountdev(struct vnode *);
int softdep_sync_metadata(struct vnode *);
int softdep_process_worklist(struct mount *, int);
int softdep_fsync(struct vnode *);
int softdep_waitidle(struct mount *);
int softdep_prealloc(struct vnode *, int);
int softdep_journal_lookup(struct mount *, struct vnode **);
int ffs_rdonly(struct inode *);

View file

@ -92,15 +92,6 @@ ffs_update(vp, waitfor)
fs = ip->i_fs;
if (fs->fs_ronly)
return (0);
/*
* Ensure that uid and gid are correct. This is a temporary
* fix until fsck has been changed to do the update.
*/
if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */
fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */
ip->i_din1->di_ouid = ip->i_uid; /* XXX */
ip->i_din1->di_ogid = ip->i_gid; /* XXX */
} /* XXX */
error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
(int)fs->fs_bsize, NOCRED, &bp);
if (error) {
@ -160,6 +151,7 @@ ffs_truncate(vp, length, flags, cred, td)
ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
ufs2_daddr_t count, blocksreleased = 0, datablocks;
void *cookie;
struct bufobj *bo;
struct fs *fs;
struct buf *bp;
@ -173,11 +165,14 @@ ffs_truncate(vp, length, flags, cred, td)
fs = ip->i_fs;
ump = ip->i_ump;
bo = &vp->v_bufobj;
cookie = NULL;
ASSERT_VOP_LOCKED(vp, "ffs_truncate");
if (length < 0)
return (EINVAL);
if (length > fs->fs_maxfilesize)
return (EFBIG);
/*
* Historically clients did not have to specify which data
* they were truncating. So, if not specified, we assume
@ -192,6 +187,7 @@ ffs_truncate(vp, length, flags, cred, td)
* (e.g., the file is being unlinked), then pick it off with
* soft updates below.
*/
allerror = 0;
needextclean = 0;
softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp);
extblocks = 0;
@ -212,6 +208,8 @@ ffs_truncate(vp, length, flags, cred, td)
panic("ffs_truncate: partial trunc of extdata");
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
return (error);
if (DOINGSUJ(vp))
cookie = softdep_setup_trunc(vp, length, flags);
osize = ip->i_din2->di_extsize;
ip->i_din2->di_blocks -= extblocks;
#ifdef QUOTA
@ -227,19 +225,19 @@ ffs_truncate(vp, length, flags, cred, td)
}
ip->i_flag |= IN_CHANGE;
if ((error = ffs_update(vp, 1)))
return (error);
goto out;
for (i = 0; i < NXADDR; i++) {
if (oldblks[i] == 0)
continue;
ffs_blkfree(ump, fs, ip->i_devvp, oldblks[i],
sblksize(fs, osize, i), ip->i_number);
sblksize(fs, osize, i), ip->i_number, NULL);
}
}
}
if ((flags & IO_NORMAL) == 0)
return (0);
if (length > fs->fs_maxfilesize)
return (EFBIG);
if ((flags & IO_NORMAL) == 0) {
error = 0;
goto out;
}
if (vp->v_type == VLNK &&
(ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
datablocks == 0)) {
@ -253,24 +251,52 @@ ffs_truncate(vp, length, flags, cred, td)
ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (needextclean)
softdep_setup_freeblocks(ip, length, IO_EXT);
return (ffs_update(vp, 1));
error = ffs_update(vp, 1);
goto out;
}
if (ip->i_size == length) {
ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (needextclean)
softdep_setup_freeblocks(ip, length, IO_EXT);
return (ffs_update(vp, 0));
error = ffs_update(vp, 0);
goto out;
}
if (fs->fs_ronly)
panic("ffs_truncate: read-only filesystem");
#ifdef QUOTA
error = getinoquota(ip);
if (error)
return (error);
goto out;
#endif
if ((ip->i_flags & SF_SNAPSHOT) != 0)
ffs_snapremove(vp);
vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
osize = ip->i_size;
/*
* Lengthen the size of the file. We must ensure that the
* last byte of the file is allocated. Since the smallest
* value of osize is 0, length will be at least 1.
*/
if (osize < length) {
vnode_pager_setsize(vp, length);
flags |= BA_CLRBUF;
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error) {
vnode_pager_setsize(vp, osize);
goto out;
}
ip->i_size = length;
DIP_SET(ip, i_size, length);
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
error = ffs_update(vp, 1);
goto out;
}
if (DOINGSOFTDEP(vp)) {
if (length > 0 || softdepslowdown) {
/*
@ -283,11 +309,18 @@ ffs_truncate(vp, length, flags, cred, td)
* so that it will have no data structures left.
*/
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
return (error);
goto out;
UFS_LOCK(ump);
if (ip->i_flag & IN_SPACECOUNTED)
fs->fs_pendingblocks -= datablocks;
UFS_UNLOCK(ump);
/*
* We have to journal the truncation before we change
* any blocks so we don't leave the file partially
* truncated.
*/
if (DOINGSUJ(vp) && cookie == NULL)
cookie = softdep_setup_trunc(vp, length, flags);
} else {
#ifdef QUOTA
(void) chkdq(ip, -datablocks, NOCRED, 0);
@ -301,34 +334,10 @@ ffs_truncate(vp, length, flags, cred, td)
OFF_TO_IDX(lblktosize(fs, -extblocks)));
vnode_pager_setsize(vp, 0);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
return (ffs_update(vp, 0));
error = ffs_update(vp, 0);
goto out;
}
}
osize = ip->i_size;
/*
* Lengthen the size of the file. We must ensure that the
* last byte of the file is allocated. Since the smallest
* value of osize is 0, length will be at least 1.
*/
if (osize < length) {
vnode_pager_setsize(vp, length);
flags |= BA_CLRBUF;
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error) {
vnode_pager_setsize(vp, osize);
return (error);
}
ip->i_size = length;
DIP_SET(ip, i_size, length);
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
return (ffs_update(vp, 1));
}
/*
* Shorten the size of the file. If the file is not being
* truncated to a block boundary, the contents of the
@ -345,9 +354,8 @@ ffs_truncate(vp, length, flags, cred, td)
lbn = lblkno(fs, length);
flags |= BA_CLRBUF;
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error) {
return (error);
}
if (error)
goto out;
/*
* When we are doing soft updates and the UFS_BALLOC
* above fills in a direct block hole with a full sized
@ -359,7 +367,7 @@ ffs_truncate(vp, length, flags, cred, td)
if (DOINGSOFTDEP(vp) && lbn < NDADDR &&
fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
(error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
return (error);
goto out;
ip->i_size = length;
DIP_SET(ip, i_size, length);
size = blksize(fs, ip, lbn);
@ -405,7 +413,13 @@ ffs_truncate(vp, length, flags, cred, td)
DIP_SET(ip, i_db[i], 0);
}
ip->i_flag |= IN_CHANGE | IN_UPDATE;
allerror = ffs_update(vp, 1);
/*
* When doing softupdate journaling we must preserve the size along
* with the old pointers until they are freed or we might not
* know how many fragments remain.
*/
if (!DOINGSUJ(vp))
allerror = ffs_update(vp, 1);
/*
* Having written the new inode to disk, save its new configuration
@ -445,7 +459,7 @@ ffs_truncate(vp, length, flags, cred, td)
if (lastiblock[level] < 0) {
DIP_SET(ip, i_ib[level], 0);
ffs_blkfree(ump, fs, ip->i_devvp, bn,
fs->fs_bsize, ip->i_number);
fs->fs_bsize, ip->i_number, NULL);
blocksreleased += nblocks;
}
}
@ -464,7 +478,8 @@ ffs_truncate(vp, length, flags, cred, td)
continue;
DIP_SET(ip, i_db[i], 0);
bsize = blksize(fs, ip, i);
ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number);
ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number,
NULL);
blocksreleased += btodb(bsize);
}
if (lastblock < 0)
@ -496,7 +511,7 @@ ffs_truncate(vp, length, flags, cred, td)
*/
bn += numfrags(fs, newspace);
ffs_blkfree(ump, fs, ip->i_devvp, bn,
oldspace - newspace, ip->i_number);
oldspace - newspace, ip->i_number, NULL);
blocksreleased += btodb(oldspace - newspace);
}
}
@ -528,7 +543,14 @@ done:
#ifdef QUOTA
(void) chkdq(ip, -blocksreleased, NOCRED, 0);
#endif
return (allerror);
error = allerror;
out:
if (cookie) {
allerror = softdep_complete_trunc(vp, cookie);
if (allerror != 0 && error == 0)
error = allerror;
}
return (error);
}
/*
@ -638,7 +660,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
blocksreleased += blkcount;
}
ffs_blkfree(ip->i_ump, fs, ip->i_devvp, nb, fs->fs_bsize,
ip->i_number);
ip->i_number, NULL);
blocksreleased += nblocks;
}

View file

@ -142,7 +142,7 @@ MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF);
static int cgaccount(int, struct vnode *, struct buf *, int);
static int expunge_ufs1(struct vnode *, struct inode *, struct fs *,
int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
ufs_lbn_t, int), int);
ufs_lbn_t, int), int, int);
static int indiracct_ufs1(struct vnode *, struct vnode *, int,
ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *,
@ -155,7 +155,7 @@ static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
struct fs *, ufs_lbn_t, int);
static int expunge_ufs2(struct vnode *, struct inode *, struct fs *,
int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
ufs_lbn_t, int), int);
ufs_lbn_t, int), int, int);
static int indiracct_ufs2(struct vnode *, struct vnode *, int,
ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *,
int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *,
@ -582,7 +582,8 @@ loop:
len = fragroundup(fs, blkoff(fs, xp->i_size));
if (len != 0 && len < fs->fs_bsize) {
ffs_blkfree(ump, copy_fs, vp,
DIP(xp, i_db[loc]), len, xp->i_number);
DIP(xp, i_db[loc]), len, xp->i_number,
NULL);
blkno = DIP(xp, i_db[loc]);
DIP_SET(xp, i_db[loc], 0);
}
@ -590,15 +591,15 @@ loop:
snaplistsize += 1;
if (xp->i_ump->um_fstype == UFS1)
error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
BLK_NOCOPY);
BLK_NOCOPY, 1);
else
error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
BLK_NOCOPY);
BLK_NOCOPY, 1);
if (blkno)
DIP_SET(xp, i_db[loc], blkno);
if (!error)
error = ffs_freefile(ump, copy_fs, vp, xp->i_number,
xp->i_mode);
xp->i_mode, NULL);
VOP_UNLOCK(xvp, 0);
vdrop(xvp);
if (error) {
@ -611,6 +612,26 @@ loop:
MNT_ILOCK(mp);
}
MNT_IUNLOCK(mp);
/*
* Erase the journal file from the snapshot.
*/
if (fs->fs_flags & FS_SUJ) {
error = softdep_journal_lookup(mp, &xvp);
if (error) {
free(copy_fs->fs_csp, M_UFSMNT);
bawrite(sbp);
sbp = NULL;
goto out1;
}
xp = VTOI(xvp);
if (xp->i_ump->um_fstype == UFS1)
error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1,
BLK_NOCOPY, 0);
else
error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2,
BLK_NOCOPY, 0);
vput(xvp);
}
/*
* Acquire a lock on the snapdata structure, creating it if necessary.
*/
@ -691,16 +712,16 @@ out1:
break;
if (xp->i_ump->um_fstype == UFS1)
error = expunge_ufs1(vp, xp, fs, snapacct_ufs1,
BLK_SNAP);
BLK_SNAP, 0);
else
error = expunge_ufs2(vp, xp, fs, snapacct_ufs2,
BLK_SNAP);
BLK_SNAP, 0);
if (error == 0 && xp->i_effnlink == 0) {
error = ffs_freefile(ump,
copy_fs,
vp,
xp->i_number,
xp->i_mode);
xp->i_mode, NULL);
}
if (error) {
fs->fs_snapinum[snaploc] = 0;
@ -719,9 +740,11 @@ out1:
* the list of allocated blocks in i_snapblklist.
*/
if (ip->i_ump->um_fstype == UFS1)
error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP);
error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1,
BLK_SNAP, 0);
else
error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP);
error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2,
BLK_SNAP, 0);
if (error) {
fs->fs_snapinum[snaploc] = 0;
free(snapblklist, M_UFSMNT);
@ -954,13 +977,14 @@ cgaccount(cg, vp, nbp, passno)
* is reproduced once each for UFS1 and UFS2.
*/
static int
expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype)
expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
struct vnode *snapvp;
struct inode *cancelip;
struct fs *fs;
int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *,
struct fs *, ufs_lbn_t, int);
int expungetype;
int clearmode;
{
int i, error, indiroff;
ufs_lbn_t lbn, rlbn;
@ -1005,7 +1029,7 @@ expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype)
*/
dip = (struct ufs1_dinode *)bp->b_data +
ino_to_fsbo(fs, cancelip->i_number);
if (expungetype == BLK_NOCOPY || cancelip->i_effnlink == 0)
if (clearmode || cancelip->i_effnlink == 0)
dip->di_mode = 0;
dip->di_size = 0;
dip->di_blocks = 0;
@ -1220,7 +1244,7 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
*ip->i_snapblklist++ = lblkno;
if (blkno == BLK_SNAP)
blkno = blkstofrags(fs, lblkno);
ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum);
ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL);
}
return (0);
}
@ -1234,13 +1258,14 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
* is reproduced once each for UFS1 and UFS2.
*/
static int
expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype)
expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
struct vnode *snapvp;
struct inode *cancelip;
struct fs *fs;
int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
struct fs *, ufs_lbn_t, int);
int expungetype;
int clearmode;
{
int i, error, indiroff;
ufs_lbn_t lbn, rlbn;
@ -1285,7 +1310,7 @@ expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype)
*/
dip = (struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, cancelip->i_number);
if (expungetype == BLK_NOCOPY)
if (clearmode || cancelip->i_effnlink == 0)
dip->di_mode = 0;
dip->di_size = 0;
dip->di_blocks = 0;
@ -1500,7 +1525,7 @@ mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype)
*ip->i_snapblklist++ = lblkno;
if (blkno == BLK_SNAP)
blkno = blkstofrags(fs, lblkno);
ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum);
ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL);
}
return (0);
}
@ -1657,6 +1682,13 @@ ffs_snapremove(vp)
ip->i_flags &= ~SF_SNAPSHOT;
DIP_SET(ip, i_flags, ip->i_flags);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
/*
* The dirtied indirects must be written out before
* softdep_setup_freeblocks() is called. Otherwise indir_trunc()
* may find indirect pointers using the magic BLK_* values.
*/
if (DOINGSOFTDEP(vp))
ffs_syncvnode(vp, MNT_WAIT);
#ifdef QUOTA
/*
* Reenable disk quotas for ex-snapshot file.

File diff suppressed because it is too large Load diff

View file

@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$");
#ifndef _KERNEL
#include <ufs/ufs/dinode.h>
#include <ufs/ffs/fs.h>
#include "fsck.h"
#else
#include <sys/systm.h>
#include <sys/lock.h>
@ -223,7 +222,38 @@ ffs_isblock(fs, cp, h)
mask = 0x01 << (h & 0x7);
return ((cp[h >> 3] & mask) == mask);
default:
#ifdef _KERNEL
panic("ffs_isblock");
#endif
break;
}
return (0);
}
/*
* check if a block is free
*/
int
ffs_isfreeblock(fs, cp, h)
struct fs *fs;
u_char *cp;
ufs1_daddr_t h;
{
switch ((int)fs->fs_frag) {
case 8:
return (cp[h] == 0);
case 4:
return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
case 2:
return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
case 1:
return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
default:
#ifdef _KERNEL
panic("ffs_isfreeblock");
#endif
break;
}
return (0);
}
@ -252,7 +282,10 @@ ffs_clrblock(fs, cp, h)
cp[h >> 3] &= ~(0x01 << (h & 0x7));
return;
default:
#ifdef _KERNEL
panic("ffs_clrblock");
#endif
break;
}
}
@ -281,6 +314,101 @@ ffs_setblock(fs, cp, h)
cp[h >> 3] |= (0x01 << (h & 0x7));
return;
default:
#ifdef _KERNEL
panic("ffs_setblock");
#endif
break;
}
}
/*
* Update the cluster map because of an allocation or free.
*
* Cnt == 1 means free; cnt == -1 means allocating.
*/
void
ffs_clusteracct(fs, cgp, blkno, cnt)
struct fs *fs;
struct cg *cgp;
ufs1_daddr_t blkno;
int cnt;
{
int32_t *sump;
int32_t *lp;
u_char *freemapp, *mapp;
int i, start, end, forw, back, map, bit;
if (fs->fs_contigsumsize <= 0)
return;
freemapp = cg_clustersfree(cgp);
sump = cg_clustersum(cgp);
/*
* Allocate or clear the actual block.
*/
if (cnt > 0)
setbit(freemapp, blkno);
else
clrbit(freemapp, blkno);
/*
* Find the size of the cluster going forward.
*/
start = blkno + 1;
end = start + fs->fs_contigsumsize;
if (end >= cgp->cg_nclusterblks)
end = cgp->cg_nclusterblks;
mapp = &freemapp[start / NBBY];
map = *mapp++;
bit = 1 << (start % NBBY);
for (i = start; i < end; i++) {
if ((map & bit) == 0)
break;
if ((i & (NBBY - 1)) != (NBBY - 1)) {
bit <<= 1;
} else {
map = *mapp++;
bit = 1;
}
}
forw = i - start;
/*
* Find the size of the cluster going backward.
*/
start = blkno - 1;
end = start - fs->fs_contigsumsize;
if (end < 0)
end = -1;
mapp = &freemapp[start / NBBY];
map = *mapp--;
bit = 1 << (start % NBBY);
for (i = start; i > end; i--) {
if ((map & bit) == 0)
break;
if ((i & (NBBY - 1)) != 0) {
bit >>= 1;
} else {
map = *mapp--;
bit = 1 << (NBBY - 1);
}
}
back = start - i;
/*
* Account for old cluster and the possibly new forward and
* back clusters.
*/
i = back + forw + 1;
if (i > fs->fs_contigsumsize)
i = fs->fs_contigsumsize;
sump[i] += cnt;
if (back > 0)
sump[back] -= cnt;
if (forw > 0)
sump[forw] -= cnt;
/*
* Update cluster summary information.
*/
lp = &sump[fs->fs_contigsumsize];
for (i = fs->fs_contigsumsize; i > 0; i--)
if (*lp-- > 0)
break;
fs->fs_maxcluster[cgp->cg_cgx] = i;
}

View file

@ -79,7 +79,6 @@ static int ffs_reload(struct mount *, struct thread *);
static int ffs_mountfs(struct vnode *, struct mount *, struct thread *);
static void ffs_oldfscompat_read(struct fs *, struct ufsmount *,
ufs2_daddr_t);
static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
static void ffs_ifree(struct ufsmount *ump, struct inode *ip);
static vfs_init_t ffs_init;
static vfs_uninit_t ffs_uninit;
@ -299,7 +298,8 @@ ffs_mount(struct mount *mp)
if (fs->fs_clean == 0) {
fs->fs_flags |= FS_UNCLEAN;
if ((mp->mnt_flag & MNT_FORCE) ||
((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
((fs->fs_flags &
(FS_SUJ | FS_NEEDSFSCK)) == 0 &&
(fs->fs_flags & FS_DOSOFTDEP))) {
printf("WARNING: %s was not %s\n",
fs->fs_fsmnt, "properly dismounted");
@ -307,6 +307,9 @@ ffs_mount(struct mount *mp)
printf(
"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
fs->fs_fsmnt);
if (fs->fs_flags & FS_SUJ)
printf(
"WARNING: Forced mount will invalidated journal contents\n");
return (EPERM);
}
}
@ -330,17 +333,18 @@ ffs_mount(struct mount *mp)
MNT_ILOCK(mp);
mp->mnt_flag &= ~MNT_RDONLY;
MNT_IUNLOCK(mp);
fs->fs_clean = 0;
if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
vn_finished_write(mp);
return (error);
}
fs->fs_mtime = time_second;
/* check to see if we need to start softdep */
if ((fs->fs_flags & FS_DOSOFTDEP) &&
(error = softdep_mount(devvp, mp, fs, td->td_ucred))){
vn_finished_write(mp);
return (error);
}
fs->fs_clean = 0;
if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
vn_finished_write(mp);
return (error);
}
if (fs->fs_snapinum[0] != 0)
ffs_snapshot_mount(mp);
vn_finished_write(mp);
@ -705,7 +709,7 @@ ffs_mountfs(devvp, mp, td)
if (fs->fs_clean == 0) {
fs->fs_flags |= FS_UNCLEAN;
if (ronly || (mp->mnt_flag & MNT_FORCE) ||
((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
(fs->fs_flags & FS_DOSOFTDEP))) {
printf(
"WARNING: %s was not properly dismounted\n",
@ -714,6 +718,9 @@ ffs_mountfs(devvp, mp, td)
printf(
"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
fs->fs_fsmnt);
if (fs->fs_flags & FS_SUJ)
printf(
"WARNING: Forced mount will invalidated journal contents\n");
error = EPERM;
goto out;
}
@ -896,6 +903,7 @@ ffs_mountfs(devvp, mp, td)
*/
bzero(fs->fs_fsmnt, MAXMNTLEN);
strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
mp->mnt_stat.f_iosize = fs->fs_bsize;
if( mp->mnt_flag & MNT_ROOTFS) {
/*
@ -907,6 +915,7 @@ ffs_mountfs(devvp, mp, td)
}
if (ronly == 0) {
fs->fs_mtime = time_second;
if ((fs->fs_flags & FS_DOSOFTDEP) &&
(error = softdep_mount(devvp, mp, fs, cred)) != 0) {
free(fs->fs_csp, M_UFSMNT);
@ -937,7 +946,6 @@ ffs_mountfs(devvp, mp, td)
* This would all happen while the filesystem was busy/not
* available, so would effectively be "atomic".
*/
mp->mnt_stat.f_iosize = fs->fs_bsize;
(void) ufs_extattr_autostart(mp, td);
#endif /* !UFS_EXTATTR_AUTOSTART */
#endif /* !UFS_EXTATTR */
@ -1037,7 +1045,7 @@ ffs_oldfscompat_read(fs, ump, sblockloc)
* XXX - Parts get retired eventually.
* Unfortunately new bits get added.
*/
static void
void
ffs_oldfscompat_write(fs, ump)
struct fs *fs;
struct ufsmount *ump;
@ -1132,6 +1140,7 @@ ffs_unmount(mp, mntflags)
fs->fs_pendinginodes = 0;
}
UFS_UNLOCK(ump);
softdep_unmount(mp);
if (fs->fs_ronly == 0) {
fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
error = ffs_sbupdate(ump, MNT_WAIT, 0);
@ -1573,16 +1582,6 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
DIP_SET(ip, i_gen, ip->i_gen);
}
}
/*
* Ensure that uid and gid are correct. This is a temporary
* fix until fsck has been changed to do the update.
*/
if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */
fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */
ip->i_uid = ip->i_din1->di_ouid; /* XXX */
ip->i_gid = ip->i_din1->di_ogid; /* XXX */
} /* XXX */
#ifdef MAC
if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
/*
@ -1726,6 +1725,8 @@ ffs_sbupdate(mp, waitfor, suspended)
}
fs->fs_fmod = 0;
fs->fs_time = time_second;
if (fs->fs_flags & FS_DOSOFTDEP)
softdep_setup_sbupdate(mp, (struct fs *)bp->b_data, bp);
bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
if (suspended)
@ -1867,9 +1868,6 @@ ffs_bufwrite(struct buf *bp)
}
BO_UNLOCK(bp->b_bufobj);
/* Mark the buffer clean */
bundirty(bp);
/*
* If this buffer is marked for background writing and we
* do not have to wait for it, make a copy and write the
@ -1910,9 +1908,16 @@ ffs_bufwrite(struct buf *bp)
newbp->b_flags &= ~B_INVAL;
#ifdef SOFTUPDATES
/* move over the dependencies */
if (!LIST_EMPTY(&bp->b_dep))
softdep_move_dependencies(bp, newbp);
/*
* Move over the dependencies. If there are rollbacks,
* leave the parent buffer dirtied as it will need to
* be written again.
*/
if (LIST_EMPTY(&bp->b_dep) ||
softdep_move_dependencies(bp, newbp) == 0)
bundirty(bp);
#else
bundirty(bp);
#endif
/*
@ -1925,7 +1930,10 @@ ffs_bufwrite(struct buf *bp)
*/
bqrelse(bp);
bp = newbp;
}
} else
/* Mark the buffer clean */
bundirty(bp);
/* Let the normal bufwrite do the rest for us */
normal_write:
@ -1939,6 +1947,7 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
struct vnode *vp;
int error;
struct buf *tbp;
int nocopy;
vp = bo->__bo_vnode;
if (bp->b_iocmd == BIO_WRITE) {
@ -1946,8 +1955,9 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
(bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
panic("ffs_geom_strategy: bad I/O");
bp->b_flags &= ~B_VALIDSUSPWRT;
if ((vp->v_vflag & VV_COPYONWRITE) &&
nocopy = bp->b_flags & B_NOCOPY;
bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
vp->v_rdev->si_snapdata != NULL) {
if ((bp->b_flags & B_CLUSTER) != 0) {
runningbufwakeup(bp);

View file

@ -225,6 +225,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
wait = (waitfor == MNT_WAIT);
lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
bo = &vp->v_bufobj;
ip->i_flag &= ~IN_NEEDSYNC;
/*
* Flush all dirty buffers associated with a vnode.

View file

@ -340,7 +340,9 @@ struct fs {
u_int32_t fs_avgfilesize; /* expected average file size */
u_int32_t fs_avgfpdir; /* expected # of files per directory */
int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */
int32_t fs_sparecon32[26]; /* reserved for future constants */
ufs_time_t fs_mtime; /* Last mount or fsck time. */
int32_t fs_sujfree; /* SUJ free list */
int32_t fs_sparecon32[23]; /* reserved for future constants */
int32_t fs_flags; /* see FS_ flags below */
int32_t fs_contigsumsize; /* size of cluster summary array */
int32_t fs_maxsymlinklen; /* max length of an internal symlink */
@ -408,12 +410,13 @@ CTASSERT(sizeof(struct fs) == 1376);
#define FS_UNCLEAN 0x0001 /* filesystem not clean at mount */
#define FS_DOSOFTDEP 0x0002 /* filesystem using soft dependencies */
#define FS_NEEDSFSCK 0x0004 /* filesystem needs sync fsck before mount */
#define FS_INDEXDIRS 0x0008 /* kernel supports indexed directories */
#define FS_SUJ 0x0008 /* Filesystem using softupdate journal */
#define FS_ACLS 0x0010 /* file system has POSIX.1e ACLs enabled */
#define FS_MULTILABEL 0x0020 /* file system is MAC multi-label */
#define FS_GJOURNAL 0x0040 /* gjournaled file system */
#define FS_FLAGS_UPDATED 0x0080 /* flags have been moved to new location */
#define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */
#define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */
/*
* Macros to access bits in the fs_active array.
@ -603,7 +606,31 @@ struct cg {
? (fs)->fs_bsize \
: (fragroundup(fs, blkoff(fs, (size)))))
/*
* Indirect lbns are aligned on NDADDR addresses where single indirects
* are the negated address of the lowest lbn reachable, double indirects
* are this lbn - 1 and triple indirects are this lbn - 2. This yields
* an unusual bit order to determine level.
*/
static inline int
lbn_level(ufs_lbn_t lbn)
{
if (lbn >= 0)
return 0;
switch (lbn & 0x3) {
case 0:
return (0);
case 1:
break;
case 2:
return (2);
case 3:
return (1);
default:
break;
}
return (-1);
}
/*
* Number of inodes in a secondary storage block/fragment.
*/
@ -615,6 +642,108 @@ struct cg {
*/
#define NINDIR(fs) ((fs)->fs_nindir)
/*
* Softdep journal record format.
*/
#define JOP_ADDREF 1 /* Add a reference to an inode. */
#define JOP_REMREF 2 /* Remove a reference from an inode. */
#define JOP_NEWBLK 3 /* Allocate a block. */
#define JOP_FREEBLK 4 /* Free a block or a tree of blocks. */
#define JOP_MVREF 5 /* Move a reference from one off to another. */
#define JOP_TRUNC 6 /* Partial truncation record. */
#define JREC_SIZE 32 /* Record and segment header size. */
#define SUJ_MIN (4 * 1024 * 1024) /* Minimum journal size */
#define SUJ_MAX (32 * 1024 * 1024) /* Maximum journal size */
#define SUJ_FILE ".sujournal" /* Journal file name */
/*
* Size of the segment record header. There is at most one for each disk
* block n the journal. The segment header is followed by an array of
* records. fsck depends on the first element in each record being 'op'
* and the second being 'ino'. Segments may span multiple disk blocks but
* the header is present on each.
*/
struct jsegrec {
uint64_t jsr_seq; /* Our sequence number */
uint64_t jsr_oldest; /* Oldest valid sequence number */
uint16_t jsr_cnt; /* Count of valid records */
uint16_t jsr_blocks; /* Count of DEV_BSIZE blocks. */
uint32_t jsr_crc; /* 32bit crc of the valid space */
ufs_time_t jsr_time; /* timestamp for mount instance */
};
/*
* Reference record. Records a single link count modification.
*/
struct jrefrec {
uint32_t jr_op;
ino_t jr_ino;
ino_t jr_parent;
uint16_t jr_nlink;
uint16_t jr_mode;
off_t jr_diroff;
uint64_t jr_unused;
};
/*
* Move record. Records a reference moving within a directory block. The
* nlink is unchanged but we must search both locations.
*/
struct jmvrec {
uint32_t jm_op;
ino_t jm_ino;
ino_t jm_parent;
uint16_t jm_unused;
off_t jm_oldoff;
off_t jm_newoff;
};
/*
* Block record. A set of frags or tree of blocks starting at an indirect are
* freed or a set of frags are allocated.
*/
struct jblkrec {
uint32_t jb_op;
uint32_t jb_ino;
ufs2_daddr_t jb_blkno;
ufs_lbn_t jb_lbn;
uint16_t jb_frags;
uint16_t jb_oldfrags;
uint32_t jb_unused;
};
/*
* Truncation record. Records a partial truncation so that it may be
* completed later.
*/
struct jtrncrec {
uint32_t jt_op;
uint32_t jt_ino;
off_t jt_size;
uint32_t jt_extsize;
uint32_t jt_pad[3];
};
union jrec {
struct jsegrec rec_jsegrec;
struct jrefrec rec_jrefrec;
struct jmvrec rec_jmvrec;
struct jblkrec rec_jblkrec;
struct jtrncrec rec_jtrncrec;
};
#ifdef CTASSERT
CTASSERT(sizeof(struct jsegrec) == JREC_SIZE);
CTASSERT(sizeof(struct jrefrec) == JREC_SIZE);
CTASSERT(sizeof(struct jmvrec) == JREC_SIZE);
CTASSERT(sizeof(struct jblkrec) == JREC_SIZE);
CTASSERT(sizeof(struct jtrncrec) == JREC_SIZE);
CTASSERT(sizeof(union jrec) == JREC_SIZE);
#endif
extern int inside[], around[];
extern u_char *fragtbl[];

View file

@ -94,22 +94,29 @@
* The ONWORKLIST flag shows whether the structure is currently linked
* onto a worklist.
*/
#define ATTACHED 0x0001
#define UNDONE 0x0002
#define COMPLETE 0x0004
#define DEPCOMPLETE 0x0008
#define MKDIR_PARENT 0x0010 /* diradd & mkdir only */
#define MKDIR_BODY 0x0020 /* diradd & mkdir only */
#define RMDIR 0x0040 /* dirrem only */
#define DIRCHG 0x0080 /* diradd & dirrem only */
#define GOINGAWAY 0x0100 /* indirdep only */
#define IOSTARTED 0x0200 /* inodedep & pagedep only */
#define SPACECOUNTED 0x0400 /* inodedep only */
#define NEWBLOCK 0x0800 /* pagedep only */
#define INPROGRESS 0x1000 /* dirrem, freeblks, freefrag, freefile only */
#define UFS1FMT 0x2000 /* indirdep only */
#define EXTDATA 0x4000 /* allocdirect only */
#define ONWORKLIST 0x8000
#define ATTACHED 0x000001
#define UNDONE 0x000002
#define COMPLETE 0x000004
#define DEPCOMPLETE 0x000008
#define MKDIR_PARENT 0x000010 /* diradd, mkdir, jaddref, jsegdep only */
#define MKDIR_BODY 0x000020 /* diradd, mkdir, jaddref only */
#define RMDIR 0x000040 /* dirrem only */
#define DIRCHG 0x000080 /* diradd, dirrem only */
#define GOINGAWAY 0x000100 /* indirdep, jremref only */
#define IOSTARTED 0x000200 /* inodedep, pagedep, bmsafemap only */
#define SPACECOUNTED 0x000400 /* inodedep only */
#define NEWBLOCK 0x000800 /* pagedep, jaddref only */
#define INPROGRESS 0x001000 /* dirrem, freeblks, freefrag, freefile only */
#define UFS1FMT 0x002000 /* indirdep only */
#define EXTDATA 0x004000 /* allocdirect only */
#define ONWORKLIST 0x008000
#define IOWAITING 0x010000 /* Thread is waiting for IO to complete. */
#define ONDEPLIST 0x020000 /* Structure is on a dependency list. */
#define UNLINKED 0x040000 /* inodedep has been unlinked. */
#define UNLINKNEXT 0x080000 /* inodedep has valid di_freelink */
#define UNLINKPREV 0x100000 /* inodedep is pointed at in the unlink list */
#define UNLINKONLIST 0x200000 /* inodedep is in the unlinked list on disk */
#define UNLINKLINKS (UNLINKNEXT | UNLINKPREV)
#define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE)
@ -135,25 +142,38 @@
* and the macros below changed to use it.
*/
struct worklist {
struct mount *wk_mp; /* Mount we live in */
LIST_ENTRY(worklist) wk_list; /* list of work requests */
unsigned short wk_type; /* type of request */
unsigned short wk_state; /* state flags */
struct mount *wk_mp; /* Mount we live in */
unsigned int wk_type:8, /* type of request */
wk_state:24; /* state flags */
};
#define WK_DATA(wk) ((void *)(wk))
#define WK_PAGEDEP(wk) ((struct pagedep *)(wk))
#define WK_INODEDEP(wk) ((struct inodedep *)(wk))
#define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk))
#define WK_NEWBLK(wk) ((struct newblk *)(wk))
#define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk))
#define WK_INDIRDEP(wk) ((struct indirdep *)(wk))
#define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk))
#define WK_FREEFRAG(wk) ((struct freefrag *)(wk))
#define WK_FREEBLKS(wk) ((struct freeblks *)(wk))
#define WK_FREEWORK(wk) ((struct freework *)(wk))
#define WK_FREEFILE(wk) ((struct freefile *)(wk))
#define WK_DIRADD(wk) ((struct diradd *)(wk))
#define WK_MKDIR(wk) ((struct mkdir *)(wk))
#define WK_DIRREM(wk) ((struct dirrem *)(wk))
#define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk))
#define WK_JADDREF(wk) ((struct jaddref *)(wk))
#define WK_JREMREF(wk) ((struct jremref *)(wk))
#define WK_JMVREF(wk) ((struct jmvref *)(wk))
#define WK_JSEGDEP(wk) ((struct jsegdep *)(wk))
#define WK_JSEG(wk) ((struct jseg *)(wk))
#define WK_JNEWBLK(wk) ((struct jnewblk *)(wk))
#define WK_JFREEBLK(wk) ((struct jfreeblk *)(wk))
#define WK_FREEDEP(wk) ((struct freedep *)(wk))
#define WK_JFREEFRAG(wk) ((struct jfreefrag *)(wk))
#define WK_SBDEP(wk) ((struct sbdep *)wk)
#define WK_JTRUNC(wk) ((struct jtrunc *)(wk))
/*
* Various types of lists
@ -165,6 +185,15 @@ LIST_HEAD(inodedephd, inodedep);
LIST_HEAD(allocindirhd, allocindir);
LIST_HEAD(allocdirecthd, allocdirect);
TAILQ_HEAD(allocdirectlst, allocdirect);
LIST_HEAD(indirdephd, indirdep);
LIST_HEAD(jaddrefhd, jaddref);
LIST_HEAD(jremrefhd, jremref);
LIST_HEAD(jmvrefhd, jmvref);
LIST_HEAD(jnewblkhd, jnewblk);
LIST_HEAD(jfreeblkhd, jfreeblk);
LIST_HEAD(freeworkhd, freework);
TAILQ_HEAD(jseglst, jseg);
TAILQ_HEAD(inoreflst, inoref);
/*
* The "pagedep" structure tracks the various dependencies related to
@ -192,9 +221,11 @@ struct pagedep {
LIST_ENTRY(pagedep) pd_hash; /* hashed lookup */
ino_t pd_ino; /* associated file */
ufs_lbn_t pd_lbn; /* block within file */
struct newdirblk *pd_newdirblk; /* associated newdirblk if NEWBLOCK */
struct dirremhd pd_dirremhd; /* dirrem's waiting for page */
struct diraddhd pd_diraddhd[DAHASHSZ]; /* diradd dir entry updates */
struct diraddhd pd_pendinghd; /* directory entries awaiting write */
struct jmvrefhd pd_jmvrefhd; /* Dependent journal writes. */
};
/*
@ -248,13 +279,18 @@ struct inodedep {
struct worklist id_list; /* buffer holding inode block */
# define id_state id_list.wk_state /* inode dependency state */
LIST_ENTRY(inodedep) id_hash; /* hashed lookup */
TAILQ_ENTRY(inodedep) id_unlinked; /* Unlinked but ref'd inodes */
struct fs *id_fs; /* associated filesystem */
ino_t id_ino; /* dependent inode */
nlink_t id_nlinkdelta; /* saved effective link count */
nlink_t id_savednlink; /* Link saved during rollback */
LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */
struct buf *id_buf; /* related bmsafemap (if pending) */
struct bmsafemap *id_bmsafemap; /* related bmsafemap (if pending) */
struct diradd *id_mkdiradd; /* diradd for a mkdir. */
struct inoreflst id_inoreflst; /* Inode reference adjustments. */
long id_savedextsize; /* ext size saved during rollback */
off_t id_savedsize; /* file size saved during rollback */
struct dirremhd id_dirremhd; /* Removals pending. */
struct workhead id_pendinghd; /* entries awaiting directory write */
struct workhead id_bufwait; /* operations after inode written */
struct workhead id_inowait; /* operations waiting inode update */
@ -270,23 +306,6 @@ struct inodedep {
#define id_savedino1 id_un.idu_savedino1
#define id_savedino2 id_un.idu_savedino2
/*
* A "newblk" structure is attached to a bmsafemap structure when a block
* or fragment is allocated from a cylinder group. Its state is set to
* DEPCOMPLETE when its cylinder group map is written. It is consumed by
* an associated allocdirect or allocindir allocation which will attach
* themselves to the bmsafemap structure if the newblk's DEPCOMPLETE flag
* is not set (i.e., its cylinder group map has not been written).
*/
struct newblk {
LIST_ENTRY(newblk) nb_hash; /* hashed lookup */
struct fs *nb_fs; /* associated filesystem */
int nb_state; /* state of bitmap dependency */
ufs2_daddr_t nb_newblkno; /* allocated block number */
LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblk's */
struct bmsafemap *nb_bmsafemap; /* associated bmsafemap */
};
/*
* A "bmsafemap" structure maintains a list of dependency structures
* that depend on the update of a particular cylinder group map.
@ -299,11 +318,41 @@ struct newblk {
*/
struct bmsafemap {
struct worklist sm_list; /* cylgrp buffer */
# define sm_state sm_list.wk_state
int sm_cg;
LIST_ENTRY(bmsafemap) sm_hash; /* Hash links. */
struct buf *sm_buf; /* associated buffer */
struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */
struct allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */
struct allocindirhd sm_allocindirhd; /* allocindir deps */
struct allocindirhd sm_allocindirwr; /* writing allocindir deps */
struct inodedephd sm_inodedephd; /* inodedep deps */
struct inodedephd sm_inodedepwr; /* writing inodedep deps */
struct newblkhd sm_newblkhd; /* newblk deps */
struct newblkhd sm_newblkwr; /* writing newblk deps */
struct jaddrefhd sm_jaddrefhd; /* Pending inode allocations. */
struct jnewblkhd sm_jnewblkhd; /* Pending block allocations. */
};
/*
* A "newblk" structure is attached to a bmsafemap structure when a block
* or fragment is allocated from a cylinder group. Its state is set to
* DEPCOMPLETE when its cylinder group map is written. It is converted to
* an allocdirect or allocindir allocation once the allocator calls the
* appropriate setup function.
*/
struct newblk {
struct worklist nb_list;
# define nb_state nb_list.wk_state
LIST_ENTRY(newblk) nb_hash; /* hashed lookup */
LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblks */
struct jnewblk *nb_jnewblk; /* New block journal entry. */
struct bmsafemap *nb_bmsafemap;/* cylgrp dep (if pending) */
struct freefrag *nb_freefrag; /* fragment to be freed (if any) */
struct indirdephd nb_indirdeps; /* Children indirect blocks. */
struct workhead nb_newdirblk; /* dir block to notify when written */
struct workhead nb_jwork; /* Journal work pending. */
ufs2_daddr_t nb_newblkno; /* new value of block pointer */
};
/*
@ -334,20 +383,18 @@ struct bmsafemap {
* and inodedep->id_pendinghd lists.
*/
struct allocdirect {
struct worklist ad_list; /* buffer holding block */
# define ad_state ad_list.wk_state /* block pointer state */
struct newblk ad_block; /* Common block logic */
# define ad_state ad_block.nb_list.wk_state /* block pointer state */
TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */
ufs_lbn_t ad_lbn; /* block within file */
ufs2_daddr_t ad_newblkno; /* new value of block pointer */
ufs2_daddr_t ad_oldblkno; /* old value of block pointer */
long ad_newsize; /* size of new block */
long ad_oldsize; /* size of old block */
LIST_ENTRY(allocdirect) ad_deps; /* bmsafemap's list of allocdirect's */
struct buf *ad_buf; /* cylgrp buffer (if pending) */
struct inodedep *ad_inodedep; /* associated inodedep */
struct freefrag *ad_freefrag; /* fragment to be freed (if any) */
struct workhead ad_newdirblk; /* dir block to notify when written */
ufs2_daddr_t ad_oldblkno; /* old value of block pointer */
int ad_offset; /* Pointer offset in parent. */
long ad_newsize; /* size of new block */
long ad_oldsize; /* size of old block */
};
#define ad_newblkno ad_block.nb_newblkno
#define ad_freefrag ad_block.nb_freefrag
#define ad_newdirblk ad_block.nb_newdirblk
/*
* A single "indirdep" structure manages all allocation dependencies for
@ -369,10 +416,14 @@ struct allocdirect {
struct indirdep {
struct worklist ir_list; /* buffer holding indirect block */
# define ir_state ir_list.wk_state /* indirect block pointer state */
caddr_t ir_saveddata; /* buffer cache contents */
LIST_ENTRY(indirdep) ir_next; /* alloc{direct,indir} list */
caddr_t ir_saveddata; /* buffer cache contents */
struct buf *ir_savebp; /* buffer holding safe copy */
struct allocindirhd ir_completehd; /* waiting for indirdep complete */
struct allocindirhd ir_writehd; /* Waiting for the pointer write. */
struct allocindirhd ir_donehd; /* done waiting to update safecopy */
struct allocindirhd ir_deplisthd; /* allocindir deps for this block */
struct workhead ir_jwork; /* Journal work pending. */
};
/*
@ -389,16 +440,25 @@ struct indirdep {
* can then be freed as it is no longer applicable.
*/
struct allocindir {
struct worklist ai_list; /* buffer holding indirect block */
# define ai_state ai_list.wk_state /* indirect block pointer state */
struct newblk ai_block; /* Common block area */
# define ai_state ai_block.nb_list.wk_state /* indirect pointer state */
LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */
int ai_offset; /* pointer offset in indirect block */
ufs2_daddr_t ai_newblkno; /* new block pointer value */
ufs2_daddr_t ai_oldblkno; /* old block pointer value */
struct freefrag *ai_freefrag; /* block to be freed when complete */
struct indirdep *ai_indirdep; /* address of associated indirdep */
LIST_ENTRY(allocindir) ai_deps; /* bmsafemap's list of allocindir's */
struct buf *ai_buf; /* cylgrp buffer (if pending) */
ufs2_daddr_t ai_oldblkno; /* old value of block pointer */
int ai_offset; /* Pointer offset in parent. */
};
#define ai_newblkno ai_block.nb_newblkno
#define ai_freefrag ai_block.nb_freefrag
#define ai_newdirblk ai_block.nb_newdirblk
/*
* The allblk union is used to size the newblk structure on allocation so
* that it may be any one of three types.
*/
union allblk {
struct allocindir ab_allocindir;
struct allocdirect ab_allocdirect;
struct newblk ab_newblk;
};
/*
@ -406,14 +466,13 @@ struct allocindir {
* allocated fragment is replaced with a larger fragment, rather than extended.
* The "freefrag" structure is constructed and attached when the replacement
* block is first allocated. It is processed after the inode claiming the
* bigger block that replaces it has been written to disk. Note that the
* ff_state field is is used to store the uid, so may lose data. However,
* the uid is used only in printing an error message, so is not critical.
* Keeping it in a short keeps the data structure down to 32 bytes.
* bigger block that replaces it has been written to disk.
*/
struct freefrag {
struct worklist ff_list; /* id_inowait or delayed worklist */
# define ff_state ff_list.wk_state /* owning user; should be uid_t */
# define ff_state ff_list.wk_state
struct jfreefrag *ff_jfreefrag; /* Associated journal entry. */
struct workhead ff_jwork; /* Journal work pending. */
ufs2_daddr_t ff_blkno; /* fragment physical block number */
long ff_fragsize; /* size of fragment being deleted */
ino_t ff_inum; /* owning inode number */
@ -423,20 +482,57 @@ struct freefrag {
* A "freeblks" structure is attached to an "inodedep" when the
* corresponding file's length is reduced to zero. It records all
* the information needed to free the blocks of a file after its
* zero'ed inode has been written to disk.
* zero'ed inode has been written to disk. The actual work is done
* by child freework structures which are responsible for individual
* inode pointers while freeblks is responsible for retiring the
* entire operation when it is complete and holding common members.
*/
struct freeblks {
struct worklist fb_list; /* id_inowait or delayed worklist */
# define fb_state fb_list.wk_state /* inode and dirty block state */
struct jfreeblkhd fb_jfreeblkhd; /* Journal entries pending */
struct workhead fb_freeworkhd; /* Work items pending */
struct workhead fb_jwork; /* Journal work pending */
ino_t fb_previousinum; /* inode of previous owner of blocks */
uid_t fb_uid; /* uid of previous owner of blocks */
struct vnode *fb_devvp; /* filesystem device vnode */
long fb_oldextsize; /* previous ext data size */
off_t fb_oldsize; /* previous file size */
ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */
ufs2_daddr_t fb_dblks[NDADDR]; /* direct blk ptrs to deallocate */
ufs2_daddr_t fb_iblks[NIADDR]; /* indirect blk ptrs to deallocate */
ufs2_daddr_t fb_eblks[NXADDR]; /* indirect blk ptrs to deallocate */
int fb_ref; /* Children outstanding. */
};
/*
* A "freework" structure handles the release of a tree of blocks or a single
* block. Each indirect block in a tree is allocated its own freework
* structure so that the indrect block may be freed only when all of its
* children are freed. In this way we enforce the rule that an allocated
* block must have a valid path to a root that is journaled. Each child
* block acquires a reference and when the ref hits zero the parent ref
* is decremented. If there is no parent the freeblks ref is decremented.
*/
struct freework {
struct worklist fw_list;
# define fw_state fw_list.wk_state
LIST_ENTRY(freework) fw_next; /* Queue for freeblksk. */
struct freeblks *fw_freeblks; /* Root of operation. */
struct freework *fw_parent; /* Parent indirect. */
ufs2_daddr_t fw_blkno; /* Our block #. */
ufs_lbn_t fw_lbn; /* Original lbn before free. */
int fw_frags; /* Number of frags. */
int fw_ref; /* Number of children out. */
int fw_off; /* Current working position. */
struct workhead fw_jwork; /* Journal work pending. */
};
/*
* A "freedep" structure is allocated to track the completion of a bitmap
* write for a freework. One freedep may cover many freed blocks so long
* as they reside in the same cylinder group. When the cg is written
* the freedep decrements the ref on the freework which may permit it
* to be freed as well.
*/
struct freedep {
struct worklist fd_list;
struct freework *fd_freework; /* Parent freework. */
};
/*
@ -450,6 +546,7 @@ struct freefile {
mode_t fx_mode; /* mode of inode */
ino_t fx_oldinum; /* inum of the unlinked file */
struct vnode *fx_devvp; /* filesystem device vnode */
struct workhead fx_jwork; /* journal work pending. */
};
/*
@ -482,12 +579,11 @@ struct freefile {
* than zero.
*
* The overlaying of da_pagedep and da_previous is done to keep the
* structure down to 32 bytes in size on a 32-bit machine. If a
* da_previous entry is present, the pointer to its pagedep is available
* in the associated dirrem entry. If the DIRCHG flag is set, the
* da_previous entry is valid; if not set the da_pagedep entry is valid.
* The DIRCHG flag never changes; it is set when the structure is created
* if appropriate and is never cleared.
* structure down. If a da_previous entry is present, the pointer to its
* pagedep is available in the associated dirrem entry. If the DIRCHG flag
* is set, the da_previous entry is valid; if not set the da_pagedep entry
* is valid. The DIRCHG flag never changes; it is set when the structure
* is created if appropriate and is never cleared.
*/
struct diradd {
struct worklist da_list; /* id_inowait or id_pendinghd list */
@ -499,6 +595,7 @@ struct diradd {
struct dirrem *dau_previous; /* entry being replaced in dir change */
struct pagedep *dau_pagedep; /* pagedep dependency for addition */
} da_un;
struct workhead da_jwork; /* Journal work awaiting completion. */
};
#define da_previous da_un.dau_previous
#define da_pagedep da_un.dau_pagedep
@ -525,12 +622,13 @@ struct diradd {
* mkdir structures that reference it. The deletion would be faster if the
* diradd structure were simply augmented to have two pointers that referenced
* the associated mkdir's. However, this would increase the size of the diradd
* structure from 32 to 64-bits to speed a very infrequent operation.
* structure to speed a very infrequent operation.
*/
struct mkdir {
struct worklist md_list; /* id_inowait or buffer holding dir */
# define md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */
struct diradd *md_diradd; /* associated diradd */
struct jaddref *md_jaddref; /* dependent jaddref. */
struct buf *md_buf; /* MKDIR_BODY: buffer holding dir */
LIST_ENTRY(mkdir) md_mkdirs; /* list of all mkdirs */
};
@ -542,20 +640,19 @@ LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
* list of the pagedep for the directory page that contains the entry.
* It is processed after the directory page with the deleted entry has
* been written to disk.
*
* The overlaying of dm_pagedep and dm_dirinum is done to keep the
* structure down to 32 bytes in size on a 32-bit machine. It works
* because they are never used concurrently.
*/
struct dirrem {
struct worklist dm_list; /* delayed worklist */
# define dm_state dm_list.wk_state /* state of the old directory entry */
LIST_ENTRY(dirrem) dm_next; /* pagedep's list of dirrem's */
LIST_ENTRY(dirrem) dm_inonext; /* inodedep's list of dirrem's */
struct jremrefhd dm_jremrefhd; /* Pending remove reference deps. */
ino_t dm_oldinum; /* inum of the removed dir entry */
union {
struct pagedep *dmu_pagedep; /* pagedep dependency for remove */
ino_t dmu_dirinum; /* parent inode number (for rmdir) */
} dm_un;
struct workhead dm_jwork; /* Journal work awaiting completion. */
};
#define dm_pagedep dm_un.dmu_pagedep
#define dm_dirinum dm_un.dmu_dirinum
@ -577,9 +674,200 @@ struct dirrem {
* blocks using a similar scheme with the allocindir structures. Rather
* than adding this level of complexity, we simply write those newly
* allocated indirect blocks synchronously as such allocations are rare.
* In the case of a new directory the . and .. links are tracked with
* a mkdir rather than a pagedep. In this case we track the mkdir
* so it can be released when it is written. A workhead is used
* to simplify canceling a mkdir that is removed by a subsequent dirrem.
*/
struct newdirblk {
struct worklist db_list; /* id_inowait or pg_newdirblk */
# define db_state db_list.wk_state /* unused */
struct pagedep *db_pagedep; /* associated pagedep */
struct workhead db_mkdir;
};
/*
* The inoref structure holds the elements common to jaddref and jremref
* so they may easily be queued in-order on the inodedep.
*/
struct inoref {
struct worklist if_list;
# define if_state if_list.wk_state
TAILQ_ENTRY(inoref) if_deps; /* Links for inodedep. */
struct jsegdep *if_jsegdep;
off_t if_diroff; /* Directory offset. */
ino_t if_ino; /* Inode number. */
ino_t if_parent; /* Parent inode number. */
nlink_t if_nlink; /* nlink before addition. */
uint16_t if_mode; /* File mode, needed for IFMT. */
};
/*
* A "jaddref" structure tracks a new reference (link count) on an inode
* and prevents the link count increase and bitmap allocation until a
* journal entry can be written. Once the journal entry is written,
* the inode is put on the pendinghd of the bmsafemap and a diradd or
* mkdir entry is placed on the bufwait list of the inode. The DEPCOMPLETE
* flag is used to indicate that all of the required information for writing
* the journal entry is present. MKDIR_BODY and MKDIR_PARENT are used to
* differentiate . and .. links from regular file names. NEWBLOCK indicates
* a bitmap is still pending. If a new reference is canceled by a delete
* prior to writing the journal the jaddref write is canceled and the
* structure persists to prevent any disk-visible changes until it is
* ultimately released when the file is freed or the link is dropped again.
*/
struct jaddref {
struct inoref ja_ref;
# define ja_list ja_ref.if_list /* Journal pending or jseg entries. */
# define ja_state ja_ref.if_list.wk_state
LIST_ENTRY(jaddref) ja_bmdeps; /* Links for bmsafemap. */
union {
struct diradd *jau_diradd; /* Pending diradd. */
struct mkdir *jau_mkdir; /* MKDIR_{PARENT,BODY} */
} ja_un;
};
#define ja_diradd ja_un.jau_diradd
#define ja_mkdir ja_un.jau_mkdir
#define ja_diroff ja_ref.if_diroff
#define ja_ino ja_ref.if_ino
#define ja_parent ja_ref.if_parent
#define ja_mode ja_ref.if_mode
/*
* A "jremref" structure tracks a removed reference (unlink) on an
* inode and prevents the directory remove from proceeding until the
* journal entry is written. Once the journal has been written the remove
* may proceed as normal.
*/
struct jremref {
struct inoref jr_ref;
# define jr_list jr_ref.if_list /* Journal pending or jseg entries. */
# define jr_state jr_ref.if_list.wk_state
LIST_ENTRY(jremref) jr_deps; /* Links for pagdep. */
struct dirrem *jr_dirrem; /* Back pointer to dirrem. */
};
struct jmvref {
struct worklist jm_list;
LIST_ENTRY(jmvref) jm_deps;
struct pagedep *jm_pagedep;
ino_t jm_parent;
ino_t jm_ino;
off_t jm_oldoff;
off_t jm_newoff;
};
/*
* A "jnewblk" structure tracks a newly allocated block or fragment and
* prevents the direct or indirect block pointer as well as the cg bitmap
* from being written until it is logged. After it is logged the jsegdep
* is attached to the allocdirect or allocindir until the operation is
* completed or reverted. If the operation is reverted prior to the journal
* write the jnewblk structure is maintained to prevent the bitmaps from
* reaching the disk. Ultimately the jnewblk structure will be passed
* to the free routine as the in memory cg is modified back to the free
* state at which time it can be released.
*/
struct jnewblk {
struct worklist jn_list;
# define jn_state jn_list.wk_state
struct jsegdep *jn_jsegdep;
LIST_ENTRY(jnewblk) jn_deps; /* All jnewblks on bmsafemap */
struct newblk *jn_newblk;
ino_t jn_ino;
ufs_lbn_t jn_lbn;
ufs2_daddr_t jn_blkno;
int jn_oldfrags;
int jn_frags;
};
/*
* A "jfreeblk" structure tracks the journal write for freeing a block
* or tree of blocks. The block pointer must not be cleared in the inode
* or indirect prior to the jfreeblk being written.
*/
struct jfreeblk {
struct worklist jf_list;
# define jf_state jf_list.wk_state
struct jsegdep *jf_jsegdep;
struct freeblks *jf_freeblks;
LIST_ENTRY(jfreeblk) jf_deps;
ino_t jf_ino;
ufs_lbn_t jf_lbn;
ufs2_daddr_t jf_blkno;
int jf_frags;
};
/*
* A "jfreefrag" tracks the freeing of a single block when a fragment is
* extended or an indirect page is replaced. It is not part of a larger
* freeblks operation.
*/
struct jfreefrag {
struct worklist fr_list;
# define fr_state fr_list.wk_state
struct jsegdep *fr_jsegdep;
struct freefrag *fr_freefrag;
ino_t fr_ino;
ufs_lbn_t fr_lbn;
ufs2_daddr_t fr_blkno;
int fr_frags;
};
/*
* A "jtrunc" journals the intent to truncate an inode to a non-zero
* value. This is done synchronously prior to the synchronous partial
* truncation process. The jsegdep is not released until the truncation
* is complete and the truncated inode is fsync'd.
*/
struct jtrunc {
struct worklist jt_list;
struct jsegdep *jt_jsegdep;
ino_t jt_ino;
off_t jt_size;
int jt_extsize;
};
/*
* A "jsegdep" structure tracks a single reference to a written journal
* segment so the journal space can be reclaimed when all dependencies
* have been written.
*/
struct jsegdep {
struct worklist jd_list;
# define jd_state jd_list.wk_state
struct jseg *jd_seg;
};
/*
* A "jseg" structure contains all of the journal records written in a
* single disk write. jaddref and jremref structures are linked into
* js_entries so thay may be completed when the write completes. The
* js_deps array contains as many entries as there are ref counts to
* reduce the number of allocations required per journal write to one.
*/
struct jseg {
struct worklist js_list; /* b_deps link for journal */
# define js_state js_list.wk_state
struct workhead js_entries; /* Entries awaiting write */
TAILQ_ENTRY(jseg) js_next;
struct jblocks *js_jblocks; /* Back pointer to block/seg list */
struct buf *js_buf; /* Buffer while unwritten */
uint64_t js_seq;
int js_size; /* Allocated size in bytes */
int js_cnt; /* Total items allocated */
int js_refs; /* Count of items pending completion */
};
/*
* A 'sbdep' structure tracks the head of the free inode list and
* superblock writes. This makes sure the superblock is always pointing at
* the first possible unlinked inode for the suj recovery process. If a
* block write completes and we discover a new head is available the buf
* is dirtied and the dep is kept.
*/
struct sbdep {
struct worklist sb_list; /* b_dep linkage */
struct fs *sb_fs; /* Filesystem pointer within buf. */
struct ufsmount *sb_ump;
};

View file

@ -146,7 +146,8 @@ struct ufs2_dinode {
ufs2_daddr_t di_db[NDADDR]; /* 112: Direct disk blocks. */
ufs2_daddr_t di_ib[NIADDR]; /* 208: Indirect disk blocks. */
u_int64_t di_modrev; /* 232: i_modrev for NFSv4 */
int64_t di_spare[2]; /* 240: Reserved; currently unused */
ino_t di_freelink; /* 240: SUJ: Next unlinked inode. */
uint32_t di_spare[3]; /* 244: Reserved; currently unused */
};
/*
@ -167,9 +168,7 @@ struct ufs2_dinode {
struct ufs1_dinode {
u_int16_t di_mode; /* 0: IFMT, permissions; see below. */
int16_t di_nlink; /* 2: File link count. */
union {
u_int16_t oldids[2]; /* 4: Ffs: old user and group ids. */
} di_u;
ino_t di_freelink; /* 4: SUJ: Next unlinked inode. */
u_int64_t di_size; /* 8: File byte count. */
int32_t di_atime; /* 16: Last access time. */
int32_t di_atimensec; /* 20: Last access time. */
@ -186,7 +185,5 @@ struct ufs1_dinode {
u_int32_t di_gid; /* 116: File group. */
u_int64_t di_modrev; /* 120: i_modrev for NFSv4 */
};
#define di_ogid di_u.oldids[1]
#define di_ouid di_u.oldids[0]
#endif /* _UFS_UFS_DINODE_H_ */

View file

@ -120,7 +120,7 @@ struct inode {
#define IN_CHANGE 0x0002 /* Inode change time update request. */
#define IN_UPDATE 0x0004 /* Modification time update request. */
#define IN_MODIFIED 0x0008 /* Inode has been modified. */
#define IN_RENAME 0x0010 /* Inode is being renamed. */
#define IN_NEEDSYNC 0x0010 /* Inode requires fsync. */
#define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */
#define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */
#define IN_LAZYACCESS 0x0100 /* Process IN_ACCESS after the
@ -175,6 +175,7 @@ struct indir {
/* Determine if soft dependencies are being done */
#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP)
#define DOINGASYNC(vp) ((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC)
#define DOINGSUJ(vp) ((vp)->v_mount->mnt_kern_flag & MNTK_SUJ)
/* This overlays the fid structure (see mount.h). */
struct ufid {

View file

@ -68,8 +68,6 @@ __FBSDID("$FreeBSD$");
static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables");
static SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
static int ufs_mindirhashsize = DIRBLKSIZ * 5;
SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW,
&ufs_mindirhashsize,

View file

@ -57,7 +57,7 @@ int ufs_bmap(struct vop_bmap_args *);
int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
struct buf *, int *, int *);
int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **);
int ufs_checkpath(ino_t, struct inode *, struct ucred *);
int ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *);
void ufs_dirbad(struct inode *, doff_t, char *);
int ufs_dirbadentry(struct vnode *, struct direct *, int);
int ufs_dirempty(struct inode *, ino_t, struct ucred *);
@ -66,9 +66,11 @@ int ufs_extwrite(struct vop_write_args *);
void ufs_makedirentry(struct inode *, struct componentname *,
struct direct *);
int ufs_direnter(struct vnode *, struct vnode *, struct direct *,
struct componentname *, struct buf *);
struct componentname *, struct buf *, int);
int ufs_dirremove(struct vnode *, struct inode *, int, int);
int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int);
int ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *,
ino_t *);
int ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *);
int ufs_inactive(struct vop_inactive_args *);
int ufs_init(struct vfsconf *);
@ -81,19 +83,33 @@ vfs_root_t ufs_root;
int ufs_uninit(struct vfsconf *);
int ufs_vinit(struct mount *, struct vop_vector *, struct vnode **);
#include <sys/sysctl.h>
SYSCTL_DECL(_vfs_ufs);
/*
* Soft update function prototypes.
*/
int softdep_setup_directory_add(struct buf *, struct inode *, off_t,
ino_t, struct buf *, int);
void softdep_change_directoryentry_offset(struct inode *, caddr_t,
caddr_t, caddr_t, int);
void softdep_change_directoryentry_offset(struct buf *, struct inode *,
caddr_t, caddr_t, caddr_t, int);
void softdep_setup_remove(struct buf *,struct inode *, struct inode *, int);
void softdep_setup_directory_change(struct buf *, struct inode *,
struct inode *, ino_t, int);
void softdep_change_linkcnt(struct inode *);
void softdep_releasefile(struct inode *);
int softdep_slowdown(struct vnode *);
void softdep_setup_create(struct inode *, struct inode *);
void softdep_setup_dotdot_link(struct inode *, struct inode *);
void softdep_setup_link(struct inode *, struct inode *);
void softdep_setup_mkdir(struct inode *, struct inode *);
void softdep_setup_rmdir(struct inode *, struct inode *);
void softdep_setup_unlink(struct inode *, struct inode *);
void softdep_revert_create(struct inode *, struct inode *);
void softdep_revert_dotdot_link(struct inode *, struct inode *);
void softdep_revert_link(struct inode *, struct inode *);
void softdep_revert_mkdir(struct inode *, struct inode *);
void softdep_revert_rmdir(struct inode *, struct inode *);
/*
* Flags to low-level allocation routines. The low 16-bits are reserved

View file

@ -77,9 +77,6 @@ SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, "");
/* true if old FS format...*/
#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0)
static int ufs_lookup_(struct vnode *, struct vnode **, struct componentname *,
ino_t *);
static int
ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred,
struct thread *td)
@ -189,11 +186,11 @@ ufs_lookup(ap)
} */ *ap;
{
return (ufs_lookup_(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
}
static int
ufs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
int
ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
ino_t *dd_ino)
{
struct inode *dp; /* inode for directory being searched */
@ -524,6 +521,8 @@ notfound:
return (ENOENT);
found:
if (dd_ino != NULL)
*dd_ino = ino;
if (numdirpasses == 2)
nchstats.ncs_pass2++;
/*
@ -546,11 +545,6 @@ found:
if ((flags & ISLASTCN) && nameiop == LOOKUP)
dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1);
if (dd_ino != NULL) {
*dd_ino = ino;
return (0);
}
/*
* If deleting, and at end of pathname, return
* parameters which can be used to remove file.
@ -558,17 +552,6 @@ found:
if (nameiop == DELETE && (flags & ISLASTCN)) {
if (flags & LOCKPARENT)
ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
if ((error = VFS_VGET(vdp->v_mount, ino,
LK_EXCLUSIVE, &tdp)) != 0)
return (error);
error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
if (error) {
vput(tdp);
return (error);
}
/*
* Return pointer to current entry in dp->i_offset,
* and distance past previous entry (if there
@ -585,6 +568,16 @@ found:
dp->i_count = 0;
else
dp->i_count = dp->i_offset - prevoff;
if (dd_ino != NULL)
return (0);
if ((error = VFS_VGET(vdp->v_mount, ino,
LK_EXCLUSIVE, &tdp)) != 0)
return (error);
error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread);
if (error) {
vput(tdp);
return (error);
}
if (dp->i_number == ino) {
VREF(vdp);
*vpp = vdp;
@ -616,6 +609,8 @@ found:
dp->i_offset = i_offset;
if (dp->i_number == ino)
return (EISDIR);
if (dd_ino != NULL)
return (0);
if ((error = VFS_VGET(vdp->v_mount, ino,
LK_EXCLUSIVE, &tdp)) != 0)
return (error);
@ -650,6 +645,8 @@ found:
cnp->cn_flags |= SAVENAME;
return (0);
}
if (dd_ino != NULL)
return (0);
/*
* Step through the translation in the name. We do not `vput' the
@ -681,7 +678,7 @@ found:
* to the inode we looked up before vdp lock was
* dropped.
*/
error = ufs_lookup_(pdp, NULL, cnp, &ino1);
error = ufs_lookup_ino(pdp, NULL, cnp, &ino1);
if (error) {
vput(tdp);
return (error);
@ -833,12 +830,13 @@ ufs_makedirentry(ip, cnp, newdirp)
* soft dependency code).
*/
int
ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename)
struct vnode *dvp;
struct vnode *tvp;
struct direct *dirp;
struct componentname *cnp;
struct buf *newdirbp;
int isrename;
{
struct ucred *cr;
struct thread *td;
@ -911,22 +909,28 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
blkoff += DIRBLKSIZ;
}
if (softdep_setup_directory_add(bp, dp, dp->i_offset,
dirp->d_ino, newdirbp, 1) == 0) {
bdwrite(bp);
dirp->d_ino, newdirbp, 1))
dp->i_flag |= IN_NEEDSYNC;
if (newdirbp)
bdwrite(newdirbp);
bdwrite(bp);
if ((dp->i_flag & IN_NEEDSYNC) == 0)
return (UFS_UPDATE(dvp, 0));
}
/* We have just allocated a directory block in an
* indirect block. Rather than tracking when it gets
* claimed by the inode, we simply do a VOP_FSYNC
* now to ensure that it is there (in case the user
* does a future fsync). Note that we have to unlock
* the inode for the entry that we just entered, as
* the VOP_FSYNC may need to lock other inodes which
* can lead to deadlock if we also hold a lock on
* the newly entered node.
/*
* We have just allocated a directory block in an
* indirect block. We must prevent holes in the
* directory created if directory entries are
* written out of order. To accomplish this we
* fsync when we extend a directory into indirects.
* During rename it's not safe to drop the tvp lock
* so sync must be delayed until it is.
*
* This synchronous step could be removed if fsck and
* the kernel were taught to fill in sparse
* directories rather than panic.
*/
if ((error = bwrite(bp)))
return (error);
if (isrename)
return (0);
if (tvp != NULL)
VOP_UNLOCK(tvp, 0);
error = VOP_FSYNC(dvp, MNT_WAIT, td);
@ -1015,7 +1019,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
dp->i_offset + ((char *)ep - dirbuf));
#endif
if (DOINGSOFTDEP(dvp))
softdep_change_directoryentry_offset(dp, dirbuf,
softdep_change_directoryentry_offset(bp, dp, dirbuf,
(caddr_t)nep, (caddr_t)ep, dsize);
else
bcopy((caddr_t)nep, (caddr_t)ep, dsize);
@ -1067,6 +1071,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
(void) softdep_setup_directory_add(bp, dp,
dp->i_offset + (caddr_t)ep - dirbuf,
dirp->d_ino, newdirbp, 0);
if (newdirbp != NULL)
bdwrite(newdirbp);
bdwrite(bp);
} else {
if (DOINGASYNC(dvp)) {
@ -1084,7 +1090,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
* lock other inodes which can lead to deadlock if we also hold a
* lock on the newly entered node.
*/
if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) {
if (isrename == 0 && error == 0 &&
dp->i_endoff && dp->i_endoff < dp->i_size) {
if (tvp != NULL)
VOP_UNLOCK(tvp, 0);
#ifdef UFS_DIRHASH
@ -1125,6 +1132,19 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
dp = VTOI(dvp);
/*
* Adjust the link count early so softdep can block if necessary.
*/
if (ip) {
ip->i_effnlink--;
if (DOINGSOFTDEP(dvp)) {
softdep_setup_unlink(dp, ip);
} else {
ip->i_nlink--;
DIP_SET(ip, i_nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
}
}
if (flags & DOWHITEOUT) {
/*
* Whiteout entry: set d_ino to WINO.
@ -1154,6 +1174,9 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
if (dp->i_dirhash != NULL)
ufsdirhash_remove(dp, rep, dp->i_offset);
#endif
if (ip && rep->d_ino != ip->i_number)
panic("ufs_dirremove: ip %d does not match dirent ino %d\n",
ip->i_number, rep->d_ino);
if (dp->i_count == 0) {
/*
* First entry in block: set d_ino to zero.
@ -1172,31 +1195,20 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
dp->i_offset & ~(DIRBLKSIZ - 1));
#endif
out:
error = 0;
if (DOINGSOFTDEP(dvp)) {
if (ip) {
ip->i_effnlink--;
softdep_change_linkcnt(ip);
if (ip)
softdep_setup_remove(bp, dp, ip, isrmdir);
}
if (softdep_slowdown(dvp)) {
if (softdep_slowdown(dvp))
error = bwrite(bp);
} else {
else
bdwrite(bp);
error = 0;
}
} else {
if (ip) {
ip->i_effnlink--;
ip->i_nlink--;
DIP_SET(ip, i_nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
}
if (flags & DOWHITEOUT)
error = bwrite(bp);
else if (DOINGASYNC(dvp) && dp->i_count != 0) {
else if (DOINGASYNC(dvp) && dp->i_count != 0)
bdwrite(bp);
error = 0;
} else
else
error = bwrite(bp);
}
dp->i_flag |= IN_CHANGE | IN_UPDATE;
@ -1229,6 +1241,19 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
struct vnode *vdp = ITOV(dp);
int error;
/*
* Drop the link before we lock the buf so softdep can block if
* necessary.
*/
oip->i_effnlink--;
if (DOINGSOFTDEP(vdp)) {
softdep_setup_unlink(dp, oip);
} else {
oip->i_nlink--;
DIP_SET(oip, i_nlink, oip->i_nlink);
oip->i_flag |= IN_CHANGE;
}
error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
if (error)
return (error);
@ -1240,15 +1265,10 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
ep->d_ino = newinum;
if (!OFSFMT(vdp))
ep->d_type = newtype;
oip->i_effnlink--;
if (DOINGSOFTDEP(vdp)) {
softdep_change_linkcnt(oip);
softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
bdwrite(bp);
} else {
oip->i_nlink--;
DIP_SET(oip, i_nlink, oip->i_nlink);
oip->i_flag |= IN_CHANGE;
if (DOINGASYNC(vdp)) {
bdwrite(bp);
error = 0;
@ -1363,25 +1383,25 @@ ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino)
/*
* Check if source directory is in the path of the target directory.
* Target is supplied locked, source is unlocked.
* The target is always vput before returning.
*/
int
ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred)
ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino)
{
struct vnode *vp, *vp1;
struct mount *mp;
struct vnode *tvp, *vp, *vp1;
int error;
ino_t dd_ino;
vp = ITOV(target);
if (target->i_number == source_ino) {
error = EEXIST;
goto out;
}
error = 0;
vp = tvp = ITOV(target);
mp = vp->v_mount;
*wait_ino = 0;
if (target->i_number == source_ino)
return (EEXIST);
if (target->i_number == parent_ino)
return (0);
if (target->i_number == ROOTINO)
goto out;
return (0);
error = 0;
for (;;) {
error = ufs_dir_dd_ino(vp, cred, &dd_ino);
if (error != 0)
@ -1392,9 +1412,13 @@ ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred)
}
if (dd_ino == ROOTINO)
break;
error = vn_vget_ino(vp, dd_ino, LK_EXCLUSIVE, &vp1);
if (error != 0)
if (dd_ino == parent_ino)
break;
error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1);
if (error != 0) {
*wait_ino = dd_ino;
break;
}
/* Recheck that ".." still points to vp1 after relock of vp */
error = ufs_dir_dd_ino(vp, cred, &dd_ino);
if (error != 0) {
@ -1406,14 +1430,14 @@ ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred)
vput(vp1);
continue;
}
vput(vp);
if (vp != tvp)
vput(vp);
vp = vp1;
}
out:
if (error == ENOTDIR)
printf("checkpath: .. not a directory\n");
if (vp != NULL)
panic("checkpath: .. not a directory\n");
if (vp != tvp)
vput(vp);
return (error);
}

View file

@ -114,6 +114,8 @@ static vop_close_t ufsfifo_close;
static vop_kqfilter_t ufsfifo_kqfilter;
static vop_pathconf_t ufsfifo_pathconf;
SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
/*
* A virgin directory (no blushing please).
*/
@ -974,6 +976,9 @@ ufs_link(ap)
error = EXDEV;
goto out;
}
if (VTOI(tdvp)->i_effnlink < 2)
panic("ufs_link: Bad link count %d on parent",
VTOI(tdvp)->i_effnlink);
ip = VTOI(vp);
if ((nlink_t)ip->i_nlink >= LINK_MAX) {
error = EMLINK;
@ -988,11 +993,11 @@ ufs_link(ap)
DIP_SET(ip, i_nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
softdep_setup_link(VTOI(tdvp), ip);
error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
if (!error) {
ufs_makedirentry(ip, cnp, &newdir);
error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
}
if (error) {
@ -1001,7 +1006,7 @@ ufs_link(ap)
DIP_SET(ip, i_nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
softdep_revert_link(VTOI(tdvp), ip);
}
out:
return (error);
@ -1043,7 +1048,7 @@ ufs_whiteout(ap)
newdir.d_namlen = cnp->cn_namelen;
bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
newdir.d_type = DT_WHT;
error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
break;
case DELETE:
@ -1062,6 +1067,11 @@ ufs_whiteout(ap)
return (error);
}
static volatile int rename_restarts;
SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
__DEVOLATILE(int *, &rename_restarts), 0,
"Times rename had to restart due to lock contention");
/*
* Rename system call.
* rename("foo", "bar");
@ -1101,111 +1111,183 @@ ufs_rename(ap)
struct vnode *tdvp = ap->a_tdvp;
struct vnode *fvp = ap->a_fvp;
struct vnode *fdvp = ap->a_fdvp;
struct vnode *nvp;
struct componentname *tcnp = ap->a_tcnp;
struct componentname *fcnp = ap->a_fcnp;
struct thread *td = fcnp->cn_thread;
struct inode *ip, *xp, *dp;
struct inode *fip, *tip, *tdp, *fdp;
struct direct newdir;
int doingdirectory = 0, oldparent = 0, newparent = 0;
off_t endoff;
int doingdirectory, newparent;
int error = 0, ioflag;
ino_t fvp_ino;
struct mount *mp;
ino_t ino;
#ifdef INVARIANTS
if ((tcnp->cn_flags & HASBUF) == 0 ||
(fcnp->cn_flags & HASBUF) == 0)
panic("ufs_rename: no name");
#endif
endoff = 0;
mp = tdvp->v_mount;
VOP_UNLOCK(tdvp, 0);
if (tvp && tvp != tdvp)
VOP_UNLOCK(tvp, 0);
/*
* Check for cross-device rename.
*/
if ((fvp->v_mount != tdvp->v_mount) ||
(tvp && (fvp->v_mount != tvp->v_mount))) {
error = EXDEV;
abortit:
if (tdvp == tvp)
vrele(tdvp);
else
vput(tdvp);
if (tvp)
vput(tvp);
vrele(fdvp);
vrele(fvp);
return (error);
mp = NULL;
goto releout;
}
error = vfs_busy(mp, 0);
if (error) {
mp = NULL;
goto releout;
}
relock:
/*
* We need to acquire 2 to 4 locks depending on whether tvp is NULL
* and fdvp and tdvp are the same directory. Subsequently we need
* to double-check all paths and in the directory rename case we
* need to verify that we are not creating a directory loop. To
* handle this we acquire all but fdvp using non-blocking
* acquisitions. If we fail to acquire any lock in the path we will
* drop all held locks, acquire the new lock in a blocking fashion,
* and then release it and restart the rename. This acquire/release
* step ensures that we do not spin on a lock waiting for release.
*/
error = vn_lock(fdvp, LK_EXCLUSIVE);
if (error)
goto releout;
if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
VOP_UNLOCK(fdvp, 0);
error = vn_lock(tdvp, LK_EXCLUSIVE);
if (error)
goto releout;
VOP_UNLOCK(tdvp, 0);
atomic_add_int(&rename_restarts, 1);
goto relock;
}
/*
* Re-resolve fvp to be certain it still exists and fetch the
* correct vnode.
*/
error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
if (error) {
VOP_UNLOCK(fdvp, 0);
VOP_UNLOCK(tdvp, 0);
goto releout;
}
error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
if (error) {
VOP_UNLOCK(fdvp, 0);
VOP_UNLOCK(tdvp, 0);
if (error != EBUSY)
goto releout;
error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
if (error != 0)
goto releout;
VOP_UNLOCK(nvp, 0);
vrele(fvp);
fvp = nvp;
atomic_add_int(&rename_restarts, 1);
goto relock;
}
vrele(fvp);
fvp = nvp;
/*
* Re-resolve tvp and acquire the vnode lock if present.
*/
error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
if (error != 0 && error != EJUSTRETURN) {
VOP_UNLOCK(fdvp, 0);
VOP_UNLOCK(tdvp, 0);
VOP_UNLOCK(fvp, 0);
goto releout;
}
/*
* If tvp disappeared we just carry on.
*/
if (error == EJUSTRETURN && tvp != NULL) {
vrele(tvp);
tvp = NULL;
}
/*
* Get the tvp ino if the lookup succeeded. We may have to restart
* if the non-blocking acquire fails.
*/
if (error == 0) {
nvp = NULL;
error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
if (tvp)
vrele(tvp);
tvp = nvp;
if (error) {
VOP_UNLOCK(fdvp, 0);
VOP_UNLOCK(tdvp, 0);
VOP_UNLOCK(fvp, 0);
if (error != EBUSY)
goto releout;
error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
if (error != 0)
goto releout;
VOP_UNLOCK(nvp, 0);
atomic_add_int(&rename_restarts, 1);
goto relock;
}
}
fdp = VTOI(fdvp);
fip = VTOI(fvp);
tdp = VTOI(tdvp);
tip = NULL;
if (tvp)
tip = VTOI(tvp);
if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
(VTOI(tdvp)->i_flags & APPEND))) {
error = EPERM;
goto abortit;
goto unlockout;
}
/*
* Renaming a file to itself has no effect. The upper layers should
* not call us in that case. Temporarily just warn if they do.
* not call us in that case. However, things could change after
* we drop the locks above.
*/
if (fvp == tvp) {
printf("ufs_rename: fvp == tvp (can't happen)\n");
error = 0;
goto abortit;
goto unlockout;
}
if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
goto abortit;
dp = VTOI(fdvp);
ip = VTOI(fvp);
if (ip->i_nlink >= LINK_MAX) {
VOP_UNLOCK(fvp, 0);
doingdirectory = 0;
newparent = 0;
ino = fip->i_number;
if (fip->i_nlink >= LINK_MAX) {
error = EMLINK;
goto abortit;
goto unlockout;
}
if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
|| (dp->i_flags & APPEND)) {
VOP_UNLOCK(fvp, 0);
if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
|| (fdp->i_flags & APPEND)) {
error = EPERM;
goto abortit;
goto unlockout;
}
if ((ip->i_mode & IFMT) == IFDIR) {
if ((fip->i_mode & IFMT) == IFDIR) {
/*
* Avoid ".", "..", and aliases of "." for obvious reasons.
*/
if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
(ip->i_flag & IN_RENAME)) {
VOP_UNLOCK(fvp, 0);
fdp == fip ||
(fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
error = EINVAL;
goto abortit;
goto unlockout;
}
ip->i_flag |= IN_RENAME;
oldparent = dp->i_number;
if (fdp->i_number != tdp->i_number)
newparent = tdp->i_number;
doingdirectory = 1;
}
vrele(fdvp);
/*
* When the target exists, both the directory
* and target vnodes are returned locked.
*/
dp = VTOI(tdvp);
xp = NULL;
if (tvp)
xp = VTOI(tvp);
/*
* 1) Bump link count while we're moving stuff
* around. If we crash somewhere before
* completing our work, the link count
* may be wrong, but correctable.
*/
ip->i_effnlink++;
ip->i_nlink++;
DIP_SET(ip, i_nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(fvp))
softdep_change_linkcnt(ip);
if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
DOINGASYNC(fvp)))) != 0) {
VOP_UNLOCK(fvp, 0);
goto bad;
if (fvp->v_mountedhere != NULL || (tvp && tvp->v_mountedhere != NULL)) {
error = EXDEV;
goto unlockout;
}
/*
@ -1214,35 +1296,55 @@ abortit:
* directory hierarchy above the target, as this would
* orphan everything below the source directory. Also
* the user must have write permission in the source so
* as to be able to change "..". We must repeat the call
* to namei, as the parent directory is unlocked by the
* call to checkpath().
* as to be able to change "..".
*/
error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
fvp_ino = ip->i_number;
VOP_UNLOCK(fvp, 0);
if (oldparent != dp->i_number)
newparent = dp->i_number;
if (doingdirectory && newparent) {
if (error) /* write access check above */
goto bad;
if (xp != NULL)
vput(tvp);
error = ufs_checkpath(fvp_ino, dp, tcnp->cn_cred);
error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
if (error)
goto out;
goto unlockout;
error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
&ino);
/*
* We encountered a lock that we have to wait for. Unlock
* everything else and VGET before restarting.
*/
if (ino) {
VOP_UNLOCK(fdvp, 0);
VOP_UNLOCK(fvp, 0);
VOP_UNLOCK(tdvp, 0);
if (tvp)
VOP_UNLOCK(tvp, 0);
error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
if (error == 0)
vput(nvp);
atomic_add_int(&rename_restarts, 1);
goto relock;
}
if (error)
goto unlockout;
if ((tcnp->cn_flags & SAVESTART) == 0)
panic("ufs_rename: lost to startdir");
VREF(tdvp);
error = relookup(tdvp, &tvp, tcnp);
if (error)
goto out;
vrele(tdvp);
dp = VTOI(tdvp);
xp = NULL;
if (tvp)
xp = VTOI(tvp);
}
if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
tdp->i_effnlink == 0)
panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
/*
* 1) Bump link count while we're moving stuff
* around. If we crash somewhere before
* completing our work, the link count
* may be wrong, but correctable.
*/
fip->i_effnlink++;
fip->i_nlink++;
DIP_SET(fip, i_nlink, fip->i_nlink);
fip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(fvp))
softdep_setup_link(tdp, fip);
error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp)));
if (error)
goto bad;
/*
* 2) If target doesn't exist, link the target
* to the source and unlink the source.
@ -1250,52 +1352,37 @@ abortit:
* entry to reference the source inode and
* expunge the original entry's existence.
*/
if (xp == NULL) {
if (dp->i_dev != ip->i_dev)
if (tip == NULL) {
if (tdp->i_dev != fip->i_dev)
panic("ufs_rename: EXDEV");
/*
* Account for ".." in new directory.
* When source and destination have the same
* parent we don't fool with the link count.
*/
if (doingdirectory && newparent) {
if ((nlink_t)dp->i_nlink >= LINK_MAX) {
/*
* Account for ".." in new directory.
* When source and destination have the same
* parent we don't adjust the link count. The
* actual link modification is completed when
* .. is rewritten below.
*/
if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
error = EMLINK;
goto bad;
}
dp->i_effnlink++;
dp->i_nlink++;
DIP_SET(dp, i_nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(dp);
error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
DOINGASYNC(tdvp)));
if (error)
goto bad;
}
ufs_makedirentry(ip, tcnp, &newdir);
error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
if (error) {
if (doingdirectory && newparent) {
dp->i_effnlink--;
dp->i_nlink--;
DIP_SET(dp, i_nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(dp);
(void)UFS_UPDATE(tdvp, 1);
}
ufs_makedirentry(fip, tcnp, &newdir);
error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
if (error)
goto bad;
}
vput(tdvp);
/* Setup tdvp for directory compaction if needed. */
if (tdp->i_count && tdp->i_endoff &&
tdp->i_endoff < tdp->i_size)
endoff = tdp->i_endoff;
} else {
if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev)
panic("ufs_rename: EXDEV");
/*
* Short circuit rename(foo, foo).
*/
if (xp->i_number == ip->i_number)
if (tip->i_number == fip->i_number)
panic("ufs_rename: same file");
/*
* If the parent directory is "sticky", then the caller
@ -1303,7 +1390,7 @@ abortit:
* destination of the rename. This implements append-only
* directories.
*/
if ((dp->i_mode & S_ISTXT) &&
if ((tdp->i_mode & S_ISTXT) &&
VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
error = EPERM;
@ -1314,9 +1401,9 @@ abortit:
* to it. Also, ensure source and target are compatible
* (both directories, or both not directories).
*/
if ((xp->i_mode&IFMT) == IFDIR) {
if ((xp->i_effnlink > 2) ||
!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
if ((tip->i_mode & IFMT) == IFDIR) {
if ((tip->i_effnlink > 2) ||
!ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
error = ENOTEMPTY;
goto bad;
}
@ -1329,20 +1416,30 @@ abortit:
error = EISDIR;
goto bad;
}
error = ufs_dirrewrite(dp, xp, ip->i_number,
IFTODT(ip->i_mode),
(doingdirectory && newparent) ? newparent : doingdirectory);
if (error)
goto bad;
if (doingdirectory) {
if (!newparent) {
dp->i_effnlink--;
tdp->i_effnlink--;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(dp);
softdep_change_linkcnt(tdp);
}
xp->i_effnlink--;
tip->i_effnlink--;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(xp);
softdep_change_linkcnt(tip);
}
error = ufs_dirrewrite(tdp, tip, fip->i_number,
IFTODT(fip->i_mode),
(doingdirectory && newparent) ? newparent : doingdirectory);
if (error) {
if (doingdirectory) {
if (!newparent) {
tdp->i_effnlink++;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
}
tip->i_effnlink++;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(tip);
}
}
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
/*
@ -1357,115 +1454,107 @@ abortit:
* them now.
*/
if (!newparent) {
dp->i_nlink--;
DIP_SET(dp, i_nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE;
tdp->i_nlink--;
DIP_SET(tdp, i_nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
}
xp->i_nlink--;
DIP_SET(xp, i_nlink, xp->i_nlink);
xp->i_flag |= IN_CHANGE;
tip->i_nlink--;
DIP_SET(tip, i_nlink, tip->i_nlink);
tip->i_flag |= IN_CHANGE;
ioflag = IO_NORMAL;
if (!DOINGASYNC(tvp))
ioflag |= IO_SYNC;
/* Don't go to bad here as the new link exists. */
if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
tcnp->cn_cred, tcnp->cn_thread)) != 0)
goto bad;
goto unlockout;
}
vput(tdvp);
vput(tvp);
xp = NULL;
}
/*
* 3) Unlink the source.
* 3) Unlink the source. We have to resolve the path again to
* fixup the directory offset and count for ufs_dirremove.
*/
fcnp->cn_flags &= ~MODMASK;
fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
if ((fcnp->cn_flags & SAVESTART) == 0)
panic("ufs_rename: lost from startdir");
VREF(fdvp);
error = relookup(fdvp, &fvp, fcnp);
if (error == 0)
vrele(fdvp);
if (fvp != NULL) {
xp = VTOI(fvp);
dp = VTOI(fdvp);
} else {
/*
* From name has disappeared. IN_RENAME is not sufficient
* to protect against directory races due to timing windows,
* so we have to remove the panic. XXX the only real way
* to solve this issue is at a much higher level. By the
* time we hit ufs_rename() it's too late.
*/
#if 0
if (doingdirectory)
panic("ufs_rename: lost dir entry");
#endif
vrele(ap->a_fvp);
return (0);
if (fdvp == tdvp) {
error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
if (error)
panic("ufs_rename: from entry went away!");
if (ino != fip->i_number)
panic("ufs_rename: ino mismatch %d != %d\n", ino,
fip->i_number);
}
/*
* Ensure that the directory entry still exists and has not
* changed while the new name has been entered. If the source is
* a file then the entry may have been unlinked or renamed. In
* either case there is no further work to be done. If the source
* is a directory then it cannot have been rmdir'ed; the IN_RENAME
* flag ensures that it cannot be moved by another rename or removed
* by a rmdir.
* If the source is a directory with a
* new parent, the link count of the old
* parent directory must be decremented
* and ".." set to point to the new parent.
*/
if (xp != ip) {
if (doingdirectory && newparent) {
/*
* From name resolves to a different inode. IN_RENAME is
* not sufficient protection against timing window races
* so we can't panic here. XXX the only real way
* to solve this issue is at a much higher level. By the
* time we hit ufs_rename() it's too late.
* If tip exists we simply use its link, otherwise we must
* add a new one.
*/
#if 0
if (doingdirectory)
panic("ufs_rename: lost dir entry");
#endif
} else {
/*
* If the source is a directory with a
* new parent, the link count of the old
* parent directory must be decremented
* and ".." set to point to the new parent.
*/
if (doingdirectory && newparent) {
xp->i_offset = mastertemplate.dot_reclen;
ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
cache_purge(fdvp);
if (tip == NULL) {
tdp->i_effnlink++;
tdp->i_nlink++;
DIP_SET(tdp, i_nlink, tdp->i_nlink);
tdp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_setup_dotdot_link(tdp, fip);
error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
DOINGASYNC(tdvp)));
/* Don't go to bad here as the new link exists. */
if (error)
goto unlockout;
}
error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
xp->i_flag &= ~IN_RENAME;
fip->i_offset = mastertemplate.dot_reclen;
ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
cache_purge(fdvp);
}
if (dp)
vput(fdvp);
if (xp)
vput(fvp);
vrele(ap->a_fvp);
error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
unlockout:
vput(fdvp);
vput(fvp);
if (tvp)
vput(tvp);
/*
* If compaction or fsync was requested do it now that other locks
* are no longer needed.
*/
if (error == 0 && endoff != 0) {
#ifdef UFS_DIRHASH
if (tdp->i_dirhash != NULL)
ufsdirhash_dirtrunc(tdp, endoff);
#endif
UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred,
td);
}
if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
error = VOP_FSYNC(tdvp, MNT_WAIT, td);
vput(tdvp);
if (mp)
vfs_unbusy(mp);
return (error);
bad:
if (xp)
vput(ITOV(xp));
vput(ITOV(dp));
out:
if (doingdirectory)
ip->i_flag &= ~IN_RENAME;
if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
ip->i_effnlink--;
ip->i_nlink--;
DIP_SET(ip, i_nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
ip->i_flag &= ~IN_RENAME;
if (DOINGSOFTDEP(fvp))
softdep_change_linkcnt(ip);
vput(fvp);
} else
vrele(fvp);
fip->i_effnlink--;
fip->i_nlink--;
DIP_SET(fip, i_nlink, fip->i_nlink);
fip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(fvp))
softdep_revert_link(tdp, fip);
goto unlockout;
releout:
vrele(fdvp);
vrele(fvp);
vrele(tdvp);
if (tvp)
vrele(tvp);
if (mp)
vfs_unbusy(mp);
return (error);
}
@ -1767,8 +1856,7 @@ ufs_mkdir(ap)
ip->i_effnlink = 2;
ip->i_nlink = 2;
DIP_SET(ip, i_nlink, 2);
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip);
if (cnp->cn_flags & ISWHITEOUT) {
ip->i_flags |= UF_OPAQUE;
DIP_SET(ip, i_flags, ip->i_flags);
@ -1784,8 +1872,8 @@ ufs_mkdir(ap)
DIP_SET(dp, i_nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(dvp))
softdep_change_linkcnt(dp);
error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
softdep_setup_mkdir(dp, ip);
error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
if (error)
goto bad;
#ifdef MAC
@ -1863,7 +1951,7 @@ ufs_mkdir(ap)
else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
goto bad;
ufs_makedirentry(ip, cnp, &newdir);
error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
bad:
if (error == 0) {
@ -1873,8 +1961,6 @@ bad:
dp->i_nlink--;
DIP_SET(dp, i_nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(dvp))
softdep_change_linkcnt(dp);
/*
* No need to do an explicit VOP_TRUNCATE here, vrele will
* do this for us because we set the link count to 0.
@ -1884,7 +1970,8 @@ bad:
DIP_SET(ip, i_nlink, 0);
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip);
softdep_revert_mkdir(dp, ip);
vput(tvp);
}
out:
@ -1920,10 +2007,13 @@ ufs_rmdir(ap)
* tries to remove a locally mounted on directory).
*/
error = 0;
if ((ip->i_flag & IN_RENAME) || ip->i_effnlink < 2) {
if (ip->i_effnlink < 2) {
error = EINVAL;
goto out;
}
if (dp->i_effnlink < 3)
panic("ufs_dirrem: Bad link count %d on parent",
dp->i_effnlink);
if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
error = ENOTEMPTY;
goto out;
@ -1947,18 +2037,14 @@ ufs_rmdir(ap)
*/
dp->i_effnlink--;
ip->i_effnlink--;
if (DOINGSOFTDEP(vp)) {
softdep_change_linkcnt(dp);
softdep_change_linkcnt(ip);
}
if (DOINGSOFTDEP(vp))
softdep_setup_rmdir(dp, ip);
error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
if (error) {
dp->i_effnlink++;
ip->i_effnlink++;
if (DOINGSOFTDEP(vp)) {
softdep_change_linkcnt(dp);
softdep_change_linkcnt(ip);
}
if (DOINGSOFTDEP(vp))
softdep_revert_rmdir(dp, ip);
goto out;
}
cache_purge(dvp);
@ -2464,6 +2550,9 @@ ufs_makeinode(mode, dvp, vpp, cnp)
if ((mode & IFMT) == 0)
mode |= IFREG;
if (VTOI(dvp)->i_effnlink < 2)
panic("ufs_makeinode: Bad link count %d on parent",
VTOI(dvp)->i_effnlink);
error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
if (error)
return (error);
@ -2539,7 +2628,7 @@ ufs_makeinode(mode, dvp, vpp, cnp)
ip->i_nlink = 1;
DIP_SET(ip, i_nlink, 1);
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip);
softdep_setup_create(VTOI(dvp), ip);
if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
ip->i_mode &= ~ISGID;
@ -2579,7 +2668,7 @@ ufs_makeinode(mode, dvp, vpp, cnp)
}
#endif /* !UFS_ACL */
ufs_makedirentry(ip, cnp, &newdir);
error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
if (error)
goto bad;
*vpp = tvp;
@ -2595,7 +2684,7 @@ bad:
DIP_SET(ip, i_nlink, 0);
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip);
softdep_revert_create(VTOI(dvp), ip);
vput(tvp);
return (error);
}

View file

@ -57,6 +57,10 @@ struct ucred;
struct uio;
struct vnode;
struct ufs_extattr_per_mount;
struct jblocks;
struct inodedep;
TAILQ_HEAD(inodedeplst, inodedep);
/* This structure describes the UFS specific mount structure data. */
struct ufsmount {
@ -75,6 +79,11 @@ struct ufsmount {
long um_numindirdeps; /* outstanding indirdeps */
struct workhead softdep_workitem_pending; /* softdep work queue */
struct worklist *softdep_worklist_tail; /* Tail pointer for above */
struct workhead softdep_journal_pending; /* journal work queue */
struct worklist *softdep_journal_tail; /* Tail pointer for above */
struct jblocks *softdep_jblocks; /* Journal block information */
struct inodedeplst softdep_unlinked; /* Unlinked inodes */
int softdep_on_journal; /* Items on the journal list */
int softdep_on_worklist; /* Items on the worklist */
int softdep_on_worklist_inprogress; /* Busy items on worklist */
int softdep_deps; /* Total dependency count */

View file

@ -136,8 +136,6 @@ ffs_dinode1_swap(struct ufs1_dinode *o, struct ufs1_dinode *n)
n->di_mode = bswap16(o->di_mode);
n->di_nlink = bswap16(o->di_nlink);
n->di_u.oldids[0] = bswap16(o->di_u.oldids[0]);
n->di_u.oldids[1] = bswap16(o->di_u.oldids[1]);
n->di_size = bswap64(o->di_size);
n->di_atime = bswap32(o->di_atime);
n->di_atimensec = bswap32(o->di_atimensec);