mirror of
https://github.com/opnsense/src.git
synced 2026-05-28 04:12:45 -04:00
This is David Schultz's swapoff code which I am finally able to commit.
This should be considered highly experimental for the moment. Submitted by: David Schultz <dschultz@uclink.Berkeley.EDU> MFC after: 3 weeks
This commit is contained in:
parent
389d2b6e21
commit
92da00bb24
19 changed files with 633 additions and 84 deletions
|
|
@ -522,6 +522,7 @@ int setruid(uid_t);
|
||||||
void setusershell(void);
|
void setusershell(void);
|
||||||
int strtofflags(char **, u_long *, u_long *);
|
int strtofflags(char **, u_long *, u_long *);
|
||||||
int swapon(const char *);
|
int swapon(const char *);
|
||||||
|
int swapoff(const char *);
|
||||||
int syscall(int, ...);
|
int syscall(int, ...);
|
||||||
off_t __syscall(quad_t, ...);
|
off_t __syscall(quad_t, ...);
|
||||||
int ttyslot(void);
|
int ttyslot(void);
|
||||||
|
|
|
||||||
|
|
@ -131,6 +131,7 @@ MLINKS+=shmat.2 shmdt.2
|
||||||
MLINKS+=stat.2 fstat.2 stat.2 lstat.2
|
MLINKS+=stat.2 fstat.2 stat.2 lstat.2
|
||||||
MLINKS+=statfs.2 fstatfs.2
|
MLINKS+=statfs.2 fstatfs.2
|
||||||
MLINKS+=syscall.2 __syscall.2
|
MLINKS+=syscall.2 __syscall.2
|
||||||
|
MLINKS+=swapon.2 swapoff.2
|
||||||
MLINKS+=truncate.2 ftruncate.2
|
MLINKS+=truncate.2 ftruncate.2
|
||||||
MLINKS+=utimes.2 futimes.2 utimes.2 lutimes.2
|
MLINKS+=utimes.2 futimes.2 utimes.2 lutimes.2
|
||||||
MLINKS+=wait.2 wait3.2 wait.2 wait4.2 wait.2 waitpid.2
|
MLINKS+=wait.2 wait3.2 wait.2 wait4.2 wait.2 waitpid.2
|
||||||
|
|
|
||||||
|
|
@ -36,14 +36,16 @@
|
||||||
.Dt SWAPON 2
|
.Dt SWAPON 2
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
.Nm swapon
|
.Nm swapon , swapoff
|
||||||
.Nd add a swap device for interleaved paging/swapping
|
.Nd control devices for interleaved paging/swapping
|
||||||
.Sh LIBRARY
|
.Sh LIBRARY
|
||||||
.Lb libc
|
.Lb libc
|
||||||
.Sh SYNOPSIS
|
.Sh SYNOPSIS
|
||||||
.In unistd.h
|
.In unistd.h
|
||||||
.Ft int
|
.Ft int
|
||||||
.Fn swapon "const char *special"
|
.Fn swapon "const char *special"
|
||||||
|
.Ft int
|
||||||
|
.Fn swapoff "const char *special"
|
||||||
.Sh DESCRIPTION
|
.Sh DESCRIPTION
|
||||||
.Fn Swapon
|
.Fn Swapon
|
||||||
makes the block device
|
makes the block device
|
||||||
|
|
@ -55,13 +57,22 @@ configuration time. The size of the swap area on
|
||||||
.Fa special
|
.Fa special
|
||||||
is calculated at the time the device is first made available
|
is calculated at the time the device is first made available
|
||||||
for swapping.
|
for swapping.
|
||||||
|
.Pp
|
||||||
|
The
|
||||||
|
.Fn swapoff
|
||||||
|
system call disables paging and swapping on the given device.
|
||||||
|
All associated swap metadata are deallocated, and the device
|
||||||
|
is made available for other purposes.
|
||||||
.Sh RETURN VALUES
|
.Sh RETURN VALUES
|
||||||
If an error has occurred, a value of -1 is returned and
|
If an error has occurred, a value of -1 is returned and
|
||||||
.Va errno
|
.Va errno
|
||||||
is set to indicate the error.
|
is set to indicate the error.
|
||||||
.Sh ERRORS
|
.Sh ERRORS
|
||||||
.Fn Swapon
|
Both
|
||||||
succeeds unless:
|
.Fn swapon
|
||||||
|
and
|
||||||
|
.Fn swapoff
|
||||||
|
can fail if:
|
||||||
.Bl -tag -width Er
|
.Bl -tag -width Er
|
||||||
.It Bq Er ENOTDIR
|
.It Bq Er ENOTDIR
|
||||||
A component of the path prefix is not a directory.
|
A component of the path prefix is not a directory.
|
||||||
|
|
@ -76,6 +87,19 @@ Search permission is denied for a component of the path prefix.
|
||||||
Too many symbolic links were encountered in translating the pathname.
|
Too many symbolic links were encountered in translating the pathname.
|
||||||
.It Bq Er EPERM
|
.It Bq Er EPERM
|
||||||
The caller is not the super-user.
|
The caller is not the super-user.
|
||||||
|
.It Bq Er EFAULT
|
||||||
|
.Fa Special
|
||||||
|
points outside the process's allocated address space.
|
||||||
|
.El
|
||||||
|
.Pp
|
||||||
|
Additionally,
|
||||||
|
.Fn swapon
|
||||||
|
can fail for the following reasons:
|
||||||
|
.Bl -tag -width Er
|
||||||
|
.It Bq Er EINVAL
|
||||||
|
The system has reached the boot-time limit on the number of
|
||||||
|
swap devices,
|
||||||
|
.Va vm.nswapdev .
|
||||||
.It Bq Er ENOTBLK
|
.It Bq Er ENOTBLK
|
||||||
.Fa Special
|
.Fa Special
|
||||||
is not a block device.
|
is not a block device.
|
||||||
|
|
@ -84,11 +108,6 @@ The device specified by
|
||||||
.Fa special
|
.Fa special
|
||||||
has already
|
has already
|
||||||
been made available for swapping
|
been made available for swapping
|
||||||
.It Bq Er EINVAL
|
|
||||||
The device configured by
|
|
||||||
.Fa special
|
|
||||||
was not
|
|
||||||
configured into the system as a swap device.
|
|
||||||
.It Bq Er ENXIO
|
.It Bq Er ENXIO
|
||||||
The major device number of
|
The major device number of
|
||||||
.Fa special
|
.Fa special
|
||||||
|
|
@ -96,20 +115,28 @@ is out of range (this indicates no device driver exists
|
||||||
for the associated hardware).
|
for the associated hardware).
|
||||||
.It Bq Er EIO
|
.It Bq Er EIO
|
||||||
An I/O error occurred while opening the swap device.
|
An I/O error occurred while opening the swap device.
|
||||||
.It Bq Er EFAULT
|
.El
|
||||||
.Fa Special
|
.Pp
|
||||||
points outside the process's allocated address space.
|
Lastly,
|
||||||
|
.Fn swapoff
|
||||||
|
can fail if:
|
||||||
|
.Bl -tag -width Er
|
||||||
|
.It Bq Er EINVAL
|
||||||
|
The system is not currently swapping to
|
||||||
|
.Fa special .
|
||||||
|
.It Bq Er ENOMEM
|
||||||
|
Not enough virtual memory is available to safely disable
|
||||||
|
paging and swapping to the given device.
|
||||||
.El
|
.El
|
||||||
.Sh SEE ALSO
|
.Sh SEE ALSO
|
||||||
.Xr config 8 ,
|
.Xr config 8 ,
|
||||||
.Xr swapon 8
|
.Xr swapon 8 ,
|
||||||
.Sh BUGS
|
.Xr sysctl 8
|
||||||
There is no way to stop swapping on a disk so that the pack may be
|
|
||||||
dismounted.
|
|
||||||
.Pp
|
|
||||||
This call will be upgraded in future versions of the system.
|
|
||||||
.Sh HISTORY
|
.Sh HISTORY
|
||||||
The
|
The
|
||||||
.Fn swapon
|
.Fn swapon
|
||||||
function call appeared in
|
function call appeared in
|
||||||
.Bx 4.0 .
|
.Bx 4.0 .
|
||||||
|
.Fn Swapoff
|
||||||
|
appeared in
|
||||||
|
.Fx 5.0 .
|
||||||
|
|
|
||||||
|
|
@ -3,5 +3,7 @@
|
||||||
|
|
||||||
PROG= swapon
|
PROG= swapon
|
||||||
MAN= swapon.8
|
MAN= swapon.8
|
||||||
|
LINKS= ${BINDIR}/swapon ${BINDIR}/swapoff
|
||||||
|
MLINKS= swapon.8 swapoff.8
|
||||||
|
|
||||||
.include <bsd.prog.mk>
|
.include <bsd.prog.mk>
|
||||||
|
|
|
||||||
|
|
@ -36,39 +36,46 @@
|
||||||
.Dt SWAPON 8
|
.Dt SWAPON 8
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
.Nm swapon
|
.Nm swapon , swapoff
|
||||||
.Nd "specify additional device for paging and swapping"
|
.Nd "specify devices for paging and swapping"
|
||||||
.Sh SYNOPSIS
|
.Sh SYNOPSIS
|
||||||
.Nm
|
.Nm swap[on|off]
|
||||||
.Fl a
|
.Fl a
|
||||||
.Nm
|
.Nm swap[on|off]
|
||||||
.Ar special_file ...
|
.Ar special_file ...
|
||||||
.Sh DESCRIPTION
|
.Sh DESCRIPTION
|
||||||
The
|
The
|
||||||
.Nm
|
.Nm swapon
|
||||||
utility is used to specify additional devices on which paging and swapping
|
utility is used to specify additional devices on which paging and swapping
|
||||||
are to take place.
|
are to take place.
|
||||||
The system begins by swapping and paging on only a single device
|
The system begins by swapping and paging on only a single device
|
||||||
so that only one disk is required at bootstrap time.
|
so that only one disk is required at bootstrap time.
|
||||||
Calls to
|
Calls to
|
||||||
.Nm
|
.Nm swapon
|
||||||
normally occur in the system multi-user initialization file
|
normally occur in the system multi-user initialization file
|
||||||
.Pa /etc/rc
|
.Pa /etc/rc
|
||||||
making all swap devices available, so that the paging and swapping
|
making all swap devices available, so that the paging and swapping
|
||||||
activity is interleaved across several devices.
|
activity is interleaved across several devices.
|
||||||
.Pp
|
.Pp
|
||||||
|
The
|
||||||
|
.Nm swapoff
|
||||||
|
utility disables paging and swapping on a device.
|
||||||
|
Calls to
|
||||||
|
.Nm swapoff
|
||||||
|
succeed only if disabling the device would leave enough
|
||||||
|
remaining virtual memory to accomodate all running programs.
|
||||||
|
.Pp
|
||||||
Normally, the first form is used:
|
Normally, the first form is used:
|
||||||
.Bl -tag -width indent
|
.Bl -tag -width indent
|
||||||
.It Fl a
|
.It Fl a
|
||||||
All devices marked as ``sw''
|
All devices marked as ``sw''
|
||||||
swap devices in
|
swap devices in
|
||||||
.Pa /etc/fstab
|
.Pa /etc/fstab
|
||||||
are made available unless their ``noauto'' option is also set.
|
are added to or removed from the pool of available swap
|
||||||
|
unless their ``noauto'' option is also set.
|
||||||
.El
|
.El
|
||||||
.Pp
|
.Pp
|
||||||
The second form gives individual block devices as given
|
The second form is used to configure or disable individual devices.
|
||||||
in the system swap configuration table. The call makes only this space
|
|
||||||
available to the system for swap allocation.
|
|
||||||
.Sh SEE ALSO
|
.Sh SEE ALSO
|
||||||
.Xr swapon 2 ,
|
.Xr swapon 2 ,
|
||||||
.Xr fstab 5 ,
|
.Xr fstab 5 ,
|
||||||
|
|
@ -85,12 +92,12 @@ memory disk devices
|
||||||
.It Pa /etc/fstab
|
.It Pa /etc/fstab
|
||||||
ASCII file system description table
|
ASCII file system description table
|
||||||
.El
|
.El
|
||||||
.Sh BUGS
|
|
||||||
There is no way to stop paging and swapping on a device.
|
|
||||||
It is therefore not possible to dismount swap devices which are
|
|
||||||
mounted during system operation.
|
|
||||||
.Sh HISTORY
|
.Sh HISTORY
|
||||||
The
|
The
|
||||||
.Nm
|
.Nm swapon
|
||||||
utility appeared in
|
utility appeared in
|
||||||
.Bx 4.0 .
|
.Bx 4.0 .
|
||||||
|
The
|
||||||
|
.Nm swapoff
|
||||||
|
utility appeared in
|
||||||
|
.Fx 5.0 .
|
||||||
|
|
|
||||||
|
|
@ -53,8 +53,9 @@ static const char rcsid[] =
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
static void usage(void);
|
static void usage(const char *);
|
||||||
int add(char *name, int ignoreebusy);
|
static int is_swapoff(const char *);
|
||||||
|
int swap_on_off(char *name, int ignoreebusy, int do_swapoff);
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
|
|
@ -62,6 +63,10 @@ main(int argc, char **argv)
|
||||||
struct fstab *fsp;
|
struct fstab *fsp;
|
||||||
int stat;
|
int stat;
|
||||||
int ch, doall;
|
int ch, doall;
|
||||||
|
int do_swapoff;
|
||||||
|
char *pname = argv[0];
|
||||||
|
|
||||||
|
do_swapoff = is_swapoff(pname);
|
||||||
|
|
||||||
doall = 0;
|
doall = 0;
|
||||||
while ((ch = getopt(argc, argv, "a")) != -1)
|
while ((ch = getopt(argc, argv, "a")) != -1)
|
||||||
|
|
@ -71,7 +76,7 @@ main(int argc, char **argv)
|
||||||
break;
|
break;
|
||||||
case '?':
|
case '?':
|
||||||
default:
|
default:
|
||||||
usage();
|
usage(pname);
|
||||||
}
|
}
|
||||||
argv += optind;
|
argv += optind;
|
||||||
|
|
||||||
|
|
@ -82,23 +87,24 @@ main(int argc, char **argv)
|
||||||
continue;
|
continue;
|
||||||
if (strstr(fsp->fs_mntops, "noauto"))
|
if (strstr(fsp->fs_mntops, "noauto"))
|
||||||
continue;
|
continue;
|
||||||
if (add(fsp->fs_spec, 1))
|
if (swap_on_off(fsp->fs_spec, 1, do_swapoff))
|
||||||
stat = 1;
|
stat = 1;
|
||||||
else
|
else
|
||||||
printf("swapon: adding %s as swap device\n",
|
printf("%s: %sing %s as swap device\n",
|
||||||
|
pname, do_swapoff ? "remov" : "add",
|
||||||
fsp->fs_spec);
|
fsp->fs_spec);
|
||||||
}
|
}
|
||||||
else if (!*argv)
|
else if (!*argv)
|
||||||
usage();
|
usage(pname);
|
||||||
for (; *argv; ++argv)
|
for (; *argv; ++argv)
|
||||||
stat |= add(*argv, 0);
|
stat |= swap_on_off(*argv, 0, do_swapoff);
|
||||||
exit(stat);
|
exit(stat);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
add(char *name, int ignoreebusy)
|
swap_on_off(char *name, int ignoreebusy, int do_swapoff)
|
||||||
{
|
{
|
||||||
if (swapon(name) == -1) {
|
if ((do_swapoff ? swapoff(name) : swapon(name)) == -1) {
|
||||||
switch (errno) {
|
switch (errno) {
|
||||||
case EBUSY:
|
case EBUSY:
|
||||||
if (!ignoreebusy)
|
if (!ignoreebusy)
|
||||||
|
|
@ -114,8 +120,23 @@ add(char *name, int ignoreebusy)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
usage()
|
usage(const char *pname)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "usage: swapon [-a] [special_file ...]\n");
|
fprintf(stderr, "usage: %s [-a] [special_file ...]\n", pname);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
is_swapoff(const char *s)
|
||||||
|
{
|
||||||
|
const char *u;
|
||||||
|
|
||||||
|
if ((u = strrchr(s, '/')) != NULL)
|
||||||
|
++u;
|
||||||
|
else
|
||||||
|
u = s;
|
||||||
|
if (strcmp(u, "swapoff") == 0)
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -594,3 +594,4 @@
|
||||||
421 UNIMPL BSD getcontext
|
421 UNIMPL BSD getcontext
|
||||||
422 UNIMPL BSD setcontext
|
422 UNIMPL BSD setcontext
|
||||||
423 UNIMPL BSD swapcontext
|
423 UNIMPL BSD swapcontext
|
||||||
|
424 MNOPROTO BSD swapoff
|
||||||
|
|
|
||||||
|
|
@ -594,3 +594,4 @@
|
||||||
421 UNIMPL BSD getcontext
|
421 UNIMPL BSD getcontext
|
||||||
422 UNIMPL BSD setcontext
|
422 UNIMPL BSD setcontext
|
||||||
423 UNIMPL BSD swapcontext
|
423 UNIMPL BSD swapcontext
|
||||||
|
424 MNOPROTO BSD swapoff
|
||||||
|
|
|
||||||
|
|
@ -594,3 +594,4 @@
|
||||||
421 UNIMPL BSD getcontext
|
421 UNIMPL BSD getcontext
|
||||||
422 UNIMPL BSD setcontext
|
422 UNIMPL BSD setcontext
|
||||||
423 UNIMPL BSD swapcontext
|
423 UNIMPL BSD swapcontext
|
||||||
|
424 MNOPROTO BSD swapoff
|
||||||
|
|
|
||||||
|
|
@ -93,7 +93,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
|
||||||
#define malloc(a,b,c) malloc(a)
|
#define malloc(a,b,c) calloc(a, 1)
|
||||||
#define free(a,b) free(a)
|
#define free(a,b) free(a)
|
||||||
|
|
||||||
typedef unsigned int u_daddr_t;
|
typedef unsigned int u_daddr_t;
|
||||||
|
|
@ -116,6 +116,9 @@ static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
|
||||||
daddr_t radix, int skip, daddr_t blk);
|
daddr_t radix, int skip, daddr_t blk);
|
||||||
static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
|
static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
|
||||||
daddr_t skip, blist_t dest, daddr_t count);
|
daddr_t skip, blist_t dest, daddr_t count);
|
||||||
|
static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
|
||||||
|
static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
|
||||||
|
daddr_t radix, int skip, daddr_t blk);
|
||||||
static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix,
|
static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix,
|
||||||
int skip, daddr_t count);
|
int skip, daddr_t count);
|
||||||
#ifndef _KERNEL
|
#ifndef _KERNEL
|
||||||
|
|
@ -165,13 +168,14 @@ blist_create(daddr_t blocks)
|
||||||
|
|
||||||
#if defined(BLIST_DEBUG)
|
#if defined(BLIST_DEBUG)
|
||||||
printf(
|
printf(
|
||||||
"BLIST representing %d blocks (%d MB of swap)"
|
"BLIST representing %lld blocks (%lld MB of swap)"
|
||||||
", requiring %dK of ram\n",
|
", requiring %lldK of ram\n",
|
||||||
bl->bl_blocks,
|
(long long)bl->bl_blocks,
|
||||||
bl->bl_blocks * 4 / 1024,
|
(long long)bl->bl_blocks * 4 / 1024,
|
||||||
(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
|
(long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
|
||||||
);
|
);
|
||||||
printf("BLIST raw radix tree contains %d records\n", bl->bl_rootblks);
|
printf("BLIST raw radix tree contains %lld records\n",
|
||||||
|
(long long)bl->bl_rootblks);
|
||||||
#endif
|
#endif
|
||||||
blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
|
blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
|
||||||
|
|
||||||
|
|
@ -225,6 +229,30 @@ blist_free(blist_t bl, daddr_t blkno, daddr_t count)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* blist_fill() - mark a region in the block bitmap as off-limits
|
||||||
|
* to the allocator (i.e. allocate it), ignoring any
|
||||||
|
* existing allocations. Return the number of blocks
|
||||||
|
* actually filled that were free before the call.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int
|
||||||
|
blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
|
||||||
|
{
|
||||||
|
int filled;
|
||||||
|
|
||||||
|
if (bl) {
|
||||||
|
if (bl->bl_radix == BLIST_BMAP_RADIX)
|
||||||
|
filled = blst_leaf_fill(bl->bl_root, blkno, count);
|
||||||
|
else
|
||||||
|
filled = blst_meta_fill(bl->bl_root, blkno, count,
|
||||||
|
bl->bl_radix, bl->bl_skip, 0);
|
||||||
|
bl->bl_free -= filled;
|
||||||
|
return filled;
|
||||||
|
} else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* blist_resize() - resize an existing radix tree to handle the
|
* blist_resize() - resize an existing radix tree to handle the
|
||||||
* specified number of blocks. This will reallocate
|
* specified number of blocks. This will reallocate
|
||||||
|
|
@ -507,9 +535,9 @@ blst_meta_free(
|
||||||
int next_skip = (skip >> BLIST_META_RADIX_SHIFT);
|
int next_skip = (skip >> BLIST_META_RADIX_SHIFT);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
printf("FREE (%x,%d) FROM (%x,%d)\n",
|
printf("FREE (%llx,%lld) FROM (%llx,%lld)\n",
|
||||||
freeBlk, count,
|
(long long)freeBlk, (long long)count,
|
||||||
blk, radix
|
(long long)blk, (long long)radix
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -678,6 +706,117 @@ static void blst_copy(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
|
||||||
|
*
|
||||||
|
* This routine allocates all blocks in the specified range
|
||||||
|
* regardless of any existing allocations in that range. Returns
|
||||||
|
* the number of blocks allocated by the call.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
|
||||||
|
{
|
||||||
|
int n = blk & (BLIST_BMAP_RADIX - 1);
|
||||||
|
int nblks;
|
||||||
|
u_daddr_t mask, bitmap;
|
||||||
|
|
||||||
|
mask = ((u_daddr_t)-1 << n) &
|
||||||
|
((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
|
||||||
|
|
||||||
|
/* Count the number of blocks we're about to allocate */
|
||||||
|
bitmap = scan->u.bmu_bitmap & mask;
|
||||||
|
for (nblks = 0; bitmap != 0; nblks++)
|
||||||
|
bitmap &= bitmap - 1;
|
||||||
|
|
||||||
|
scan->u.bmu_bitmap &= ~mask;
|
||||||
|
return nblks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BLIST_META_FILL() - allocate specific blocks at a meta node
|
||||||
|
*
|
||||||
|
* This routine allocates the specified range of blocks,
|
||||||
|
* regardless of any existing allocations in the range. The
|
||||||
|
* range must be within the extent of this node. Returns the
|
||||||
|
* number of blocks allocated by the call.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
blst_meta_fill(
|
||||||
|
blmeta_t *scan,
|
||||||
|
daddr_t allocBlk,
|
||||||
|
daddr_t count,
|
||||||
|
daddr_t radix,
|
||||||
|
int skip,
|
||||||
|
daddr_t blk
|
||||||
|
) {
|
||||||
|
int i;
|
||||||
|
int next_skip = (skip >> BLIST_META_RADIX_SHIFT);
|
||||||
|
int nblks = 0;
|
||||||
|
|
||||||
|
if (count == radix || scan->u.bmu_avail == 0) {
|
||||||
|
/*
|
||||||
|
* ALL-ALLOCATED special case
|
||||||
|
*/
|
||||||
|
nblks = scan->u.bmu_avail;
|
||||||
|
scan->u.bmu_avail = 0;
|
||||||
|
scan->bm_bighint = count;
|
||||||
|
return nblks;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scan->u.bmu_avail == radix) {
|
||||||
|
radix >>= BLIST_META_RADIX_SHIFT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ALL-FREE special case, initialize sublevel
|
||||||
|
*/
|
||||||
|
for (i = 1; i <= skip; i += next_skip) {
|
||||||
|
if (scan[i].bm_bighint == (daddr_t)-1)
|
||||||
|
break;
|
||||||
|
if (next_skip == 1) {
|
||||||
|
scan[i].u.bmu_bitmap = (u_daddr_t)-1;
|
||||||
|
scan[i].bm_bighint = BLIST_BMAP_RADIX;
|
||||||
|
} else {
|
||||||
|
scan[i].bm_bighint = radix;
|
||||||
|
scan[i].u.bmu_avail = radix;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
radix >>= BLIST_META_RADIX_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count > radix)
|
||||||
|
panic("blist_meta_fill: allocation too large");
|
||||||
|
|
||||||
|
i = (allocBlk - blk) / radix;
|
||||||
|
blk += i * radix;
|
||||||
|
i = i * next_skip + 1;
|
||||||
|
|
||||||
|
while (i <= skip && blk < allocBlk + count) {
|
||||||
|
daddr_t v;
|
||||||
|
|
||||||
|
v = blk + radix - allocBlk;
|
||||||
|
if (v > count)
|
||||||
|
v = count;
|
||||||
|
|
||||||
|
if (scan->bm_bighint == (daddr_t)-1)
|
||||||
|
panic("blst_meta_fill: filling unexpected range");
|
||||||
|
|
||||||
|
if (next_skip == 1) {
|
||||||
|
nblks += blst_leaf_fill(&scan[i], allocBlk, v);
|
||||||
|
} else {
|
||||||
|
nblks += blst_meta_fill(&scan[i], allocBlk, v,
|
||||||
|
radix, next_skip - 1, blk);
|
||||||
|
}
|
||||||
|
count -= v;
|
||||||
|
allocBlk += v;
|
||||||
|
blk += radix;
|
||||||
|
i += next_skip;
|
||||||
|
}
|
||||||
|
scan->u.bmu_avail -= nblks;
|
||||||
|
return nblks;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BLST_RADIX_INIT() - initialize radix tree
|
* BLST_RADIX_INIT() - initialize radix tree
|
||||||
*
|
*
|
||||||
|
|
@ -768,41 +907,41 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
|
||||||
|
|
||||||
if (radix == BLIST_BMAP_RADIX) {
|
if (radix == BLIST_BMAP_RADIX) {
|
||||||
printf(
|
printf(
|
||||||
"%*.*s(%04x,%d): bitmap %08x big=%d\n",
|
"%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n",
|
||||||
tab, tab, "",
|
tab, tab, "",
|
||||||
blk, radix,
|
(long long)blk, (long long)radix,
|
||||||
scan->u.bmu_bitmap,
|
(long long)scan->u.bmu_bitmap,
|
||||||
scan->bm_bighint
|
(long long)scan->bm_bighint
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scan->u.bmu_avail == 0) {
|
if (scan->u.bmu_avail == 0) {
|
||||||
printf(
|
printf(
|
||||||
"%*.*s(%04x,%d) ALL ALLOCATED\n",
|
"%*.*s(%08llx,%lld) ALL ALLOCATED\n",
|
||||||
tab, tab, "",
|
tab, tab, "",
|
||||||
blk,
|
(long long)blk,
|
||||||
radix
|
(long long)radix
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (scan->u.bmu_avail == radix) {
|
if (scan->u.bmu_avail == radix) {
|
||||||
printf(
|
printf(
|
||||||
"%*.*s(%04x,%d) ALL FREE\n",
|
"%*.*s(%08llx,%lld) ALL FREE\n",
|
||||||
tab, tab, "",
|
tab, tab, "",
|
||||||
blk,
|
(long long)blk,
|
||||||
radix
|
(long long)radix
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf(
|
printf(
|
||||||
"%*.*s(%04x,%d): subtree (%d/%d) big=%d {\n",
|
"%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
|
||||||
tab, tab, "",
|
tab, tab, "",
|
||||||
blk, radix,
|
(long long)blk, (long long)radix,
|
||||||
scan->u.bmu_avail,
|
(long long)scan->u.bmu_avail,
|
||||||
radix,
|
(long long)radix,
|
||||||
scan->bm_bighint
|
(long long)scan->bm_bighint
|
||||||
);
|
);
|
||||||
|
|
||||||
radix >>= BLIST_META_RADIX_SHIFT;
|
radix >>= BLIST_META_RADIX_SHIFT;
|
||||||
|
|
@ -812,9 +951,9 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
|
||||||
for (i = 1; i <= skip; i += next_skip) {
|
for (i = 1; i <= skip; i += next_skip) {
|
||||||
if (scan[i].bm_bighint == (daddr_t)-1) {
|
if (scan[i].bm_bighint == (daddr_t)-1) {
|
||||||
printf(
|
printf(
|
||||||
"%*.*s(%04x,%d): Terminator\n",
|
"%*.*s(%08llx,%lld): Terminator\n",
|
||||||
tab, tab, "",
|
tab, tab, "",
|
||||||
blk, radix
|
(long long)blk, (long long)radix
|
||||||
);
|
);
|
||||||
lastState = 0;
|
lastState = 0;
|
||||||
break;
|
break;
|
||||||
|
|
@ -866,13 +1005,14 @@ main(int ac, char **av)
|
||||||
daddr_t count = 0;
|
daddr_t count = 0;
|
||||||
|
|
||||||
|
|
||||||
printf("%d/%d/%d> ", bl->bl_free, size, bl->bl_radix);
|
printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
|
||||||
|
(long long)size, (long long)bl->bl_radix);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
if (fgets(buf, sizeof(buf), stdin) == NULL)
|
if (fgets(buf, sizeof(buf), stdin) == NULL)
|
||||||
break;
|
break;
|
||||||
switch(buf[0]) {
|
switch(buf[0]) {
|
||||||
case 'r':
|
case 'r':
|
||||||
if (sscanf(buf + 1, "%d", &count) == 1) {
|
if (sscanf(buf + 1, "%lld", &count) == 1) {
|
||||||
blist_resize(&bl, count, 1);
|
blist_resize(&bl, count, 1);
|
||||||
} else {
|
} else {
|
||||||
printf("?\n");
|
printf("?\n");
|
||||||
|
|
@ -881,26 +1021,37 @@ main(int ac, char **av)
|
||||||
blist_print(bl);
|
blist_print(bl);
|
||||||
break;
|
break;
|
||||||
case 'a':
|
case 'a':
|
||||||
if (sscanf(buf + 1, "%d", &count) == 1) {
|
if (sscanf(buf + 1, "%lld", &count) == 1) {
|
||||||
daddr_t blk = blist_alloc(bl, count);
|
daddr_t blk = blist_alloc(bl, count);
|
||||||
printf(" R=%04x\n", blk);
|
printf(" R=%08llx\n", (long long)blk);
|
||||||
} else {
|
} else {
|
||||||
printf("?\n");
|
printf("?\n");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
if (sscanf(buf + 1, "%x %d", &da, &count) == 2) {
|
if (sscanf(buf + 1, "%llx %lld",
|
||||||
|
(long long *)&da, (long long *)&count) == 2) {
|
||||||
blist_free(bl, da, count);
|
blist_free(bl, da, count);
|
||||||
} else {
|
} else {
|
||||||
printf("?\n");
|
printf("?\n");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'l':
|
||||||
|
if (sscanf(buf + 1, "%llx %lld",
|
||||||
|
(long long *)&da, (long long *)&count) == 2) {
|
||||||
|
printf(" n=%d\n",
|
||||||
|
blist_fill(bl, da, count));
|
||||||
|
} else {
|
||||||
|
printf("?\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
case '?':
|
case '?':
|
||||||
case 'h':
|
case 'h':
|
||||||
puts(
|
puts(
|
||||||
"p -print\n"
|
"p -print\n"
|
||||||
"a %d -allocate\n"
|
"a %d -allocate\n"
|
||||||
"f %x %d -free\n"
|
"f %x %d -free\n"
|
||||||
|
"l %x %d -fill\n"
|
||||||
"r %d -resize\n"
|
"r %d -resize\n"
|
||||||
"h/? -help"
|
"h/? -help"
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -612,6 +612,7 @@
|
||||||
422 MSTD BSD { int setcontext(const struct __ucontext *ucp); }
|
422 MSTD BSD { int setcontext(const struct __ucontext *ucp); }
|
||||||
423 MSTD BSD { int swapcontext(struct __ucontext *oucp, \
|
423 MSTD BSD { int swapcontext(struct __ucontext *oucp, \
|
||||||
const struct __ucontext *ucp); }
|
const struct __ucontext *ucp); }
|
||||||
|
424 MSTD BSD { int swapoff(const char *name); }
|
||||||
|
|
||||||
; Please copy any additions and changes to the following compatability tables:
|
; Please copy any additions and changes to the following compatability tables:
|
||||||
; sys/ia64/ia32/syscalls.master (take a best guess)
|
; sys/ia64/ia32/syscalls.master (take a best guess)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@
|
||||||
* (void) blist_destroy(blist)
|
* (void) blist_destroy(blist)
|
||||||
* blkno = blist_alloc(blist, count)
|
* blkno = blist_alloc(blist, count)
|
||||||
* (void) blist_free(blist, blkno, count)
|
* (void) blist_free(blist, blkno, count)
|
||||||
|
* nblks = blist_fill(blist, blkno, count)
|
||||||
* (void) blist_resize(&blist, count, freeextra)
|
* (void) blist_resize(&blist, count, freeextra)
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
|
|
@ -78,6 +79,7 @@ extern blist_t blist_create(daddr_t blocks);
|
||||||
extern void blist_destroy(blist_t blist);
|
extern void blist_destroy(blist_t blist);
|
||||||
extern daddr_t blist_alloc(blist_t blist, daddr_t count);
|
extern daddr_t blist_alloc(blist_t blist, daddr_t count);
|
||||||
extern void blist_free(blist_t blist, daddr_t blkno, daddr_t count);
|
extern void blist_free(blist_t blist, daddr_t blkno, daddr_t count);
|
||||||
|
extern int blist_fill(blist_t bl, daddr_t blkno, daddr_t count);
|
||||||
extern void blist_print(blist_t blist);
|
extern void blist_print(blist_t blist);
|
||||||
extern void blist_resize(blist_t *pblist, daddr_t count, int freenew);
|
extern void blist_resize(blist_t *pblist, daddr_t count, int freenew);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -274,6 +274,7 @@ struct swdevt {
|
||||||
};
|
};
|
||||||
#define SW_FREED 0x01
|
#define SW_FREED 0x01
|
||||||
#define SW_SEQUENTIAL 0x02
|
#define SW_SEQUENTIAL 0x02
|
||||||
|
#define SW_CLOSING 0x04
|
||||||
#define sw_freed sw_flags /* XXX compat */
|
#define sw_freed sw_flags /* XXX compat */
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
|
|
|
||||||
|
|
@ -274,6 +274,7 @@ struct swdevt {
|
||||||
};
|
};
|
||||||
#define SW_FREED 0x01
|
#define SW_FREED 0x01
|
||||||
#define SW_SEQUENTIAL 0x02
|
#define SW_SEQUENTIAL 0x02
|
||||||
|
#define SW_CLOSING 0x04
|
||||||
#define sw_freed sw_flags /* XXX compat */
|
#define sw_freed sw_flags /* XXX compat */
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
|
|
|
||||||
|
|
@ -206,6 +206,8 @@ static __inline daddr_t swp_pager_getswapspace(int npages);
|
||||||
/*
|
/*
|
||||||
* Metadata functions
|
* Metadata functions
|
||||||
*/
|
*/
|
||||||
|
static __inline struct swblock **
|
||||||
|
swp_pager_hash(vm_object_t object, vm_pindex_t index);
|
||||||
static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
|
static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
|
||||||
static void swp_pager_meta_free(vm_object_t, vm_pindex_t, daddr_t);
|
static void swp_pager_meta_free(vm_object_t, vm_pindex_t, daddr_t);
|
||||||
static void swp_pager_meta_free_all(vm_object_t);
|
static void swp_pager_meta_free_all(vm_object_t);
|
||||||
|
|
@ -512,12 +514,22 @@ swp_pager_freeswapspace(blk, npages)
|
||||||
daddr_t blk;
|
daddr_t blk;
|
||||||
int npages;
|
int npages;
|
||||||
{
|
{
|
||||||
|
struct swdevt *sp = &swdevt[BLK2DEVIDX(blk)];
|
||||||
|
|
||||||
GIANT_REQUIRED;
|
GIANT_REQUIRED;
|
||||||
|
|
||||||
|
/* per-swap area stats */
|
||||||
|
sp->sw_used -= npages;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are attempting to stop swapping on this device, we
|
||||||
|
* don't want to mark any blocks free lest they be reused.
|
||||||
|
*/
|
||||||
|
if (sp->sw_flags & SW_CLOSING)
|
||||||
|
return;
|
||||||
|
|
||||||
blist_free(swapblist, blk, npages);
|
blist_free(swapblist, blk, npages);
|
||||||
vm_swap_size += npages;
|
vm_swap_size += npages;
|
||||||
/* per-swap area stats */
|
|
||||||
swdevt[BLK2DEVIDX(blk)].sw_used -= npages;
|
|
||||||
swp_sizecheck();
|
swp_sizecheck();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1624,6 +1636,149 @@ swp_pager_async_iodone(bp)
|
||||||
splx(s);
|
splx(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* swap_pager_isswapped:
|
||||||
|
*
|
||||||
|
* Return 1 if at least one page in the given object is paged
|
||||||
|
* out to the given swap device.
|
||||||
|
*
|
||||||
|
* This routine may not block.
|
||||||
|
*/
|
||||||
|
int swap_pager_isswapped(vm_object_t object, int devidx) {
|
||||||
|
daddr_t index = 0;
|
||||||
|
int bcount;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) {
|
||||||
|
struct swblock *swap;
|
||||||
|
|
||||||
|
if ((swap = *swp_pager_hash(object, index)) != NULL) {
|
||||||
|
for (i = 0; i < SWAP_META_PAGES; ++i) {
|
||||||
|
daddr_t v = swap->swb_pages[i];
|
||||||
|
if (v != SWAPBLK_NONE &&
|
||||||
|
BLK2DEVIDX(v) == devidx)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
index += SWAP_META_PAGES;
|
||||||
|
if (index > 0x20000000)
|
||||||
|
panic("swap_pager_isswapped: failed to locate all swap meta blocks");
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in
|
||||||
|
*
|
||||||
|
* This routine dissociates the page at the given index within a
|
||||||
|
* swap block from its backing store, paging it in if necessary.
|
||||||
|
* If the page is paged in, it is placed in the inactive queue,
|
||||||
|
* since it had its backing store ripped out from under it.
|
||||||
|
* We also attempt to swap in all other pages in the swap block,
|
||||||
|
* we only guarantee that the one at the specified index is
|
||||||
|
* paged in.
|
||||||
|
*
|
||||||
|
* XXX - The code to page the whole block in doesn't work, so we
|
||||||
|
* revert to the one-by-one behavior for now. Sigh.
|
||||||
|
*/
|
||||||
|
static __inline void
|
||||||
|
swp_pager_force_pagein(struct swblock *swap, int idx)
|
||||||
|
{
|
||||||
|
vm_object_t object;
|
||||||
|
vm_page_t m;
|
||||||
|
vm_pindex_t pindex;
|
||||||
|
|
||||||
|
object = swap->swb_object;
|
||||||
|
pindex = swap->swb_index;
|
||||||
|
|
||||||
|
vm_object_pip_add(object, 1);
|
||||||
|
m = vm_page_grab(object, pindex + idx, VM_ALLOC_NORMAL|VM_ALLOC_RETRY);
|
||||||
|
if (m->valid == VM_PAGE_BITS_ALL) {
|
||||||
|
vm_object_pip_subtract(object, 1);
|
||||||
|
vm_page_lock_queues();
|
||||||
|
vm_page_activate(m);
|
||||||
|
vm_page_dirty(m);
|
||||||
|
vm_page_wakeup(m);
|
||||||
|
vm_page_unlock_queues();
|
||||||
|
vm_pager_page_unswapped(m);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (swap_pager_getpages(object, &m, 1, 0) !=
|
||||||
|
VM_PAGER_OK)
|
||||||
|
panic("swap_pager_force_pagein: read from swap failed");/*XXX*/
|
||||||
|
vm_object_pip_subtract(object, 1);
|
||||||
|
|
||||||
|
vm_page_lock_queues();
|
||||||
|
vm_page_dirty(m);
|
||||||
|
vm_page_dontneed(m);
|
||||||
|
vm_page_wakeup(m);
|
||||||
|
vm_page_unlock_queues();
|
||||||
|
vm_pager_page_unswapped(m);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* swap_pager_swapoff:
|
||||||
|
*
|
||||||
|
* Page in all of the pages that have been paged out to the
|
||||||
|
* given device. The corresponding blocks in the bitmap must be
|
||||||
|
* marked as allocated and the device must be flagged SW_CLOSING.
|
||||||
|
* There may be no processes swapped out to the device.
|
||||||
|
*
|
||||||
|
* The sw_used parameter points to the field in the swdev structure
|
||||||
|
* that contains a count of the number of blocks still allocated
|
||||||
|
* on the device. If we encounter objects with a nonzero pip count
|
||||||
|
* in our scan, we use this number to determine if we're really done.
|
||||||
|
*
|
||||||
|
* This routine may block.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
swap_pager_swapoff(int devidx, int *sw_used)
|
||||||
|
{
|
||||||
|
struct swblock **pswap;
|
||||||
|
struct swblock *swap;
|
||||||
|
vm_object_t waitobj;
|
||||||
|
daddr_t v;
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
GIANT_REQUIRED;
|
||||||
|
|
||||||
|
full_rescan:
|
||||||
|
waitobj = NULL;
|
||||||
|
for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */
|
||||||
|
restart:
|
||||||
|
pswap = &swhash[i];
|
||||||
|
while ((swap = *pswap) != NULL) {
|
||||||
|
for (j = 0; j < SWAP_META_PAGES; ++j) {
|
||||||
|
v = swap->swb_pages[j];
|
||||||
|
if (v != SWAPBLK_NONE &&
|
||||||
|
BLK2DEVIDX(v) == devidx)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (j < SWAP_META_PAGES) {
|
||||||
|
swp_pager_force_pagein(swap, j);
|
||||||
|
goto restart;
|
||||||
|
} else if (swap->swb_object->paging_in_progress) {
|
||||||
|
if (!waitobj)
|
||||||
|
waitobj = swap->swb_object;
|
||||||
|
}
|
||||||
|
pswap = &swap->swb_hnext;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (waitobj && *sw_used) {
|
||||||
|
/*
|
||||||
|
* We wait on an arbitrary object to clock our rescans
|
||||||
|
* to the rate of paging completion.
|
||||||
|
*/
|
||||||
|
vm_object_pip_wait(waitobj, "swpoff");
|
||||||
|
goto full_rescan;
|
||||||
|
}
|
||||||
|
if (*sw_used)
|
||||||
|
panic("swapoff: failed to locate %d swap blocks", *sw_used);
|
||||||
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
* SWAP META DATA *
|
* SWAP META DATA *
|
||||||
************************************************************************
|
************************************************************************
|
||||||
|
|
|
||||||
|
|
@ -83,9 +83,11 @@ extern struct pagerlst swap_pager_un_object_list;
|
||||||
extern int swap_pager_full;
|
extern int swap_pager_full;
|
||||||
extern struct blist *swapblist;
|
extern struct blist *swapblist;
|
||||||
extern struct uma_zone *swap_zone;
|
extern struct uma_zone *swap_zone;
|
||||||
|
extern int nswap_lowat, nswap_hiwat;
|
||||||
|
|
||||||
void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
|
void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
|
||||||
boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after);
|
boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after);
|
||||||
|
void swap_pager_swapoff(int devidx, int *sw_used);
|
||||||
|
|
||||||
int swap_pager_swp_alloc(vm_object_t, int);
|
int swap_pager_swp_alloc(vm_object_t, int);
|
||||||
void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int);
|
void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int);
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,7 @@
|
||||||
#include <vm/vm_kern.h>
|
#include <vm/vm_kern.h>
|
||||||
#include <vm/vm_extern.h>
|
#include <vm/vm_extern.h>
|
||||||
#include <vm/vm_pager.h>
|
#include <vm/vm_pager.h>
|
||||||
|
#include <vm/swap_pager.h>
|
||||||
|
|
||||||
#include <sys/user.h>
|
#include <sys/user.h>
|
||||||
|
|
||||||
|
|
@ -324,6 +325,45 @@ vm_proc_swapin(struct proc *p)
|
||||||
up = (vm_offset_t)p->p_uarea;
|
up = (vm_offset_t)p->p_uarea;
|
||||||
pmap_qenter(up, ma, UAREA_PAGES);
|
pmap_qenter(up, ma, UAREA_PAGES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Swap in the UAREAs of all processes swapped out to the given device.
|
||||||
|
* The pages in the UAREA are marked dirty and their swap metadata is freed.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
vm_proc_swapin_all(int devidx)
|
||||||
|
{
|
||||||
|
struct proc *p;
|
||||||
|
vm_object_t object;
|
||||||
|
vm_page_t m;
|
||||||
|
|
||||||
|
retry:
|
||||||
|
sx_slock(&allproc_lock);
|
||||||
|
FOREACH_PROC_IN_SYSTEM(p) {
|
||||||
|
PROC_LOCK(p);
|
||||||
|
mtx_lock_spin(&sched_lock);
|
||||||
|
|
||||||
|
object = p->p_upages_obj;
|
||||||
|
if (object != NULL &&
|
||||||
|
swap_pager_isswapped(p->p_upages_obj, devidx)) {
|
||||||
|
sx_sunlock(&allproc_lock);
|
||||||
|
faultin(p);
|
||||||
|
mtx_unlock_spin(&sched_lock);
|
||||||
|
PROC_UNLOCK(p);
|
||||||
|
vm_page_lock_queues();
|
||||||
|
TAILQ_FOREACH(m, &object->memq, listq)
|
||||||
|
vm_page_dirty(m);
|
||||||
|
vm_page_unlock_queues();
|
||||||
|
swap_pager_freespace(object, 0,
|
||||||
|
object->un_pager.swp.swp_bcount);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
mtx_unlock_spin(&sched_lock);
|
||||||
|
PROC_UNLOCK(p);
|
||||||
|
}
|
||||||
|
sx_sunlock(&allproc_lock);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,12 @@ extern void pagedaemon_wakeup(void);
|
||||||
extern void vm_wait(void);
|
extern void vm_wait(void);
|
||||||
extern void vm_waitpfault(void);
|
extern void vm_waitpfault(void);
|
||||||
|
|
||||||
|
/* XXX This is probably misplaced. */
|
||||||
|
#ifndef NO_SWAPPING
|
||||||
|
void vm_proc_swapin_all(int);
|
||||||
|
int swap_pager_isswapped(vm_object_t, int);
|
||||||
|
#endif /* !NO_SWAPPING */
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
void vm_pageout_page(vm_page_t, vm_object_t);
|
void vm_pageout_page(vm_page_t, vm_object_t);
|
||||||
void vm_pageout_cluster(vm_page_t, vm_object_t);
|
void vm_pageout_cluster(vm_page_t, vm_object_t);
|
||||||
|
|
|
||||||
140
sys/vm/vm_swap.c
140
sys/vm/vm_swap.c
|
|
@ -36,6 +36,7 @@
|
||||||
|
|
||||||
#include "opt_mac.h"
|
#include "opt_mac.h"
|
||||||
#include "opt_swap.h"
|
#include "opt_swap.h"
|
||||||
|
#include "opt_vm.h"
|
||||||
|
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#include <sys/systm.h>
|
#include <sys/systm.h>
|
||||||
|
|
@ -58,6 +59,7 @@
|
||||||
#include <vm/vm.h>
|
#include <vm/vm.h>
|
||||||
#include <vm/vm_extern.h>
|
#include <vm/vm_extern.h>
|
||||||
#include <vm/vm_param.h>
|
#include <vm/vm_param.h>
|
||||||
|
#include <vm/vm_pageout.h>
|
||||||
#include <vm/swap_pager.h>
|
#include <vm/swap_pager.h>
|
||||||
#include <vm/uma.h>
|
#include <vm/uma.h>
|
||||||
|
|
||||||
|
|
@ -73,6 +75,8 @@ struct swdevt *swdevt = should_be_malloced;
|
||||||
static int nswap; /* first block after the interleaved devs */
|
static int nswap; /* first block after the interleaved devs */
|
||||||
int nswdev = NSWAPDEV;
|
int nswdev = NSWAPDEV;
|
||||||
int vm_swap_size;
|
int vm_swap_size;
|
||||||
|
static int swdev_syscall_active = 0; /* serialize swap(on|off) */
|
||||||
|
|
||||||
|
|
||||||
static int swapdev_strategy(struct vop_strategy_args *ap);
|
static int swapdev_strategy(struct vop_strategy_args *ap);
|
||||||
struct vnode *swapdev_vp;
|
struct vnode *swapdev_vp;
|
||||||
|
|
@ -165,11 +169,12 @@ swapdev_strategy(ap)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a special vnode op vector for swapdev_vp - we only use
|
* Create a special vnode op vector for swapdev_vp - we only use
|
||||||
* VOP_STRATEGY(), everything else returns an error.
|
* VOP_STRATEGY() and reclaim; everything else returns an error.
|
||||||
*/
|
*/
|
||||||
vop_t **swapdev_vnodeop_p;
|
vop_t **swapdev_vnodeop_p;
|
||||||
static struct vnodeopv_entry_desc swapdev_vnodeop_entries[] = {
|
static struct vnodeopv_entry_desc swapdev_vnodeop_entries[] = {
|
||||||
{ &vop_default_desc, (vop_t *) vop_defaultop },
|
{ &vop_default_desc, (vop_t *) vop_defaultop },
|
||||||
|
{ &vop_reclaim_desc, (vop_t *) vop_null },
|
||||||
{ &vop_strategy_desc, (vop_t *) swapdev_strategy },
|
{ &vop_strategy_desc, (vop_t *) swapdev_strategy },
|
||||||
{ NULL, NULL }
|
{ NULL, NULL }
|
||||||
};
|
};
|
||||||
|
|
@ -208,19 +213,23 @@ swapon(td, uap)
|
||||||
if (error)
|
if (error)
|
||||||
goto done2;
|
goto done2;
|
||||||
|
|
||||||
|
while (swdev_syscall_active)
|
||||||
|
tsleep(&swdev_syscall_active, PUSER - 1, "swpon", 0);
|
||||||
|
swdev_syscall_active = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Swap metadata may not fit in the KVM if we have physical
|
* Swap metadata may not fit in the KVM if we have physical
|
||||||
* memory of >1GB.
|
* memory of >1GB.
|
||||||
*/
|
*/
|
||||||
if (swap_zone == NULL) {
|
if (swap_zone == NULL) {
|
||||||
error = ENOMEM;
|
error = ENOMEM;
|
||||||
goto done2;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, td);
|
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, td);
|
||||||
error = namei(&nd);
|
error = namei(&nd);
|
||||||
if (error)
|
if (error)
|
||||||
goto done2;
|
goto done;
|
||||||
|
|
||||||
NDFREE(&nd, NDF_ONLY_PNBUF);
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
||||||
vp = nd.ni_vp;
|
vp = nd.ni_vp;
|
||||||
|
|
@ -239,6 +248,9 @@ swapon(td, uap)
|
||||||
|
|
||||||
if (error)
|
if (error)
|
||||||
vrele(vp);
|
vrele(vp);
|
||||||
|
done:
|
||||||
|
swdev_syscall_active = 0;
|
||||||
|
wakeup_one(&swdev_syscall_active);
|
||||||
done2:
|
done2:
|
||||||
mtx_unlock(&Giant);
|
mtx_unlock(&Giant);
|
||||||
return (error);
|
return (error);
|
||||||
|
|
@ -252,8 +264,6 @@ done2:
|
||||||
*
|
*
|
||||||
* The new swap code uses page-sized blocks. The old swap code used
|
* The new swap code uses page-sized blocks. The old swap code used
|
||||||
* DEV_BSIZE'd chunks.
|
* DEV_BSIZE'd chunks.
|
||||||
*
|
|
||||||
* XXX locking when multiple swapon's run in parallel
|
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
swaponvp(td, vp, dev, nblks)
|
swaponvp(td, vp, dev, nblks)
|
||||||
|
|
@ -330,7 +340,7 @@ swaponvp(td, vp, dev, nblks)
|
||||||
sp->sw_vp = vp;
|
sp->sw_vp = vp;
|
||||||
sp->sw_dev = dev2udev(dev);
|
sp->sw_dev = dev2udev(dev);
|
||||||
sp->sw_device = dev;
|
sp->sw_device = dev;
|
||||||
sp->sw_flags |= SW_FREED;
|
sp->sw_flags = SW_FREED;
|
||||||
sp->sw_nblks = nblks;
|
sp->sw_nblks = nblks;
|
||||||
sp->sw_used = 0;
|
sp->sw_used = 0;
|
||||||
|
|
||||||
|
|
@ -356,9 +366,127 @@ swaponvp(td, vp, dev, nblks)
|
||||||
vm_swap_size += blk;
|
vm_swap_size += blk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
swap_pager_full = 0;
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SYSCALL: swapoff(devname)
|
||||||
|
*
|
||||||
|
* Disable swapping on the given device.
|
||||||
|
*/
|
||||||
|
#ifndef _SYS_SYSPROTO_H_
|
||||||
|
struct swapoff_args {
|
||||||
|
char *name;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MPSAFE
|
||||||
|
*/
|
||||||
|
/* ARGSUSED */
|
||||||
|
int
|
||||||
|
swapoff(td, uap)
|
||||||
|
struct thread *td;
|
||||||
|
struct swapoff_args *uap;
|
||||||
|
{
|
||||||
|
struct vnode *vp;
|
||||||
|
struct nameidata nd;
|
||||||
|
struct swdevt *sp;
|
||||||
|
swblk_t dvbase, vsbase;
|
||||||
|
u_long nblks, aligned_nblks, blk;
|
||||||
|
int error, index;
|
||||||
|
|
||||||
|
mtx_lock(&Giant);
|
||||||
|
|
||||||
|
error = suser(td);
|
||||||
|
if (error)
|
||||||
|
goto done2;
|
||||||
|
|
||||||
|
while (swdev_syscall_active)
|
||||||
|
tsleep(&swdev_syscall_active, PUSER - 1, "swpoff", 0);
|
||||||
|
swdev_syscall_active = 1;
|
||||||
|
|
||||||
|
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, td);
|
||||||
|
error = namei(&nd);
|
||||||
|
if (error)
|
||||||
|
goto done;
|
||||||
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
||||||
|
vp = nd.ni_vp;
|
||||||
|
|
||||||
|
for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) {
|
||||||
|
if (sp->sw_vp == vp)
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
error = EINVAL;
|
||||||
|
goto done;
|
||||||
|
found:
|
||||||
|
nblks = sp->sw_nblks;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can turn off this swap device safely only if the
|
||||||
|
* available virtual memory in the system will fit the amount
|
||||||
|
* of data we will have to page back in, plus an epsilon so
|
||||||
|
* the system doesn't become critically low on swap space.
|
||||||
|
*/
|
||||||
|
if (cnt.v_free_count + cnt.v_cache_count + vm_swap_size <
|
||||||
|
nblks + nswap_lowat) {
|
||||||
|
error = ENOMEM;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prevent further allocations on this device.
|
||||||
|
*/
|
||||||
|
sp->sw_flags |= SW_CLOSING;
|
||||||
|
for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) {
|
||||||
|
blk = min(nblks - dvbase, dmmax);
|
||||||
|
vsbase = index * dmmax + dvbase * nswdev;
|
||||||
|
vm_swap_size -= blist_fill(swapblist, vsbase, blk);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Page in the contents of the device and close it.
|
||||||
|
*/
|
||||||
|
#ifndef NO_SWAPPING
|
||||||
|
vm_proc_swapin_all(index);
|
||||||
|
#endif /* !NO_SWAPPING */
|
||||||
|
swap_pager_swapoff(index, &sp->sw_used);
|
||||||
|
|
||||||
|
VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
|
||||||
|
vrele(vp);
|
||||||
|
sp->sw_vp = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Resize the bitmap based on the new largest swap device,
|
||||||
|
* or free the bitmap if there are no more devices.
|
||||||
|
*/
|
||||||
|
for (sp = swdevt, nblks = 0; sp < swdevt + nswdev; sp++) {
|
||||||
|
if (sp->sw_vp == NULL)
|
||||||
|
continue;
|
||||||
|
nblks = max(nblks, sp->sw_nblks);
|
||||||
|
}
|
||||||
|
|
||||||
|
aligned_nblks = (nblks + (dmmax - 1)) & ~(u_long)(dmmax - 1);
|
||||||
|
nswap = aligned_nblks * nswdev;
|
||||||
|
|
||||||
|
if (nswap == 0) {
|
||||||
|
blist_destroy(swapblist);
|
||||||
|
swapblist = NULL;
|
||||||
|
vrele(swapdev_vp);
|
||||||
|
swapdev_vp = NULL;
|
||||||
|
} else
|
||||||
|
blist_resize(&swapblist, nswap, 0);
|
||||||
|
|
||||||
|
done:
|
||||||
|
swdev_syscall_active = 0;
|
||||||
|
wakeup_one(&swdev_syscall_active);
|
||||||
|
done2:
|
||||||
|
mtx_unlock(&Giant);
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS)
|
sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue