mirror of
https://github.com/opnsense/src.git
synced 2026-06-09 00:32:25 -04:00
Move the dirty queues inside the per-domain structure. This resolves a bug
where we had not hit global dirty limits but a single queue was starved for space by dirty buffers. A single buf_daemon is maintained for now. Add a bd_speedup() when we are low on bufspace. This can happen due to SUJ keeping many bufs locked until a cg block is written. Document this with a comment. Fix sysctls to work with per-domain variables. Add more ddb debugging. Reported by: pho Reviewed by: kib Tested by: pho Sponsored by: Netflix, Dell/EMC Isilon Differential Revision: https://reviews.freebsd.org/D14705
This commit is contained in:
parent
b521cf275c
commit
3cec5c77d6
1 changed files with 333 additions and 145 deletions
|
|
@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#include <sys/systm.h>
|
#include <sys/systm.h>
|
||||||
#include <sys/bio.h>
|
#include <sys/bio.h>
|
||||||
|
#include <sys/bitset.h>
|
||||||
#include <sys/conf.h>
|
#include <sys/conf.h>
|
||||||
#include <sys/counter.h>
|
#include <sys/counter.h>
|
||||||
#include <sys/buf.h>
|
#include <sys/buf.h>
|
||||||
|
|
@ -100,6 +101,52 @@ struct buf_ops buf_ops_bio = {
|
||||||
.bop_bdflush = bufbdflush,
|
.bop_bdflush = bufbdflush,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bufqueue {
|
||||||
|
struct mtx_padalign bq_lock;
|
||||||
|
TAILQ_HEAD(, buf) bq_queue;
|
||||||
|
uint8_t bq_index;
|
||||||
|
uint16_t bq_subqueue;
|
||||||
|
int bq_len;
|
||||||
|
} __aligned(CACHE_LINE_SIZE);
|
||||||
|
|
||||||
|
#define BQ_LOCKPTR(bq) (&(bq)->bq_lock)
|
||||||
|
#define BQ_LOCK(bq) mtx_lock(BQ_LOCKPTR((bq)))
|
||||||
|
#define BQ_UNLOCK(bq) mtx_unlock(BQ_LOCKPTR((bq)))
|
||||||
|
#define BQ_ASSERT_LOCKED(bq) mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED)
|
||||||
|
|
||||||
|
struct bufdomain {
|
||||||
|
struct bufqueue bd_subq[MAXCPU + 1]; /* Per-cpu sub queues + global */
|
||||||
|
struct bufqueue bd_dirtyq;
|
||||||
|
struct bufqueue *bd_cleanq;
|
||||||
|
struct mtx_padalign bd_run_lock;
|
||||||
|
/* Constants */
|
||||||
|
long bd_maxbufspace;
|
||||||
|
long bd_hibufspace;
|
||||||
|
long bd_lobufspace;
|
||||||
|
long bd_bufspacethresh;
|
||||||
|
int bd_hifreebuffers;
|
||||||
|
int bd_lofreebuffers;
|
||||||
|
int bd_hidirtybuffers;
|
||||||
|
int bd_lodirtybuffers;
|
||||||
|
int bd_dirtybufthresh;
|
||||||
|
int bd_lim;
|
||||||
|
/* atomics */
|
||||||
|
int bd_wanted;
|
||||||
|
int __aligned(CACHE_LINE_SIZE) bd_numdirtybuffers;
|
||||||
|
int __aligned(CACHE_LINE_SIZE) bd_running;
|
||||||
|
long __aligned(CACHE_LINE_SIZE) bd_bufspace;
|
||||||
|
int __aligned(CACHE_LINE_SIZE) bd_freebuffers;
|
||||||
|
} __aligned(CACHE_LINE_SIZE);
|
||||||
|
|
||||||
|
#define BD_LOCKPTR(bd) (&(bd)->bd_cleanq->bq_lock)
|
||||||
|
#define BD_LOCK(bd) mtx_lock(BD_LOCKPTR((bd)))
|
||||||
|
#define BD_UNLOCK(bd) mtx_unlock(BD_LOCKPTR((bd)))
|
||||||
|
#define BD_ASSERT_LOCKED(bd) mtx_assert(BD_LOCKPTR((bd)), MA_OWNED)
|
||||||
|
#define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock)
|
||||||
|
#define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd)))
|
||||||
|
#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd)))
|
||||||
|
#define BD_DOMAIN(bd) (bd - bdomain)
|
||||||
|
|
||||||
static struct buf *buf; /* buffer header pool */
|
static struct buf *buf; /* buffer header pool */
|
||||||
extern struct buf *swbuf; /* Swap buffer header pool. */
|
extern struct buf *swbuf; /* Swap buffer header pool. */
|
||||||
caddr_t unmapped_buf;
|
caddr_t unmapped_buf;
|
||||||
|
|
@ -123,8 +170,8 @@ static int vfs_bio_clcheck(struct vnode *vp, int size,
|
||||||
daddr_t lblkno, daddr_t blkno);
|
daddr_t lblkno, daddr_t blkno);
|
||||||
static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int,
|
static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int,
|
||||||
void (*)(struct buf *));
|
void (*)(struct buf *));
|
||||||
static int buf_flush(struct vnode *vp, int);
|
static int buf_flush(struct vnode *vp, struct bufdomain *, int);
|
||||||
static int flushbufqueues(struct vnode *, int, int);
|
static int flushbufqueues(struct vnode *, struct bufdomain *, int, int);
|
||||||
static void buf_daemon(void);
|
static void buf_daemon(void);
|
||||||
static __inline void bd_wakeup(void);
|
static __inline void bd_wakeup(void);
|
||||||
static int sysctl_runningspace(SYSCTL_HANDLER_ARGS);
|
static int sysctl_runningspace(SYSCTL_HANDLER_ARGS);
|
||||||
|
|
@ -133,6 +180,16 @@ static void bufkva_free(struct buf *);
|
||||||
static int buf_import(void *, void **, int, int, int);
|
static int buf_import(void *, void **, int, int, int);
|
||||||
static void buf_release(void *, void **, int);
|
static void buf_release(void *, void **, int);
|
||||||
static void maxbcachebuf_adjust(void);
|
static void maxbcachebuf_adjust(void);
|
||||||
|
static inline struct bufdomain *bufdomain(struct buf *);
|
||||||
|
static void bq_remove(struct bufqueue *bq, struct buf *bp);
|
||||||
|
static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock);
|
||||||
|
static int buf_recycle(struct bufdomain *, bool kva);
|
||||||
|
static void bq_init(struct bufqueue *bq, int qindex, int cpu,
|
||||||
|
const char *lockname);
|
||||||
|
static void bd_init(struct bufdomain *bd);
|
||||||
|
static int bd_flushall(struct bufdomain *bd);
|
||||||
|
static int sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS);
|
||||||
|
static int sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS);
|
||||||
|
|
||||||
static int sysctl_bufspace(SYSCTL_HANDLER_ARGS);
|
static int sysctl_bufspace(SYSCTL_HANDLER_ARGS);
|
||||||
int vmiodirenable = TRUE;
|
int vmiodirenable = TRUE;
|
||||||
|
|
@ -147,7 +204,9 @@ static counter_u64_t bufkvaspace;
|
||||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace,
|
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace,
|
||||||
"Kernel virtual memory used for buffers");
|
"Kernel virtual memory used for buffers");
|
||||||
static long maxbufspace;
|
static long maxbufspace;
|
||||||
SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW, &maxbufspace, 0,
|
SYSCTL_PROC(_vfs, OID_AUTO, maxbufspace,
|
||||||
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &maxbufspace,
|
||||||
|
__offsetof(struct bufdomain, bd_maxbufspace), sysctl_bufdomain_long, "L",
|
||||||
"Maximum allowed value of bufspace (including metadata)");
|
"Maximum allowed value of bufspace (including metadata)");
|
||||||
static long bufmallocspace;
|
static long bufmallocspace;
|
||||||
SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
|
SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
|
||||||
|
|
@ -156,14 +215,20 @@ static long maxbufmallocspace;
|
||||||
SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace,
|
SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace,
|
||||||
0, "Maximum amount of malloced memory for buffers");
|
0, "Maximum amount of malloced memory for buffers");
|
||||||
static long lobufspace;
|
static long lobufspace;
|
||||||
SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RW, &lobufspace, 0,
|
SYSCTL_PROC(_vfs, OID_AUTO, lobufspace,
|
||||||
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lobufspace,
|
||||||
|
__offsetof(struct bufdomain, bd_lobufspace), sysctl_bufdomain_long, "L",
|
||||||
"Minimum amount of buffers we want to have");
|
"Minimum amount of buffers we want to have");
|
||||||
long hibufspace;
|
long hibufspace;
|
||||||
SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RW, &hibufspace, 0,
|
SYSCTL_PROC(_vfs, OID_AUTO, hibufspace,
|
||||||
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hibufspace,
|
||||||
|
__offsetof(struct bufdomain, bd_hibufspace), sysctl_bufdomain_long, "L",
|
||||||
"Maximum allowed value of bufspace (excluding metadata)");
|
"Maximum allowed value of bufspace (excluding metadata)");
|
||||||
long bufspacethresh;
|
long bufspacethresh;
|
||||||
SYSCTL_LONG(_vfs, OID_AUTO, bufspacethresh, CTLFLAG_RW, &bufspacethresh,
|
SYSCTL_PROC(_vfs, OID_AUTO, bufspacethresh,
|
||||||
0, "Bufspace consumed before waking the daemon to free some");
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &bufspacethresh,
|
||||||
|
__offsetof(struct bufdomain, bd_bufspacethresh), sysctl_bufdomain_long, "L",
|
||||||
|
"Bufspace consumed before waking the daemon to free some");
|
||||||
static counter_u64_t buffreekvacnt;
|
static counter_u64_t buffreekvacnt;
|
||||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt,
|
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt,
|
||||||
"Number of times we have freed the KVA space from some buffer");
|
"Number of times we have freed the KVA space from some buffer");
|
||||||
|
|
@ -190,26 +255,37 @@ SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes,
|
||||||
static int recursiveflushes;
|
static int recursiveflushes;
|
||||||
SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes,
|
SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes,
|
||||||
0, "Number of flushes skipped due to being recursive");
|
0, "Number of flushes skipped due to being recursive");
|
||||||
static int numdirtybuffers;
|
static int sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS);
|
||||||
SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0,
|
SYSCTL_PROC(_vfs, OID_AUTO, numdirtybuffers,
|
||||||
|
CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RD, NULL, 0, sysctl_numdirtybuffers, "I",
|
||||||
"Number of buffers that are dirty (has unwritten changes) at the moment");
|
"Number of buffers that are dirty (has unwritten changes) at the moment");
|
||||||
static int lodirtybuffers;
|
static int lodirtybuffers;
|
||||||
SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0,
|
SYSCTL_PROC(_vfs, OID_AUTO, lodirtybuffers,
|
||||||
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lodirtybuffers,
|
||||||
|
__offsetof(struct bufdomain, bd_lodirtybuffers), sysctl_bufdomain_int, "L",
|
||||||
"How many buffers we want to have free before bufdaemon can sleep");
|
"How many buffers we want to have free before bufdaemon can sleep");
|
||||||
static int hidirtybuffers;
|
static int hidirtybuffers;
|
||||||
SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0,
|
SYSCTL_PROC(_vfs, OID_AUTO, hidirtybuffers,
|
||||||
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hidirtybuffers,
|
||||||
|
__offsetof(struct bufdomain, bd_hidirtybuffers), sysctl_bufdomain_int, "L",
|
||||||
"When the number of dirty buffers is considered severe");
|
"When the number of dirty buffers is considered severe");
|
||||||
int dirtybufthresh;
|
int dirtybufthresh;
|
||||||
SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh,
|
SYSCTL_PROC(_vfs, OID_AUTO, dirtybufthresh,
|
||||||
0, "Number of bdwrite to bawrite conversions to clear dirty buffers");
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &dirtybufthresh,
|
||||||
|
__offsetof(struct bufdomain, bd_dirtybufthresh), sysctl_bufdomain_int, "L",
|
||||||
|
"Number of bdwrite to bawrite conversions to clear dirty buffers");
|
||||||
static int numfreebuffers;
|
static int numfreebuffers;
|
||||||
SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0,
|
SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0,
|
||||||
"Number of free buffers");
|
"Number of free buffers");
|
||||||
static int lofreebuffers;
|
static int lofreebuffers;
|
||||||
SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0,
|
SYSCTL_PROC(_vfs, OID_AUTO, lofreebuffers,
|
||||||
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lofreebuffers,
|
||||||
|
__offsetof(struct bufdomain, bd_lofreebuffers), sysctl_bufdomain_int, "L",
|
||||||
"Target number of free buffers");
|
"Target number of free buffers");
|
||||||
static int hifreebuffers;
|
static int hifreebuffers;
|
||||||
SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0,
|
SYSCTL_PROC(_vfs, OID_AUTO, hifreebuffers,
|
||||||
|
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hifreebuffers,
|
||||||
|
__offsetof(struct bufdomain, bd_hifreebuffers), sysctl_bufdomain_int, "L",
|
||||||
"Threshold for clean buffer recycling");
|
"Threshold for clean buffer recycling");
|
||||||
static counter_u64_t getnewbufcalls;
|
static counter_u64_t getnewbufcalls;
|
||||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD,
|
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD,
|
||||||
|
|
@ -294,65 +370,18 @@ static int bdirtywait;
|
||||||
#define QUEUE_CLEAN 3 /* non-B_DELWRI buffers */
|
#define QUEUE_CLEAN 3 /* non-B_DELWRI buffers */
|
||||||
#define QUEUE_SENTINEL 4 /* not an queue index, but mark for sentinel */
|
#define QUEUE_SENTINEL 4 /* not an queue index, but mark for sentinel */
|
||||||
|
|
||||||
struct bufqueue {
|
/* Maximum number of buffer domains. */
|
||||||
struct mtx_padalign bq_lock;
|
#define BUF_DOMAINS 8
|
||||||
TAILQ_HEAD(, buf) bq_queue;
|
|
||||||
uint8_t bq_index;
|
|
||||||
uint16_t bq_subqueue;
|
|
||||||
int bq_len;
|
|
||||||
} __aligned(CACHE_LINE_SIZE);
|
|
||||||
|
|
||||||
#define BQ_LOCKPTR(bq) (&(bq)->bq_lock)
|
struct bufdomainset bdlodirty; /* Domains > lodirty */
|
||||||
#define BQ_LOCK(bq) mtx_lock(BQ_LOCKPTR((bq)))
|
struct bufdomainset bdhidirty; /* Domains > hidirty */
|
||||||
#define BQ_UNLOCK(bq) mtx_unlock(BQ_LOCKPTR((bq)))
|
|
||||||
#define BQ_ASSERT_LOCKED(bq) mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED)
|
|
||||||
|
|
||||||
struct bufqueue __exclusive_cache_line bqempty;
|
|
||||||
struct bufqueue __exclusive_cache_line bqdirty;
|
|
||||||
|
|
||||||
struct bufdomain {
|
|
||||||
struct bufqueue bd_subq[MAXCPU + 1]; /* Per-cpu sub queues + global */
|
|
||||||
struct bufqueue *bd_cleanq;
|
|
||||||
struct mtx_padalign bd_run_lock;
|
|
||||||
/* Constants */
|
|
||||||
long bd_maxbufspace;
|
|
||||||
long bd_hibufspace;
|
|
||||||
long bd_lobufspace;
|
|
||||||
long bd_bufspacethresh;
|
|
||||||
int bd_hifreebuffers;
|
|
||||||
int bd_lofreebuffers;
|
|
||||||
int bd_lim;
|
|
||||||
/* atomics */
|
|
||||||
int bd_wanted;
|
|
||||||
int __aligned(CACHE_LINE_SIZE) bd_running;
|
|
||||||
long __aligned(CACHE_LINE_SIZE) bd_bufspace;
|
|
||||||
int __aligned(CACHE_LINE_SIZE) bd_freebuffers;
|
|
||||||
} __aligned(CACHE_LINE_SIZE);
|
|
||||||
|
|
||||||
#define BD_LOCKPTR(bd) (&(bd)->bd_cleanq->bq_lock)
|
|
||||||
#define BD_LOCK(bd) mtx_lock(BD_LOCKPTR((bd)))
|
|
||||||
#define BD_UNLOCK(bd) mtx_unlock(BD_LOCKPTR((bd)))
|
|
||||||
#define BD_ASSERT_LOCKED(bd) mtx_assert(BD_LOCKPTR((bd)), MA_OWNED)
|
|
||||||
#define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock)
|
|
||||||
#define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd)))
|
|
||||||
#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd)))
|
|
||||||
#define BD_DOMAIN(bd) (bd - bdclean)
|
|
||||||
|
|
||||||
/* Maximum number of clean buffer domains. */
|
|
||||||
#define CLEAN_DOMAINS 8
|
|
||||||
|
|
||||||
/* Configured number of clean queues. */
|
/* Configured number of clean queues. */
|
||||||
static int __read_mostly clean_domains;
|
static int __read_mostly buf_domains;
|
||||||
|
|
||||||
struct bufdomain __exclusive_cache_line bdclean[CLEAN_DOMAINS];
|
BITSET_DEFINE(bufdomainset, BUF_DOMAINS);
|
||||||
|
struct bufdomain __exclusive_cache_line bdomain[BUF_DOMAINS];
|
||||||
static void bq_remove(struct bufqueue *bq, struct buf *bp);
|
struct bufqueue __exclusive_cache_line bqempty;
|
||||||
static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock);
|
|
||||||
static int buf_recycle(struct bufdomain *, bool kva);
|
|
||||||
static void bq_init(struct bufqueue *bq, int qindex, int cpu,
|
|
||||||
const char *lockname);
|
|
||||||
static void bd_init(struct bufdomain *bd);
|
|
||||||
static int bd_flushall(struct bufdomain *bd);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* per-cpu empty buffer cache.
|
* per-cpu empty buffer cache.
|
||||||
|
|
@ -393,6 +422,44 @@ sysctl_runningspace(SYSCTL_HANDLER_ARGS)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
int value;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
value = *(int *)arg1;
|
||||||
|
error = sysctl_handle_int(oidp, &value, 0, req);
|
||||||
|
if (error != 0 || req->newptr == NULL)
|
||||||
|
return (error);
|
||||||
|
*(int *)arg1 = value;
|
||||||
|
for (i = 0; i < buf_domains; i++)
|
||||||
|
*(int *)(((uintptr_t)&bdomain[i]) + arg2) =
|
||||||
|
value / buf_domains;
|
||||||
|
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS)
|
||||||
|
{
|
||||||
|
long value;
|
||||||
|
int error;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
value = *(long *)arg1;
|
||||||
|
error = sysctl_handle_long(oidp, &value, 0, req);
|
||||||
|
if (error != 0 || req->newptr == NULL)
|
||||||
|
return (error);
|
||||||
|
*(long *)arg1 = value;
|
||||||
|
for (i = 0; i < buf_domains; i++)
|
||||||
|
*(long *)(((uintptr_t)&bdomain[i]) + arg2) =
|
||||||
|
value / buf_domains;
|
||||||
|
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
|
#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
|
||||||
defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
|
defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
|
||||||
static int
|
static int
|
||||||
|
|
@ -403,8 +470,8 @@ sysctl_bufspace(SYSCTL_HANDLER_ARGS)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
lvalue = 0;
|
lvalue = 0;
|
||||||
for (i = 0; i < clean_domains; i++)
|
for (i = 0; i < buf_domains; i++)
|
||||||
lvalue += bdclean[i].bd_bufspace;
|
lvalue += bdomain[i].bd_bufspace;
|
||||||
if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long))
|
if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long))
|
||||||
return (sysctl_handle_long(oidp, &lvalue, 0, req));
|
return (sysctl_handle_long(oidp, &lvalue, 0, req));
|
||||||
if (lvalue > INT_MAX)
|
if (lvalue > INT_MAX)
|
||||||
|
|
@ -421,12 +488,24 @@ sysctl_bufspace(SYSCTL_HANDLER_ARGS)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
lvalue = 0;
|
lvalue = 0;
|
||||||
for (i = 0; i < clean_domains; i++)
|
for (i = 0; i < buf_domains; i++)
|
||||||
lvalue += bdclean[i].bd_bufspace;
|
lvalue += bdomain[i].bd_bufspace;
|
||||||
return (sysctl_handle_long(oidp, &lvalue, 0, req));
|
return (sysctl_handle_long(oidp, &lvalue, 0, req));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int
|
||||||
|
sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS)
|
||||||
|
{
|
||||||
|
int value;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
value = 0;
|
||||||
|
for (i = 0; i < buf_domains; i++)
|
||||||
|
value += bdomain[i].bd_numdirtybuffers;
|
||||||
|
return (sysctl_handle_int(oidp, &value, 0, req));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* bdirtywakeup:
|
* bdirtywakeup:
|
||||||
*
|
*
|
||||||
|
|
@ -443,6 +522,42 @@ bdirtywakeup(void)
|
||||||
mtx_unlock(&bdirtylock);
|
mtx_unlock(&bdirtylock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bd_clear:
|
||||||
|
*
|
||||||
|
* Clear a domain from the appropriate bitsets when dirtybuffers
|
||||||
|
* is decremented.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
bd_clear(struct bufdomain *bd)
|
||||||
|
{
|
||||||
|
|
||||||
|
mtx_lock(&bdirtylock);
|
||||||
|
if (bd->bd_numdirtybuffers <= bd->bd_lodirtybuffers)
|
||||||
|
BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty);
|
||||||
|
if (bd->bd_numdirtybuffers <= bd->bd_hidirtybuffers)
|
||||||
|
BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty);
|
||||||
|
mtx_unlock(&bdirtylock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bd_set:
|
||||||
|
*
|
||||||
|
* Set a domain in the appropriate bitsets when dirtybuffers
|
||||||
|
* is incremented.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
bd_set(struct bufdomain *bd)
|
||||||
|
{
|
||||||
|
|
||||||
|
mtx_lock(&bdirtylock);
|
||||||
|
if (bd->bd_numdirtybuffers > bd->bd_lodirtybuffers)
|
||||||
|
BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty);
|
||||||
|
if (bd->bd_numdirtybuffers > bd->bd_hidirtybuffers)
|
||||||
|
BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty);
|
||||||
|
mtx_unlock(&bdirtylock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* bdirtysub:
|
* bdirtysub:
|
||||||
*
|
*
|
||||||
|
|
@ -450,12 +565,17 @@ bdirtywakeup(void)
|
||||||
* threads blocked in bwillwrite().
|
* threads blocked in bwillwrite().
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
bdirtysub(void)
|
bdirtysub(struct buf *bp)
|
||||||
{
|
{
|
||||||
|
struct bufdomain *bd;
|
||||||
|
int num;
|
||||||
|
|
||||||
if (atomic_fetchadd_int(&numdirtybuffers, -1) ==
|
bd = bufdomain(bp);
|
||||||
(lodirtybuffers + hidirtybuffers) / 2)
|
num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, -1);
|
||||||
|
if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2)
|
||||||
bdirtywakeup();
|
bdirtywakeup();
|
||||||
|
if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers)
|
||||||
|
bd_clear(bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -465,16 +585,21 @@ bdirtysub(void)
|
||||||
* daemon if needed.
|
* daemon if needed.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
bdirtyadd(void)
|
bdirtyadd(struct buf *bp)
|
||||||
{
|
{
|
||||||
|
struct bufdomain *bd;
|
||||||
|
int num;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only do the wakeup once as we cross the boundary. The
|
* Only do the wakeup once as we cross the boundary. The
|
||||||
* buf daemon will keep running until the condition clears.
|
* buf daemon will keep running until the condition clears.
|
||||||
*/
|
*/
|
||||||
if (atomic_fetchadd_int(&numdirtybuffers, 1) ==
|
bd = bufdomain(bp);
|
||||||
(lodirtybuffers + hidirtybuffers) / 2)
|
num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, 1);
|
||||||
|
if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2)
|
||||||
bd_wakeup();
|
bd_wakeup();
|
||||||
|
if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers)
|
||||||
|
bd_set(bd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -539,11 +664,11 @@ bufspace_adjust(struct buf *bp, int bufsize)
|
||||||
|
|
||||||
KASSERT((bp->b_flags & B_MALLOC) == 0,
|
KASSERT((bp->b_flags & B_MALLOC) == 0,
|
||||||
("bufspace_adjust: malloc buf %p", bp));
|
("bufspace_adjust: malloc buf %p", bp));
|
||||||
bd = &bdclean[bp->b_domain];
|
bd = bufdomain(bp);
|
||||||
diff = bufsize - bp->b_bufsize;
|
diff = bufsize - bp->b_bufsize;
|
||||||
if (diff < 0) {
|
if (diff < 0) {
|
||||||
atomic_subtract_long(&bd->bd_bufspace, -diff);
|
atomic_subtract_long(&bd->bd_bufspace, -diff);
|
||||||
} else {
|
} else if (diff > 0) {
|
||||||
space = atomic_fetchadd_long(&bd->bd_bufspace, diff);
|
space = atomic_fetchadd_long(&bd->bd_bufspace, diff);
|
||||||
/* Wake up the daemon on the transition. */
|
/* Wake up the daemon on the transition. */
|
||||||
if (space < bd->bd_bufspacethresh &&
|
if (space < bd->bd_bufspacethresh &&
|
||||||
|
|
@ -638,7 +763,7 @@ bufspace_wait(struct bufdomain *bd, struct vnode *vp, int gbflags,
|
||||||
* recursion.
|
* recursion.
|
||||||
*/
|
*/
|
||||||
td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
|
td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
|
||||||
fl = buf_flush(vp, flushbufqtarget);
|
fl = buf_flush(vp, bd, flushbufqtarget);
|
||||||
td->td_pflags &= norunbuf;
|
td->td_pflags &= norunbuf;
|
||||||
BD_LOCK(bd);
|
BD_LOCK(bd);
|
||||||
if (fl != 0)
|
if (fl != 0)
|
||||||
|
|
@ -700,6 +825,15 @@ bufspace_daemon(void *arg)
|
||||||
if (buf_recycle(bd, false) != 0) {
|
if (buf_recycle(bd, false) != 0) {
|
||||||
if (bd_flushall(bd))
|
if (bd_flushall(bd))
|
||||||
continue;
|
continue;
|
||||||
|
/*
|
||||||
|
* Speedup dirty if we've run out of clean
|
||||||
|
* buffers. This is possible in particular
|
||||||
|
* because softdep may held many bufs locked
|
||||||
|
* pending writes to other bufs which are
|
||||||
|
* marked for delayed write, exhausting
|
||||||
|
* clean space until they are written.
|
||||||
|
*/
|
||||||
|
bd_speedup();
|
||||||
BD_LOCK(bd);
|
BD_LOCK(bd);
|
||||||
if (bd->bd_wanted) {
|
if (bd->bd_wanted) {
|
||||||
msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
|
msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
|
||||||
|
|
@ -1025,7 +1159,6 @@ bufinit(void)
|
||||||
("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf,
|
("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf,
|
||||||
MAXBSIZE));
|
MAXBSIZE));
|
||||||
bq_init(&bqempty, QUEUE_EMPTY, -1, "bufq empty lock");
|
bq_init(&bqempty, QUEUE_EMPTY, -1, "bufq empty lock");
|
||||||
bq_init(&bqdirty, QUEUE_DIRTY, -1, "bufq dirty lock");
|
|
||||||
mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF);
|
mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF);
|
||||||
mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF);
|
mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF);
|
||||||
mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF);
|
mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF);
|
||||||
|
|
@ -1093,7 +1226,6 @@ bufinit(void)
|
||||||
*/
|
*/
|
||||||
hidirtybuffers = nbuf / 4 + 20;
|
hidirtybuffers = nbuf / 4 + 20;
|
||||||
dirtybufthresh = hidirtybuffers * 9 / 10;
|
dirtybufthresh = hidirtybuffers * 9 / 10;
|
||||||
numdirtybuffers = 0;
|
|
||||||
/*
|
/*
|
||||||
* To support extreme low-memory systems, make sure hidirtybuffers
|
* To support extreme low-memory systems, make sure hidirtybuffers
|
||||||
* cannot eat up all available buffer space. This occurs when our
|
* cannot eat up all available buffer space. This occurs when our
|
||||||
|
|
@ -1128,22 +1260,26 @@ bufinit(void)
|
||||||
* One queue per-256mb up to the max. More queues gives better
|
* One queue per-256mb up to the max. More queues gives better
|
||||||
* concurrency but less accurate LRU.
|
* concurrency but less accurate LRU.
|
||||||
*/
|
*/
|
||||||
clean_domains = MIN(howmany(maxbufspace, 256*1024*1024), CLEAN_DOMAINS);
|
buf_domains = MIN(howmany(maxbufspace, 256*1024*1024), BUF_DOMAINS);
|
||||||
for (i = 0 ; i < clean_domains; i++) {
|
for (i = 0 ; i < buf_domains; i++) {
|
||||||
struct bufdomain *bd;
|
struct bufdomain *bd;
|
||||||
|
|
||||||
bd = &bdclean[i];
|
bd = &bdomain[i];
|
||||||
bd_init(bd);
|
bd_init(bd);
|
||||||
bd->bd_freebuffers = nbuf / clean_domains;
|
bd->bd_freebuffers = nbuf / buf_domains;
|
||||||
bd->bd_hifreebuffers = hifreebuffers / clean_domains;
|
bd->bd_hifreebuffers = hifreebuffers / buf_domains;
|
||||||
bd->bd_lofreebuffers = lofreebuffers / clean_domains;
|
bd->bd_lofreebuffers = lofreebuffers / buf_domains;
|
||||||
bd->bd_bufspace = 0;
|
bd->bd_bufspace = 0;
|
||||||
bd->bd_maxbufspace = maxbufspace / clean_domains;
|
bd->bd_maxbufspace = maxbufspace / buf_domains;
|
||||||
bd->bd_hibufspace = hibufspace / clean_domains;
|
bd->bd_hibufspace = hibufspace / buf_domains;
|
||||||
bd->bd_lobufspace = lobufspace / clean_domains;
|
bd->bd_lobufspace = lobufspace / buf_domains;
|
||||||
bd->bd_bufspacethresh = bufspacethresh / clean_domains;
|
bd->bd_bufspacethresh = bufspacethresh / buf_domains;
|
||||||
|
bd->bd_numdirtybuffers = 0;
|
||||||
|
bd->bd_hidirtybuffers = hidirtybuffers / buf_domains;
|
||||||
|
bd->bd_lodirtybuffers = lodirtybuffers / buf_domains;
|
||||||
|
bd->bd_dirtybufthresh = dirtybufthresh / buf_domains;
|
||||||
/* Don't allow more than 2% of bufs in the per-cpu caches. */
|
/* Don't allow more than 2% of bufs in the per-cpu caches. */
|
||||||
bd->bd_lim = nbuf / clean_domains / 50 / mp_ncpus;
|
bd->bd_lim = nbuf / buf_domains / 50 / mp_ncpus;
|
||||||
}
|
}
|
||||||
getnewbufcalls = counter_u64_alloc(M_WAITOK);
|
getnewbufcalls = counter_u64_alloc(M_WAITOK);
|
||||||
getnewbufrestarts = counter_u64_alloc(M_WAITOK);
|
getnewbufrestarts = counter_u64_alloc(M_WAITOK);
|
||||||
|
|
@ -1327,6 +1463,13 @@ bpmap_qenter(struct buf *bp)
|
||||||
(vm_offset_t)(bp->b_offset & PAGE_MASK));
|
(vm_offset_t)(bp->b_offset & PAGE_MASK));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct bufdomain *
|
||||||
|
bufdomain(struct buf *bp)
|
||||||
|
{
|
||||||
|
|
||||||
|
return (&bdomain[bp->b_domain]);
|
||||||
|
}
|
||||||
|
|
||||||
static struct bufqueue *
|
static struct bufqueue *
|
||||||
bufqueue(struct buf *bp)
|
bufqueue(struct buf *bp)
|
||||||
{
|
{
|
||||||
|
|
@ -1339,9 +1482,9 @@ bufqueue(struct buf *bp)
|
||||||
case QUEUE_EMPTY:
|
case QUEUE_EMPTY:
|
||||||
return (&bqempty);
|
return (&bqempty);
|
||||||
case QUEUE_DIRTY:
|
case QUEUE_DIRTY:
|
||||||
return (&bqdirty);
|
return (&bufdomain(bp)->bd_dirtyq);
|
||||||
case QUEUE_CLEAN:
|
case QUEUE_CLEAN:
|
||||||
return (&bdclean[bp->b_domain].bd_subq[bp->b_subqueue]);
|
return (&bufdomain(bp)->bd_subq[bp->b_subqueue]);
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -1404,14 +1547,14 @@ binsfree(struct buf *bp, int qindex)
|
||||||
bq_remove(bq, bp);
|
bq_remove(bq, bp);
|
||||||
BQ_UNLOCK(bq);
|
BQ_UNLOCK(bq);
|
||||||
}
|
}
|
||||||
|
bd = bufdomain(bp);
|
||||||
if (qindex == QUEUE_CLEAN) {
|
if (qindex == QUEUE_CLEAN) {
|
||||||
bd = &bdclean[bp->b_domain];
|
|
||||||
if (bd->bd_lim != 0)
|
if (bd->bd_lim != 0)
|
||||||
bq = &bd->bd_subq[PCPU_GET(cpuid)];
|
bq = &bd->bd_subq[PCPU_GET(cpuid)];
|
||||||
else
|
else
|
||||||
bq = bd->bd_cleanq;
|
bq = bd->bd_cleanq;
|
||||||
} else
|
} else
|
||||||
bq = &bqdirty;
|
bq = &bd->bd_dirtyq;
|
||||||
bq_insert(bq, bp, true);
|
bq_insert(bq, bp, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1439,7 +1582,7 @@ buf_free(struct buf *bp)
|
||||||
if (!LIST_EMPTY(&bp->b_dep))
|
if (!LIST_EMPTY(&bp->b_dep))
|
||||||
buf_deallocate(bp);
|
buf_deallocate(bp);
|
||||||
bufkva_free(bp);
|
bufkva_free(bp);
|
||||||
atomic_add_int(&bdclean[bp->b_domain].bd_freebuffers, 1);
|
atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1);
|
||||||
BUF_UNLOCK(bp);
|
BUF_UNLOCK(bp);
|
||||||
uma_zfree(buf_zone, bp);
|
uma_zfree(buf_zone, bp);
|
||||||
}
|
}
|
||||||
|
|
@ -1715,9 +1858,10 @@ bd_init(struct bufdomain *bd)
|
||||||
int domain;
|
int domain;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
domain = bd - bdclean;
|
domain = bd - bdomain;
|
||||||
bd->bd_cleanq = &bd->bd_subq[mp_maxid + 1];
|
bd->bd_cleanq = &bd->bd_subq[mp_maxid + 1];
|
||||||
bq_init(bd->bd_cleanq, QUEUE_CLEAN, mp_maxid + 1, "bufq clean lock");
|
bq_init(bd->bd_cleanq, QUEUE_CLEAN, mp_maxid + 1, "bufq clean lock");
|
||||||
|
bq_init(&bd->bd_dirtyq, QUEUE_DIRTY, -1, "bufq dirty lock");
|
||||||
for (i = 0; i <= mp_maxid; i++)
|
for (i = 0; i <= mp_maxid; i++)
|
||||||
bq_init(&bd->bd_subq[i], QUEUE_CLEAN, i,
|
bq_init(&bd->bd_subq[i], QUEUE_CLEAN, i,
|
||||||
"bufq clean subqueue lock");
|
"bufq clean subqueue lock");
|
||||||
|
|
@ -1809,7 +1953,7 @@ bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock)
|
||||||
if (bp->b_qindex != QUEUE_NONE)
|
if (bp->b_qindex != QUEUE_NONE)
|
||||||
panic("bq_insert: free buffer %p onto another queue?", bp);
|
panic("bq_insert: free buffer %p onto another queue?", bp);
|
||||||
|
|
||||||
bd = &bdclean[bp->b_domain];
|
bd = bufdomain(bp);
|
||||||
if (bp->b_flags & B_AGE) {
|
if (bp->b_flags & B_AGE) {
|
||||||
/* Place this buf directly on the real queue. */
|
/* Place this buf directly on the real queue. */
|
||||||
if (bq->bq_index == QUEUE_CLEAN)
|
if (bq->bq_index == QUEUE_CLEAN)
|
||||||
|
|
@ -1926,8 +2070,8 @@ bufkva_reclaim(vmem_t *vmem, int flags)
|
||||||
|
|
||||||
done = false;
|
done = false;
|
||||||
for (i = 0; i < 5; i++) {
|
for (i = 0; i < 5; i++) {
|
||||||
for (q = 0; q < clean_domains; q++)
|
for (q = 0; q < buf_domains; q++)
|
||||||
if (buf_recycle(&bdclean[q], true) != 0)
|
if (buf_recycle(&bdomain[q], true) != 0)
|
||||||
done = true;
|
done = true;
|
||||||
if (done)
|
if (done)
|
||||||
break;
|
break;
|
||||||
|
|
@ -2319,7 +2463,7 @@ bdirty(struct buf *bp)
|
||||||
if ((bp->b_flags & B_DELWRI) == 0) {
|
if ((bp->b_flags & B_DELWRI) == 0) {
|
||||||
bp->b_flags |= /* XXX B_DONE | */ B_DELWRI;
|
bp->b_flags |= /* XXX B_DONE | */ B_DELWRI;
|
||||||
reassignbuf(bp);
|
reassignbuf(bp);
|
||||||
bdirtyadd();
|
bdirtyadd(bp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2347,7 +2491,7 @@ bundirty(struct buf *bp)
|
||||||
if (bp->b_flags & B_DELWRI) {
|
if (bp->b_flags & B_DELWRI) {
|
||||||
bp->b_flags &= ~B_DELWRI;
|
bp->b_flags &= ~B_DELWRI;
|
||||||
reassignbuf(bp);
|
reassignbuf(bp);
|
||||||
bdirtysub();
|
bdirtysub(bp);
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* Since it is now being written, we can clear its deferred write flag.
|
* Since it is now being written, we can clear its deferred write flag.
|
||||||
|
|
@ -2419,9 +2563,9 @@ void
|
||||||
bwillwrite(void)
|
bwillwrite(void)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (numdirtybuffers >= hidirtybuffers) {
|
if (buf_dirty_count_severe()) {
|
||||||
mtx_lock(&bdirtylock);
|
mtx_lock(&bdirtylock);
|
||||||
while (numdirtybuffers >= hidirtybuffers) {
|
while (buf_dirty_count_severe()) {
|
||||||
bdirtywait = 1;
|
bdirtywait = 1;
|
||||||
msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4),
|
msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4),
|
||||||
"flswai", 0);
|
"flswai", 0);
|
||||||
|
|
@ -2437,7 +2581,7 @@ int
|
||||||
buf_dirty_count_severe(void)
|
buf_dirty_count_severe(void)
|
||||||
{
|
{
|
||||||
|
|
||||||
return(numdirtybuffers >= hidirtybuffers);
|
return (!BIT_EMPTY(BUF_DOMAINS, &bdhidirty));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -2523,7 +2667,7 @@ brelse(struct buf *bp)
|
||||||
if (!LIST_EMPTY(&bp->b_dep))
|
if (!LIST_EMPTY(&bp->b_dep))
|
||||||
buf_deallocate(bp);
|
buf_deallocate(bp);
|
||||||
if (bp->b_flags & B_DELWRI)
|
if (bp->b_flags & B_DELWRI)
|
||||||
bdirtysub();
|
bdirtysub(bp);
|
||||||
bp->b_flags &= ~(B_DELWRI | B_CACHE);
|
bp->b_flags &= ~(B_DELWRI | B_CACHE);
|
||||||
if ((bp->b_flags & B_VMIO) == 0) {
|
if ((bp->b_flags & B_VMIO) == 0) {
|
||||||
allocbuf(bp, 0);
|
allocbuf(bp, 0);
|
||||||
|
|
@ -3138,9 +3282,9 @@ getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int maxsize, int gbflags)
|
||||||
else
|
else
|
||||||
metadata = false;
|
metadata = false;
|
||||||
if (vp == NULL)
|
if (vp == NULL)
|
||||||
bd = &bdclean[0];
|
bd = &bdomain[0];
|
||||||
else
|
else
|
||||||
bd = &bdclean[vp->v_bufobj.bo_domain];
|
bd = &bdomain[vp->v_bufobj.bo_domain];
|
||||||
|
|
||||||
counter_u64_add(getnewbufcalls, 1);
|
counter_u64_add(getnewbufcalls, 1);
|
||||||
reserved = false;
|
reserved = false;
|
||||||
|
|
@ -3186,11 +3330,11 @@ static struct kproc_desc buf_kp = {
|
||||||
SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp);
|
SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp);
|
||||||
|
|
||||||
static int
|
static int
|
||||||
buf_flush(struct vnode *vp, int target)
|
buf_flush(struct vnode *vp, struct bufdomain *bd, int target)
|
||||||
{
|
{
|
||||||
int flushed;
|
int flushed;
|
||||||
|
|
||||||
flushed = flushbufqueues(vp, target, 0);
|
flushed = flushbufqueues(vp, bd, target, 0);
|
||||||
if (flushed == 0) {
|
if (flushed == 0) {
|
||||||
/*
|
/*
|
||||||
* Could not find any buffers without rollback
|
* Could not find any buffers without rollback
|
||||||
|
|
@ -3199,7 +3343,7 @@ buf_flush(struct vnode *vp, int target)
|
||||||
*/
|
*/
|
||||||
if (vp != NULL && target > 2)
|
if (vp != NULL && target > 2)
|
||||||
target /= 2;
|
target /= 2;
|
||||||
flushbufqueues(vp, target, 1);
|
flushbufqueues(vp, bd, target, 1);
|
||||||
}
|
}
|
||||||
return (flushed);
|
return (flushed);
|
||||||
}
|
}
|
||||||
|
|
@ -3207,6 +3351,8 @@ buf_flush(struct vnode *vp, int target)
|
||||||
static void
|
static void
|
||||||
buf_daemon()
|
buf_daemon()
|
||||||
{
|
{
|
||||||
|
struct bufdomain *bd;
|
||||||
|
int speedupreq;
|
||||||
int lodirty;
|
int lodirty;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
|
@ -3219,11 +3365,11 @@ buf_daemon()
|
||||||
/*
|
/*
|
||||||
* Start the buf clean daemons as children threads.
|
* Start the buf clean daemons as children threads.
|
||||||
*/
|
*/
|
||||||
for (i = 0 ; i < clean_domains; i++) {
|
for (i = 0 ; i < buf_domains; i++) {
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = kthread_add((void (*)(void *))bufspace_daemon,
|
error = kthread_add((void (*)(void *))bufspace_daemon,
|
||||||
&bdclean[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i);
|
&bdomain[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i);
|
||||||
if (error)
|
if (error)
|
||||||
panic("error %d spawning bufspace daemon", error);
|
panic("error %d spawning bufspace daemon", error);
|
||||||
}
|
}
|
||||||
|
|
@ -3238,20 +3384,30 @@ buf_daemon()
|
||||||
mtx_unlock(&bdlock);
|
mtx_unlock(&bdlock);
|
||||||
|
|
||||||
kproc_suspend_check(bufdaemonproc);
|
kproc_suspend_check(bufdaemonproc);
|
||||||
lodirty = lodirtybuffers;
|
|
||||||
if (bd_speedupreq) {
|
|
||||||
lodirty = numdirtybuffers / 2;
|
|
||||||
bd_speedupreq = 0;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Do the flush. Limit the amount of in-transit I/O we
|
* Save speedupreq for this pass and reset to capture new
|
||||||
* allow to build up, otherwise we would completely saturate
|
* requests.
|
||||||
* the I/O system.
|
|
||||||
*/
|
*/
|
||||||
while (numdirtybuffers > lodirty) {
|
speedupreq = bd_speedupreq;
|
||||||
if (buf_flush(NULL, numdirtybuffers - lodirty) == 0)
|
bd_speedupreq = 0;
|
||||||
break;
|
|
||||||
kern_yield(PRI_USER);
|
/*
|
||||||
|
* Flush each domain sequentially according to its level and
|
||||||
|
* the speedup request.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < buf_domains; i++) {
|
||||||
|
bd = &bdomain[i];
|
||||||
|
if (speedupreq)
|
||||||
|
lodirty = bd->bd_numdirtybuffers / 2;
|
||||||
|
else
|
||||||
|
lodirty = bd->bd_lodirtybuffers;
|
||||||
|
while (bd->bd_numdirtybuffers > lodirty) {
|
||||||
|
if (buf_flush(NULL, bd,
|
||||||
|
bd->bd_numdirtybuffers - lodirty) == 0)
|
||||||
|
break;
|
||||||
|
kern_yield(PRI_USER);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -3265,7 +3421,7 @@ buf_daemon()
|
||||||
* to avoid endless loops on unlockable buffers.
|
* to avoid endless loops on unlockable buffers.
|
||||||
*/
|
*/
|
||||||
mtx_lock(&bdlock);
|
mtx_lock(&bdlock);
|
||||||
if (numdirtybuffers <= lodirtybuffers) {
|
if (!BIT_EMPTY(BUF_DOMAINS, &bdlodirty)) {
|
||||||
/*
|
/*
|
||||||
* We reached our low water mark, reset the
|
* We reached our low water mark, reset the
|
||||||
* request and sleep until we are needed again.
|
* request and sleep until we are needed again.
|
||||||
|
|
@ -3304,7 +3460,8 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
|
||||||
0, "Number of buffers flushed with dependecies that require rollbacks");
|
0, "Number of buffers flushed with dependecies that require rollbacks");
|
||||||
|
|
||||||
static int
|
static int
|
||||||
flushbufqueues(struct vnode *lvp, int target, int flushdeps)
|
flushbufqueues(struct vnode *lvp, struct bufdomain *bd, int target,
|
||||||
|
int flushdeps)
|
||||||
{
|
{
|
||||||
struct bufqueue *bq;
|
struct bufqueue *bq;
|
||||||
struct buf *sentinel;
|
struct buf *sentinel;
|
||||||
|
|
@ -3317,7 +3474,7 @@ flushbufqueues(struct vnode *lvp, int target, int flushdeps)
|
||||||
bool unlock;
|
bool unlock;
|
||||||
|
|
||||||
flushed = 0;
|
flushed = 0;
|
||||||
bq = &bqdirty;
|
bq = &bd->bd_dirtyq;
|
||||||
bp = NULL;
|
bp = NULL;
|
||||||
sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO);
|
sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO);
|
||||||
sentinel->b_qindex = QUEUE_SENTINEL;
|
sentinel->b_qindex = QUEUE_SENTINEL;
|
||||||
|
|
@ -3653,7 +3810,7 @@ bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
|
||||||
panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp);
|
panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp);
|
||||||
}
|
}
|
||||||
counter_u64_add(mappingrestarts, 1);
|
counter_u64_add(mappingrestarts, 1);
|
||||||
bufspace_wait(&bdclean[bp->b_domain], bp->b_vp, gbflags, 0, 0);
|
bufspace_wait(bufdomain(bp), bp->b_vp, gbflags, 0, 0);
|
||||||
}
|
}
|
||||||
has_addr:
|
has_addr:
|
||||||
if (need_mapping) {
|
if (need_mapping) {
|
||||||
|
|
@ -3851,7 +4008,7 @@ loop:
|
||||||
*/
|
*/
|
||||||
if (flags & GB_NOCREAT)
|
if (flags & GB_NOCREAT)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (bdclean[bo->bo_domain].bd_freebuffers == 0 &&
|
if (bdomain[bo->bo_domain].bd_freebuffers == 0 &&
|
||||||
TD_IS_IDLETHREAD(curthread))
|
TD_IS_IDLETHREAD(curthread))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
@ -3908,7 +4065,7 @@ loop:
|
||||||
if (gbincore(bo, blkno)) {
|
if (gbincore(bo, blkno)) {
|
||||||
BO_UNLOCK(bo);
|
BO_UNLOCK(bo);
|
||||||
bp->b_flags |= B_INVAL;
|
bp->b_flags |= B_INVAL;
|
||||||
bufspace_release(&bdclean[bp->b_domain], maxsize);
|
bufspace_release(bufdomain(bp), maxsize);
|
||||||
brelse(bp);
|
brelse(bp);
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
|
|
@ -3943,7 +4100,7 @@ loop:
|
||||||
}
|
}
|
||||||
|
|
||||||
allocbuf(bp, size);
|
allocbuf(bp, size);
|
||||||
bufspace_release(&bdclean[bp->b_domain], maxsize);
|
bufspace_release(bufdomain(bp), maxsize);
|
||||||
bp->b_flags &= ~B_DONE;
|
bp->b_flags &= ~B_DONE;
|
||||||
}
|
}
|
||||||
CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp);
|
CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp);
|
||||||
|
|
@ -3972,7 +4129,7 @@ geteblk(int size, int flags)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
allocbuf(bp, size);
|
allocbuf(bp, size);
|
||||||
bufspace_release(&bdclean[bp->b_domain], maxsize);
|
bufspace_release(bufdomain(bp), maxsize);
|
||||||
bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */
|
bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */
|
||||||
BUF_ASSERT_HELD(bp);
|
BUF_ASSERT_HELD(bp);
|
||||||
return (bp);
|
return (bp);
|
||||||
|
|
@ -4841,7 +4998,7 @@ bufobj_init(struct bufobj *bo, void *private)
|
||||||
static volatile int bufobj_cleanq;
|
static volatile int bufobj_cleanq;
|
||||||
|
|
||||||
bo->bo_domain =
|
bo->bo_domain =
|
||||||
atomic_fetchadd_int(&bufobj_cleanq, 1) % clean_domains;
|
atomic_fetchadd_int(&bufobj_cleanq, 1) % buf_domains;
|
||||||
rw_init(BO_LOCKPTR(bo), "bufobj interlock");
|
rw_init(BO_LOCKPTR(bo), "bufobj interlock");
|
||||||
bo->bo_private = private;
|
bo->bo_private = private;
|
||||||
TAILQ_INIT(&bo->bo_clean.bv_hd);
|
TAILQ_INIT(&bo->bo_clean.bv_hd);
|
||||||
|
|
@ -5164,6 +5321,7 @@ DB_SHOW_COMMAND(buffer, db_show_buffer)
|
||||||
}
|
}
|
||||||
db_printf("\n");
|
db_printf("\n");
|
||||||
}
|
}
|
||||||
|
BUF_LOCKPRINTINFO(bp);
|
||||||
#if defined(FULL_BUF_TRACKING)
|
#if defined(FULL_BUF_TRACKING)
|
||||||
db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt);
|
db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt);
|
||||||
|
|
||||||
|
|
@ -5178,19 +5336,19 @@ DB_SHOW_COMMAND(buffer, db_show_buffer)
|
||||||
db_printf("b_io_tracking: %s\n", bp->b_io_tracking);
|
db_printf("b_io_tracking: %s\n", bp->b_io_tracking);
|
||||||
#endif
|
#endif
|
||||||
db_printf(" ");
|
db_printf(" ");
|
||||||
BUF_LOCKPRINTINFO(bp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DB_SHOW_COMMAND(bufqueues, bufqueues)
|
DB_SHOW_COMMAND(bufqueues, bufqueues)
|
||||||
{
|
{
|
||||||
struct bufdomain *bd;
|
struct bufdomain *bd;
|
||||||
int i, j;
|
struct buf *bp;
|
||||||
|
long total;
|
||||||
|
int i, j, cnt;
|
||||||
|
|
||||||
db_printf("bqempty: %d\n", bqempty.bq_len);
|
db_printf("bqempty: %d\n", bqempty.bq_len);
|
||||||
db_printf("bqdirty: %d\n", bqdirty.bq_len);
|
|
||||||
|
|
||||||
for (i = 0; i < clean_domains; i++) {
|
for (i = 0; i < buf_domains; i++) {
|
||||||
bd = &bdclean[i];
|
bd = &bdomain[i];
|
||||||
db_printf("Buf domain %d\n", i);
|
db_printf("Buf domain %d\n", i);
|
||||||
db_printf("\tfreebufs\t%d\n", bd->bd_freebuffers);
|
db_printf("\tfreebufs\t%d\n", bd->bd_freebuffers);
|
||||||
db_printf("\tlofreebufs\t%d\n", bd->bd_lofreebuffers);
|
db_printf("\tlofreebufs\t%d\n", bd->bd_lofreebuffers);
|
||||||
|
|
@ -5202,13 +5360,43 @@ DB_SHOW_COMMAND(bufqueues, bufqueues)
|
||||||
db_printf("\tlobufspace\t%ld\n", bd->bd_lobufspace);
|
db_printf("\tlobufspace\t%ld\n", bd->bd_lobufspace);
|
||||||
db_printf("\tbufspacethresh\t%ld\n", bd->bd_bufspacethresh);
|
db_printf("\tbufspacethresh\t%ld\n", bd->bd_bufspacethresh);
|
||||||
db_printf("\n");
|
db_printf("\n");
|
||||||
db_printf("\tcleanq count\t%d\n", bd->bd_cleanq->bq_len);
|
db_printf("\tnumdirtybuffers\t%d\n", bd->bd_numdirtybuffers);
|
||||||
|
db_printf("\tlodirtybuffers\t%d\n", bd->bd_lodirtybuffers);
|
||||||
|
db_printf("\thidirtybuffers\t%d\n", bd->bd_hidirtybuffers);
|
||||||
|
db_printf("\tdirtybufthresh\t%d\n", bd->bd_dirtybufthresh);
|
||||||
|
db_printf("\n");
|
||||||
|
total = 0;
|
||||||
|
TAILQ_FOREACH(bp, &bd->bd_cleanq->bq_queue, b_freelist)
|
||||||
|
total += bp->b_bufsize;
|
||||||
|
db_printf("\tcleanq count\t%d (%ld)\n",
|
||||||
|
bd->bd_cleanq->bq_len, total);
|
||||||
|
total = 0;
|
||||||
|
TAILQ_FOREACH(bp, &bd->bd_dirtyq.bq_queue, b_freelist)
|
||||||
|
total += bp->b_bufsize;
|
||||||
|
db_printf("\tdirtyq count\t%d (%ld)\n",
|
||||||
|
bd->bd_dirtyq.bq_len, total);
|
||||||
db_printf("\twakeup\t\t%d\n", bd->bd_wanted);
|
db_printf("\twakeup\t\t%d\n", bd->bd_wanted);
|
||||||
db_printf("\tlim\t\t%d\n", bd->bd_lim);
|
db_printf("\tlim\t\t%d\n", bd->bd_lim);
|
||||||
db_printf("\tCPU ");
|
db_printf("\tCPU ");
|
||||||
for (j = 0; j <= mp_maxid; j++)
|
for (j = 0; j <= mp_maxid; j++)
|
||||||
db_printf("%d, ", bd->bd_subq[j].bq_len);
|
db_printf("%d, ", bd->bd_subq[j].bq_len);
|
||||||
db_printf("\n");
|
db_printf("\n");
|
||||||
|
cnt = 0;
|
||||||
|
total = 0;
|
||||||
|
for (j = 0; j < nbuf; j++)
|
||||||
|
if (buf[j].b_domain == i && BUF_ISLOCKED(&buf[j])) {
|
||||||
|
cnt++;
|
||||||
|
total += buf[j].b_bufsize;
|
||||||
|
}
|
||||||
|
db_printf("\tLocked buffers: %d space %ld\n", cnt, total);
|
||||||
|
cnt = 0;
|
||||||
|
total = 0;
|
||||||
|
for (j = 0; j < nbuf; j++)
|
||||||
|
if (buf[j].b_domain == i) {
|
||||||
|
cnt++;
|
||||||
|
total += buf[j].b_bufsize;
|
||||||
|
}
|
||||||
|
db_printf("\tTotal buffers: %d space %ld\n", cnt, total);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue