mirror of
https://github.com/opnsense/src.git
synced 2026-05-28 04:12:45 -04:00
Un-staticize runningbufwakeup() and staticize updateproc.
Add a new private thread flag to indicate that the thread should not sleep if runningbufspace is too large. Set this flag on the bufdaemon and syncer threads so that they skip the waitrunningbufspace() call in bufwrite() rather than than checking the proc pointer vs. the known proc pointers for these two threads. A way of preventing these threads from being starved for I/O but still placing limits on their outstanding I/O would be desirable. Set this flag in ffs_copyonwrite() to prevent bufwrite() calls from blocking on the runningbufspace check while holding snaplk. This prevents snaplk from being held for an arbitrarily long period of time if runningbufspace is high and greatly reduces the contention for snaplk. The disadvantage is that ffs_copyonwrite() can start a large amount of I/O if there are a large number of snapshots, which could cause a deadlock in other parts of the code. Call runningbufwakeup() in ffs_copyonwrite() to decrement runningbufspace before attempting to grab snaplk so that I/O requests waiting on snaplk are not counted in runningbufspace as being in-progress. Increment runningbufspace again before actually launching the original I/O request. Prior to the above two changes, the system could deadlock if enough I/O requests were blocked by snaplk to prevent runningbufspace from falling below lorunningspace and one of the bawrite() calls in ffs_copyonwrite() blocked in waitrunningbufspace() while holding snaplk. See <http://www.holm.cc/stress/log/cons143.html>
This commit is contained in:
parent
9e241c5ef2
commit
6c8b634f1d
5 changed files with 23 additions and 9 deletions
|
|
@ -318,7 +318,7 @@ bufspacewakeup(void)
|
|||
* runningbufwakeup() - in-progress I/O accounting.
|
||||
*
|
||||
*/
|
||||
static __inline void
|
||||
void
|
||||
runningbufwakeup(struct buf *bp)
|
||||
{
|
||||
|
||||
|
|
@ -847,8 +847,7 @@ bufwrite(struct buf *bp)
|
|||
* or syncer daemon trying to clean up as that can lead
|
||||
* to deadlock.
|
||||
*/
|
||||
if (curthread->td_proc != bufdaemonproc &&
|
||||
curthread->td_proc != updateproc)
|
||||
if ((curthread->td_pflags & TDP_NORUNNINGBUF) == 0)
|
||||
waitrunningbufspace();
|
||||
}
|
||||
|
||||
|
|
@ -1964,6 +1963,7 @@ buf_daemon()
|
|||
/*
|
||||
* This process is allowed to take the buffer cache to the limit
|
||||
*/
|
||||
curthread->td_pflags |= TDP_NORUNNINGBUF;
|
||||
mtx_lock(&bdlock);
|
||||
for (;;) {
|
||||
bd_request = 0;
|
||||
|
|
|
|||
|
|
@ -1524,7 +1524,7 @@ sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS)
|
|||
SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0,
|
||||
sysctl_vfs_worklist_len, "I", "Syncer thread worklist length");
|
||||
|
||||
struct proc *updateproc;
|
||||
static struct proc *updateproc;
|
||||
static void sched_sync(void);
|
||||
static struct kproc_desc up_kp = {
|
||||
"syncer",
|
||||
|
|
@ -1601,6 +1601,7 @@ sched_sync(void)
|
|||
first_printf = 1;
|
||||
syncer_state = SYNCER_RUNNING;
|
||||
starttime = time_uptime;
|
||||
td->td_pflags |= TDP_NORUNNINGBUF;
|
||||
|
||||
EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
|
||||
SHUTDOWN_PRI_LAST);
|
||||
|
|
|
|||
|
|
@ -477,6 +477,7 @@ extern int nswbuf; /* Number of swap I/O buffer headers. */
|
|||
extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
|
||||
extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
|
||||
|
||||
void runningbufwakeup(struct buf *);
|
||||
caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est);
|
||||
void bufinit(void);
|
||||
void bwillwrite(void);
|
||||
|
|
|
|||
|
|
@ -378,6 +378,7 @@ struct thread {
|
|||
#define TDP_SCHED4 0x00008000 /* Reserved for scheduler private use */
|
||||
#define TDP_GEOM 0x00010000 /* Settle GEOM before finishing syscall */
|
||||
#define TDP_SOFTDEP 0x00020000 /* Stuck processing softdep worklist */
|
||||
#define TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */
|
||||
|
||||
/*
|
||||
* Reasons that the current thread can not be run yet.
|
||||
|
|
@ -833,7 +834,6 @@ TAILQ_HEAD(threadqueue, thread);
|
|||
extern struct proclist allproc; /* List of all processes. */
|
||||
extern struct proclist zombproc; /* List of zombie processes. */
|
||||
extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
|
||||
extern struct proc *updateproc; /* Process slot for syncer (sic). */
|
||||
|
||||
extern struct uma_zone *proc_zone;
|
||||
|
||||
|
|
|
|||
|
|
@ -1997,6 +1997,12 @@ ffs_copyonwrite(devvp, bp)
|
|||
VI_UNLOCK(devvp);
|
||||
return (0);
|
||||
}
|
||||
/*
|
||||
* Since I/O on bp isn't yet in progress and it may be blocked
|
||||
* for a long time waiting on snaplk, back it out of
|
||||
* runningbufspace, possibly waking other threads waiting for space.
|
||||
*/
|
||||
runningbufwakeup(bp);
|
||||
/*
|
||||
* Not in the precomputed list, so check the snapshots.
|
||||
*/
|
||||
|
|
@ -2028,7 +2034,7 @@ retry:
|
|||
goto retry;
|
||||
}
|
||||
snapshot_locked = 1;
|
||||
td->td_pflags |= TDP_COWINPROGRESS;
|
||||
td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF;
|
||||
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
|
||||
fs->fs_bsize, KERNCRED, BA_METAONLY, &ibp);
|
||||
td->td_pflags &= ~TDP_COWINPROGRESS;
|
||||
|
|
@ -2065,7 +2071,7 @@ retry:
|
|||
goto retry;
|
||||
}
|
||||
snapshot_locked = 1;
|
||||
td->td_pflags |= TDP_COWINPROGRESS;
|
||||
td->td_pflags |= TDP_COWINPROGRESS | TDP_NORUNNINGBUF;
|
||||
error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn),
|
||||
fs->fs_bsize, KERNCRED, 0, &cbp);
|
||||
td->td_pflags &= ~TDP_COWINPROGRESS;
|
||||
|
|
@ -2120,10 +2126,16 @@ retry:
|
|||
if (dopersistence && VTOI(vp)->i_effnlink > 0)
|
||||
(void) ffs_syncvnode(vp, MNT_WAIT);
|
||||
}
|
||||
if (snapshot_locked)
|
||||
if (snapshot_locked) {
|
||||
lockmgr(vp->v_vnlock, LK_RELEASE, NULL, td);
|
||||
else
|
||||
td->td_pflags &= ~TDP_NORUNNINGBUF;
|
||||
} else
|
||||
VI_UNLOCK(devvp);
|
||||
/*
|
||||
* I/O on bp will now be started, so count it in runningbufspace.
|
||||
*/
|
||||
if (bp->b_runningbufspace)
|
||||
atomic_add_int(&runningbufspace, bp->b_runningbufspace);
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue