mirror of
https://github.com/opnsense/src.git
synced 2026-05-28 04:12:45 -04:00
amd64 pmap: per-domain pv chunk list
This significantly reduces contention since chunks get created and removed all the time. See the review for sample results. Reviewed by: kib Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D21976
This commit is contained in:
parent
639ec13157
commit
08ded448cf
1 changed files with 121 additions and 57 deletions
|
|
@ -165,6 +165,12 @@ __FBSDID("$FreeBSD$");
|
|||
#include <machine/sysarch.h>
|
||||
#include <machine/tss.h>
|
||||
|
||||
#ifdef NUMA
|
||||
#define PMAP_MEMDOM MAXMEMDOM
|
||||
#else
|
||||
#define PMAP_MEMDOM 1
|
||||
#endif
|
||||
|
||||
static __inline boolean_t
|
||||
pmap_type_guest(pmap_t pmap)
|
||||
{
|
||||
|
|
@ -420,8 +426,30 @@ static int pmap_initialized;
|
|||
* Data for the pv entry allocation mechanism.
|
||||
* Updates to pv_invl_gen are protected by the pv list lock but reads are not.
|
||||
*/
|
||||
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
|
||||
static struct mtx __exclusive_cache_line pv_chunks_mutex;
|
||||
#ifdef NUMA
|
||||
static __inline int
|
||||
pc_to_domain(struct pv_chunk *pc)
|
||||
{
|
||||
|
||||
return (_vm_phys_domain(DMAP_TO_PHYS((vm_offset_t)pc)));
|
||||
}
|
||||
#else
|
||||
static __inline int
|
||||
pc_to_domain(struct pv_chunk *pc __unused)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct pv_chunks_list {
|
||||
struct mtx pvc_lock;
|
||||
TAILQ_HEAD(pch, pv_chunk) pvc_list;
|
||||
int active_reclaims;
|
||||
} __aligned(CACHE_LINE_SIZE);
|
||||
|
||||
struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
|
||||
|
||||
#ifdef NUMA
|
||||
struct pmap_large_md_page {
|
||||
struct rwlock pv_lock;
|
||||
|
|
@ -2044,10 +2072,12 @@ pmap_init(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* Initialize the pv chunk list mutex.
|
||||
* Initialize pv chunk lists.
|
||||
*/
|
||||
mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
|
||||
|
||||
for (i = 0; i < PMAP_MEMDOM; i++) {
|
||||
mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, MTX_DEF);
|
||||
TAILQ_INIT(&pv_chunks[i].pvc_list);
|
||||
}
|
||||
pmap_init_pv_table();
|
||||
|
||||
pmap_initialized = 1;
|
||||
|
|
@ -4150,8 +4180,9 @@ reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked_pmap, bool start_di)
|
|||
* exacerbating the shortage of free pv entries.
|
||||
*/
|
||||
static vm_page_t
|
||||
reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
|
||||
reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain)
|
||||
{
|
||||
struct pv_chunks_list *pvc;
|
||||
struct pv_chunk *pc, *pc_marker, *pc_marker_end;
|
||||
struct pv_chunk_header pc_marker_b, pc_marker_end_b;
|
||||
struct md_page *pvh;
|
||||
|
|
@ -4166,7 +4197,6 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
|
|||
uint64_t inuse;
|
||||
int bit, field, freed;
|
||||
bool start_di;
|
||||
static int active_reclaims = 0;
|
||||
|
||||
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
|
||||
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
|
||||
|
|
@ -4186,10 +4216,11 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
|
|||
*/
|
||||
start_di = pmap_not_in_di();
|
||||
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
active_reclaims++;
|
||||
TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru);
|
||||
TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru);
|
||||
pvc = &pv_chunks[domain];
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
pvc->active_reclaims++;
|
||||
TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru);
|
||||
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru);
|
||||
while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
|
||||
SLIST_EMPTY(&free)) {
|
||||
next_pmap = pc->pc_pmap;
|
||||
|
|
@ -4202,7 +4233,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
|
|||
*/
|
||||
goto next_chunk;
|
||||
}
|
||||
mtx_unlock(&pv_chunks_mutex);
|
||||
mtx_unlock(&pvc->pvc_lock);
|
||||
|
||||
/*
|
||||
* A pv_chunk can only be removed from the pc_lru list
|
||||
|
|
@ -4219,17 +4250,17 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
|
|||
PMAP_LOCK(pmap);
|
||||
if (start_di)
|
||||
pmap_delayed_invl_start();
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
continue;
|
||||
} else if (pmap != locked_pmap) {
|
||||
if (PMAP_TRYLOCK(pmap)) {
|
||||
if (start_di)
|
||||
pmap_delayed_invl_start();
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
continue;
|
||||
} else {
|
||||
pmap = NULL; /* pmap is not locked */
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
pc = TAILQ_NEXT(pc_marker, pc_lru);
|
||||
if (pc == NULL ||
|
||||
pc->pc_pmap != next_pmap)
|
||||
|
|
@ -4286,7 +4317,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
|
|||
}
|
||||
}
|
||||
if (freed == 0) {
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
goto next_chunk;
|
||||
}
|
||||
/* Every freed mapping is for a 4 KB page. */
|
||||
|
|
@ -4303,19 +4334,19 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
|
|||
/* Entire chunk is free; return it. */
|
||||
m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
|
||||
dump_drop_page(m_pc->phys_addr);
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
|
||||
break;
|
||||
}
|
||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
/* One freed pv entry in locked_pmap is sufficient. */
|
||||
if (pmap == locked_pmap)
|
||||
break;
|
||||
next_chunk:
|
||||
TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
|
||||
TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru);
|
||||
if (active_reclaims == 1 && pmap != NULL) {
|
||||
TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
|
||||
TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru);
|
||||
if (pvc->active_reclaims == 1 && pmap != NULL) {
|
||||
/*
|
||||
* Rotate the pv chunks list so that we do not
|
||||
* scan the same pv chunks that could not be
|
||||
|
|
@ -4323,17 +4354,17 @@ next_chunk:
|
|||
* and/or superpage mapping) on every
|
||||
* invocation of reclaim_pv_chunk().
|
||||
*/
|
||||
while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) {
|
||||
while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker) {
|
||||
MPASS(pc->pc_pmap != NULL);
|
||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
|
||||
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
|
||||
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
|
||||
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
|
||||
}
|
||||
}
|
||||
}
|
||||
TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
|
||||
TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru);
|
||||
active_reclaims--;
|
||||
mtx_unlock(&pv_chunks_mutex);
|
||||
TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
|
||||
TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru);
|
||||
pvc->active_reclaims--;
|
||||
mtx_unlock(&pvc->pvc_lock);
|
||||
reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di);
|
||||
if (m_pc == NULL && !SLIST_EMPTY(&free)) {
|
||||
m_pc = SLIST_FIRST(&free);
|
||||
|
|
@ -4345,6 +4376,23 @@ next_chunk:
|
|||
return (m_pc);
|
||||
}
|
||||
|
||||
static vm_page_t
|
||||
reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
|
||||
{
|
||||
vm_page_t m;
|
||||
int i, domain;
|
||||
|
||||
domain = PCPU_GET(domain);
|
||||
for (i = 0; i < vm_ndomains; i++) {
|
||||
m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain);
|
||||
if (m != NULL)
|
||||
break;
|
||||
domain = (domain + 1) % vm_ndomains;
|
||||
}
|
||||
|
||||
return (m);
|
||||
}
|
||||
|
||||
/*
|
||||
* free the pv_entry back to the free list
|
||||
*/
|
||||
|
|
@ -4394,29 +4442,37 @@ free_pv_chunk_dequeued(struct pv_chunk *pc)
|
|||
static void
|
||||
free_pv_chunk(struct pv_chunk *pc)
|
||||
{
|
||||
struct pv_chunks_list *pvc;
|
||||
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
|
||||
mtx_unlock(&pv_chunks_mutex);
|
||||
pvc = &pv_chunks[pc_to_domain(pc)];
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
|
||||
mtx_unlock(&pvc->pvc_lock);
|
||||
free_pv_chunk_dequeued(pc);
|
||||
}
|
||||
|
||||
static void
|
||||
free_pv_chunk_batch(struct pv_chunklist *batch)
|
||||
{
|
||||
struct pv_chunks_list *pvc;
|
||||
struct pv_chunk *pc, *npc;
|
||||
int i;
|
||||
|
||||
if (TAILQ_EMPTY(batch))
|
||||
return;
|
||||
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
TAILQ_FOREACH(pc, batch, pc_list) {
|
||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
|
||||
for (i = 0; i < vm_ndomains; i++) {
|
||||
if (TAILQ_EMPTY(&batch[i]))
|
||||
continue;
|
||||
pvc = &pv_chunks[i];
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
TAILQ_FOREACH(pc, &batch[i], pc_list) {
|
||||
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
|
||||
}
|
||||
mtx_unlock(&pvc->pvc_lock);
|
||||
}
|
||||
mtx_unlock(&pv_chunks_mutex);
|
||||
|
||||
TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) {
|
||||
free_pv_chunk_dequeued(pc);
|
||||
for (i = 0; i < vm_ndomains; i++) {
|
||||
TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) {
|
||||
free_pv_chunk_dequeued(pc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4431,6 +4487,7 @@ free_pv_chunk_batch(struct pv_chunklist *batch)
|
|||
static pv_entry_t
|
||||
get_pv_entry(pmap_t pmap, struct rwlock **lockp)
|
||||
{
|
||||
struct pv_chunks_list *pvc;
|
||||
int bit, field;
|
||||
pv_entry_t pv;
|
||||
struct pv_chunk *pc;
|
||||
|
|
@ -4482,9 +4539,10 @@ retry:
|
|||
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
|
||||
pc->pc_map[1] = PC_FREE1;
|
||||
pc->pc_map[2] = PC_FREE2;
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
|
||||
mtx_unlock(&pv_chunks_mutex);
|
||||
pvc = &pv_chunks[_vm_phys_domain(m->phys_addr)];
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
|
||||
mtx_unlock(&pvc->pvc_lock);
|
||||
pv = &pc->pc_pventry[0];
|
||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
|
||||
PV_STAT(atomic_add_long(&pv_entry_count, 1));
|
||||
|
|
@ -4530,10 +4588,11 @@ popcnt_pc_map_pq(uint64_t *map)
|
|||
static void
|
||||
reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
|
||||
{
|
||||
struct pch new_tail;
|
||||
struct pv_chunks_list *pvc;
|
||||
struct pch new_tail[PMAP_MEMDOM];
|
||||
struct pv_chunk *pc;
|
||||
vm_page_t m;
|
||||
int avail, free;
|
||||
int avail, free, i;
|
||||
bool reclaimed;
|
||||
|
||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
|
||||
|
|
@ -4545,7 +4604,8 @@ reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
|
|||
* reclaim_pv_chunk() could recycle one of these chunks. In
|
||||
* contrast, these chunks must be added to the pmap upon allocation.
|
||||
*/
|
||||
TAILQ_INIT(&new_tail);
|
||||
for (i = 0; i < PMAP_MEMDOM; i++)
|
||||
TAILQ_INIT(&new_tail[i]);
|
||||
retry:
|
||||
avail = 0;
|
||||
TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
|
||||
|
|
@ -4580,7 +4640,7 @@ retry:
|
|||
pc->pc_map[1] = PC_FREE1;
|
||||
pc->pc_map[2] = PC_FREE2;
|
||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
|
||||
TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
|
||||
TAILQ_INSERT_TAIL(&new_tail[pc_to_domain(pc)], pc, pc_lru);
|
||||
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
|
||||
|
||||
/*
|
||||
|
|
@ -4591,10 +4651,13 @@ retry:
|
|||
if (reclaimed)
|
||||
goto retry;
|
||||
}
|
||||
if (!TAILQ_EMPTY(&new_tail)) {
|
||||
mtx_lock(&pv_chunks_mutex);
|
||||
TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
|
||||
mtx_unlock(&pv_chunks_mutex);
|
||||
for (i = 0; i < vm_ndomains; i++) {
|
||||
if (TAILQ_EMPTY(&new_tail[i]))
|
||||
continue;
|
||||
pvc = &pv_chunks[i];
|
||||
mtx_lock(&pvc->pvc_lock);
|
||||
TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru);
|
||||
mtx_unlock(&pvc->pvc_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -7020,7 +7083,7 @@ pmap_remove_pages(pmap_t pmap)
|
|||
pt_entry_t *pte, tpte;
|
||||
pt_entry_t PG_M, PG_RW, PG_V;
|
||||
struct spglist free;
|
||||
struct pv_chunklist free_chunks;
|
||||
struct pv_chunklist free_chunks[PMAP_MEMDOM];
|
||||
vm_page_t m, mpte, mt;
|
||||
pv_entry_t pv;
|
||||
struct md_page *pvh;
|
||||
|
|
@ -7028,7 +7091,7 @@ pmap_remove_pages(pmap_t pmap)
|
|||
struct rwlock *lock;
|
||||
int64_t bit;
|
||||
uint64_t inuse, bitmask;
|
||||
int allfree, field, freed, idx;
|
||||
int allfree, field, freed, i, idx;
|
||||
boolean_t superpage;
|
||||
vm_paddr_t pa;
|
||||
|
||||
|
|
@ -7056,7 +7119,8 @@ pmap_remove_pages(pmap_t pmap)
|
|||
PG_V = pmap_valid_bit(pmap);
|
||||
PG_RW = pmap_rw_bit(pmap);
|
||||
|
||||
TAILQ_INIT(&free_chunks);
|
||||
for (i = 0; i < PMAP_MEMDOM; i++)
|
||||
TAILQ_INIT(&free_chunks[i]);
|
||||
SLIST_INIT(&free);
|
||||
PMAP_LOCK(pmap);
|
||||
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
|
||||
|
|
@ -7184,14 +7248,14 @@ pmap_remove_pages(pmap_t pmap)
|
|||
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
|
||||
if (allfree) {
|
||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
|
||||
TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list);
|
||||
TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc, pc_list);
|
||||
}
|
||||
}
|
||||
if (lock != NULL)
|
||||
rw_wunlock(lock);
|
||||
pmap_invalidate_all(pmap);
|
||||
pmap_pkru_deassign_all(pmap);
|
||||
free_pv_chunk_batch(&free_chunks);
|
||||
free_pv_chunk_batch((struct pv_chunklist *)&free_chunks);
|
||||
PMAP_UNLOCK(pmap);
|
||||
vm_page_free_pages_toq(&free, true);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue