diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h index 164b0f07293..b071c63926e 100644 --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -277,4 +277,11 @@ typedef void (*ada_probe_veto_fn)(void *, struct cam_path *, struct ata_params *, int *); EVENTHANDLER_DECLARE(ada_probe_veto, ada_probe_veto_fn); +/* Swap device events */ +struct swdevt; +typedef void (*swapon_fn)(void *, struct swdevt *); +typedef void (*swapoff_fn)(void *, struct swdevt *); +EVENTHANDLER_DECLARE(swapon, swapon_fn); +EVENTHANDLER_DECLARE(swapoff, swapoff_fn); + #endif /* _SYS_EVENTHANDLER_H_ */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index ad306a6c6af..33b3ab1359a 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1632,6 +1632,13 @@ swap_pager_isswapped(vm_object_t object, struct swdevt *sp) return (0); } +int +swap_pager_nswapdev(void) +{ + + return (nswapdev); +} + /* * SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in * @@ -1750,6 +1757,7 @@ restart: pause("swpoff", hz / 20); goto full_rescan; } + EVENTHANDLER_INVOKE(swapoff, sp); } /************************************************************************ @@ -2209,6 +2217,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks, swapon_check_swzone(swap_total / PAGE_SIZE); swp_sizecheck(); mtx_unlock(&sw_dev_mtx); + EVENTHANDLER_INVOKE(swapon, sp); } /* diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h index 83567f47609..75bde77f8d2 100644 --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -83,6 +83,7 @@ vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex); void swap_pager_freespace(vm_object_t, vm_pindex_t, vm_size_t); void swap_pager_swap_init(void); int swap_pager_isswapped(vm_object_t, struct swdevt *); +int swap_pager_nswapdev(void); int swap_pager_reserve(vm_object_t, vm_pindex_t, vm_size_t); void swap_pager_status(int *total, int *used); void swapoff_all(void); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index a0da9bb40e3..503b02be29f 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -393,6 +393,11 @@ vm_page_domain_init(struct vm_domain *vmd) "vm laundry pagequeue"; *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) = &vm_cnt.v_laundry_count; + *__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) = + "vm unswappable pagequeue"; + /* Unswappable dirty pages are counted as being in the laundry. */ + *__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) = + &vm_cnt.v_laundry_count; vmd->vmd_page_count = 0; vmd->vmd_free_count = 0; vmd->vmd_segs = 0; @@ -2578,7 +2583,7 @@ vm_page_enqueue(uint8_t queue, vm_page_t m) KASSERT(queue < PQ_COUNT, ("vm_page_enqueue: invalid queue %u request for page %p", queue, m)); - if (queue == PQ_LAUNDRY) + if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE) pq = &vm_dom[0].vmd_pagequeues[queue]; else pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; @@ -2946,6 +2951,23 @@ vm_page_launder(vm_page_t m) } } +/* + * vm_page_unswappable + * + * Put a page in the PQ_UNSWAPPABLE holding queue. + */ +void +vm_page_unswappable(vm_page_t m) +{ + + vm_page_assert_locked(m); + KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0, + ("page %p already unswappable", m)); + if (m->queue != PQ_NONE) + vm_page_dequeue(m); + vm_page_enqueue(PQ_UNSWAPPABLE, m); +} + /* * vm_page_try_to_free() * @@ -3534,13 +3556,14 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) db_printf("pq_free %d\n", vm_cnt.v_free_count); for (dom = 0; dom < vm_ndomains; dom++) { db_printf( - "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d\n", + "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n", dom, vm_dom[dom].vmd_page_count, vm_dom[dom].vmd_free_count, vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, - vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt); + vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt, + vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt); } } diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 6e177194a53..f53e41ea689 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -207,7 +207,8 @@ struct vm_page { #define PQ_INACTIVE 0 #define PQ_ACTIVE 1 #define PQ_LAUNDRY 2 -#define PQ_COUNT 3 +#define PQ_UNSWAPPABLE 3 +#define PQ_COUNT 4 TAILQ_HEAD(pglist, vm_page); SLIST_HEAD(spglist, vm_page); @@ -347,7 +348,7 @@ extern struct mtx_padalign pa_lock[]; #include /* - * Each pageable resident page falls into one of four lists: + * Each pageable resident page falls into one of five lists: * * free * Available for allocation now. @@ -360,6 +361,10 @@ extern struct mtx_padalign pa_lock[]; * This is the list of pages that should be * paged out next. * + * unswappable + * Dirty anonymous pages that cannot be paged + * out because no swap device is configured. + * * active * Pages that are "active", i.e., they have been * recently referenced. @@ -483,6 +488,7 @@ vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); +void vm_page_unswappable(vm_page_t m); boolean_t vm_page_unwire(vm_page_t m, uint8_t queue); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_wire (vm_page_t); @@ -707,7 +713,7 @@ static inline bool vm_page_in_laundry(vm_page_t m) { - return (m->queue == PQ_LAUNDRY); + return (m->queue == PQ_LAUNDRY || m->queue == PQ_UNSWAPPABLE); } #endif /* _KERNEL */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index a62aebdd28e..e21ee9595d9 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -182,6 +182,7 @@ static int vm_pageout_update_period; static int disable_swap_pageouts; static int lowmem_period = 10; static time_t lowmem_uptime; +static int swapdev_enabled; #if defined(NO_SWAPPING) static int vm_swap_enabled = 0; @@ -568,12 +569,24 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, case VM_PAGER_ERROR: case VM_PAGER_FAIL: /* - * If the page couldn't be paged out, then reactivate - * it so that it doesn't clog the laundry and inactive - * queues. (We will try paging it out again later). + * If the page couldn't be paged out to swap because the + * pager wasn't able to find space, place the page in + * the PQ_UNSWAPPABLE holding queue. This is an + * optimization that prevents the page daemon from + * wasting CPU cycles on pages that cannot be reclaimed + * becase no swap device is configured. + * + * Otherwise, reactivate the page so that it doesn't + * clog the laundry and inactive queues. (We will try + * paging it out again later.) */ vm_page_lock(mt); - vm_page_activate(mt); + if (object->type == OBJT_SWAP && + pageout_status[i] == VM_PAGER_FAIL) { + vm_page_unswappable(mt); + numpagedout++; + } else + vm_page_activate(mt); vm_page_unlock(mt); if (eio != NULL && i >= mreq && i - mreq < runlen) *eio = TRUE; @@ -600,6 +613,21 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, return (numpagedout); } +static void +vm_pageout_swapon(void *arg __unused, struct swdevt *sp __unused) +{ + + atomic_store_rel_int(&swapdev_enabled, 1); +} + +static void +vm_pageout_swapoff(void *arg __unused, struct swdevt *sp __unused) +{ + + if (swap_pager_nswapdev() == 1) + atomic_store_rel_int(&swapdev_enabled, 0); +} + #if !defined(NO_SWAPPING) /* * vm_pageout_object_deactivate_pages @@ -893,7 +921,7 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) vnodes_skipped = 0; /* - * Scan the laundry queue for pages eligible to be laundered. We stop + * Scan the laundry queues for pages eligible to be laundered. We stop * once the target number of dirty pages have been laundered, or once * we've reached the end of the queue. A single iteration of this loop * may cause more than one page to be laundered because of clustering. @@ -901,11 +929,18 @@ vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall) * maxscan ensures that we don't re-examine requeued pages. Any * additional pages written as part of a cluster are subtracted from * maxscan since they must be taken from the laundry queue. + * + * As an optimization, we avoid laundering from PQ_UNSWAPPABLE when no + * swap devices are configured. */ - pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; - maxscan = pq->pq_cnt; + if (atomic_load_acq_int(&swapdev_enabled)) + pq = &vmd->vmd_pagequeues[PQ_UNSWAPPABLE]; + else + pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; +scan: vm_pagequeue_lock(pq); + maxscan = pq->pq_cnt; queue_locked = true; for (m = TAILQ_FIRST(&pq->pq_pl); m != NULL && maxscan-- > 0 && launder > 0; @@ -1070,6 +1105,11 @@ relock_queue: } vm_pagequeue_unlock(pq); + if (launder > 0 && pq == &vmd->vmd_pagequeues[PQ_UNSWAPPABLE]) { + pq = &vmd->vmd_pagequeues[PQ_LAUNDRY]; + goto scan; + } + /* * Wakeup the sync daemon if we skipped a vnode in a writeable object * and we didn't launder enough pages. @@ -1131,6 +1171,14 @@ vm_pageout_laundry_worker(void *arg) target = 0; last_launder = 0; + /* + * Calls to these handlers are serialized by the swap syscall lock. + */ + (void)EVENTHANDLER_REGISTER(swapon, vm_pageout_swapon, domain, + EVENTHANDLER_PRI_ANY); + (void)EVENTHANDLER_REGISTER(swapoff, vm_pageout_swapoff, domain, + EVENTHANDLER_PRI_ANY); + /* * The pageout laundry worker is never done, so loop forever. */ @@ -1492,18 +1540,22 @@ drop_page: /* * Wake up the laundry thread so that it can perform any needed * laundering. If we didn't meet our target, we're in shortfall and - * need to launder more aggressively. + * need to launder more aggressively. If PQ_LAUNDRY is empty and no + * swap devices are configured, the laundry thread has no work to do, so + * don't bother waking it up. */ if (vm_laundry_request == VM_LAUNDRY_IDLE && starting_page_shortage > 0) { pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY]; vm_pagequeue_lock(pq); - if (page_shortage > 0) { - vm_laundry_request = VM_LAUNDRY_SHORTFALL; - PCPU_INC(cnt.v_pdshortfalls); - } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL) - vm_laundry_request = VM_LAUNDRY_BACKGROUND; - wakeup(&vm_laundry_request); + if (pq->pq_cnt > 0 || atomic_load_acq_int(&swapdev_enabled)) { + if (page_shortage > 0) { + vm_laundry_request = VM_LAUNDRY_SHORTFALL; + PCPU_INC(cnt.v_pdshortfalls); + } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL) + vm_laundry_request = VM_LAUNDRY_BACKGROUND; + wakeup(&vm_laundry_request); + } vm_pagequeue_unlock(pq); }