From 9110db818a1e36cffed5208b6d005d9c1b84236a Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Mon, 21 Oct 2013 16:44:53 +0000 Subject: [PATCH] Add a resource limit for the total number of kqueues available to the user. Kqueue now saves the ucred of the allocating thread, to correctly decrement the counter on close. Under some specific and not real-world use scenario for kqueue, it is possible for the kqueues to consume memory proportional to the square of the number of the filedescriptors available to the process. Limit allows administrator to prevent the abuse. This is kernel-mode side of the change, with the user-mode enabling commit following. Reported and tested by: pho Discussed with: jmg Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/kern/kern_event.c | 24 +++++++++++++++++++++++- sys/kern/kern_resource.c | 18 ++++++++++++++++++ sys/sys/eventvar.h | 1 + sys/sys/resource.h | 4 +++- sys/sys/resourcevar.h | 2 ++ 5 files changed, 47 insertions(+), 2 deletions(-) diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 4c068bf0f31..a80e50d1fab 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -699,9 +700,23 @@ sys_kqueue(struct thread *td, struct kqueue_args *uap) struct filedesc *fdp; struct kqueue *kq; struct file *fp; + struct proc *p; + struct ucred *cred; int fd, error; - fdp = td->td_proc->p_fd; + p = td->td_proc; + cred = td->td_ucred; + crhold(cred); + PROC_LOCK(p); + if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td->td_proc, + RLIMIT_KQUEUES))) { + PROC_UNLOCK(p); + crfree(cred); + return (EMFILE); + } + PROC_UNLOCK(p); + + fdp = p->p_fd; error = falloc(td, &fp, &fd, 0); if (error) goto done2; @@ -711,6 +726,7 @@ sys_kqueue(struct thread *td, struct kqueue_args *uap) mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK); TAILQ_INIT(&kq->kq_head); kq->kq_fdp = fdp; + kq->kq_cred = cred; knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); @@ -723,6 +739,10 @@ sys_kqueue(struct thread *td, struct kqueue_args *uap) td->td_retval[0] = fd; done2: + if (error != 0) { + chgkqcnt(cred->cr_ruidinfo, -1, 0); + crfree(cred); + } return (error); } @@ -1767,6 +1787,8 @@ kqueue_close(struct file *fp, struct thread *td) free(kq->kq_knlist, M_KQUEUE); funsetown(&kq->kq_sigio); + chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0); + crfree(kq->kq_cred); free(kq, M_KQUEUE); fp->f_data = NULL; diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 57ee671aa79..470eaedd52d 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -1432,3 +1432,21 @@ chgptscnt(uip, diff, max) } return (1); } + +int +chgkqcnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + if (diff > 0 && max != 0) { + if (atomic_fetchadd_long(&uip->ui_kqcnt, (long)diff) + + diff > max) { + atomic_subtract_long(&uip->ui_kqcnt, (long)diff); + return (0); + } + } else { + atomic_add_long(&uip->ui_kqcnt, (long)diff); + if (uip->ui_kqcnt < 0) + printf("negative kqcnt for uid = %d\n", uip->ui_uid); + } + return (1); +} diff --git a/sys/sys/eventvar.h b/sys/sys/eventvar.h index ed1067f1015..ce79677f3be 100644 --- a/sys/sys/eventvar.h +++ b/sys/sys/eventvar.h @@ -60,6 +60,7 @@ struct kqueue { u_long kq_knhashmask; /* size of knhash */ struct klist *kq_knhash; /* hash table for knotes */ struct task kq_task; + struct ucred *kq_cred; }; #endif /* !_SYS_EVENTVAR_H_ */ diff --git a/sys/sys/resource.h b/sys/sys/resource.h index 89dc967c233..fb9e05b2486 100644 --- a/sys/sys/resource.h +++ b/sys/sys/resource.h @@ -103,8 +103,9 @@ struct __wrusage { #define RLIMIT_AS RLIMIT_VMEM /* standard name for RLIMIT_VMEM */ #define RLIMIT_NPTS 11 /* pseudo-terminals */ #define RLIMIT_SWAP 12 /* swap used */ +#define RLIMIT_KQUEUES 13 /* kqueues allocated */ -#define RLIM_NLIMITS 13 /* number of resource limits */ +#define RLIM_NLIMITS 14 /* number of resource limits */ #define RLIM_INFINITY ((rlim_t)(((uint64_t)1 << 63) - 1)) /* XXX Missing: RLIM_SAVED_MAX, RLIM_SAVED_CUR */ @@ -129,6 +130,7 @@ static const char *rlimit_ident[RLIM_NLIMITS] = { "vmem", "npts", "swap", + "kqueues", }; #endif diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index 0c9194895a3..1a35a9e8d3a 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -99,6 +99,7 @@ struct uidinfo { long ui_sbsize; /* (b) socket buffer space consumed */ long ui_proccnt; /* (b) number of processes */ long ui_ptscnt; /* (b) number of pseudo-terminals */ + long ui_kqcnt; /* (b) number of kqueues */ uid_t ui_uid; /* (a) uid */ u_int ui_ref; /* (b) reference count */ struct racct *ui_racct; /* (a) resource accounting */ @@ -115,6 +116,7 @@ void addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks); void addupc_task(struct thread *td, uintfptr_t pc, u_int ticks); void calccru(struct proc *p, struct timeval *up, struct timeval *sp); void calcru(struct proc *p, struct timeval *up, struct timeval *sp); +int chgkqcnt(struct uidinfo *uip, int diff, rlim_t max); int chgproccnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t maxval);