diff --git a/sys/netinet/sctp_bsd_addr.c b/sys/netinet/sctp_bsd_addr.c index e508a4ab986..c15384f5936 100644 --- a/sys/netinet/sctp_bsd_addr.c +++ b/sys/netinet/sctp_bsd_addr.c @@ -49,16 +49,6 @@ __FBSDID("$FreeBSD$"); #include /* Declare all of our malloc named types */ - -/* Note to Michael/Peter for mac-os, - * I think mac has this too since I - * do see the M_PCB type, so I - * will also put in the mac file the - * MALLOC_DECLARE. If this does not - * work for mac uncomment the defines for - * the strings that we use in Panda, I put - * them in comments in the mac-os file. - */ MALLOC_DEFINE(SCTP_M_MAP, "sctp_map", "sctp asoc map descriptor"); MALLOC_DEFINE(SCTP_M_STRMI, "sctp_stri", "sctp stream in array"); MALLOC_DEFINE(SCTP_M_STRMO, "sctp_stro", "sctp stream out array"); @@ -79,46 +69,77 @@ MALLOC_DEFINE(SCTP_M_MVRF, "sctp_mvrf", "sctp mvrf pcb list"); MALLOC_DEFINE(SCTP_M_ITER, "sctp_iter", "sctp iterator control"); MALLOC_DEFINE(SCTP_M_SOCKOPT, "sctp_socko", "sctp socket option"); -#if defined(SCTP_USE_THREAD_BASED_ITERATOR) +/* Global NON-VNET structure that controls the iterator */ +struct iterator_control sctp_it_ctl; +static int __sctp_thread_based_iterator_started = 0; + + +static void +sctp_cleanup_itqueue(void) +{ + struct sctp_iterator *it; + + while ((it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead)) != NULL) { + if (it->function_atend != NULL) { + (*it->function_atend) (it->pointer, it->val); + } + TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); + SCTP_FREE(it, SCTP_M_ITER); + } +} + + void sctp_wakeup_iterator(void) { - wakeup(&SCTP_BASE_INFO(iterator_running)); + wakeup(&sctp_it_ctl.iterator_running); } static void sctp_iterator_thread(void *v) { - CURVNET_SET((struct vnet *)v); SCTP_IPI_ITERATOR_WQ_LOCK(); - SCTP_BASE_INFO(iterator_running) = 0; while (1) { - msleep(&SCTP_BASE_INFO(iterator_running), - &SCTP_BASE_INFO(ipi_iterator_wq_mtx), + msleep(&sctp_it_ctl.iterator_running, + &sctp_it_ctl.ipi_iterator_wq_mtx, 0, "waiting_for_work", 0); - if (SCTP_BASE_INFO(threads_must_exit)) { + if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_MUST_EXIT) { SCTP_IPI_ITERATOR_WQ_DESTROY(); + SCTP_ITERATOR_LOCK_DESTROY(); + sctp_cleanup_itqueue(); + __sctp_thread_based_iterator_started = 0; kthread_exit(); } sctp_iterator_worker(); } - CURVNET_RESTORE(); } void sctp_startup_iterator(void) { + if (__sctp_thread_based_iterator_started) { + /* You only get one */ + return; + } + /* init the iterator head */ + __sctp_thread_based_iterator_started = 1; + sctp_it_ctl.iterator_running = 0; + sctp_it_ctl.iterator_flags = 0; + sctp_it_ctl.cur_it = NULL; + SCTP_ITERATOR_LOCK_INIT(); + SCTP_IPI_ITERATOR_WQ_INIT(); + TAILQ_INIT(&sctp_it_ctl.iteratorhead); + int ret; ret = kproc_create(sctp_iterator_thread, - (void *)curvnet, - &SCTP_BASE_INFO(thread_proc), + (void *)NULL, + &sctp_it_ctl.thread_proc, RFPROC, SCTP_KTHREAD_PAGES, SCTP_KTRHEAD_NAME); } -#endif #ifdef INET6 diff --git a/sys/netinet/sctp_bsd_addr.h b/sys/netinet/sctp_bsd_addr.h index 2260cf93aae..ae2fa52596c 100644 --- a/sys/netinet/sctp_bsd_addr.h +++ b/sys/netinet/sctp_bsd_addr.h @@ -37,12 +37,11 @@ __FBSDID("$FreeBSD$"); #if defined(_KERNEL) || defined(__Userspace__) -#if defined(SCTP_USE_THREAD_BASED_ITERATOR) +extern struct iterator_control sctp_it_ctl; void sctp_wakeup_iterator(void); void sctp_startup_iterator(void); -#endif #ifdef INET6 void sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa); diff --git a/sys/netinet/sctp_constants.h b/sys/netinet/sctp_constants.h index 7eae6774d3f..271e19702fb 100644 --- a/sys/netinet/sctp_constants.h +++ b/sys/netinet/sctp_constants.h @@ -87,10 +87,6 @@ __FBSDID("$FreeBSD$"); /* #define SCTP_AUDITING_ENABLED 1 used for debug/auditing */ #define SCTP_AUDIT_SIZE 256 -/* temporary disabled since it does not work with VNET. */ -#if 0 -#define SCTP_USE_THREAD_BASED_ITERATOR 1 -#endif #define SCTP_KTRHEAD_NAME "sctp_iterator" #define SCTP_KTHREAD_PAGES 0 @@ -572,7 +568,6 @@ __FBSDID("$FreeBSD$"); #define SCTP_TIMER_TYPE_EVENTWAKE 13 #define SCTP_TIMER_TYPE_STRRESET 14 #define SCTP_TIMER_TYPE_INPKILL 15 -#define SCTP_TIMER_TYPE_ITERATOR 16 #define SCTP_TIMER_TYPE_EARLYFR 17 #define SCTP_TIMER_TYPE_ASOCKILL 18 #define SCTP_TIMER_TYPE_ADDR_WQ 19 diff --git a/sys/netinet/sctp_lock_bsd.h b/sys/netinet/sctp_lock_bsd.h index 6db9057ef2a..75c382b6fc6 100644 --- a/sys/netinet/sctp_lock_bsd.h +++ b/sys/netinet/sctp_lock_bsd.h @@ -107,42 +107,36 @@ extern int sctp_logoff_stuff; #define SCTP_INP_INFO_WUNLOCK() rw_wunlock(&SCTP_BASE_INFO(ipi_ep_mtx)) -#define SCTP_IPI_ADDR_INIT() \ +#define SCTP_IPI_ADDR_INIT() \ rw_init(&SCTP_BASE_INFO(ipi_addr_mtx), "sctp-addr") - #define SCTP_IPI_ADDR_DESTROY() do { \ if(rw_wowned(&SCTP_BASE_INFO(ipi_addr_mtx))) { \ rw_wunlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } \ rw_destroy(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } while (0) - - - #define SCTP_IPI_ADDR_RLOCK() do { \ rw_rlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } while (0) - #define SCTP_IPI_ADDR_WLOCK() do { \ rw_wlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } while (0) - #define SCTP_IPI_ADDR_RUNLOCK() rw_runlock(&SCTP_BASE_INFO(ipi_addr_mtx)) #define SCTP_IPI_ADDR_WUNLOCK() rw_wunlock(&SCTP_BASE_INFO(ipi_addr_mtx)) #define SCTP_IPI_ITERATOR_WQ_INIT() \ - mtx_init(&SCTP_BASE_INFO(ipi_iterator_wq_mtx), "sctp-it-wq", "sctp_it_wq", MTX_DEF) + mtx_init(&sctp_it_ctl.ipi_iterator_wq_mtx, "sctp-it-wq", "sctp_it_wq", MTX_DEF) #define SCTP_IPI_ITERATOR_WQ_DESTROY() \ - mtx_destroy(&SCTP_BASE_INFO(ipi_iterator_wq_mtx)) + mtx_destroy(&sctp_it_ctl.ipi_iterator_wq_mtx) #define SCTP_IPI_ITERATOR_WQ_LOCK() do { \ - mtx_lock(&SCTP_BASE_INFO(ipi_iterator_wq_mtx)); \ + mtx_lock(&sctp_it_ctl.ipi_iterator_wq_mtx); \ } while (0) -#define SCTP_IPI_ITERATOR_WQ_UNLOCK() mtx_unlock(&SCTP_BASE_INFO(ipi_iterator_wq_mtx)) +#define SCTP_IPI_ITERATOR_WQ_UNLOCK() mtx_unlock(&sctp_it_ctl.ipi_iterator_wq_mtx) #define SCTP_IP_PKTLOG_INIT() \ @@ -300,25 +294,45 @@ extern int sctp_logoff_stuff; #endif #define SCTP_ITERATOR_LOCK_INIT() \ - mtx_init(&SCTP_BASE_INFO(it_mtx), "sctp-it", "iterator", MTX_DEF) + mtx_init(&sctp_it_ctl.it_mtx, "sctp-it", "iterator", MTX_DEF) #ifdef INVARIANTS #define SCTP_ITERATOR_LOCK() \ do { \ - if (mtx_owned(&SCTP_BASE_INFO(it_mtx))) \ + if (mtx_owned(&sctp_it_ctl.it_mtx)) \ panic("Iterator Lock"); \ - mtx_lock(&SCTP_BASE_INFO(it_mtx)); \ + mtx_lock(&sctp_it_ctl.it_mtx); \ } while (0) #else #define SCTP_ITERATOR_LOCK() \ do { \ - mtx_lock(&SCTP_BASE_INFO(it_mtx)); \ + mtx_lock(&sctp_it_ctl.it_mtx); \ } while (0) #endif -#define SCTP_ITERATOR_UNLOCK() mtx_unlock(&SCTP_BASE_INFO(it_mtx)) -#define SCTP_ITERATOR_LOCK_DESTROY() mtx_destroy(&SCTP_BASE_INFO(it_mtx)) +#define SCTP_ITERATOR_UNLOCK() mtx_unlock(&sctp_it_ctl.it_mtx) +#define SCTP_ITERATOR_LOCK_DESTROY() mtx_destroy(&sctp_it_ctl.it_mtx) + + +#define SCTP_WQ_ADDR_INIT() do { \ + mtx_init(&SCTP_BASE_INFO(wq_addr_mtx), "sctp-addr-wq","sctp_addr_wq",MTX_DEF); \ + } while (0) + +#define SCTP_WQ_ADDR_DESTROY() do { \ + if(mtx_owned(&SCTP_BASE_INFO(wq_addr_mtx))) { \ + mtx_unlock(&SCTP_BASE_INFO(wq_addr_mtx)); \ + } \ + mtx_destroy(&SCTP_BASE_INFO(wq_addr_mtx)); \ + } while (0) + +#define SCTP_WQ_ADDR_LOCK() do { \ + mtx_lock(&SCTP_BASE_INFO(wq_addr_mtx)); \ +} while (0) +#define SCTP_WQ_ADDR_UNLOCK() do { \ + mtx_unlock(&SCTP_BASE_INFO(wq_addr_mtx)); \ +} while (0) + #define SCTP_INCR_EP_COUNT() \ diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index bbf62e48791..a89573ab998 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -692,13 +692,11 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, (void)SCTP_GETTIME_TIMEVAL(&wi->start_time); wi->ifa = sctp_ifap; wi->action = SCTP_ADD_IP_ADDRESS; - SCTP_IPI_ITERATOR_WQ_LOCK(); - /* - * Should this really be a tailq? As it is we will process - * the newest first :-0 - */ + + SCTP_WQ_ADDR_LOCK(); LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr); - SCTP_IPI_ITERATOR_WQ_UNLOCK(); + SCTP_WQ_ADDR_UNLOCK(); + sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, (struct sctp_inpcb *)NULL, (struct sctp_tcb *)NULL, @@ -806,13 +804,13 @@ out_now: (void)SCTP_GETTIME_TIMEVAL(&wi->start_time); wi->ifa = sctp_ifap; wi->action = SCTP_DEL_IP_ADDRESS; - SCTP_IPI_ITERATOR_WQ_LOCK(); + SCTP_WQ_ADDR_LOCK(); /* * Should this really be a tailq? As it is we will process * the newest first :-0 */ LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr); - SCTP_IPI_ITERATOR_WQ_UNLOCK(); + SCTP_WQ_ADDR_UNLOCK(); sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, (struct sctp_inpcb *)NULL, @@ -3017,57 +3015,68 @@ continue_anyway: static void -sctp_iterator_inp_being_freed(struct sctp_inpcb *inp, struct sctp_inpcb *inp_next) +sctp_iterator_inp_being_freed(struct sctp_inpcb *inp) { - struct sctp_iterator *it; + struct sctp_iterator *it, *nit; /* * We enter with the only the ITERATOR_LOCK in place and a write * lock on the inp_info stuff. */ - + it = sctp_it_ctl.cur_it; + if (it && (it->vn != curvnet)) { + /* Its not looking at our VNET */ + return; + } + if (it && (it->inp == inp)) { + /* + * This is tricky and we hold the iterator lock, but when it + * returns and gets the lock (when we release it) the + * iterator will try to operate on inp. We need to stop that + * from happening. But of course the iterator has a + * reference on the stcb and inp. We can mark it and it will + * stop. + * + * If its a single iterator situation, we set the end iterator + * flag. Otherwise we set the iterator to go to the next + * inp. + * + */ + if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { + sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT; + } else { + sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_INP; + } + } /* - * Go through all iterators, we must do this since it is possible - * that some iterator does NOT have the lock, but is waiting for it. - * And the one that had the lock has either moved in the last - * iteration or we just cleared it above. We need to find all of - * those guys. The list of iterators should never be very big - * though. + * Now go through and remove any single reference to our inp that + * may be still pending on the list */ - TAILQ_FOREACH(it, &SCTP_BASE_INFO(iteratorhead), sctp_nxt_itr) { - if (it == inp->inp_starting_point_for_iterator) - /* skip this guy, he's special */ + SCTP_IPI_ITERATOR_WQ_LOCK(); + it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); + while (it) { + nit = TAILQ_NEXT(it, sctp_nxt_itr); + if (it->vn != curvnet) { + it = nit; continue; + } if (it->inp == inp) { - /* - * This is tricky and we DON'T lock the iterator. - * Reason is he's running but waiting for me since - * inp->inp_starting_point_for_iterator has the lock - * on me (the guy above we skipped). This tells us - * its is not running but waiting for - * inp->inp_starting_point_for_iterator to be - * released by the guy that does have our INP in a - * lock. - */ + /* This one points to me is it inp specific? */ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { - it->inp = NULL; - it->stcb = NULL; + /* Remove and free this one */ + TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, + it, sctp_nxt_itr); + if (it->function_atend != NULL) { + (*it->function_atend) (it->pointer, it->val); + } + SCTP_FREE(it, SCTP_M_ITER); } else { - /* set him up to do the next guy not me */ - it->inp = inp_next; - it->stcb = NULL; + it->inp = LIST_NEXT(it->inp, sctp_list); } } + it = nit; } - it = inp->inp_starting_point_for_iterator; - if (it) { - if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { - it->inp = NULL; - } else { - it->inp = inp_next; - } - it->stcb = NULL; - } + SCTP_IPI_ITERATOR_WQ_UNLOCK(); } /* release sctp_inpcb unbind the port */ @@ -3083,7 +3092,6 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) * all associations. d) finally the ep itself. */ struct sctp_pcb *m; - struct sctp_inpcb *inp_save; struct sctp_tcb *asoc, *nasoc; struct sctp_laddr *laddr, *nladdr; struct inpcb *ip_pcb; @@ -3100,6 +3108,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) sctp_log_closing(inp, NULL, 0); #endif SCTP_ITERATOR_LOCK(); + so = inp->sctp_socket; if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { /* been here before.. eeks.. get out of here */ @@ -3126,6 +3135,9 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) inp->sctp_flags |= SCTP_PCB_FLAGS_DONT_WAKE; inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT; inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT; + + /* mark any iterators on the list or being processed */ + sctp_iterator_inp_being_freed(inp); } sctp_timer_stop(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL, SCTP_FROM_SCTP_PCB + SCTP_LOC_1); @@ -3494,11 +3506,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys); } - inp_save = LIST_NEXT(inp, sctp_list); LIST_REMOVE(inp, sctp_list); - /* fix any iterators only after out of the list */ - sctp_iterator_inp_being_freed(inp, inp_save); /* * if we have an address list the following will free the list of * ifaddr's that are set into this ep. Again macro limitations here, @@ -5436,8 +5445,6 @@ sctp_pcb_init() /* init the empty list of (All) Endpoints */ LIST_INIT(&SCTP_BASE_INFO(listhead)); - /* init the iterator head */ - TAILQ_INIT(&SCTP_BASE_INFO(iteratorhead)); /* init the hash table of endpoints */ TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &SCTP_BASE_SYSCTL(sctp_hashtblsize)); @@ -5500,16 +5507,15 @@ sctp_pcb_init() /* Master Lock INIT for info structure */ SCTP_INP_INFO_LOCK_INIT(); SCTP_STATLOG_INIT_LOCK(); - SCTP_ITERATOR_LOCK_INIT(); SCTP_IPI_COUNT_INIT(); SCTP_IPI_ADDR_INIT(); - SCTP_IPI_ITERATOR_WQ_INIT(); #ifdef SCTP_PACKET_LOGGING SCTP_IP_PKTLOG_INIT(); #endif LIST_INIT(&SCTP_BASE_INFO(addr_wq)); + SCTP_WQ_ADDR_INIT(); /* not sure if we need all the counts */ SCTP_BASE_INFO(ipi_count_ep) = 0; /* assoc/tcb zone info */ @@ -5537,11 +5543,7 @@ sctp_pcb_init() LIST_INIT(&SCTP_BASE_INFO(vtag_timewait)[i]); } -#if defined(SCTP_USE_THREAD_BASED_ITERATOR) - SCTP_BASE_INFO(iterator_running) = 0; - SCTP_BASE_INFO(threads_must_exit) = 0; sctp_startup_iterator(); -#endif /* * INIT the default VRF which for BSD is the only one, other O/S's @@ -5565,30 +5567,49 @@ sctp_pcb_finish(void) struct sctpvtaghead *chain; struct sctp_tagblock *twait_block, *prev_twait_block; struct sctp_laddr *wi; - struct sctp_iterator *it; int i; -#if defined(SCTP_USE_THREAD_BASED_ITERATOR) - SCTP_BASE_INFO(threads_must_exit) = 1; - /* Wake the thread up so it will exit now */ - sctp_wakeup_iterator(); + /* + * Free BSD the it thread never exits but we do clean up. The only + * way freebsd reaches here if we have VRF's but we still add the + * ifdef to make it compile on old versions. + */ + { + struct sctp_iterator *it, *nit; + + SCTP_IPI_ITERATOR_WQ_LOCK(); + it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); + while (it) { + nit = TAILQ_NEXT(it, sctp_nxt_itr); + if (it->vn != curvnet) { + it = nit; + continue; + } + TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, + it, sctp_nxt_itr); + if (it->function_atend != NULL) { + (*it->function_atend) (it->pointer, it->val); + } + SCTP_FREE(it, SCTP_M_ITER); + it = nit; + } + SCTP_IPI_ITERATOR_WQ_UNLOCK(); + SCTP_ITERATOR_LOCK(); + if ((sctp_it_ctl.cur_it) && + (sctp_it_ctl.cur_it->vn == curvnet)) { + sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT; + } + SCTP_ITERATOR_UNLOCK(); + } -#endif SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer)); - SCTP_IPI_ITERATOR_WQ_LOCK(); + SCTP_WQ_ADDR_LOCK(); while ((wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq))) != NULL) { LIST_REMOVE(wi, sctp_nxt_addr); SCTP_DECR_LADDR_COUNT(); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi); } - SCTP_IPI_ITERATOR_WQ_UNLOCK(); - while ((it = TAILQ_FIRST(&SCTP_BASE_INFO(iteratorhead))) != NULL) { - if (it->function_atend != NULL) { - (*it->function_atend) (it->pointer, it->val); - } - TAILQ_REMOVE(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr); - SCTP_FREE(it, SCTP_M_ITER); - } + SCTP_WQ_ADDR_UNLOCK(); /* * free the vrf/ifn/ifa lists and hashes (be sure address monitor is @@ -5640,10 +5661,11 @@ sctp_pcb_finish(void) SCTP_IP_PKTLOG_DESTROY(); #endif SCTP_IPI_ADDR_DESTROY(); - SCTP_ITERATOR_LOCK_DESTROY(); SCTP_STATLOG_DESTROY(); SCTP_INP_INFO_LOCK_DESTROY(); + SCTP_WQ_ADDR_DESTROY(); + SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr)); @@ -6632,6 +6654,7 @@ sctp_initiate_iterator(inp_func inpf, it->asoc_state = asoc_state; it->function_inp_end = inpe; it->no_chunk_output = chunk_output_off; + it->vn = curvnet; if (s_inp) { it->inp = s_inp; it->iterator_flags = SCTP_ITERATOR_DO_SINGLE_INP; @@ -6647,22 +6670,11 @@ sctp_initiate_iterator(inp_func inpf, if (it->inp) { SCTP_INP_INCR_REF(it->inp); } - TAILQ_INSERT_TAIL(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr); -#if defined(SCTP_USE_THREAD_BASED_ITERATOR) - if (SCTP_BASE_INFO(iterator_running) == 0) { + TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); + if (sctp_it_ctl.iterator_running == 0) { sctp_wakeup_iterator(); } SCTP_IPI_ITERATOR_WQ_UNLOCK(); -#else - if (it->inp) - SCTP_INP_DECR_REF(it->inp); - SCTP_IPI_ITERATOR_WQ_UNLOCK(); - /* Init the timer */ - SCTP_OS_TIMER_INIT(&it->tmr.timer); - /* add to the list of all iterators */ - sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR, (struct sctp_inpcb *)it, - NULL, NULL); -#endif /* sa_ignore MEMLEAK {memory is put on the tailq for the iterator} */ return (0); } diff --git a/sys/netinet/sctp_pcb.h b/sys/netinet/sctp_pcb.h index 1a468f8632d..6f657e8ef24 100644 --- a/sys/netinet/sctp_pcb.h +++ b/sys/netinet/sctp_pcb.h @@ -177,8 +177,6 @@ struct sctp_epinfo { struct sctppcbhead listhead; struct sctpladdr addr_wq; - struct sctpiterators iteratorhead; - int threads_must_exit; /* ep zone info */ sctp_zone_t ipi_zone_ep; sctp_zone_t ipi_zone_asoc; @@ -191,10 +189,10 @@ struct sctp_epinfo { sctp_zone_t ipi_zone_asconf_ack; struct rwlock ipi_ep_mtx; - struct mtx it_mtx; struct mtx ipi_iterator_wq_mtx; struct rwlock ipi_addr_mtx; struct mtx ipi_pktlog_mtx; + struct mtx wq_addr_mtx; uint32_t ipi_count_ep; /* assoc/tcb zone info */ @@ -228,14 +226,9 @@ struct sctp_epinfo { uint32_t ipi_free_chunks; uint32_t ipi_free_strmoq; - struct sctpvtaghead vtag_timewait[SCTP_STACK_VTAG_HASH_SIZE]; /* address work queue handling */ -#if defined(SCTP_USE_THREAD_BASED_ITERATOR) - uint32_t iterator_running; - SCTP_PROCESS_STRUCT thread_proc; -#endif struct sctp_timer addr_wq_timer; }; diff --git a/sys/netinet/sctp_structs.h b/sys/netinet/sctp_structs.h index cd798b521d8..ed443011a1d 100644 --- a/sys/netinet/sctp_structs.h +++ b/sys/netinet/sctp_structs.h @@ -108,9 +108,11 @@ typedef void (*end_func) (void *ptr, uint32_t val); struct sctp_iterator { TAILQ_ENTRY(sctp_iterator) sctp_nxt_itr; + struct vnet *vn; struct sctp_timer tmr; struct sctp_inpcb *inp; /* current endpoint */ struct sctp_tcb *stcb; /* current* assoc */ + struct sctp_inpcb *next_inp; /* special hook to skip to */ asoc_func function_assoc; /* per assoc function */ inp_func function_inp; /* per endpoint function */ inp_func function_inp_end; /* end INP function */ @@ -129,6 +131,7 @@ struct sctp_iterator { #define SCTP_ITERATOR_DO_ALL_INP 0x00000001 #define SCTP_ITERATOR_DO_SINGLE_INP 0x00000002 + TAILQ_HEAD(sctpiterators, sctp_iterator); struct sctp_copy_all { @@ -145,6 +148,20 @@ struct sctp_asconf_iterator { int cnt; }; +struct iterator_control { + struct mtx ipi_iterator_wq_mtx; + struct mtx it_mtx; + SCTP_PROCESS_STRUCT thread_proc; + struct sctpiterators iteratorhead; + struct sctp_iterator *cur_it; + uint32_t iterator_running; + uint32_t iterator_flags; +}; + +#define SCTP_ITERATOR_MUST_EXIT 0x00000001 +#define SCTP_ITERATOR_STOP_CUR_IT 0x00000002 +#define SCTP_ITERATOR_STOP_CUR_INP 0x00000004 + struct sctp_net_route { sctp_rtentry_t *ro_rt; void *ro_lle; diff --git a/sys/netinet/sctp_timer.c b/sys/netinet/sctp_timer.c index 334401838e1..646c1127c07 100644 --- a/sys/netinet/sctp_timer.c +++ b/sys/netinet/sctp_timer.c @@ -1880,143 +1880,3 @@ sctp_autoclose_timer(struct sctp_inpcb *inp, } } } - -void -sctp_iterator_timer(struct sctp_iterator *it) -{ - int iteration_count = 0; - int inp_skip = 0; - - /* - * only one iterator can run at a time. This is the only way we can - * cleanly pull ep's from underneath all the running interators when - * a ep is freed. - */ - SCTP_ITERATOR_LOCK(); - if (it->inp == NULL) { - /* iterator is complete */ -done_with_iterator: - SCTP_ITERATOR_UNLOCK(); - SCTP_INP_INFO_WLOCK(); - TAILQ_REMOVE(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr); - /* stopping the callout is not needed, in theory */ - SCTP_INP_INFO_WUNLOCK(); - (void)SCTP_OS_TIMER_STOP(&it->tmr.timer); - if (it->function_atend != NULL) { - (*it->function_atend) (it->pointer, it->val); - } - SCTP_FREE(it, SCTP_M_ITER); - return; - } -select_a_new_ep: - SCTP_INP_WLOCK(it->inp); - while (((it->pcb_flags) && - ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) || - ((it->pcb_features) && - ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) { - /* endpoint flags or features don't match, so keep looking */ - if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { - SCTP_INP_WUNLOCK(it->inp); - goto done_with_iterator; - } - SCTP_INP_WUNLOCK(it->inp); - it->inp = LIST_NEXT(it->inp, sctp_list); - if (it->inp == NULL) { - goto done_with_iterator; - } - SCTP_INP_WLOCK(it->inp); - } - if ((it->inp->inp_starting_point_for_iterator != NULL) && - (it->inp->inp_starting_point_for_iterator != it)) { - SCTP_PRINTF("Iterator collision, waiting for one at %p\n", - it->inp); - SCTP_INP_WUNLOCK(it->inp); - goto start_timer_return; - } - /* mark the current iterator on the endpoint */ - it->inp->inp_starting_point_for_iterator = it; - SCTP_INP_WUNLOCK(it->inp); - SCTP_INP_RLOCK(it->inp); - /* now go through each assoc which is in the desired state */ - if (it->done_current_ep == 0) { - if (it->function_inp != NULL) - inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val); - it->done_current_ep = 1; - } - if (it->stcb == NULL) { - /* run the per instance function */ - it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list); - } - SCTP_INP_RUNLOCK(it->inp); - if ((inp_skip) || it->stcb == NULL) { - if (it->function_inp_end != NULL) { - inp_skip = (*it->function_inp_end) (it->inp, - it->pointer, - it->val); - } - goto no_stcb; - } - if ((it->stcb) && - (it->stcb->asoc.stcb_starting_point_for_iterator == it)) { - it->stcb->asoc.stcb_starting_point_for_iterator = NULL; - } - while (it->stcb) { - SCTP_TCB_LOCK(it->stcb); - if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) { - /* not in the right state... keep looking */ - SCTP_TCB_UNLOCK(it->stcb); - goto next_assoc; - } - /* mark the current iterator on the assoc */ - it->stcb->asoc.stcb_starting_point_for_iterator = it; - /* see if we have limited out the iterator loop */ - iteration_count++; - if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) { - start_timer_return: - /* set a timer to continue this later */ - if (it->stcb) - SCTP_TCB_UNLOCK(it->stcb); - sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR, - (struct sctp_inpcb *)it, NULL, NULL); - SCTP_ITERATOR_UNLOCK(); - return; - } - /* run function on this one */ - (*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val); - - /* - * we lie here, it really needs to have its own type but - * first I must verify that this won't effect things :-0 - */ - if (it->no_chunk_output == 0) - sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED); - - SCTP_TCB_UNLOCK(it->stcb); -next_assoc: - it->stcb = LIST_NEXT(it->stcb, sctp_tcblist); - if (it->stcb == NULL) { - if (it->function_inp_end != NULL) { - inp_skip = (*it->function_inp_end) (it->inp, - it->pointer, - it->val); - } - } - } -no_stcb: - /* done with all assocs on this endpoint, move on to next endpoint */ - it->done_current_ep = 0; - SCTP_INP_WLOCK(it->inp); - it->inp->inp_starting_point_for_iterator = NULL; - SCTP_INP_WUNLOCK(it->inp); - if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { - it->inp = NULL; - } else { - SCTP_INP_INFO_RLOCK(); - it->inp = LIST_NEXT(it->inp, sctp_list); - SCTP_INP_INFO_RUNLOCK(); - } - if (it->inp == NULL) { - goto done_with_iterator; - } - goto select_a_new_ep; -} diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index 04f76732cf1..7a30a192f10 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #define NUMBER_OF_MTU_SIZES 18 @@ -1255,7 +1256,6 @@ sctp_expand_mapping_array(struct sctp_association *asoc, uint32_t needed) } -#if defined(SCTP_USE_THREAD_BASED_ITERATOR) static void sctp_iterator_work(struct sctp_iterator *it) { @@ -1277,27 +1277,23 @@ done_with_iterator: return; } select_a_new_ep: - SCTP_INP_WLOCK(it->inp); + SCTP_INP_RLOCK(it->inp); while (((it->pcb_flags) && ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) || ((it->pcb_features) && ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) { /* endpoint flags or features don't match, so keep looking */ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { - SCTP_INP_WUNLOCK(it->inp); + SCTP_INP_RUNLOCK(it->inp); goto done_with_iterator; } - SCTP_INP_WUNLOCK(it->inp); + SCTP_INP_RUNLOCK(it->inp); it->inp = LIST_NEXT(it->inp, sctp_list); if (it->inp == NULL) { goto done_with_iterator; } - SCTP_INP_WLOCK(it->inp); + SCTP_INP_RLOCK(it->inp); } - - SCTP_INP_WUNLOCK(it->inp); - SCTP_INP_RLOCK(it->inp); - /* now go through each assoc which is in the desired state */ if (it->done_current_ep == 0) { if (it->function_inp != NULL) @@ -1330,13 +1326,34 @@ select_a_new_ep: /* Pause to let others grab the lock */ atomic_add_int(&it->stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(it->stcb); - SCTP_INP_INCR_REF(it->inp); SCTP_INP_RUNLOCK(it->inp); SCTP_ITERATOR_UNLOCK(); SCTP_ITERATOR_LOCK(); + if (sctp_it_ctl.iterator_flags) { + /* We won't be staying here */ + SCTP_INP_DECR_REF(it->inp); + atomic_add_int(&it->stcb->asoc.refcnt, -1); + if (sctp_it_ctl.iterator_flags & + SCTP_ITERATOR_MUST_EXIT) { + goto done_with_iterator; + } + if (sctp_it_ctl.iterator_flags & + SCTP_ITERATOR_STOP_CUR_IT) { + sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_IT; + goto done_with_iterator; + } + if (sctp_it_ctl.iterator_flags & + SCTP_ITERATOR_STOP_CUR_INP) { + sctp_it_ctl.iterator_flags &= ~SCTP_ITERATOR_STOP_CUR_INP; + goto no_stcb; + } + /* If we reach here huh? */ + printf("Unknown it ctl flag %x\n", + sctp_it_ctl.iterator_flags); + sctp_it_ctl.iterator_flags = 0; + } SCTP_INP_RLOCK(it->inp); - SCTP_INP_DECR_REF(it->inp); SCTP_TCB_LOCK(it->stcb); atomic_add_int(&it->stcb->asoc.refcnt, -1); @@ -1368,8 +1385,6 @@ next_assoc: no_stcb: /* done with all assocs on this endpoint, move on to next endpoint */ it->done_current_ep = 0; - SCTP_INP_WLOCK(it->inp); - SCTP_INP_WUNLOCK(it->inp); if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { it->inp = NULL; } else { @@ -1390,27 +1405,28 @@ sctp_iterator_worker(void) /* This function is called with the WQ lock in place */ - SCTP_BASE_INFO(iterator_running) = 1; -again: - it = TAILQ_FIRST(&SCTP_BASE_INFO(iteratorhead)); + sctp_it_ctl.iterator_running = 1; + sctp_it_ctl.cur_it = it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); while (it) { /* now lets work on this one */ - TAILQ_REMOVE(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr); + TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); SCTP_IPI_ITERATOR_WQ_UNLOCK(); + CURVNET_SET(it->vn); sctp_iterator_work(it); + + CURVNET_RESTORE(); SCTP_IPI_ITERATOR_WQ_LOCK(); + if (sctp_it_ctl.iterator_flags & SCTP_ITERATOR_MUST_EXIT) { + sctp_it_ctl.cur_it = NULL; + break; + } /* sa_ignore FREED_MEMORY */ - it = TAILQ_FIRST(&SCTP_BASE_INFO(iteratorhead)); + sctp_it_ctl.cur_it = it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); } - if (TAILQ_FIRST(&SCTP_BASE_INFO(iteratorhead))) { - goto again; - } - SCTP_BASE_INFO(iterator_running) = 0; + sctp_it_ctl.iterator_running = 0; return; } -#endif - static void sctp_handle_addr_wq(void) @@ -1431,7 +1447,8 @@ sctp_handle_addr_wq(void) } LIST_INIT(&asc->list_of_work); asc->cnt = 0; - SCTP_IPI_ITERATOR_WQ_LOCK(); + + SCTP_WQ_ADDR_LOCK(); wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq)); while (wi != NULL) { LIST_REMOVE(wi, sctp_nxt_addr); @@ -1439,7 +1456,8 @@ sctp_handle_addr_wq(void) asc->cnt++; wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq)); } - SCTP_IPI_ITERATOR_WQ_UNLOCK(); + SCTP_WQ_ADDR_UNLOCK(); + if (asc->cnt == 0) { SCTP_FREE(asc, SCTP_M_ASC_IT); } else { @@ -1470,7 +1488,6 @@ sctp_timeout_handler(void *t) #endif int did_output, type; - struct sctp_iterator *it = NULL; tmr = (struct sctp_timer *)t; inp = (struct sctp_inpcb *)tmr->ep; @@ -1509,10 +1526,6 @@ sctp_timeout_handler(void *t) } /* if this is an iterator timeout, get the struct and clear inp */ tmr->stopped_from = 0xa003; - if (tmr->type == SCTP_TIMER_TYPE_ITERATOR) { - it = (struct sctp_iterator *)inp; - inp = NULL; - } type = tmr->type; if (inp) { SCTP_INP_INCR_REF(inp); @@ -1611,10 +1624,6 @@ sctp_timeout_handler(void *t) case SCTP_TIMER_TYPE_ADDR_WQ: sctp_handle_addr_wq(); break; - case SCTP_TIMER_TYPE_ITERATOR: - SCTP_STAT_INCR(sctps_timoiterator); - sctp_iterator_timer(it); - break; case SCTP_TIMER_TYPE_SEND: if ((stcb == NULL) || (inp == NULL)) { break; @@ -1962,15 +1971,6 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, tmr = &SCTP_BASE_INFO(addr_wq_timer); to_ticks = SCTP_ADDRESS_TICK_DELAY; break; - case SCTP_TIMER_TYPE_ITERATOR: - { - struct sctp_iterator *it; - - it = (struct sctp_iterator *)inp; - tmr = &it->tmr; - to_ticks = SCTP_ITERATOR_TICKS; - } - break; case SCTP_TIMER_TYPE_SEND: /* Here we use the RTO timer */ { @@ -2327,14 +2327,6 @@ sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, tmr = &net->fr_timer; SCTP_STAT_INCR(sctps_earlyfrstop); break; - case SCTP_TIMER_TYPE_ITERATOR: - { - struct sctp_iterator *it; - - it = (struct sctp_iterator *)inp; - tmr = &it->tmr; - } - break; case SCTP_TIMER_TYPE_SEND: if ((stcb == NULL) || (net == NULL)) { return; @@ -6154,13 +6146,13 @@ sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id) atomic_add_int(&ifa->refcount, 1); /* Now add it to the work queue */ - SCTP_IPI_ITERATOR_WQ_LOCK(); + SCTP_WQ_ADDR_LOCK(); /* * Should this really be a tailq? As it is we will process the * newest first :-0 */ LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr); - SCTP_IPI_ITERATOR_WQ_UNLOCK(); + SCTP_WQ_ADDR_UNLOCK(); sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, (struct sctp_inpcb *)NULL, (struct sctp_tcb *)NULL,