diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h index 56d2edef7..74f206a92 100644 --- a/include/haproxy/server-t.h +++ b/include/haproxy/server-t.h @@ -280,6 +280,7 @@ struct srv_per_thread { struct ceb_root *safe_conns; /* Safe idle connections */ struct ceb_root *avail_conns; /* Connections in use, but with still new streams available */ struct server *srv; /* Back-pointer to the server */ + struct eb32_node idle_node; /* When to next do cleanup in the idle connections */ #ifdef USE_QUIC struct ist quic_retry_token; #endif @@ -402,7 +403,6 @@ struct server { * thread, and generally at the same time. */ THREAD_ALIGN(); - struct eb32_node idle_node; /* When to next do cleanup in the idle connections */ unsigned int curr_idle_conns; /* Current number of orphan idling connections, both the idle and the safe lists */ unsigned int curr_idle_nb; /* Current number of connections in the idle list */ unsigned int curr_safe_nb; /* Current number of connections in the safe list */ diff --git a/include/haproxy/server.h b/include/haproxy/server.h index 0d5892968..3f1ebe5ee 100644 --- a/include/haproxy/server.h +++ b/include/haproxy/server.h @@ -41,9 +41,9 @@ #include -__decl_thread(extern HA_SPINLOCK_T idle_conn_srv_lock); extern struct idle_conns idle_conns[MAX_THREADS]; -extern struct task *idle_conn_task; +extern struct task *idle_conn_task[MAX_THREADS]; +extern struct eb_root idle_conn_srv[MAX_THREADS]; extern struct mt_list servers_list; extern struct dict server_key_dict; diff --git a/src/cfgparse.c b/src/cfgparse.c index 2e684e15d..5e54500e6 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -2486,16 +2486,17 @@ init_proxies_list_stage1: /* At this point, target names have already been resolved. */ /***********************************************************/ - idle_conn_task = task_new_anywhere(); - if (!idle_conn_task) { - ha_alert("parsing : failed to allocate global idle connection task.\n"); - cfgerr++; - } - else { - idle_conn_task->process = srv_cleanup_idle_conns; - idle_conn_task->context = NULL; + for (int i = 0; i < global.nbthread; i++) { + idle_conn_srv[i] = EB_ROOT; + idle_conn_task[i] = task_new_on(i); + if (!idle_conn_task[i]) { + ha_alert("parsing : failed to allocate global idle connection task.\n"); + cfgerr++; + } + else { + idle_conn_task[i]->process = srv_cleanup_idle_conns; + idle_conn_task[i]->context = NULL; - for (i = 0; i < global.nbthread; i++) { idle_conns[i].cleanup_task = task_new_on(i); if (!idle_conns[i].cleanup_task) { ha_alert("parsing : failed to allocate idle connection tasks for thread '%d'.\n", i); diff --git a/src/haproxy.c b/src/haproxy.c index 6af7b031e..804237203 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -2810,6 +2810,7 @@ void deinit(void) struct cfg_postparser *pprs, *pprsb; char **tmp = init_env; int cur_fd; + int i; /* the user may want to skip this phase */ if (global.tune.options & GTUNE_QUICK_EXIT) @@ -2886,8 +2887,10 @@ void deinit(void) ha_free(&global.server_state_base); ha_free(&global.server_state_file); ha_free(&global.stats_file); - task_destroy(idle_conn_task); - idle_conn_task = NULL; + for (i = 0; i < global.nbthread; i++) { + task_destroy(idle_conn_task[i]); + idle_conn_task[i] = NULL; + } list_for_each_entry_safe(log, logb, &global.loggers, list) { LIST_DEL_INIT(&log->list); diff --git a/src/server.c b/src/server.c index 330b08427..957fc4c34 100644 --- a/src/server.c +++ b/src/server.c @@ -76,9 +76,8 @@ struct srv_kw_list srv_keywords = { .list = LIST_HEAD_INIT(srv_keywords.list) }; -__decl_thread(HA_SPINLOCK_T idle_conn_srv_lock); -struct eb_root idle_conn_srv = EB_ROOT; -struct task *idle_conn_task __read_mostly = NULL; +struct eb_root idle_conn_srv[MAX_THREADS]; +struct task *idle_conn_task[MAX_THREADS] __read_mostly = {}; struct mt_list servers_list = MT_LIST_HEAD_INIT(servers_list); static struct task *server_atomic_sync_task = NULL; static event_hdl_async_equeue server_atomic_sync_queue; @@ -3281,6 +3280,8 @@ struct server *srv_drop(struct server *srv) /* This BUG_ON() is invalid for now as server released on deinit will * trigger it as they are not properly removed from their tree. + * This is even more relevant now, as we would need to check the + * idle_node for each thread */ //BUG_ON(ceb_intree(&srv->addr_node) || // srv->idle_node.node.leaf_p || @@ -6652,7 +6653,8 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap cebuis_item_delete(&be->used_server_addr, addr_node, addr_key, srv); /* remove srv from idle_node tree for idle conn cleanup */ - eb32_delete(&srv->idle_node); + for (ret = 0; ret < global.nbthread; ret++) + eb32_delete(&srv->per_thr[ret].idle_node); /* set LSB bit (odd bit) for reuse_cnt */ srv_id_reuse_cnt |= 1; @@ -7545,20 +7547,16 @@ int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_saf HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); _HA_ATOMIC_INC(&srv->curr_idle_thr[tid]); - if (HA_ATOMIC_LOAD(&srv->idle_node.node.leaf_p) == NULL) { - HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock); - if (_HA_ATOMIC_LOAD(&srv->idle_node.node.leaf_p) == NULL) { - srv->idle_node.key = tick_add(srv->pool_purge_delay, + if (srv->per_thr[tid].idle_node.node.leaf_p == NULL) { + srv->per_thr[tid].idle_node.key = tick_add(srv->pool_purge_delay, now_ms); - eb32_insert(&idle_conn_srv, &srv->idle_node); - if (!task_in_wq(idle_conn_task) && ! - task_in_rq(idle_conn_task)) { - task_schedule(idle_conn_task, - srv->idle_node.key); - } - BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list)); + eb32_insert(&idle_conn_srv[tid], &srv->per_thr[tid].idle_node); + if (!task_in_wq(idle_conn_task[tid]) && + !task_in_rq(idle_conn_task[tid])) { + task_schedule(idle_conn_task[tid], + srv->per_thr[tid].idle_node.key); } - HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock); + BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list)); } return 1; } @@ -7580,24 +7578,26 @@ struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned i { struct server *srv; struct eb32_node *eb; - int i; unsigned int next_wakeup; + int mytid = tid; next_wakeup = TICK_ETERNITY; - HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock); while (1) { + struct srv_per_thread *per_thr; int exceed_conns; int to_kill; int curr_idle; + int max_conn; + int removed; - eb = eb32_lookup_ge(&idle_conn_srv, now_ms - TIMER_LOOK_BACK); + eb = eb32_lookup_ge(&idle_conn_srv[mytid], now_ms - TIMER_LOOK_BACK); if (!eb) { /* we might have reached the end of the tree, typically because * is in the first half and we're first scanning the last * half. Let's loop back to the beginning of the tree now. */ - eb = eb32_first(&idle_conn_srv); + eb = eb32_first(&idle_conn_srv[mytid]); if (likely(!eb)) break; } @@ -7606,7 +7606,8 @@ struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned i next_wakeup = eb->key; break; } - srv = eb32_entry(eb, struct server, idle_node); + per_thr = eb32_entry(eb, struct srv_per_thread, idle_node); + srv = per_thr->srv; /* Calculate how many idle connections we want to kill : * we want to remove half the difference between the total @@ -7619,47 +7620,41 @@ struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned i exceed_conns = srv->curr_used_conns + curr_idle - MAX(srv->max_used_conns, srv->est_need_conns); exceed_conns = to_kill = exceed_conns / 2 + (exceed_conns & 1); - srv->est_need_conns = (srv->est_need_conns + srv->max_used_conns) / 2; + /* + * It is acceptable not to lock anything before modifying + * est_need_conns and max_used_conns, even if multiple threads + * are running that task at the same time, we don't need a + * very high precision here, it will converge over time. + */ + HA_ATOMIC_STORE(&srv->est_need_conns, (srv->est_need_conns + srv->max_used_conns) / 2); if (srv->est_need_conns < srv->max_used_conns) - srv->est_need_conns = srv->max_used_conns; + HA_ATOMIC_STORE(&srv->est_need_conns, srv->max_used_conns); HA_ATOMIC_STORE(&srv->max_used_conns, srv->curr_used_conns); if (exceed_conns <= 0) goto remove; - /* check all threads starting with ours */ - for (i = tid;;) { - int max_conn; - int removed; + max_conn = (exceed_conns * srv->curr_idle_thr[mytid]) / curr_idle + 1; - max_conn = (exceed_conns * srv->curr_idle_thr[i]) / - curr_idle + 1; + HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[mytid].idle_conns_lock); + removed = srv_migrate_conns_to_remove(srv, mytid, max_conn); + HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[mytid].idle_conns_lock); - HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock); - removed = srv_migrate_conns_to_remove(srv, i, max_conn); - HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock); - - if (removed) - task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER); - - if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid) - break; - } + if (removed) + task_wakeup(idle_conns[mytid].cleanup_task, TASK_WOKEN_OTHER); remove: - eb32_delete(&srv->idle_node); + eb32_delete(&srv->per_thr[mytid].idle_node); - if (srv->curr_idle_conns) { + if (!LIST_ISEMPTY(&srv->per_thr[mytid].idle_conn_list)) { /* There are still more idle connections, add the * server back in the tree. */ - srv->idle_node.key = tick_add(srv->pool_purge_delay, now_ms); - eb32_insert(&idle_conn_srv, &srv->idle_node); - next_wakeup = tick_first(next_wakeup, srv->idle_node.key); + srv->per_thr[mytid].idle_node.key = tick_add(srv->pool_purge_delay, now_ms); + eb32_insert(&idle_conn_srv[mytid], &srv->per_thr[mytid].idle_node); + next_wakeup = tick_first(next_wakeup, srv->per_thr[mytid].idle_node.key); } } - HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock); - task->expire = next_wakeup; return task; }