MINOR: activity: collect time spent waiting on a lock for each task

When DEBUG_THREAD > 0, and if task profiling is enabled, then each locking attempt will measure the time it takes to obtain the lock, then add that time to a thread_ctx accumulator that the scheduler will then retrieve to update the current task's sched_activity entry. The value will then appear avearaged over the number of calls in the lkw_avg column of "show profiling tasks", such as below: Tasks activity over 48.298 sec till 0.000 sec ago: function calls cpu_tot cpu_avg lkw_avg lat_avg h1_io_cb 3200170 26.81s 8.377us - 32.73us <- sock_conn_iocb@src/sock.c:1099 tasklet_wakeup sc_conn_io_cb 1657841 1.645s 992.0ns - 853.0us <- sc_app_chk_rcv_conn@src/stconn.c:844 tasklet_wakeup process_stream 1600450 49.16s 30.71us 1.936us 1.392ms <- sc_notify@src/stconn.c:1206 task_wakeup process_stream 1600321 7.770m 291.3us 209.1us 901.6us <- stream_new@src/stream.c:585 task_wakeup sc_conn_io_cb 1599928 7.975s 4.984us - 65.77us <- h1_wake_stream_for_recv@src/mux_h1.c:3633 tasklet_wakeup task_process_applet 997609 46.37s 46.48us 16.80us 113.0us <- sc_app_chk_snd_applet@src/stconn.c:1043 appctx_wakeup process_table_expire 922074 48.79s 52.92us 7.275us 181.1us <- run_tasks_from_lists@src/task.c:670 task_queue stktable_add_pend_updates 705423 1.511s 2.142us - 56.81us <- stktable_add_pend_updates@src/stick_table.c:869 tasklet_wakeup task_process_applet 683511 34.75s 50.84us 18.37us 153.3us <- __process_running_peer_sync@src/peers.c:3579 appctx_wakeup h1_io_cb 535395 198.1ms 370.0ns 72.00ns 930.4us <- h1_takeover@src/mux_h1.c:5659 tasklet_wakeup It now makes it pretty obvious which tasks (hence call chains) spend their time waiting on a lock and for what share of their execution time.
2026-04-15 21:59:41 -04:00 · 2025-09-11 10:47:35 +02:00 · 2025-09-11 10:47:35 +02:00 · 503084643f
commit 503084643f
parent 1956c544b5
3 changed files with 17 additions and 2 deletions
--- a/include/haproxy/thread.h
+++ b/include/haproxy/thread.h
@ -360,7 +360,15 @@ static inline unsigned long thread_isolated()
 	} while (0)
 #define _lock_wait(_LK_, lbl, expr) do {				\
 		uint64_t lock_start = 0;				\
 		extern uint64_t now_mono_time(void);			\
 		if (_LK_ != _LK_UN) {					\
 			if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
 				lock_start = now_mono_time();		\
 		}							\
 		(void)(expr);						\
 		if (_LK_ != _LK_UN && unlikely(lock_start))		\
 			th_ctx->lock_wait_total += now_mono_time() - lock_start; \
 		if (lbl != OTHER_LOCK)					\
 			_lock_wait_common(_LK_, lbl);			\
 	} while (0)
--- a/include/haproxy/tinfo-t.h
+++ b/include/haproxy/tinfo-t.h
@ -161,13 +161,16 @@ struct thread_ctx {
 	uint32_t sched_wake_date;           /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */
 	uint64_t sched_call_date;           /* current task/tasklet's call date in ns */
 	uint64_t lock_wait_total;           /* total time in ns spent waiting for a lock (task prof) */
 	uint64_t prev_mono_time;            /* previous system wide monotonic time (leaving poll) */
 	uint64_t curr_mono_time;            /* latest system wide monotonic time (leaving poll) */
 	ulong lock_history;                 /* history of used locks, see thread.h for more details */
-	// third cache line here on 64 bits: accessed mostly using atomic ops
+	/* around 56 unused bytes here */
 	// fourth cache line here on 64 bits: accessed mostly using atomic ops
 	ALWAYS_ALIGN(64);
 	struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */
 	unsigned int rqueue_ticks;          /* Insertion counter for the run queue */
--- a/src/task.c
+++ b/src/task.c
@ -569,6 +569,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
 		process = t->process;
 		t->calls++;
 		th_ctx->lock_wait_total = 0;
 		th_ctx->sched_wake_date = t->wake_date;
 		if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) {
 			/* take the most accurate clock we have, either
@ -678,8 +679,11 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
 		__ha_barrier_store();
 		/* stats are only registered for non-zero wake dates */
-		if (unlikely(th_ctx->sched_wake_date))
+		if (unlikely(th_ctx->sched_wake_date)) {
 			HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
 			if (th_ctx->lock_wait_total)
 				HA_ATOMIC_ADD(&profile_entry->lkw_time, th_ctx->lock_wait_total);
 		}
 	}
 	th_ctx->current_queue = -1;
 	th_ctx->sched_wake_date = TICK_ETERNITY;