mirror of
https://github.com/haproxy/haproxy.git
synced 2026-04-15 21:59:41 -04:00
MINOR: activity: collect time spent waiting on a lock for each task
When DEBUG_THREAD > 0, and if task profiling is enabled, then each
locking attempt will measure the time it takes to obtain the lock, then
add that time to a thread_ctx accumulator that the scheduler will then
retrieve to update the current task's sched_activity entry. The value
will then appear avearaged over the number of calls in the lkw_avg column
of "show profiling tasks", such as below:
Tasks activity over 48.298 sec till 0.000 sec ago:
function calls cpu_tot cpu_avg lkw_avg lat_avg
h1_io_cb 3200170 26.81s 8.377us - 32.73us <- sock_conn_iocb@src/sock.c:1099 tasklet_wakeup
sc_conn_io_cb 1657841 1.645s 992.0ns - 853.0us <- sc_app_chk_rcv_conn@src/stconn.c:844 tasklet_wakeup
process_stream 1600450 49.16s 30.71us 1.936us 1.392ms <- sc_notify@src/stconn.c:1206 task_wakeup
process_stream 1600321 7.770m 291.3us 209.1us 901.6us <- stream_new@src/stream.c:585 task_wakeup
sc_conn_io_cb 1599928 7.975s 4.984us - 65.77us <- h1_wake_stream_for_recv@src/mux_h1.c:3633 tasklet_wakeup
task_process_applet 997609 46.37s 46.48us 16.80us 113.0us <- sc_app_chk_snd_applet@src/stconn.c:1043 appctx_wakeup
process_table_expire 922074 48.79s 52.92us 7.275us 181.1us <- run_tasks_from_lists@src/task.c:670 task_queue
stktable_add_pend_updates 705423 1.511s 2.142us - 56.81us <- stktable_add_pend_updates@src/stick_table.c:869 tasklet_wakeup
task_process_applet 683511 34.75s 50.84us 18.37us 153.3us <- __process_running_peer_sync@src/peers.c:3579 appctx_wakeup
h1_io_cb 535395 198.1ms 370.0ns 72.00ns 930.4us <- h1_takeover@src/mux_h1.c:5659 tasklet_wakeup
It now makes it pretty obvious which tasks (hence call chains) spend their
time waiting on a lock and for what share of their execution time.
This commit is contained in:
parent
1956c544b5
commit
503084643f
3 changed files with 17 additions and 2 deletions
|
|
@ -360,7 +360,15 @@ static inline unsigned long thread_isolated()
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define _lock_wait(_LK_, lbl, expr) do { \
|
#define _lock_wait(_LK_, lbl, expr) do { \
|
||||||
|
uint64_t lock_start = 0; \
|
||||||
|
extern uint64_t now_mono_time(void); \
|
||||||
|
if (_LK_ != _LK_UN) { \
|
||||||
|
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
|
||||||
|
lock_start = now_mono_time(); \
|
||||||
|
} \
|
||||||
(void)(expr); \
|
(void)(expr); \
|
||||||
|
if (_LK_ != _LK_UN && unlikely(lock_start)) \
|
||||||
|
th_ctx->lock_wait_total += now_mono_time() - lock_start; \
|
||||||
if (lbl != OTHER_LOCK) \
|
if (lbl != OTHER_LOCK) \
|
||||||
_lock_wait_common(_LK_, lbl); \
|
_lock_wait_common(_LK_, lbl); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
|
||||||
|
|
@ -161,13 +161,16 @@ struct thread_ctx {
|
||||||
|
|
||||||
uint32_t sched_wake_date; /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */
|
uint32_t sched_wake_date; /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */
|
||||||
uint64_t sched_call_date; /* current task/tasklet's call date in ns */
|
uint64_t sched_call_date; /* current task/tasklet's call date in ns */
|
||||||
|
uint64_t lock_wait_total; /* total time in ns spent waiting for a lock (task prof) */
|
||||||
|
|
||||||
uint64_t prev_mono_time; /* previous system wide monotonic time (leaving poll) */
|
uint64_t prev_mono_time; /* previous system wide monotonic time (leaving poll) */
|
||||||
uint64_t curr_mono_time; /* latest system wide monotonic time (leaving poll) */
|
uint64_t curr_mono_time; /* latest system wide monotonic time (leaving poll) */
|
||||||
|
|
||||||
ulong lock_history; /* history of used locks, see thread.h for more details */
|
ulong lock_history; /* history of used locks, see thread.h for more details */
|
||||||
|
|
||||||
// third cache line here on 64 bits: accessed mostly using atomic ops
|
/* around 56 unused bytes here */
|
||||||
|
|
||||||
|
// fourth cache line here on 64 bits: accessed mostly using atomic ops
|
||||||
ALWAYS_ALIGN(64);
|
ALWAYS_ALIGN(64);
|
||||||
struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */
|
struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */
|
||||||
unsigned int rqueue_ticks; /* Insertion counter for the run queue */
|
unsigned int rqueue_ticks; /* Insertion counter for the run queue */
|
||||||
|
|
|
||||||
|
|
@ -569,6 +569,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
|
||||||
process = t->process;
|
process = t->process;
|
||||||
t->calls++;
|
t->calls++;
|
||||||
|
|
||||||
|
th_ctx->lock_wait_total = 0;
|
||||||
th_ctx->sched_wake_date = t->wake_date;
|
th_ctx->sched_wake_date = t->wake_date;
|
||||||
if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) {
|
if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) {
|
||||||
/* take the most accurate clock we have, either
|
/* take the most accurate clock we have, either
|
||||||
|
|
@ -678,8 +679,11 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
|
||||||
__ha_barrier_store();
|
__ha_barrier_store();
|
||||||
|
|
||||||
/* stats are only registered for non-zero wake dates */
|
/* stats are only registered for non-zero wake dates */
|
||||||
if (unlikely(th_ctx->sched_wake_date))
|
if (unlikely(th_ctx->sched_wake_date)) {
|
||||||
HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
|
HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
|
||||||
|
if (th_ctx->lock_wait_total)
|
||||||
|
HA_ATOMIC_ADD(&profile_entry->lkw_time, th_ctx->lock_wait_total);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
th_ctx->current_queue = -1;
|
th_ctx->current_queue = -1;
|
||||||
th_ctx->sched_wake_date = TICK_ETERNITY;
|
th_ctx->sched_wake_date = TICK_ETERNITY;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue