diff --git a/src/mworker.c b/src/mworker.c index 9a0292326..261b7578c 100644 --- a/src/mworker.c +++ b/src/mworker.c @@ -815,7 +815,7 @@ void mworker_cleanup_proc() struct cli_showproc_ctx { int debug; - int next_reload; /* reload number to resume from, 0 = from the beginning */ + int resume_reload; /* reload count of the last flushed old worker row, 0 = none yet */ }; /* Append a single worker row to trash (shared between current/old sections) */ @@ -850,7 +850,7 @@ static int cli_io_handler_show_proc(struct appctx *appctx) chunk_reset(&trash); - if (ctx->next_reload == 0) { + if (ctx->resume_reload == 0) { memprintf(&reloadtxt, "%d [failed: %d]", proc_self->reloads, proc_self->failedreloads); chunk_printf(&trash, "#%-14s %-15s %-15s %-15s %-15s", "", "", "", "", ""); if (ctx->debug) @@ -868,12 +868,12 @@ static int cli_io_handler_show_proc(struct appctx *appctx) ha_free(&uptime); /* displays current processes */ - if (ctx->next_reload == 0) + if (ctx->resume_reload == 0) chunk_appendf(&trash, "# workers\n"); list_for_each_entry(child, &proc_list, list) { /* don't display current worker if we only need the next ones */ - if (ctx->next_reload != 0) + if (ctx->resume_reload != 0) continue; if (!(child->options & PROC_O_TYPE_WORKER)) @@ -890,34 +890,69 @@ static int cli_io_handler_show_proc(struct appctx *appctx) return 0; /* displays old processes */ - if (old || ctx->next_reload) { /* there's more */ - if (ctx->next_reload == 0) + if (old || ctx->resume_reload) { /* there's more */ + int skip = ctx->resume_reload; /* if resuming, skip until we pass this reload count */ + int prev_reload = 0; /* previous LEAVING entry's reload count during skip phase */ + + if (!ctx->resume_reload) chunk_appendf(&trash, "# old workers\n"); list_for_each_entry(child, &proc_list, list) { - /* If we're resuming, skip entries that were already printed (reload >= ctx->next_reload) */ - if (ctx->next_reload && child->reloads >= ctx->next_reload) - continue; - if (!(child->options & PROC_O_TYPE_WORKER)) continue; - if (child->options & PROC_O_LEAVING) { - cli_append_worker_row(ctx, child, date.tv_sec); + if (!(child->options & PROC_O_LEAVING)) + continue; - /* Try to flush so we can resume after this reload on next page if the buffer is full. */ - if (applet_putchk(appctx, &trash) == -1) { - /* resume at this reload (exclude it on next pass) */ - ctx->next_reload = child->reloads; /* resume after entries >= this reload */ - return 0; + /* When resuming after a flush failure, skip entries + * up to and including the last successfully flushed + * row (identified by its reload count). This is + * direction-agnostic: works whether the list is in + * ascending or descending reload order. + * + * If the target entry was deleted from proc_list + * (e.g. process exited between handler calls), we + * detect that we've passed its former position when + * two consecutive LEAVING entries straddle the skip + * value — i.e. one has reloads > skip and the next + * has reloads < skip (or vice versa). In that case + * we stop skipping and emit the current entry. + */ + if (skip) { + if (child->reloads == skip) { + skip = 0; /* found it, resume from the next entry */ + prev_reload = 0; + continue; + } + if (prev_reload && + ((prev_reload > skip) != (child->reloads > skip))) { + /* Crossed where skip would have been — + * the entry was deleted. Stop skipping + * and fall through to emit this entry. + */ + skip = 0; + } else { + prev_reload = child->reloads; + continue; } - chunk_reset(&trash); } + cli_append_worker_row(ctx, child, date.tv_sec); + + if (applet_putchk(appctx, &trash) == -1) { + /* ctx->resume_reload already holds the last + * flushed row or 0; don't update it here so + * the failed row will be replayed. + */ + return 0; + } + /* This row was successfully flushed, remember it */ + ctx->resume_reload = child->reloads; + chunk_reset(&trash); } } /* dump complete: reset resume cursor so next 'show proc' starts from the top */ - ctx->next_reload = 0; + ctx->resume_reload = 0; return 1; }