pg_verifybackup: Enable WAL parsing for tar-format backups

Now that pg_waldump supports reading WAL from tar archives, remove the
restriction that forced --no-parse-wal for tar-format backups.

pg_verifybackup now automatically locates the WAL archive: it looks for
a separate pg_wal.tar first, then falls back to the main base.tar.  A
new --wal-path option (replacing the old --wal-directory, which is kept
as a silent alias) accepts either a directory or a tar archive path.

The default WAL directory preparation is deferred until the backup
format is known, since tar-format backups resolve the WAL path
differently from plain-format ones.

Author: Amul Sul <sulamul@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Euler Taveira <euler@eulerto.com>
Reviewed-by: Andrew Dunstan <andrew@dunslane.net>
discussion: https://postgr.es/m/CAAJ_b94bqdWN3h2J-PzzzQ2Npbwct5ZQHggn_QoYGhC2rn-=WQ@mail.gmail.com
This commit is contained in:
Andrew Dunstan 2026-03-20 15:31:35 -04:00
parent b15c151398
commit b3cf461b3c
7 changed files with 91 additions and 57 deletions

View file

@ -36,10 +36,7 @@ PostgreSQL documentation
<literal>backup_manifest</literal> generated by the server at the time <literal>backup_manifest</literal> generated by the server at the time
of the backup. The backup may be stored either in the "plain" or the "tar" of the backup. The backup may be stored either in the "plain" or the "tar"
format; this includes tar-format backups compressed with any algorithm format; this includes tar-format backups compressed with any algorithm
supported by <application>pg_basebackup</application>. However, at present, supported by <application>pg_basebackup</application>.
<literal>WAL</literal> verification is supported only for plain-format
backups. Therefore, if the backup is stored in tar-format, the
<literal>-n, --no-parse-wal</literal> option should be used.
</para> </para>
<para> <para>
@ -261,12 +258,13 @@ PostgreSQL documentation
<varlistentry> <varlistentry>
<term><option>-w <replaceable class="parameter">path</replaceable></option></term> <term><option>-w <replaceable class="parameter">path</replaceable></option></term>
<term><option>--wal-directory=<replaceable class="parameter">path</replaceable></option></term> <term><option>--wal-path=<replaceable class="parameter">path</replaceable></option></term>
<listitem> <listitem>
<para> <para>
Try to parse WAL files stored in the specified directory, rather than Try to parse WAL files stored in the specified directory or tar
in <literal>pg_wal</literal>. This may be useful if the backup is archive, rather than in <literal>pg_wal</literal>. This may be
stored in a separate location from the WAL archive. useful if the backup is stored in a separate location from the WAL
archive.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>

View file

@ -74,7 +74,9 @@ pg_noreturn static void report_manifest_error(JsonManifestParseContext *context,
const char *fmt,...) const char *fmt,...)
pg_attribute_printf(2, 3); pg_attribute_printf(2, 3);
static void verify_tar_backup(verifier_context *context, DIR *dir); static void verify_tar_backup(verifier_context *context, DIR *dir,
char **base_archive_path,
char **wal_archive_path);
static void verify_plain_backup_directory(verifier_context *context, static void verify_plain_backup_directory(verifier_context *context,
char *relpath, char *fullpath, char *relpath, char *fullpath,
DIR *dir); DIR *dir);
@ -83,7 +85,9 @@ static void verify_plain_backup_file(verifier_context *context, char *relpath,
static void verify_control_file(const char *controlpath, static void verify_control_file(const char *controlpath,
uint64 manifest_system_identifier); uint64 manifest_system_identifier);
static void precheck_tar_backup_file(verifier_context *context, char *relpath, static void precheck_tar_backup_file(verifier_context *context, char *relpath,
char *fullpath, SimplePtrList *tarfiles); char *fullpath, SimplePtrList *tarfiles,
char **base_archive_path,
char **wal_archive_path);
static void verify_tar_file(verifier_context *context, char *relpath, static void verify_tar_file(verifier_context *context, char *relpath,
char *fullpath, astreamer *streamer); char *fullpath, astreamer *streamer);
static void report_extra_backup_files(verifier_context *context); static void report_extra_backup_files(verifier_context *context);
@ -93,7 +97,7 @@ static void verify_file_checksum(verifier_context *context,
uint8 *buffer); uint8 *buffer);
static void parse_required_wal(verifier_context *context, static void parse_required_wal(verifier_context *context,
char *pg_waldump_path, char *pg_waldump_path,
char *wal_directory); char *wal_path);
static astreamer *create_archive_verifier(verifier_context *context, static astreamer *create_archive_verifier(verifier_context *context,
char *archive_name, char *archive_name,
Oid tblspc_oid, Oid tblspc_oid,
@ -126,7 +130,8 @@ main(int argc, char **argv)
{"progress", no_argument, NULL, 'P'}, {"progress", no_argument, NULL, 'P'},
{"quiet", no_argument, NULL, 'q'}, {"quiet", no_argument, NULL, 'q'},
{"skip-checksums", no_argument, NULL, 's'}, {"skip-checksums", no_argument, NULL, 's'},
{"wal-directory", required_argument, NULL, 'w'}, {"wal-path", required_argument, NULL, 'w'},
{"wal-directory", required_argument, NULL, 'w'}, /* deprecated */
{NULL, 0, NULL, 0} {NULL, 0, NULL, 0}
}; };
@ -135,7 +140,9 @@ main(int argc, char **argv)
char *manifest_path = NULL; char *manifest_path = NULL;
bool no_parse_wal = false; bool no_parse_wal = false;
bool quiet = false; bool quiet = false;
char *wal_directory = NULL; char *wal_path = NULL;
char *base_archive_path = NULL;
char *wal_archive_path = NULL;
char *pg_waldump_path = NULL; char *pg_waldump_path = NULL;
DIR *dir; DIR *dir;
@ -221,8 +228,8 @@ main(int argc, char **argv)
context.skip_checksums = true; context.skip_checksums = true;
break; break;
case 'w': case 'w':
wal_directory = pstrdup(optarg); wal_path = pstrdup(optarg);
canonicalize_path(wal_directory); canonicalize_path(wal_path);
break; break;
default: default:
/* getopt_long already emitted a complaint */ /* getopt_long already emitted a complaint */
@ -285,10 +292,6 @@ main(int argc, char **argv)
manifest_path = psprintf("%s/backup_manifest", manifest_path = psprintf("%s/backup_manifest",
context.backup_directory); context.backup_directory);
/* By default, look for the WAL in the backup directory, too. */
if (wal_directory == NULL)
wal_directory = psprintf("%s/pg_wal", context.backup_directory);
/* /*
* Try to read the manifest. We treat any errors encountered while parsing * Try to read the manifest. We treat any errors encountered while parsing
* the manifest as fatal; there doesn't seem to be much point in trying to * the manifest as fatal; there doesn't seem to be much point in trying to
@ -331,17 +334,6 @@ main(int argc, char **argv)
pfree(path); pfree(path);
} }
/*
* XXX: In the future, we should consider enhancing pg_waldump to read WAL
* files from an archive.
*/
if (!no_parse_wal && context.format == 't')
{
pg_log_error("pg_waldump cannot read tar files");
pg_log_error_hint("You must use -n/--no-parse-wal when verifying a tar-format backup.");
exit(1);
}
/* /*
* Perform the appropriate type of verification appropriate based on the * Perform the appropriate type of verification appropriate based on the
* backup format. This will close 'dir'. * backup format. This will close 'dir'.
@ -350,7 +342,7 @@ main(int argc, char **argv)
verify_plain_backup_directory(&context, NULL, context.backup_directory, verify_plain_backup_directory(&context, NULL, context.backup_directory,
dir); dir);
else else
verify_tar_backup(&context, dir); verify_tar_backup(&context, dir, &base_archive_path, &wal_archive_path);
/* /*
* The "matched" flag should now be set on every entry in the hash table. * The "matched" flag should now be set on every entry in the hash table.
@ -368,12 +360,35 @@ main(int argc, char **argv)
if (context.format == 'p' && !context.skip_checksums) if (context.format == 'p' && !context.skip_checksums)
verify_backup_checksums(&context); verify_backup_checksums(&context);
/*
* By default, WAL files are expected to be found in the backup directory
* for plain-format backups. In the case of tar-format backups, if a
* separate WAL archive is not found, the WAL files are most likely
* included within the main data directory archive.
*/
if (wal_path == NULL)
{
if (context.format == 'p')
wal_path = psprintf("%s/pg_wal", context.backup_directory);
else if (wal_archive_path)
wal_path = wal_archive_path;
else if (base_archive_path)
wal_path = base_archive_path;
else
{
pg_log_error("WAL archive not found");
pg_log_error_hint("Specify the correct path using the option -w/--wal-path. "
"Or you must use -n/--no-parse-wal when verifying a tar-format backup.");
exit(1);
}
}
/* /*
* Try to parse the required ranges of WAL records, unless we were told * Try to parse the required ranges of WAL records, unless we were told
* not to do so. * not to do so.
*/ */
if (!no_parse_wal) if (!no_parse_wal)
parse_required_wal(&context, pg_waldump_path, wal_directory); parse_required_wal(&context, pg_waldump_path, wal_path);
/* /*
* If everything looks OK, tell the user this, unless we were asked to * If everything looks OK, tell the user this, unless we were asked to
@ -787,7 +802,8 @@ verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
* close when we're done with it. * close when we're done with it.
*/ */
static void static void
verify_tar_backup(verifier_context *context, DIR *dir) verify_tar_backup(verifier_context *context, DIR *dir, char **base_archive_path,
char **wal_archive_path)
{ {
struct dirent *dirent; struct dirent *dirent;
SimplePtrList tarfiles = {NULL, NULL}; SimplePtrList tarfiles = {NULL, NULL};
@ -816,7 +832,8 @@ verify_tar_backup(verifier_context *context, DIR *dir)
char *fullpath; char *fullpath;
fullpath = psprintf("%s/%s", context->backup_directory, filename); fullpath = psprintf("%s/%s", context->backup_directory, filename);
precheck_tar_backup_file(context, filename, fullpath, &tarfiles); precheck_tar_backup_file(context, filename, fullpath, &tarfiles,
base_archive_path, wal_archive_path);
pfree(fullpath); pfree(fullpath);
} }
} }
@ -875,17 +892,21 @@ verify_tar_backup(verifier_context *context, DIR *dir)
* *
* The arguments to this function are mostly the same as the * The arguments to this function are mostly the same as the
* verify_plain_backup_file. The additional argument outputs a list of valid * verify_plain_backup_file. The additional argument outputs a list of valid
* tar files. * tar files, along with the full paths to the main archive and the WAL
* directory archive.
*/ */
static void static void
precheck_tar_backup_file(verifier_context *context, char *relpath, precheck_tar_backup_file(verifier_context *context, char *relpath,
char *fullpath, SimplePtrList *tarfiles) char *fullpath, SimplePtrList *tarfiles,
char **base_archive_path, char **wal_archive_path)
{ {
struct stat sb; struct stat sb;
Oid tblspc_oid = InvalidOid; Oid tblspc_oid = InvalidOid;
pg_compress_algorithm compress_algorithm; pg_compress_algorithm compress_algorithm;
tar_file *tar; tar_file *tar;
char *suffix = NULL; char *suffix = NULL;
bool is_base_archive = false;
bool is_wal_archive = false;
/* Should be tar format backup */ /* Should be tar format backup */
Assert(context->format == 't'); Assert(context->format == 't');
@ -918,9 +939,15 @@ precheck_tar_backup_file(verifier_context *context, char *relpath,
* extension such as .gz, .lz4, or .zst. * extension such as .gz, .lz4, or .zst.
*/ */
if (strncmp("base", relpath, 4) == 0) if (strncmp("base", relpath, 4) == 0)
{
suffix = relpath + 4; suffix = relpath + 4;
is_base_archive = true;
}
else if (strncmp("pg_wal", relpath, 6) == 0) else if (strncmp("pg_wal", relpath, 6) == 0)
{
suffix = relpath + 6; suffix = relpath + 6;
is_wal_archive = true;
}
else else
{ {
/* Expected a <tablespaceoid>.tar file here. */ /* Expected a <tablespaceoid>.tar file here. */
@ -953,8 +980,13 @@ precheck_tar_backup_file(verifier_context *context, char *relpath,
* Ignore WALs, as reading and verification will be handled through * Ignore WALs, as reading and verification will be handled through
* pg_waldump. * pg_waldump.
*/ */
if (strncmp("pg_wal", relpath, 6) == 0) if (is_wal_archive)
{
*wal_archive_path = pstrdup(fullpath);
return; return;
}
else if (is_base_archive)
*base_archive_path = pstrdup(fullpath);
/* /*
* Append the information to the list for complete verification at a later * Append the information to the list for complete verification at a later
@ -1188,7 +1220,7 @@ verify_file_checksum(verifier_context *context, manifest_file *m,
*/ */
static void static void
parse_required_wal(verifier_context *context, char *pg_waldump_path, parse_required_wal(verifier_context *context, char *pg_waldump_path,
char *wal_directory) char *wal_path)
{ {
manifest_data *manifest = context->manifest; manifest_data *manifest = context->manifest;
manifest_wal_range *this_wal_range = manifest->first_wal_range; manifest_wal_range *this_wal_range = manifest->first_wal_range;
@ -1198,7 +1230,7 @@ parse_required_wal(verifier_context *context, char *pg_waldump_path,
char *pg_waldump_cmd; char *pg_waldump_cmd;
pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%08X --end=%X/%08X\n", pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%08X --end=%X/%08X\n",
pg_waldump_path, wal_directory, this_wal_range->tli, pg_waldump_path, wal_path, this_wal_range->tli,
LSN_FORMAT_ARGS(this_wal_range->start_lsn), LSN_FORMAT_ARGS(this_wal_range->start_lsn),
LSN_FORMAT_ARGS(this_wal_range->end_lsn)); LSN_FORMAT_ARGS(this_wal_range->end_lsn));
fflush(NULL); fflush(NULL);
@ -1366,7 +1398,7 @@ usage(void)
printf(_(" -P, --progress show progress information\n")); printf(_(" -P, --progress show progress information\n"));
printf(_(" -q, --quiet do not print any output, except for errors\n")); printf(_(" -q, --quiet do not print any output, except for errors\n"));
printf(_(" -s, --skip-checksums skip checksum verification\n")); printf(_(" -s, --skip-checksums skip checksum verification\n"));
printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n")); printf(_(" -w, --wal-path=PATH use specified path for WAL files\n"));
printf(_(" -V, --version output version information, then exit\n")); printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n")); printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);

View file

@ -30,10 +30,6 @@ sub test_checksums
{ {
# Add switch to get a tar-format backup # Add switch to get a tar-format backup
push @backup, ('--format' => 'tar'); push @backup, ('--format' => 'tar');
# Add switch to skip WAL verification, which is not yet supported for
# tar-format backups
push @verify, ('--no-parse-wal');
} }
# A backup with a bogus algorithm should fail. # A backup with a bogus algorithm should fail.

View file

@ -193,10 +193,8 @@ for my $scenario (@scenario)
command_ok([ $tar, '-cf' => "$tar_backup_path/base.tar", '.' ]); command_ok([ $tar, '-cf' => "$tar_backup_path/base.tar", '.' ]);
chdir($cwd) || die "chdir: $!"; chdir($cwd) || die "chdir: $!";
# Now check that the backup no longer verifies. We must use -n
# here, because pg_waldump can't yet read WAL from a tarfile.
command_fails_like( command_fails_like(
[ 'pg_verifybackup', '--no-parse-wal', $tar_backup_path ], [ 'pg_verifybackup', $tar_backup_path ],
$scenario->{'fails_like'}, $scenario->{'fails_like'},
"corrupt backup fails verification: $name"); "corrupt backup fails verification: $name");

View file

@ -42,10 +42,10 @@ command_ok([ 'pg_verifybackup', '--no-parse-wal', $backup_path ],
command_ok( command_ok(
[ [
'pg_verifybackup', 'pg_verifybackup',
'--wal-directory' => $relocated_pg_wal, '--wal-path' => $relocated_pg_wal,
$backup_path $backup_path
], ],
'--wal-directory can be used to specify WAL directory'); '--wal-path can be used to specify WAL directory');
# Move directory back to original location. # Move directory back to original location.
rename($relocated_pg_wal, $original_pg_wal) || die "rename pg_wal back: $!"; rename($relocated_pg_wal, $original_pg_wal) || die "rename pg_wal back: $!";
@ -90,4 +90,20 @@ command_ok(
[ 'pg_verifybackup', $backup_path2 ], [ 'pg_verifybackup', $backup_path2 ],
'valid base backup with timeline > 1'); 'valid base backup with timeline > 1');
# Test WAL verification for a tar-format backup with a separate pg_wal.tar,
# as produced by pg_basebackup --format=tar --wal-method=stream.
my $backup_path3 = $primary->backup_dir . '/test_tar_wal';
$primary->command_ok(
[
'pg_basebackup',
'--pgdata' => $backup_path3,
'--no-sync',
'--format' => 'tar',
'--checkpoint' => 'fast'
],
"tar backup with separate pg_wal.tar");
command_ok(
[ 'pg_verifybackup', $backup_path3 ],
'WAL verification succeeds with separate pg_wal.tar');
done_testing(); done_testing();

View file

@ -47,7 +47,6 @@ my $tsoid = $primary->safe_psql(
SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1')); SELECT oid FROM pg_tablespace WHERE spcname = 'regress_ts1'));
my $backup_path = $primary->backup_dir . '/server-backup'; my $backup_path = $primary->backup_dir . '/server-backup';
my $extract_path = $primary->backup_dir . '/extracted-backup';
my @test_configuration = ( my @test_configuration = (
{ {
@ -123,14 +122,12 @@ for my $tc (@test_configuration)
# Verify tar backup. # Verify tar backup.
$primary->command_ok( $primary->command_ok(
[ [
'pg_verifybackup', '--no-parse-wal', 'pg_verifybackup', '--exit-on-error', $backup_path,
'--exit-on-error', $backup_path,
], ],
"verify backup, compression $method"); "verify backup, compression $method");
# Cleanup. # Cleanup.
rmtree($backup_path); rmtree($backup_path);
rmtree($extract_path);
} }
} }

View file

@ -32,7 +32,6 @@ print $jf $junk_data;
close $jf; close $jf;
my $backup_path = $primary->backup_dir . '/client-backup'; my $backup_path = $primary->backup_dir . '/client-backup';
my $extract_path = $primary->backup_dir . '/extracted-backup';
my @test_configuration = ( my @test_configuration = (
{ {
@ -137,13 +136,11 @@ for my $tc (@test_configuration)
# Verify tar backup. # Verify tar backup.
$primary->command_ok( $primary->command_ok(
[ [
'pg_verifybackup', '--no-parse-wal', 'pg_verifybackup', '--exit-on-error', $backup_path,
'--exit-on-error', $backup_path,
], ],
"verify backup, compression $method"); "verify backup, compression $method");
# Cleanup. # Cleanup.
rmtree($extract_path);
rmtree($backup_path); rmtree($backup_path);
} }
} }