Switch to FATAL error for missing checkpoint record without backup_label

Crash recovery started without a backup_label previously crashed with a PANIC if the checkpoint record could not be found. This commit lowers the report generated to be a FATAL instead. With recovery methods being more imaginative these days, this should provide more flexibility when handling PostgreSQL recovery processing in the event of a driver error, similarly to 15f68cebdc. An extra benefit of this change is that it becomes possible to add a test to check that a FATAL is hit with an expected error message pattern. With the recovery code becoming more complicated over the last couple of years, I suspect that this will be benefitial to cover in the long-term. The original PANIC behavior has been introduced in the early days of crash recovery, as of 4d14fe0048 (PANIC did not exist yet, the code used STOP). Author: Nitin Jadhav <nitinjadhavpostgres@gmail.com> Discussion: https://postgr.es/m/CAMm1aWZbQ-Acp_xAxC7mX9uZZMH8+NpfepY9w=AOxbBVT9E=uA@mail.gmail.com
2026-07-16 05:02:57 -04:00 · 2026-03-10 12:00:05 +09:00 · 2026-03-10 12:00:05 +09:00 · 03facc1211
commit 03facc1211
parent 6307b096e2
3 changed files with 61 additions and 1 deletions
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@ -735,7 +735,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 			 * can't read the last checkpoint because this allows us to
 			 * simplify processing around checkpoints.
 			 */
-			ereport(PANIC,
+			ereport(FATAL,
 					errmsg("could not locate a valid checkpoint record at %X/%08X",
 						   LSN_FORMAT_ARGS(CheckPointLoc)));
 		}
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@ -60,6 +60,7 @@ tests += {
      't/049_wait_for_lsn.pl',
      't/050_redo_segment_missing.pl',
      't/051_effective_wal_level.pl',
+      't/052_checkpoint_segment_missing.pl',
    ],
  },
 }
--- a/src/test/recovery/t/052_checkpoint_segment_missing.pl
+++ b/src/test/recovery/t/052_checkpoint_segment_missing.pl
@ -0,0 +1,59 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+#
+# Verify crash recovery behavior when the WAL segment containing the
+# checkpoint record referenced by pg_controldata is missing.  This
+# checks the code path where there is no backup_label file, where the
+# startup process should fail with FATAL and log a message about the
+# missing checkpoint record.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('testnode');
+$node->init;
+$node->append_conf('postgresql.conf', 'log_checkpoints = on');
+$node->start;
+
+# Force a checkpoint so as pg_controldata points to a checkpoint record we
+# can target.
+$node->safe_psql('postgres', 'CHECKPOINT;');
+
+# Retrieve the checkpoint LSN and derive the WAL segment name.
+my $checkpoint_walfile = $node->safe_psql('postgres',
+	"SELECT pg_walfile_name(checkpoint_lsn) FROM pg_control_checkpoint()");
+
+ok($checkpoint_walfile ne '',
+	"derived checkpoint WAL file name: $checkpoint_walfile");
+
+# Stop the node.
+$node->stop('immediate');
+
+# Remove the WAL segment containing the checkpoint record.
+my $walpath = $node->data_dir . "/pg_wal/$checkpoint_walfile";
+ok(-f $walpath, "checkpoint WAL file exists before deletion: $walpath");
+
+unlink $walpath
+  or die "could not remove WAL file $walpath: $!";
+
+ok(!-e $walpath, "checkpoint WAL file removed: $walpath");
+
+# Use run_log instead of node->start because this test expects that
+# the server ends with an error during recovery.
+run_log(
+	[
+		'pg_ctl',
+		'--pgdata' => $node->data_dir,
+		'--log' => $node->logfile,
+		'start',
+	]);
+
+# Confirm that recovery has failed as expected.
+my $logfile = slurp_file($node->logfile());
+ok( $logfile =~
+	  qr/FATAL: .* could not locate a valid checkpoint record at .*/,
+	"FATAL logged for missing checkpoint record (no backup_label path)");
+
+done_testing();