mirror of
https://github.com/postgres/postgres.git
synced 2026-04-15 22:10:45 -04:00
Add file_extend_method=posix_fallocate,write_zeros.
Provide a way to disable the use of posix_fallocate() for relation
files. It was introduced by commit 4d330a61bb. The new setting
file_extend_method=write_zeros can be used as a workaround for problems
reported from the field:
* BTRFS compression is disabled by the use of posix_fallocate()
* XFS could produce spurious ENOSPC errors in some Linux kernel
versions, though that problem is reported to have been fixed
The default is file_extend_method=posix_fallocate if available, as
before. The write_zeros option is similar to PostgreSQL < 16, except
that now it's multi-block.
Backpatch-through: 16
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reported-by: Dimitrios Apostolou <jimis@gmx.net>
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
This commit is contained in:
parent
e35add48cc
commit
f94e9141a0
7 changed files with 87 additions and 5 deletions
|
|
@ -2412,6 +2412,43 @@ include_dir 'conf.d'
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry id="guc-file-extend-method" xreflabel="file_extend_method">
|
||||||
|
<term><varname>file_extend_method</varname> (<type>enum</type>)
|
||||||
|
<indexterm>
|
||||||
|
<primary><varname>file_extend_method</varname> configuration parameter</primary>
|
||||||
|
</indexterm>
|
||||||
|
</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Specifies the method used to extend data files during bulk operations
|
||||||
|
such as <command>COPY</command>. The first available option is used as
|
||||||
|
the default, depending on the operating system:
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<literal>posix_fallocate</literal> (Unix) uses the standard POSIX
|
||||||
|
interface for allocating disk space, but is missing on some systems.
|
||||||
|
If it is present but the underlying file system doesn't support it,
|
||||||
|
this option silently falls back to <literal>write_zeros</literal>.
|
||||||
|
Current versions of BTRFS are known to disable compression when
|
||||||
|
this option is used.
|
||||||
|
This is the default on systems that have the function.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<literal>write_zeros</literal> extends files by writing out blocks
|
||||||
|
of zero bytes. This is the default on systems that don't have the
|
||||||
|
function <function>posix_fallocate</function>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
The <literal>write_zeros</literal> method is always used when data
|
||||||
|
files are extended by 8 blocks or fewer.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
|
<varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
|
||||||
<term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
|
<term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
|
||||||
<indexterm>
|
<indexterm>
|
||||||
|
|
|
||||||
|
|
@ -164,6 +164,9 @@ bool data_sync_retry = false;
|
||||||
/* How SyncDataDirectory() should do its job. */
|
/* How SyncDataDirectory() should do its job. */
|
||||||
int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
|
int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
|
||||||
|
|
||||||
|
/* How data files should be bulk-extended with zeros. */
|
||||||
|
int file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
|
||||||
|
|
||||||
/* Which kinds of files should be opened with PG_O_DIRECT. */
|
/* Which kinds of files should be opened with PG_O_DIRECT. */
|
||||||
int io_direct_flags;
|
int io_direct_flags;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -602,13 +602,24 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||||
* that decision should be made though? For now just use a cutoff of
|
* that decision should be made though? For now just use a cutoff of
|
||||||
* 8, anything between 4 and 8 worked OK in some local testing.
|
* 8, anything between 4 and 8 worked OK in some local testing.
|
||||||
*/
|
*/
|
||||||
if (numblocks > 8)
|
if (numblocks > 8 &&
|
||||||
|
file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret = 0;
|
||||||
|
|
||||||
ret = FileFallocate(v->mdfd_vfd,
|
#ifdef HAVE_POSIX_FALLOCATE
|
||||||
seekpos, (pgoff_t) BLCKSZ * numblocks,
|
if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE)
|
||||||
WAIT_EVENT_DATA_FILE_EXTEND);
|
{
|
||||||
|
ret = FileFallocate(v->mdfd_vfd,
|
||||||
|
seekpos, (pgoff_t) BLCKSZ * numblocks,
|
||||||
|
WAIT_EVENT_DATA_FILE_EXTEND);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
elog(ERROR, "unsupported file_extend_method: %d",
|
||||||
|
file_extend_method);
|
||||||
|
}
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
{
|
{
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
|
|
|
||||||
|
|
@ -1042,6 +1042,13 @@
|
||||||
options => 'file_copy_method_options',
|
options => 'file_copy_method_options',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{ name => 'file_extend_method', type => 'enum', context => 'PGC_SIGHUP', group => 'RESOURCES_DISK',
|
||||||
|
short_desc => 'Selects the method used for extending data files.',
|
||||||
|
variable => 'file_extend_method',
|
||||||
|
boot_val => 'DEFAULT_FILE_EXTEND_METHOD',
|
||||||
|
options => 'file_extend_method_options',
|
||||||
|
},
|
||||||
|
|
||||||
{ name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
|
{ name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
|
||||||
short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.',
|
short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.',
|
||||||
long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.',
|
long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.',
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,7 @@
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
#include "storage/bufpage.h"
|
#include "storage/bufpage.h"
|
||||||
#include "storage/copydir.h"
|
#include "storage/copydir.h"
|
||||||
|
#include "storage/fd.h"
|
||||||
#include "storage/io_worker.h"
|
#include "storage/io_worker.h"
|
||||||
#include "storage/large_object.h"
|
#include "storage/large_object.h"
|
||||||
#include "storage/pg_shmem.h"
|
#include "storage/pg_shmem.h"
|
||||||
|
|
@ -491,6 +492,14 @@ static const struct config_enum_entry file_copy_method_options[] = {
|
||||||
{NULL, 0, false}
|
{NULL, 0, false}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct config_enum_entry file_extend_method_options[] = {
|
||||||
|
#ifdef HAVE_POSIX_FALLOCATE
|
||||||
|
{"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false},
|
||||||
|
#endif
|
||||||
|
{"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false},
|
||||||
|
{NULL, 0, false}
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Options for enum values stored in other modules
|
* Options for enum values stored in other modules
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -179,6 +179,10 @@
|
||||||
# in kilobytes, or -1 for no limit
|
# in kilobytes, or -1 for no limit
|
||||||
|
|
||||||
#file_copy_method = copy # copy, clone (if supported by OS)
|
#file_copy_method = copy # copy, clone (if supported by OS)
|
||||||
|
#file_extend_method = posix_fallocate # the default is the first option supported
|
||||||
|
# by the operating system:
|
||||||
|
# posix_fallocate (most Unix-like systems)
|
||||||
|
# write_zeros
|
||||||
|
|
||||||
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
|
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
|
||||||
# for NOTIFY / LISTEN queue
|
# for NOTIFY / LISTEN queue
|
||||||
|
|
|
||||||
|
|
@ -55,12 +55,23 @@ typedef int File;
|
||||||
#define IO_DIRECT_WAL 0x02
|
#define IO_DIRECT_WAL 0x02
|
||||||
#define IO_DIRECT_WAL_INIT 0x04
|
#define IO_DIRECT_WAL_INIT 0x04
|
||||||
|
|
||||||
|
enum FileExtendMethod
|
||||||
|
{
|
||||||
|
#ifdef HAVE_POSIX_FALLOCATE
|
||||||
|
FILE_EXTEND_METHOD_POSIX_FALLOCATE,
|
||||||
|
#endif
|
||||||
|
FILE_EXTEND_METHOD_WRITE_ZEROS,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Default to the first available file_extend_method. */
|
||||||
|
#define DEFAULT_FILE_EXTEND_METHOD 0
|
||||||
|
|
||||||
/* GUC parameter */
|
/* GUC parameter */
|
||||||
extern PGDLLIMPORT int max_files_per_process;
|
extern PGDLLIMPORT int max_files_per_process;
|
||||||
extern PGDLLIMPORT bool data_sync_retry;
|
extern PGDLLIMPORT bool data_sync_retry;
|
||||||
extern PGDLLIMPORT int recovery_init_sync_method;
|
extern PGDLLIMPORT int recovery_init_sync_method;
|
||||||
extern PGDLLIMPORT int io_direct_flags;
|
extern PGDLLIMPORT int io_direct_flags;
|
||||||
|
extern PGDLLIMPORT int file_extend_method;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is private to fd.c, but exported for save/restore_backend_variables()
|
* This is private to fd.c, but exported for save/restore_backend_variables()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue