postgresql/src/include/access/multixact.h
Alvaro Herrera 53c2a97a92
Improve performance of subsystems on top of SLRU
More precisely, what we do here is make the SLRU cache sizes
configurable with new GUCs, so that sites with high concurrency and big
ranges of transactions in flight (resp. multixacts/subtransactions) can
benefit from bigger caches.  In order for this to work with good
performance, two additional changes are made:

1. the cache is divided in "banks" (to borrow terminology from CPU
   caches), and algorithms such as eviction buffer search only affect
   one specific bank.  This forestalls the problem that linear searching
   for a specific buffer across the whole cache takes too long: we only
   have to search the specific bank, whose size is small.  This work is
   authored by Andrey Borodin.

2. Change the locking regime for the SLRU banks, so that each bank uses
   a separate LWLock.  This allows for increased scalability.  This work
   is authored by Dilip Kumar.  (A part of this was previously committed as
   d172b717c6f4.)

Special care is taken so that the algorithms that can potentially
traverse more than one bank release one bank's lock before acquiring the
next.  This should happen rarely, but particularly clog.c's group commit
feature needed code adjustment to cope with this.  I (Álvaro) also added
lots of comments to make sure the design is sound.

The new GUCs match the names introduced by bcdfa5f2e2 in the
pg_stat_slru view.

The default values for these parameters are similar to the previous
sizes of each SLRU.  commit_ts, clog and subtrans accept value 0, which
means to adjust by dividing shared_buffers by 512 (so 2MB for every 1GB
of shared_buffers), with a cap of 8MB.  (A new slru.c function
SimpleLruAutotuneBuffers() was added to support this.)  The cap was
previously 1MB for clog, so for sites with more than 512MB of shared
memory the total memory used increases, which is likely a good tradeoff.
However, other SLRUs (notably multixact ones) retain smaller sizes and
don't support a configured value of 0.  These values based on
shared_buffers may need to be revisited, but that's an easy change.

There was some resistance to adding these new GUCs: it would be better
to adjust to memory pressure automatically somehow, for example by
stealing memory from shared_buffers (where the caches can grow and
shrink naturally).  However, doing that seems to be a much larger
project and one which has made virtually no progress in several years,
and because this is such a pain point for so many users, here we take
the pragmatic approach.

Author: Andrey Borodin <x4mmm@yandex-team.ru>
Author: Dilip Kumar <dilipbalaut@gmail.com>
Reviewed-by: Amul Sul, Gilles Darold, Anastasia Lubennikova,
	Ivan Lazarev, Robert Haas, Thomas Munro, Tomas Vondra,
	Yura Sokolov, Васильев Дмитрий (Dmitry Vasiliev).
Discussion: https://postgr.es/m/2BEC2B3F-9B61-4C1D-9FB5-5FAB0F05EF86@yandex-team.ru
Discussion: https://postgr.es/m/CAFiTN-vzDvNz=ExGXz6gdyjtzGixKSqs0mKHMmaQ8sOSEFZ33A@mail.gmail.com
2024-02-28 17:05:31 +01:00

161 lines
5.3 KiB
C

/*
* multixact.h
*
* PostgreSQL multi-transaction-log manager
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/access/multixact.h
*/
#ifndef MULTIXACT_H
#define MULTIXACT_H
#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/sync.h"
/*
* The first two MultiXactId values are reserved to store the truncation Xid
* and epoch of the first segment, so we start assigning multixact values from
* 2.
*/
#define InvalidMultiXactId ((MultiXactId) 0)
#define FirstMultiXactId ((MultiXactId) 1)
#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF)
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
/*
* Possible multixact lock modes ("status"). The first four modes are for
* tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
* next two are used for update and delete modes.
*/
typedef enum
{
MultiXactStatusForKeyShare = 0x00,
MultiXactStatusForShare = 0x01,
MultiXactStatusForNoKeyUpdate = 0x02,
MultiXactStatusForUpdate = 0x03,
/* an update that doesn't touch "key" columns */
MultiXactStatusNoKeyUpdate = 0x04,
/* other updates, and delete */
MultiXactStatusUpdate = 0x05,
} MultiXactStatus;
#define MaxMultiXactStatus MultiXactStatusUpdate
/* does a status value correspond to a tuple update? */
#define ISUPDATE_from_mxstatus(status) \
((status) > MultiXactStatusForUpdate)
typedef struct MultiXactMember
{
TransactionId xid;
MultiXactStatus status;
} MultiXactMember;
/* ----------------
* multixact-related XLOG entries
* ----------------
*/
#define XLOG_MULTIXACT_ZERO_OFF_PAGE 0x00
#define XLOG_MULTIXACT_ZERO_MEM_PAGE 0x10
#define XLOG_MULTIXACT_CREATE_ID 0x20
#define XLOG_MULTIXACT_TRUNCATE_ID 0x30
typedef struct xl_multixact_create
{
MultiXactId mid; /* new MultiXact's ID */
MultiXactOffset moff; /* its starting offset in members file */
int32 nmembers; /* number of member XIDs */
MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
} xl_multixact_create;
#define SizeOfMultiXactCreate (offsetof(xl_multixact_create, members))
typedef struct xl_multixact_truncate
{
Oid oldestMultiDB;
/* to-be-truncated range of multixact offsets */
MultiXactId startTruncOff; /* just for completeness' sake */
MultiXactId endTruncOff;
/* to-be-truncated range of multixact members */
MultiXactOffset startTruncMemb;
MultiXactOffset endTruncMemb;
} xl_multixact_truncate;
#define SizeOfMultiXactTruncate (sizeof(xl_multixact_truncate))
extern MultiXactId MultiXactIdCreate(TransactionId xid1,
MultiXactStatus status1, TransactionId xid2,
MultiXactStatus status2);
extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
MultiXactStatus status);
extern MultiXactId MultiXactIdCreateFromMembers(int nmembers,
MultiXactMember *members);
extern MultiXactId ReadNextMultiXactId(void);
extern void ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next);
extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly);
extern void MultiXactIdSetOldestMember(void);
extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
bool from_pgupgrade, bool isLockOnly);
extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1,
MultiXactId multi2);
extern int multixactoffsetssyncfiletag(const FileTag *ftag, char *path);
extern int multixactmemberssyncfiletag(const FileTag *ftag, char *path);
extern void AtEOXact_MultiXact(void);
extern void AtPrepare_MultiXact(void);
extern void PostPrepare_MultiXact(TransactionId xid);
extern Size MultiXactShmemSize(void);
extern void MultiXactShmemInit(void);
extern void BootStrapMultiXact(void);
extern void StartupMultiXact(void);
extern void TrimMultiXact(void);
extern void SetMultiXactIdLimit(MultiXactId oldest_datminmxid,
Oid oldest_datoid,
bool is_startup);
extern void MultiXactGetCheckptMulti(bool is_shutdown,
MultiXactId *nextMulti,
MultiXactOffset *nextMultiOffset,
MultiXactId *oldestMulti,
Oid *oldestMultiDB);
extern void CheckPointMultiXact(void);
extern MultiXactId GetOldestMultiXactId(void);
extern void TruncateMultiXact(MultiXactId newOldestMulti,
Oid newOldestMultiDB);
extern void MultiXactSetNextMXact(MultiXactId nextMulti,
MultiXactOffset nextMultiOffset);
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
extern int MultiXactMemberFreezeThreshold(void);
extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void multixact_twophase_postcommit(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void multixact_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void multixact_redo(XLogReaderState *record);
extern void multixact_desc(StringInfo buf, XLogReaderState *record);
extern const char *multixact_identify(uint8 info);
extern char *mxid_to_string(MultiXactId multi, int nmembers,
MultiXactMember *members);
#endif /* MULTIXACT_H */