aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2024-02-28 17:05:31 +0100
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2024-02-28 17:05:31 +0100
commit53c2a97a92665be6bd7d70bd62ae6158fe4db96e (patch)
tree88d853f098fe925024b82e72f2beea523e24cbe6 /src/include
parent1c1eec0f2d88b7e823af959103b2100da493caa9 (diff)
downloadpostgresql-53c2a97a92665be6bd7d70bd62ae6158fe4db96e.tar.gz
postgresql-53c2a97a92665be6bd7d70bd62ae6158fe4db96e.zip
Improve performance of subsystems on top of SLRU
More precisely, what we do here is make the SLRU cache sizes configurable with new GUCs, so that sites with high concurrency and big ranges of transactions in flight (resp. multixacts/subtransactions) can benefit from bigger caches. In order for this to work with good performance, two additional changes are made: 1. the cache is divided in "banks" (to borrow terminology from CPU caches), and algorithms such as eviction buffer search only affect one specific bank. This forestalls the problem that linear searching for a specific buffer across the whole cache takes too long: we only have to search the specific bank, whose size is small. This work is authored by Andrey Borodin. 2. Change the locking regime for the SLRU banks, so that each bank uses a separate LWLock. This allows for increased scalability. This work is authored by Dilip Kumar. (A part of this was previously committed as d172b717c6f4.) Special care is taken so that the algorithms that can potentially traverse more than one bank release one bank's lock before acquiring the next. This should happen rarely, but particularly clog.c's group commit feature needed code adjustment to cope with this. I (Álvaro) also added lots of comments to make sure the design is sound. The new GUCs match the names introduced by bcdfa5f2e2f2 in the pg_stat_slru view. The default values for these parameters are similar to the previous sizes of each SLRU. commit_ts, clog and subtrans accept value 0, which means to adjust by dividing shared_buffers by 512 (so 2MB for every 1GB of shared_buffers), with a cap of 8MB. (A new slru.c function SimpleLruAutotuneBuffers() was added to support this.) The cap was previously 1MB for clog, so for sites with more than 512MB of shared memory the total memory used increases, which is likely a good tradeoff. However, other SLRUs (notably multixact ones) retain smaller sizes and don't support a configured value of 0. These values based on shared_buffers may need to be revisited, but that's an easy change. There was some resistance to adding these new GUCs: it would be better to adjust to memory pressure automatically somehow, for example by stealing memory from shared_buffers (where the caches can grow and shrink naturally). However, doing that seems to be a much larger project and one which has made virtually no progress in several years, and because this is such a pain point for so many users, here we take the pragmatic approach. Author: Andrey Borodin <x4mmm@yandex-team.ru> Author: Dilip Kumar <dilipbalaut@gmail.com> Reviewed-by: Amul Sul, Gilles Darold, Anastasia Lubennikova, Ivan Lazarev, Robert Haas, Thomas Munro, Tomas Vondra, Yura Sokolov, Васильев Дмитрий (Dmitry Vasiliev). Discussion: https://postgr.es/m/2BEC2B3F-9B61-4C1D-9FB5-5FAB0F05EF86@yandex-team.ru Discussion: https://postgr.es/m/CAFiTN-vzDvNz=ExGXz6gdyjtzGixKSqs0mKHMmaQ8sOSEFZ33A@mail.gmail.com
Diffstat (limited to 'src/include')
-rw-r--r--src/include/access/clog.h1
-rw-r--r--src/include/access/commit_ts.h1
-rw-r--r--src/include/access/multixact.h4
-rw-r--r--src/include/access/slru.h86
-rw-r--r--src/include/access/subtrans.h3
-rw-r--r--src/include/commands/async.h5
-rw-r--r--src/include/miscadmin.h8
-rw-r--r--src/include/storage/lwlock.h7
-rw-r--r--src/include/storage/predicate.h4
-rw-r--r--src/include/utils/guc_hooks.h11
10 files changed, 87 insertions, 43 deletions
diff --git a/src/include/access/clog.h b/src/include/access/clog.h
index becc365cb01..8e62917e498 100644
--- a/src/include/access/clog.h
+++ b/src/include/access/clog.h
@@ -40,7 +40,6 @@ extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status, XLogRecPtr lsn);
extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn);
-extern Size CLOGShmemBuffers(void);
extern Size CLOGShmemSize(void);
extern void CLOGShmemInit(void);
extern void BootStrapCLOG(void);
diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h
index 9c6f3a35ca2..82d3aa86275 100644
--- a/src/include/access/commit_ts.h
+++ b/src/include/access/commit_ts.h
@@ -27,7 +27,6 @@ extern bool TransactionIdGetCommitTsData(TransactionId xid,
extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
RepOriginId *nodeid);
-extern Size CommitTsShmemBuffers(void);
extern Size CommitTsShmemSize(void);
extern void CommitTsShmemInit(void);
extern void BootStrapCommitTs(void);
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 233f67dbcc1..7ffd256c744 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -29,10 +29,6 @@
#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
-/* Number of SLRU buffers to use for multixact */
-#define NUM_MULTIXACTOFFSET_BUFFERS 8
-#define NUM_MULTIXACTMEMBER_BUFFERS 16
-
/*
* Possible multixact lock modes ("status"). The first four modes are for
* tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 21094886547..8a8d1918733 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -17,6 +17,11 @@
#include "storage/lwlock.h"
#include "storage/sync.h"
+/*
+ * To avoid overflowing internal arithmetic and the size_t data type, the
+ * number of buffers must not exceed this number.
+ */
+#define SLRU_MAX_ALLOWED_BUFFERS ((1024 * 1024 * 1024) / BLCKSZ)
/*
* Define SLRU segment size. A page is the same BLCKSZ as is used everywhere
@@ -55,8 +60,6 @@ typedef enum
*/
typedef struct SlruSharedData
{
- LWLock *ControlLock;
-
/* Number of buffers managed by this SLRU structure */
int num_slots;
@@ -69,30 +72,41 @@ typedef struct SlruSharedData
bool *page_dirty;
int64 *page_number;
int *page_lru_count;
+
+ /* The buffer_locks protects the I/O on each buffer slots */
LWLockPadded *buffer_locks;
+ /* Locks to protect the in memory buffer slot access in SLRU bank. */
+ LWLockPadded *bank_locks;
+
+ /*----------
+ * A bank-wise LRU counter is maintained because we do a victim buffer
+ * search within a bank. Furthermore, manipulating an individual bank
+ * counter avoids frequent cache invalidation since we update it every time
+ * we access the page.
+ *
+ * We mark a page "most recently used" by setting
+ * page_lru_count[slotno] = ++bank_cur_lru_count[bankno];
+ * The oldest page in the bank is therefore the one with the highest value
+ * of
+ * bank_cur_lru_count[bankno] - page_lru_count[slotno]
+ * The counts will eventually wrap around, but this calculation still
+ * works as long as no page's age exceeds INT_MAX counts.
+ *----------
+ */
+ int *bank_cur_lru_count;
+
/*
* Optional array of WAL flush LSNs associated with entries in the SLRU
* pages. If not zero/NULL, we must flush WAL before writing pages (true
- * for pg_xact, false for multixact, pg_subtrans, pg_notify). group_lsn[]
- * has lsn_groups_per_page entries per buffer slot, each containing the
+ * for pg_xact, false for everything else). group_lsn[] has
+ * lsn_groups_per_page entries per buffer slot, each containing the
* highest LSN known for a contiguous group of SLRU entries on that slot's
* page.
*/
XLogRecPtr *group_lsn;
int lsn_groups_per_page;
- /*----------
- * We mark a page "most recently used" by setting
- * page_lru_count[slotno] = ++cur_lru_count;
- * The oldest page is therefore the one with the highest value of
- * cur_lru_count - page_lru_count[slotno]
- * The counts will eventually wrap around, but this calculation still
- * works as long as no page's age exceeds INT_MAX counts.
- *----------
- */
- int cur_lru_count;
-
/*
* latest_page_number is the page number of the current end of the log;
* this is not critical data, since we use it only to avoid swapping out
@@ -115,6 +129,19 @@ typedef struct SlruCtlData
SlruShared shared;
/*
+ * Bitmask to determine bank number from page number.
+ */
+ bits16 bank_mask;
+
+ /*
+ * If true, use long segment filenames formed from lower 48 bits of the
+ * segment number, e.g. pg_xact/000000001234. Otherwise, use short
+ * filenames formed from lower 16 bits of the segment number e.g.
+ * pg_xact/1234.
+ */
+ bool long_segment_names;
+
+ /*
* Which sync handler function to use when handing sync requests over to
* the checkpointer. SYNC_HANDLER_NONE to disable fsync (eg pg_notify).
*/
@@ -133,27 +160,35 @@ typedef struct SlruCtlData
bool (*PagePrecedes) (int64, int64);
/*
- * If true, use long segment filenames formed from lower 48 bits of the
- * segment number, e.g. pg_xact/000000001234. Otherwise, use short
- * filenames formed from lower 16 bits of the segment number e.g.
- * pg_xact/1234.
- */
- bool long_segment_names;
-
- /*
* Dir is set during SimpleLruInit and does not change thereafter. Since
* it's always the same, it doesn't need to be in shared memory.
*/
char Dir[64];
+
} SlruCtlData;
typedef SlruCtlData *SlruCtl;
+/*
+ * Get the SLRU bank lock for given SlruCtl and the pageno.
+ *
+ * This lock needs to be acquired to access the slru buffer slots in the
+ * respective bank.
+ */
+static inline LWLock *
+SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
+{
+ int bankno;
+
+ bankno = pageno & ctl->bank_mask;
+ return &(ctl->shared->bank_locks[bankno].lock);
+}
extern Size SimpleLruShmemSize(int nslots, int nlsns);
+extern int SimpleLruAutotuneBuffers(int divisor, int max);
extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
- LWLock *ctllock, const char *subdir, int tranche_id,
- SyncRequestHandler sync_handler,
+ const char *subdir, int buffer_tranche_id,
+ int bank_tranche_id, SyncRequestHandler sync_handler,
bool long_segment_names);
extern int SimpleLruZeroPage(SlruCtl ctl, int64 pageno);
extern int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok,
@@ -182,5 +217,6 @@ extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,
int64 segpage, void *data);
extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage,
void *data);
+extern bool check_slru_buffers(const char *name, int *newval);
#endif /* SLRU_H */
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
index b0d2ad57e55..e2213cf3fd2 100644
--- a/src/include/access/subtrans.h
+++ b/src/include/access/subtrans.h
@@ -11,9 +11,6 @@
#ifndef SUBTRANS_H
#define SUBTRANS_H
-/* Number of SLRU buffers to use for subtrans */
-#define NUM_SUBTRANS_BUFFERS 32
-
extern void SubTransSetParent(TransactionId xid, TransactionId parent);
extern TransactionId SubTransGetParent(TransactionId xid);
extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
index 80b8583421b..78daa25fa08 100644
--- a/src/include/commands/async.h
+++ b/src/include/commands/async.h
@@ -15,11 +15,6 @@
#include <signal.h>
-/*
- * The number of SLRU page buffers we use for the notification queue.
- */
-#define NUM_NOTIFY_BUFFERS 8
-
extern PGDLLIMPORT bool Trace_notify;
extern PGDLLIMPORT int max_notify_queue_pages;
extern PGDLLIMPORT volatile sig_atomic_t notifyInterruptPending;
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 612fb5f42e0..756d144c323 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -179,6 +179,14 @@ extern PGDLLIMPORT int MaxConnections;
extern PGDLLIMPORT int max_worker_processes;
extern PGDLLIMPORT int max_parallel_workers;
+extern PGDLLIMPORT int commit_timestamp_buffers;
+extern PGDLLIMPORT int multixact_member_buffers;
+extern PGDLLIMPORT int multixact_offset_buffers;
+extern PGDLLIMPORT int notify_buffers;
+extern PGDLLIMPORT int serializable_buffers;
+extern PGDLLIMPORT int subtransaction_buffers;
+extern PGDLLIMPORT int transaction_buffers;
+
extern PGDLLIMPORT int MyProcPid;
extern PGDLLIMPORT pg_time_t MyStartTime;
extern PGDLLIMPORT TimestampTz MyStartTimestamp;
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 50a65e046d8..10bea8c5950 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -209,6 +209,13 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_LAUNCHER_HASH,
LWTRANCHE_DSM_REGISTRY_DSA,
LWTRANCHE_DSM_REGISTRY_HASH,
+ LWTRANCHE_COMMITTS_SLRU,
+ LWTRANCHE_MULTIXACTMEMBER_SLRU,
+ LWTRANCHE_MULTIXACTOFFSET_SLRU,
+ LWTRANCHE_NOTIFY_SLRU,
+ LWTRANCHE_SERIAL_SLRU,
+ LWTRANCHE_SUBTRANS_SLRU,
+ LWTRANCHE_XACT_SLRU,
LWTRANCHE_FIRST_USER_DEFINED,
} BuiltinTrancheIds;
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
index a7edd38fa9b..14ee9b94a2f 100644
--- a/src/include/storage/predicate.h
+++ b/src/include/storage/predicate.h
@@ -26,10 +26,6 @@ extern PGDLLIMPORT int max_predicate_locks_per_xact;
extern PGDLLIMPORT int max_predicate_locks_per_relation;
extern PGDLLIMPORT int max_predicate_locks_per_page;
-
-/* Number of SLRU buffers to use for Serial SLRU */
-#define NUM_SERIAL_BUFFERS 16
-
/*
* A handle used for sharing SERIALIZABLEXACT objects between the participants
* in a parallel query.
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index 339c490300e..c8a7aa9a112 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -46,6 +46,8 @@ extern bool check_client_connection_check_interval(int *newval, void **extra,
extern bool check_client_encoding(char **newval, void **extra, GucSource source);
extern void assign_client_encoding(const char *newval, void *extra);
extern bool check_cluster_name(char **newval, void **extra, GucSource source);
+extern bool check_commit_ts_buffers(int *newval, void **extra,
+ GucSource source);
extern const char *show_data_directory_mode(void);
extern bool check_datestyle(char **newval, void **extra, GucSource source);
extern void assign_datestyle(const char *newval, void *extra);
@@ -91,6 +93,11 @@ extern bool check_max_worker_processes(int *newval, void **extra,
GucSource source);
extern bool check_max_stack_depth(int *newval, void **extra, GucSource source);
extern void assign_max_stack_depth(int newval, void *extra);
+extern bool check_multixact_member_buffers(int *newval, void **extra,
+ GucSource source);
+extern bool check_multixact_offset_buffers(int *newval, void **extra,
+ GucSource source);
+extern bool check_notify_buffers(int *newval, void **extra, GucSource source);
extern bool check_primary_slot_name(char **newval, void **extra,
GucSource source);
extern bool check_random_seed(double *newval, void **extra, GucSource source);
@@ -122,12 +129,15 @@ extern void assign_role(const char *newval, void *extra);
extern const char *show_role(void);
extern bool check_search_path(char **newval, void **extra, GucSource source);
extern void assign_search_path(const char *newval, void *extra);
+extern bool check_serial_buffers(int *newval, void **extra, GucSource source);
extern bool check_session_authorization(char **newval, void **extra, GucSource source);
extern void assign_session_authorization(const char *newval, void *extra);
extern void assign_session_replication_role(int newval, void *extra);
extern void assign_stats_fetch_consistency(int newval, void *extra);
extern bool check_ssl(bool *newval, void **extra, GucSource source);
extern bool check_stage_log_stats(bool *newval, void **extra, GucSource source);
+extern bool check_subtrans_buffers(int *newval, void **extra,
+ GucSource source);
extern bool check_synchronous_standby_names(char **newval, void **extra,
GucSource source);
extern void assign_synchronous_standby_names(const char *newval, void *extra);
@@ -152,6 +162,7 @@ extern const char *show_timezone(void);
extern bool check_timezone_abbreviations(char **newval, void **extra,
GucSource source);
extern void assign_timezone_abbreviations(const char *newval, void *extra);
+extern bool check_transaction_buffers(int *newval, void **extra, GucSource source);
extern bool check_transaction_deferrable(bool *newval, void **extra, GucSource source);
extern bool check_transaction_isolation(int *newval, void **extra, GucSource source);
extern bool check_transaction_read_only(bool *newval, void **extra, GucSource source);