aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage')
-rw-r--r--src/backend/storage/aio/README.md5
-rw-r--r--src/backend/storage/buffer/bufmgr.c13
-rw-r--r--src/backend/storage/buffer/localbuf.c9
-rw-r--r--src/backend/storage/ipc/latch.c8
-rw-r--r--src/backend/storage/ipc/procarray.c20
-rw-r--r--src/backend/storage/ipc/procsignal.c6
-rw-r--r--src/backend/storage/lmgr/generate-lwlocknames.pl113
-rw-r--r--src/backend/storage/lmgr/lwlock.c48
8 files changed, 127 insertions, 95 deletions
diff --git a/src/backend/storage/aio/README.md b/src/backend/storage/aio/README.md
index f10b5c7e31e..72ae3b3737d 100644
--- a/src/backend/storage/aio/README.md
+++ b/src/backend/storage/aio/README.md
@@ -94,7 +94,7 @@ pgaio_io_register_callbacks(ioh, PGAIO_HCB_SHARED_BUFFER_READV, 0);
*
* In this example we're reading only a single buffer, hence the 1.
*/
-pgaio_io_set_handle_data_32(ioh, (uint32 *) buffer, 1);
+pgaio_io_set_handle_data_32(ioh, (uint32 *) &buffer, 1);
/*
* Pass the AIO handle to lower-level function. When operating on the level of
@@ -119,8 +119,9 @@ pgaio_io_set_handle_data_32(ioh, (uint32 *) buffer, 1);
* e.g. due to reaching a limit on the number of unsubmitted IOs, and even
* complete before smgrstartreadv() returns.
*/
+void *page = BufferGetBlock(buffer);
smgrstartreadv(ioh, operation->smgr, forknum, blkno,
- BufferGetBlock(buffer), 1);
+ &page, 1);
/*
* To benefit from AIO, it is beneficial to perform other work, including
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6afdd28dba6..67431208e7f 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2743,12 +2743,10 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
* because mdread doesn't complain about reads beyond EOF (when
* zero_damaged_pages is ON) and so a previous attempt to read a block
* beyond EOF could have left a "valid" zero-filled buffer.
- * Unfortunately, we have also seen this case occurring because of
- * buggy Linux kernels that sometimes return an lseek(SEEK_END) result
- * that doesn't account for a recent write. In that situation, the
- * pre-existing buffer would contain valid data that we don't want to
- * overwrite. Since the legitimate cases should always have left a
- * zero-filled buffer, complain if not PageIsNew.
+ *
+ * This has also been observed when relation was overwritten by
+ * external process. Since the legitimate cases should always have
+ * left a zero-filled buffer, complain if not PageIsNew.
*/
if (existing_id >= 0)
{
@@ -2778,8 +2776,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
ereport(ERROR,
(errmsg("unexpected data beyond EOF in block %u of relation %s",
existing_hdr->tag.blockNum,
- relpath(bmr.smgr->smgr_rlocator, fork).str),
- errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
+ relpath(bmr.smgr->smgr_rlocator, fork).str)));
/*
* We *must* do smgr[zero]extend before succeeding, else the page
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 3da9c41ee1d..3c0d20f4659 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -932,10 +932,11 @@ GetLocalBufferStorage(void)
num_bufs = Min(num_bufs, MaxAllocSize / BLCKSZ);
/* Buffers should be I/O aligned. */
- cur_block = (char *)
- TYPEALIGN(PG_IO_ALIGN_SIZE,
- MemoryContextAlloc(LocalBufferContext,
- num_bufs * BLCKSZ + PG_IO_ALIGN_SIZE));
+ cur_block = MemoryContextAllocAligned(LocalBufferContext,
+ num_bufs * BLCKSZ,
+ PG_IO_ALIGN_SIZE,
+ 0);
+
next_buf_in_block = 0;
num_bufs_in_block = num_bufs;
}
diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index c6aefd2f688..beadeb5e46a 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -187,9 +187,11 @@ WaitLatch(Latch *latch, int wakeEvents, long timeout,
if (!(wakeEvents & WL_LATCH_SET))
latch = NULL;
ModifyWaitEvent(LatchWaitSet, LatchWaitSetLatchPos, WL_LATCH_SET, latch);
- ModifyWaitEvent(LatchWaitSet, LatchWaitSetPostmasterDeathPos,
- (wakeEvents & (WL_EXIT_ON_PM_DEATH | WL_POSTMASTER_DEATH)),
- NULL);
+
+ if (IsUnderPostmaster)
+ ModifyWaitEvent(LatchWaitSet, LatchWaitSetPostmasterDeathPos,
+ (wakeEvents & (WL_EXIT_ON_PM_DEATH | WL_POSTMASTER_DEATH)),
+ NULL);
if (WaitEventSetWait(LatchWaitSet,
(wakeEvents & WL_TIMEOUT) ? timeout : -1,
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 2418967def6..bf987aed8d3 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -2814,8 +2814,10 @@ GetRunningTransactionData(void)
*
* Similar to GetSnapshotData but returns just oldestActiveXid. We include
* all PGPROCs with an assigned TransactionId, even VACUUM processes.
- * We look at all databases, though there is no need to include WALSender
- * since this has no effect on hot standby conflicts.
+ *
+ * If allDbs is true, we look at all databases, though there is no need to
+ * include WALSender since this has no effect on hot standby conflicts. If
+ * allDbs is false, skip processes attached to other databases.
*
* This is never executed during recovery so there is no need to look at
* KnownAssignedXids.
@@ -2823,9 +2825,12 @@ GetRunningTransactionData(void)
* We don't worry about updating other counters, we want to keep this as
* simple as possible and leave GetSnapshotData() as the primary code for
* that bookkeeping.
+ *
+ * inCommitOnly indicates getting the oldestActiveXid among the transactions
+ * in the commit critical section.
*/
TransactionId
-GetOldestActiveTransactionId(void)
+GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
{
ProcArrayStruct *arrayP = procArray;
TransactionId *other_xids = ProcGlobal->xids;
@@ -2852,6 +2857,8 @@ GetOldestActiveTransactionId(void)
for (index = 0; index < arrayP->numProcs; index++)
{
TransactionId xid;
+ int pgprocno = arrayP->pgprocnos[index];
+ PGPROC *proc = &allProcs[pgprocno];
/* Fetch xid just once - see GetNewTransactionId */
xid = UINT32_ACCESS_ONCE(other_xids[index]);
@@ -2859,6 +2866,13 @@ GetOldestActiveTransactionId(void)
if (!TransactionIdIsNormal(xid))
continue;
+ if (inCommitOnly &&
+ (proc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0)
+ continue;
+
+ if (!allDbs && proc->databaseId != MyDatabaseId)
+ continue;
+
if (TransactionIdPrecedes(xid, oldestRunningXid))
oldestRunningXid = xid;
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index a9bb540b55a..087821311cc 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -728,7 +728,11 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
void
SendCancelRequest(int backendPID, const uint8 *cancel_key, int cancel_key_len)
{
- Assert(backendPID != 0);
+ if (backendPID == 0)
+ {
+ ereport(LOG, (errmsg("invalid cancel request with PID 0")));
+ return;
+ }
/*
* See if we have a matching backend. Reading the pss_pid and
diff --git a/src/backend/storage/lmgr/generate-lwlocknames.pl b/src/backend/storage/lmgr/generate-lwlocknames.pl
index 4441b7cba0c..cd3e43c448a 100644
--- a/src/backend/storage/lmgr/generate-lwlocknames.pl
+++ b/src/backend/storage/lmgr/generate-lwlocknames.pl
@@ -10,7 +10,6 @@ use Getopt::Long;
my $output_path = '.';
my $lastlockidx = -1;
-my $continue = "\n";
GetOptions('outdir:s' => \$output_path);
@@ -28,18 +27,24 @@ print $h "/* there is deliberately not an #ifndef LWLOCKNAMES_H here */\n\n";
#
-# First, record the predefined LWLocks listed in wait_event_names.txt. We'll
-# cross-check those with the ones in lwlocklist.h.
+# First, record the predefined LWLocks and built-in tranches listed in
+# wait_event_names.txt. We'll cross-check those with the ones in lwlocklist.h.
#
+my @wait_event_tranches;
my @wait_event_lwlocks;
my $record_lwlocks = 0;
+my $in_tranches = 0;
while (<$wait_event_names>)
{
chomp;
# Check for end marker.
- last if /^# END OF PREDEFINED LWLOCKS/;
+ if (/^# END OF PREDEFINED LWLOCKS/)
+ {
+ $in_tranches = 1;
+ next;
+ }
# Skip comments and empty lines.
next if /^#/;
@@ -55,13 +60,29 @@ while (<$wait_event_names>)
# Go to the next line if we are not yet recording LWLocks.
next if not $record_lwlocks;
+ # Stop recording if we reach another section.
+ last if /^Section:/;
+
# Record the LWLock.
(my $waiteventname, my $waitevendocsentence) = split(/\t/, $_);
- push(@wait_event_lwlocks, $waiteventname);
+
+ if ($in_tranches)
+ {
+ push(@wait_event_tranches, $waiteventname);
+ }
+ else
+ {
+ push(@wait_event_lwlocks, $waiteventname);
+ }
}
+#
+# While gathering the list of predefined LWLocks, cross-check the lists in
+# lwlocklist.h with the wait events we just recorded.
+#
my $in_comment = 0;
-my $i = 0;
+my $lwlock_count = 0;
+my $tranche_count = 0;
while (<$lwlocklist>)
{
chomp;
@@ -82,40 +103,72 @@ while (<$lwlocklist>)
next;
}
- die "unable to parse lwlocklist.h line \"$_\""
- unless /^PG_LWLOCK\((\d+),\s+(\w+)\)$/;
+ #
+ # Gather list of predefined LWLocks and cross-check with the wait events.
+ #
+ if (/^PG_LWLOCK\((\d+),\s+(\w+)\)$/)
+ {
+ my ($lockidx, $lockname) = ($1, $2);
- (my $lockidx, my $lockname) = ($1, $2);
+ die "lwlocklist.h not in order" if $lockidx < $lastlockidx;
+ die "lwlocklist.h has duplicates" if $lockidx == $lastlockidx;
- die "lwlocklist.h not in order" if $lockidx < $lastlockidx;
- die "lwlocklist.h has duplicates" if $lockidx == $lastlockidx;
+ die "$lockname defined in lwlocklist.h but missing from "
+ . "wait_event_names.txt"
+ if $lwlock_count >= scalar @wait_event_lwlocks;
+ die "lists of predefined LWLocks do not match (first mismatch at "
+ . "$wait_event_lwlocks[$lwlock_count] in wait_event_names.txt and "
+ . "$lockname in lwlocklist.h)"
+ if $wait_event_lwlocks[$lwlock_count] ne $lockname;
- die "$lockname defined in lwlocklist.h but missing from "
- . "wait_event_names.txt"
- if $i >= scalar @wait_event_lwlocks;
- die "lists of predefined LWLocks do not match (first mismatch at "
- . "$wait_event_lwlocks[$i] in wait_event_names.txt and $lockname in "
- . "lwlocklist.h)"
- if $wait_event_lwlocks[$i] ne $lockname;
- $i++;
+ $lwlock_count++;
- while ($lastlockidx < $lockidx - 1)
+ while ($lastlockidx < $lockidx - 1)
+ {
+ ++$lastlockidx;
+ }
+ $lastlockidx = $lockidx;
+
+ # Add a "Lock" suffix to each lock name, as the C code depends on that.
+ printf $h "#define %-32s (&MainLWLockArray[$lockidx].lock)\n",
+ $lockname . "Lock";
+
+ next;
+ }
+
+ #
+ # Cross-check the built-in LWLock tranches with the wait events.
+ #
+ if (/^PG_LWLOCKTRANCHE\((\w+),\s+(\w+)\)$/)
{
- ++$lastlockidx;
- $continue = ",\n";
+ my ($tranche_id, $tranche_name) = ($1, $2);
+
+ die "$tranche_name defined in lwlocklist.h but missing from "
+ . "wait_event_names.txt"
+ if $tranche_count >= scalar @wait_event_tranches;
+ die
+ "lists of built-in LWLock tranches do not match (first mismatch at "
+ . "$wait_event_tranches[$tranche_count] in wait_event_names.txt and "
+ . "$tranche_name in lwlocklist.h)"
+ if $wait_event_tranches[$tranche_count] ne $tranche_name;
+
+ $tranche_count++;
+
+ next;
}
- $lastlockidx = $lockidx;
- $continue = ",\n";
- # Add a "Lock" suffix to each lock name, as the C code depends on that
- printf $h "#define %-32s (&MainLWLockArray[$lockidx].lock)\n",
- $lockname . "Lock";
+ die "unable to parse lwlocklist.h line \"$_\"";
}
die
- "$wait_event_lwlocks[$i] defined in wait_event_names.txt but missing from "
- . "lwlocklist.h"
- if $i < scalar @wait_event_lwlocks;
+ "$wait_event_lwlocks[$lwlock_count] defined in wait_event_names.txt but "
+ . " missing from lwlocklist.h"
+ if $lwlock_count < scalar @wait_event_lwlocks;
+
+die
+ "$wait_event_tranches[$tranche_count] defined in wait_event_names.txt but "
+ . "missing from lwlocklist.h"
+ if $tranche_count < scalar @wait_event_tranches;
print $h "\n";
printf $h "#define NUM_INDIVIDUAL_LWLOCKS %s\n", $lastlockidx + 1;
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 46f44bc4511..ec9c345ffdf 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -122,9 +122,8 @@ StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
* own tranche. We absorb the names of these tranches from there into
* BuiltinTrancheNames here.
*
- * 2. There are some predefined tranches for built-in groups of locks.
- * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
- * appear in BuiltinTrancheNames[] below.
+ * 2. There are some predefined tranches for built-in groups of locks defined
+ * in lwlocklist.h. We absorb the names of these tranches, too.
*
* 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
* or LWLockRegisterTranche. The names of these that are known in the current
@@ -135,49 +134,10 @@ StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
*/
static const char *const BuiltinTrancheNames[] = {
#define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
+#define PG_LWLOCKTRANCHE(id, lockname) [LWTRANCHE_##id] = CppAsString(lockname),
#include "storage/lwlocklist.h"
#undef PG_LWLOCK
- [LWTRANCHE_XACT_BUFFER] = "XactBuffer",
- [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",
- [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",
- [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",
- [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",
- [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",
- [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",
- [LWTRANCHE_WAL_INSERT] = "WALInsert",
- [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",
- [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",
- [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",
- [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",
- [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",
- [LWTRANCHE_LOCK_MANAGER] = "LockManager",
- [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
- [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
- [LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan",
- [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
- [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
- [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",
- [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",
- [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",
- [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",
- [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",
- [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",
- [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",
- [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",
- [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",
- [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",
- [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",
- [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
- [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
- [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",
- [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
- [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
- [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
- [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
- [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
- [LWTRANCHE_XACT_SLRU] = "XactSLRU",
- [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
- [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
+#undef PG_LWLOCKTRANCHE
};
StaticAssertDecl(lengthof(BuiltinTrancheNames) ==