aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/common/toast_compression.c14
-rw-r--r--src/backend/access/heap/heapam_xlog.c7
-rw-r--r--src/backend/access/heap/vacuumlazy.c118
-rw-r--r--src/backend/access/nbtree/nbtpreprocesskeys.c412
-rw-r--r--src/backend/access/nbtree/nbtsearch.c461
-rw-r--r--src/backend/access/nbtree/nbtutils.c293
-rw-r--r--src/backend/access/rmgrdesc/replorigindesc.c2
-rw-r--r--src/backend/access/rmgrdesc/xactdesc.c6
-rw-r--r--src/backend/access/rmgrdesc/xlogdesc.c6
-rw-r--r--src/backend/access/transam/clog.c67
-rw-r--r--src/backend/access/transam/commit_ts.c73
-rw-r--r--src/backend/access/transam/multixact.c136
-rw-r--r--src/backend/access/transam/slru.c25
-rw-r--r--src/backend/access/transam/subtrans.c34
-rw-r--r--src/backend/access/transam/timeline.c4
-rw-r--r--src/backend/access/transam/twophase.c253
-rw-r--r--src/backend/access/transam/xact.c13
-rw-r--r--src/backend/access/transam/xlog.c138
-rw-r--r--src/backend/access/transam/xlogbackup.c8
-rw-r--r--src/backend/access/transam/xloginsert.c12
-rw-r--r--src/backend/access/transam/xlogprefetcher.c16
-rw-r--r--src/backend/access/transam/xlogreader.c62
-rw-r--r--src/backend/access/transam/xlogrecovery.c178
-rw-r--r--src/backend/access/transam/xlogutils.c2
-rw-r--r--src/backend/backup/backup_manifest.c2
-rw-r--r--src/backend/backup/basebackup_copy.c2
-rw-r--r--src/backend/backup/basebackup_incremental.c14
-rw-r--r--src/backend/bootstrap/bootstrap.c2
-rw-r--r--src/backend/catalog/dependency.c11
-rw-r--r--src/backend/catalog/heap.c95
-rw-r--r--src/backend/catalog/index.c8
-rw-r--r--src/backend/catalog/system_views.sql10
-rw-r--r--src/backend/commands/analyze.c26
-rw-r--r--src/backend/commands/cluster.c2
-rw-r--r--src/backend/commands/copy.c42
-rw-r--r--src/backend/commands/copyfromparse.c19
-rw-r--r--src/backend/commands/copyto.c2
-rw-r--r--src/backend/commands/dbcommands.c14
-rw-r--r--src/backend/commands/indexcmds.c42
-rw-r--r--src/backend/commands/matview.c3
-rw-r--r--src/backend/commands/publicationcmds.c4
-rw-r--r--src/backend/commands/subscriptioncmds.c8
-rw-r--r--src/backend/commands/tablecmds.c82
-rw-r--r--src/backend/commands/tablespace.c2
-rw-r--r--src/backend/commands/typecmds.c14
-rw-r--r--src/backend/commands/vacuum.c121
-rw-r--r--src/backend/executor/execExprInterp.c1
-rw-r--r--src/backend/executor/execIndexing.c4
-rw-r--r--src/backend/jit/llvm/meson.build2
-rw-r--r--src/backend/lib/README4
-rw-r--r--src/backend/libpq/be-secure-openssl.c4
-rw-r--r--src/backend/libpq/hba.c38
-rw-r--r--src/backend/libpq/pg_ident.conf.sample26
-rw-r--r--src/backend/main/main.c16
-rw-r--r--src/backend/nodes/queryjumblefuncs.c187
-rw-r--r--src/backend/optimizer/path/costsize.c54
-rw-r--r--src/backend/optimizer/path/joinpath.c56
-rw-r--r--src/backend/optimizer/path/joinrels.c60
-rw-r--r--src/backend/optimizer/plan/createplan.c136
-rw-r--r--src/backend/optimizer/util/clauses.c7
-rw-r--r--src/backend/optimizer/util/paramassign.c109
-rw-r--r--src/backend/optimizer/util/pathnode.c73
-rw-r--r--src/backend/optimizer/util/placeholder.c40
-rw-r--r--src/backend/parser/gram.y99
-rw-r--r--src/backend/parser/parse_utilcmd.c22
-rw-r--r--src/backend/postmaster/autovacuum.c2
-rw-r--r--src/backend/postmaster/checkpointer.c77
-rw-r--r--src/backend/postmaster/pgarch.c12
-rw-r--r--src/backend/postmaster/postmaster.c28
-rw-r--r--src/backend/postmaster/walsummarizer.c28
-rw-r--r--src/backend/regex/regc_pg_locale.c429
-rw-r--r--src/backend/replication/libpqwalreceiver/libpqwalreceiver.c2
-rw-r--r--src/backend/replication/logical/launcher.c42
-rw-r--r--src/backend/replication/logical/logical.c14
-rw-r--r--src/backend/replication/logical/origin.c6
-rw-r--r--src/backend/replication/logical/reorderbuffer.c2
-rw-r--r--src/backend/replication/logical/slotsync.c12
-rw-r--r--src/backend/replication/logical/snapbuild.c46
-rw-r--r--src/backend/replication/logical/tablesync.c21
-rw-r--r--src/backend/replication/logical/worker.c27
-rw-r--r--src/backend/replication/pgoutput/pgoutput.c4
-rw-r--r--src/backend/replication/repl_gram.y4
-rw-r--r--src/backend/replication/repl_scanner.l2
-rw-r--r--src/backend/replication/slot.c74
-rw-r--r--src/backend/replication/slotfuncs.c2
-rw-r--r--src/backend/replication/syncrep.c4
-rw-r--r--src/backend/replication/walreceiver.c16
-rw-r--r--src/backend/replication/walsender.c32
-rw-r--r--src/backend/rewrite/rewriteHandler.c5
-rw-r--r--src/backend/storage/aio/method_io_uring.c210
-rw-r--r--src/backend/storage/aio/method_worker.c62
-rw-r--r--src/backend/storage/buffer/bufmgr.c18
-rw-r--r--src/backend/storage/buffer/localbuf.c21
-rw-r--r--src/backend/storage/file/fd.c19
-rw-r--r--src/backend/storage/ipc/dsm_registry.c314
-rw-r--r--src/backend/storage/ipc/procarray.c52
-rw-r--r--src/backend/storage/ipc/shmem.c4
-rw-r--r--src/backend/storage/ipc/standby.c4
-rw-r--r--src/backend/storage/lmgr/lock.c20
-rw-r--r--src/backend/storage/lmgr/lwlock.c4
-rw-r--r--src/backend/storage/lmgr/predicate.c11
-rw-r--r--src/backend/tcop/backend_startup.c6
-rw-r--r--src/backend/tcop/utility.c24
-rw-r--r--src/backend/tsearch/ts_locale.c4
-rw-r--r--src/backend/tsearch/wparser_def.c2
-rw-r--r--src/backend/utils/activity/pgstat_relation.c4
-rw-r--r--src/backend/utils/adt/Makefile1
-rw-r--r--src/backend/utils/adt/acl.c33
-rw-r--r--src/backend/utils/adt/bytea.c1143
-rw-r--r--src/backend/utils/adt/date.c86
-rw-r--r--src/backend/utils/adt/float.c22
-rw-r--r--src/backend/utils/adt/formatting.c5
-rw-r--r--src/backend/utils/adt/inet_net_pton.c3
-rw-r--r--src/backend/utils/adt/jsonb_util.c43
-rw-r--r--src/backend/utils/adt/like.c22
-rw-r--r--src/backend/utils/adt/like_support.c7
-rw-r--r--src/backend/utils/adt/meson.build1
-rw-r--r--src/backend/utils/adt/network.c2
-rw-r--r--src/backend/utils/adt/network_spgist.c1
-rw-r--r--src/backend/utils/adt/numeric.c20
-rw-r--r--src/backend/utils/adt/pg_locale.c122
-rw-r--r--src/backend/utils/adt/pg_locale_builtin.c113
-rw-r--r--src/backend/utils/adt/pg_locale_icu.c130
-rw-r--r--src/backend/utils/adt/pg_locale_libc.c361
-rw-r--r--src/backend/utils/adt/pg_lsn.c2
-rw-r--r--src/backend/utils/adt/regproc.c118
-rw-r--r--src/backend/utils/adt/ri_triggers.c2
-rw-r--r--src/backend/utils/adt/selfuncs.c3
-rw-r--r--src/backend/utils/adt/timestamp.c101
-rw-r--r--src/backend/utils/adt/varlena.c1098
-rw-r--r--src/backend/utils/adt/xml.c97
-rw-r--r--src/backend/utils/cache/catcache.c1
-rw-r--r--src/backend/utils/init/postinit.c11
-rw-r--r--src/backend/utils/misc/guc_tables.c14
-rw-r--r--src/backend/utils/misc/injection_point.c46
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample2
-rw-r--r--src/backend/utils/mmgr/dsa.c15
137 files changed, 5372 insertions, 3726 deletions
diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c
index 21f2f4af97e..926f1e4008a 100644
--- a/src/backend/access/common/toast_compression.c
+++ b/src/backend/access/common/toast_compression.c
@@ -25,11 +25,11 @@
/* GUC */
int default_toast_compression = TOAST_PGLZ_COMPRESSION;
-#define NO_LZ4_SUPPORT() \
+#define NO_COMPRESSION_SUPPORT(method) \
ereport(ERROR, \
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
- errmsg("compression method lz4 not supported"), \
- errdetail("This functionality requires the server to be built with lz4 support.")))
+ errmsg("compression method %s not supported", method), \
+ errdetail("This functionality requires the server to be built with %s support.", method)))
/*
* Compress a varlena using PGLZ.
@@ -139,7 +139,7 @@ struct varlena *
lz4_compress_datum(const struct varlena *value)
{
#ifndef USE_LZ4
- NO_LZ4_SUPPORT();
+ NO_COMPRESSION_SUPPORT("lz4");
return NULL; /* keep compiler quiet */
#else
int32 valsize;
@@ -182,7 +182,7 @@ struct varlena *
lz4_decompress_datum(const struct varlena *value)
{
#ifndef USE_LZ4
- NO_LZ4_SUPPORT();
+ NO_COMPRESSION_SUPPORT("lz4");
return NULL; /* keep compiler quiet */
#else
int32 rawsize;
@@ -215,7 +215,7 @@ struct varlena *
lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
{
#ifndef USE_LZ4
- NO_LZ4_SUPPORT();
+ NO_COMPRESSION_SUPPORT("lz4");
return NULL; /* keep compiler quiet */
#else
int32 rawsize;
@@ -289,7 +289,7 @@ CompressionNameToMethod(const char *compression)
else if (strcmp(compression, "lz4") == 0)
{
#ifndef USE_LZ4
- NO_LZ4_SUPPORT();
+ NO_COMPRESSION_SUPPORT("lz4");
#endif
return TOAST_LZ4_COMPRESSION;
}
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 30f4c2d3c67..eb4bd3d6ae3 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -438,6 +438,9 @@ heap_xlog_insert(XLogReaderState *record)
ItemPointerSetBlockNumber(&target_tid, blkno);
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
+ /* No freezing in the heap_insert() code path */
+ Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
+
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
@@ -508,10 +511,6 @@ heap_xlog_insert(XLogReaderState *record)
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
- /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
- if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
- PageSetAllVisible(page);
-
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 09416450af9..14036c27e87 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -423,7 +423,7 @@ typedef struct LVSavedErrInfo
/* non-export function prototypes */
static void lazy_scan_heap(LVRelState *vacrel);
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
- VacuumParams *params);
+ const VacuumParams params);
static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
void *callback_private_data,
void *per_buffer_data);
@@ -431,7 +431,7 @@ static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
bool sharelock, Buffer vmbuffer);
-static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
+static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
Buffer vmbuffer, bool all_visible_according_to_vm,
bool *has_lpdead_items, bool *vm_page_frozen);
@@ -485,7 +485,7 @@ static void restore_vacuum_error_info(LVRelState *vacrel,
* vacuum options or for relfrozenxid/relminmxid advancement.
*/
static void
-heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
+heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
{
uint32 randseed;
BlockNumber allvisible;
@@ -504,7 +504,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
vacrel->eager_scan_remaining_successes = 0;
/* If eager scanning is explicitly disabled, just return. */
- if (params->max_eager_freeze_failure_rate == 0)
+ if (params.max_eager_freeze_failure_rate == 0)
return;
/*
@@ -581,11 +581,11 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
- Assert(params->max_eager_freeze_failure_rate > 0 &&
- params->max_eager_freeze_failure_rate <= 1);
+ Assert(params.max_eager_freeze_failure_rate > 0 &&
+ params.max_eager_freeze_failure_rate <= 1);
vacrel->eager_scan_max_fails_per_region =
- params->max_eager_freeze_failure_rate *
+ params.max_eager_freeze_failure_rate *
EAGER_SCAN_REGION_SIZE;
/*
@@ -612,7 +612,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
* and locked the relation.
*/
void
-heap_vacuum_rel(Relation rel, VacuumParams *params,
+heap_vacuum_rel(Relation rel, const VacuumParams params,
BufferAccessStrategy bstrategy)
{
LVRelState *vacrel;
@@ -634,9 +634,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
ErrorContextCallback errcallback;
char **indnames = NULL;
- verbose = (params->options & VACOPT_VERBOSE) != 0;
+ verbose = (params.options & VACOPT_VERBOSE) != 0;
instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
- params->log_min_duration >= 0));
+ params.log_min_duration >= 0));
if (instrument)
{
pg_rusage_init(&ru0);
@@ -699,9 +699,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* The truncate param allows user to avoid attempting relation truncation,
* though it can't force truncation to happen.
*/
- Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
- Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
- params->truncate != VACOPTVALUE_AUTO);
+ Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
+ Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
+ params.truncate != VACOPTVALUE_AUTO);
/*
* While VacuumFailSafeActive is reset to false before calling this, we
@@ -711,14 +711,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->consider_bypass_optimization = true;
vacrel->do_index_vacuuming = true;
vacrel->do_index_cleanup = true;
- vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
- if (params->index_cleanup == VACOPTVALUE_DISABLED)
+ vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
+ if (params.index_cleanup == VACOPTVALUE_DISABLED)
{
/* Force disable index vacuuming up-front */
vacrel->do_index_vacuuming = false;
vacrel->do_index_cleanup = false;
}
- else if (params->index_cleanup == VACOPTVALUE_ENABLED)
+ else if (params.index_cleanup == VACOPTVALUE_ENABLED)
{
/* Force index vacuuming. Note that failsafe can still bypass. */
vacrel->consider_bypass_optimization = false;
@@ -726,7 +726,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
else
{
/* Default/auto, make all decisions dynamically */
- Assert(params->index_cleanup == VACOPTVALUE_AUTO);
+ Assert(params.index_cleanup == VACOPTVALUE_AUTO);
}
/* Initialize page counters explicitly (be tidy) */
@@ -789,7 +789,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
*/
vacrel->skippedallvis = false;
skipwithvm = true;
- if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
+ if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
{
/*
* Force aggressive mode, and disable skipping blocks using the
@@ -830,7 +830,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* is already dangerously old.)
*/
lazy_check_wraparound_failsafe(vacrel);
- dead_items_alloc(vacrel, params->nworkers);
+ dead_items_alloc(vacrel, params.nworkers);
/*
* Call lazy_scan_heap to perform all required heap pruning, index
@@ -947,9 +947,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
{
TimestampTz endtime = GetCurrentTimestamp();
- if (verbose || params->log_min_duration == 0 ||
+ if (verbose || params.log_min_duration == 0 ||
TimestampDifferenceExceeds(starttime, endtime,
- params->log_min_duration))
+ params.log_min_duration))
{
long secs_dur;
int usecs_dur;
@@ -984,10 +984,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* Aggressiveness already reported earlier, in dedicated
* VACUUM VERBOSE ereport
*/
- Assert(!params->is_wraparound);
+ Assert(!params.is_wraparound);
msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
}
- else if (params->is_wraparound)
+ else if (params.is_wraparound)
{
/*
* While it's possible for a VACUUM to be both is_wraparound
@@ -1245,6 +1245,7 @@ lazy_scan_heap(LVRelState *vacrel)
Buffer buf;
Page page;
uint8 blk_info = 0;
+ int ndeleted = 0;
bool has_lpdead_items;
void *per_buffer_data = NULL;
bool vm_page_frozen = false;
@@ -1387,10 +1388,10 @@ lazy_scan_heap(LVRelState *vacrel)
* line pointers previously marked LP_DEAD.
*/
if (got_cleanup_lock)
- lazy_scan_prune(vacrel, buf, blkno, page,
- vmbuffer,
- blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
- &has_lpdead_items, &vm_page_frozen);
+ ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
+ vmbuffer,
+ blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
+ &has_lpdead_items, &vm_page_frozen);
/*
* Count an eagerly scanned page as a failure or a success.
@@ -1428,7 +1429,7 @@ lazy_scan_heap(LVRelState *vacrel)
*/
if (vacrel->eager_scan_max_fails_per_region > 0)
ereport(vacrel->verbose ? INFO : DEBUG2,
- (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"",
+ (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
orig_eager_scan_success_limit,
vacrel->dbname, vacrel->relnamespace,
vacrel->relname)));
@@ -1481,7 +1482,7 @@ lazy_scan_heap(LVRelState *vacrel)
* table has indexes. There will only be newly-freed space if we
* held the cleanup lock and lazy_scan_prune() was called.
*/
- if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
+ if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
{
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
@@ -1872,8 +1873,6 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
*/
if (!PageIsAllVisible(page))
{
- uint8 old_vmbits;
-
START_CRIT_SECTION();
/* mark buffer dirty before writing a WAL record */
@@ -1893,24 +1892,16 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
log_newpage_buffer(buf, true);
PageSetAllVisible(page);
- old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
- InvalidXLogRecPtr,
- vmbuffer, InvalidTransactionId,
- VISIBILITYMAP_ALL_VISIBLE |
- VISIBILITYMAP_ALL_FROZEN);
+ visibilitymap_set(vacrel->rel, blkno, buf,
+ InvalidXLogRecPtr,
+ vmbuffer, InvalidTransactionId,
+ VISIBILITYMAP_ALL_VISIBLE |
+ VISIBILITYMAP_ALL_FROZEN);
END_CRIT_SECTION();
- /*
- * If the page wasn't already set all-visible and/or all-frozen in
- * the VM, count it as newly set for logging.
- */
- if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
- {
- vacrel->vm_new_visible_pages++;
- vacrel->vm_new_visible_frozen_pages++;
- }
- else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0)
- vacrel->vm_new_frozen_pages++;
+ /* Count the newly all-frozen pages for logging */
+ vacrel->vm_new_visible_pages++;
+ vacrel->vm_new_visible_frozen_pages++;
}
freespace = PageGetHeapFreeSpace(page);
@@ -1946,8 +1937,10 @@ cmpOffsetNumbers(const void *a, const void *b)
* *vm_page_frozen is set to true if the page is newly set all-frozen in the
* VM. The caller currently only uses this for determining whether an eagerly
* scanned page was successfully set all-frozen.
+ *
+ * Returns the number of tuples deleted from the page during HOT pruning.
*/
-static void
+static int
lazy_scan_prune(LVRelState *vacrel,
Buffer buf,
BlockNumber blkno,
@@ -2218,6 +2211,8 @@ lazy_scan_prune(LVRelState *vacrel,
*vm_page_frozen = true;
}
}
+
+ return presult.ndeleted;
}
/*
@@ -2915,7 +2910,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
&all_frozen))
{
- uint8 old_vmbits;
uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
if (all_frozen)
@@ -2925,25 +2919,15 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
}
PageSetAllVisible(page);
- old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer,
- InvalidXLogRecPtr,
- vmbuffer, visibility_cutoff_xid,
- flags);
+ visibilitymap_set(vacrel->rel, blkno, buffer,
+ InvalidXLogRecPtr,
+ vmbuffer, visibility_cutoff_xid,
+ flags);
- /*
- * If the page wasn't already set all-visible and/or all-frozen in the
- * VM, count it as newly set for logging.
- */
- if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
- {
- vacrel->vm_new_visible_pages++;
- if (all_frozen)
- vacrel->vm_new_visible_frozen_pages++;
- }
-
- else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
- all_frozen)
- vacrel->vm_new_frozen_pages++;
+ /* Count the newly set VM page for logging */
+ vacrel->vm_new_visible_pages++;
+ if (all_frozen)
+ vacrel->vm_new_visible_frozen_pages++;
}
/* Revert to the previous phase information for error traceback */
diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c
index a136e4bbfdf..21c519cd108 100644
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@@ -16,6 +16,7 @@
#include "postgres.h"
#include "access/nbtree.h"
+#include "common/int.h"
#include "lib/qunique.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
@@ -56,6 +57,8 @@ static void _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array);
static void _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array);
+static void _bt_unmark_keys(IndexScanDesc scan, int *keyDataMap);
+static int _bt_reorder_array_cmp(const void *a, const void *b);
static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys);
static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap);
static int _bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out,
@@ -96,7 +99,7 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg);
* incomplete sets of cross-type operators, we may fail to detect redundant
* or contradictory keys, but we can survive that.)
*
- * The output keys must be sorted by index attribute. Presently we expect
+ * Required output keys are sorted by index attribute. Presently we expect
* (but verify) that the input keys are already so sorted --- this is done
* by match_clauses_to_index() in indxpath.c. Some reordering of the keys
* within each attribute may be done as a byproduct of the processing here.
@@ -127,29 +130,36 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg);
* This has the potential to be much more efficient than a full index scan
* (though it behaves like a full scan when there's many distinct "x" values).
*
- * If possible, redundant keys are eliminated: we keep only the tightest
+ * Typically, redundant keys are eliminated: we keep only the tightest
* >/>= bound and the tightest </<= bound, and if there's an = key then
* that's the only one returned. (So, we return either a single = key,
* or one or two boundary-condition keys for each attr.) However, if we
* cannot compare two keys for lack of a suitable cross-type operator,
- * we cannot eliminate either. If there are two such keys of the same
- * operator strategy, the second one is just pushed into the output array
- * without further processing here. We may also emit both >/>= or both
- * </<= keys if we can't compare them. The logic about required keys still
- * works if we don't eliminate redundant keys.
- *
- * Note that one reason we need direction-sensitive required-key flags is
- * precisely that we may not be able to eliminate redundant keys. Suppose
- * we have "x > 4::int AND x > 10::bigint", and we are unable to determine
- * which key is more restrictive for lack of a suitable cross-type operator.
- * _bt_first will arbitrarily pick one of the keys to do the initial
- * positioning with. If it picks x > 4, then the x > 10 condition will fail
- * until we reach index entries > 10; but we can't stop the scan just because
- * x > 10 is failing. On the other hand, if we are scanning backwards, then
- * failure of either key is indeed enough to stop the scan. (In general, when
- * inequality keys are present, the initial-positioning code only promises to
- * position before the first possible match, not exactly at the first match,
- * for a forward scan; or after the last match for a backward scan.)
+ * we cannot eliminate either key.
+ *
+ * When all redundant keys could not be eliminated, we'll output a key array
+ * that can more or less be treated as if it had no redundant keys. Suppose
+ * we have "x > 4::int AND x > 10::bigint AND x < 70", and we are unable to
+ * determine which > key is more restrictive for lack of a suitable cross-type
+ * operator. We'll arbitrarily pick one of the > keys; the other > key won't
+ * be marked required. Obviously, the scan will be less efficient if we
+ * choose x > 4 over x > 10 -- but it can still largely proceed as if there
+ * was only a single > condition. "x > 10" will be placed at the end of the
+ * so->keyData[] output array. It'll always be evaluated last, after the keys
+ * that could be marked required in the usual way (after "x > 4 AND x < 70").
+ * This can sometimes result in so->keyData[] keys that aren't even in index
+ * attribute order (if the qual involves multiple attributes). The scan's
+ * required keys will still be in attribute order, though, so it can't matter.
+ *
+ * This scheme ensures that _bt_first always uses the same set of keys at the
+ * start of a forwards scan as those _bt_checkkeys uses to determine when to
+ * end a similar backwards scan (and vice-versa). _bt_advance_array_keys
+ * depends on this: it expects to be able to reliably predict what the next
+ * _bt_first call will do by testing whether _bt_checkkeys' routines report
+ * that the final tuple on the page is past the end of matches for the scan's
+ * keys with the scan direction flipped. If it is (if continuescan=false),
+ * then it follows that calling _bt_first will, at a minimum, relocate the
+ * scan to the very next leaf page (in the current scan direction).
*
* As a byproduct of this work, we can detect contradictory quals such
* as "x = 1 AND x > 2". If we see that, we return so->qual_ok = false,
@@ -188,7 +198,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
int numberOfEqualCols;
ScanKey inkeys;
BTScanKeyPreproc xform[BTMaxStrategyNumber];
- bool test_result;
+ bool test_result,
+ redundant_key_kept = false;
AttrNumber attno;
ScanKey arrayKeyData;
int *keyDataMap = NULL;
@@ -388,7 +399,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
xform[j].inkey = NULL;
xform[j].inkeyi = -1;
}
- /* else, cannot determine redundancy, keep both keys */
+ else
+ redundant_key_kept = true;
}
/* track number of attrs for which we have "=" keys */
numberOfEqualCols++;
@@ -409,6 +421,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
else
xform[BTLessStrategyNumber - 1].inkey = NULL;
}
+ else
+ redundant_key_kept = true;
}
/* try to keep only one of >, >= */
@@ -426,6 +440,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
else
xform[BTGreaterStrategyNumber - 1].inkey = NULL;
}
+ else
+ redundant_key_kept = true;
}
/*
@@ -466,25 +482,6 @@ _bt_preprocess_keys(IndexScanDesc scan)
/* check strategy this key's operator corresponds to */
j = inkey->sk_strategy - 1;
- /* if row comparison, push it directly to the output array */
- if (inkey->sk_flags & SK_ROW_HEADER)
- {
- ScanKey outkey = &so->keyData[new_numberOfKeys++];
-
- memcpy(outkey, inkey, sizeof(ScanKeyData));
- if (arrayKeyData)
- keyDataMap[new_numberOfKeys - 1] = i;
- if (numberOfEqualCols == attno - 1)
- _bt_mark_scankey_required(outkey);
-
- /*
- * We don't support RowCompare using equality; such a qual would
- * mess up the numberOfEqualCols tracking.
- */
- Assert(j != (BTEqualStrategyNumber - 1));
- continue;
- }
-
if (inkey->sk_strategy == BTEqualStrategyNumber &&
(inkey->sk_flags & SK_SEARCHARRAY))
{
@@ -593,9 +590,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
* the new scan key.
*
* Note: We do things this way around so that our arrays are
- * always in the same order as their corresponding scan keys,
- * even with incomplete opfamilies. _bt_advance_array_keys
- * depends on this.
+ * always in the same order as their corresponding scan keys.
+ * _bt_preprocess_array_keys_final expects this.
*/
ScanKey outkey = &so->keyData[new_numberOfKeys++];
@@ -607,6 +603,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
xform[j].inkey = inkey;
xform[j].inkeyi = i;
xform[j].arrayidx = arrayidx;
+ redundant_key_kept = true;
}
}
}
@@ -622,6 +619,15 @@ _bt_preprocess_keys(IndexScanDesc scan)
if (arrayKeyData)
_bt_preprocess_array_keys_final(scan, keyDataMap);
+ /*
+ * If there are remaining redundant inequality keys, we must make sure
+ * that each index attribute has no more than one required >/>= key, and
+ * no more than one required </<= key. Attributes that have one or more
+ * required = keys now must keep only one required key (the first = key).
+ */
+ if (unlikely(redundant_key_kept) && so->qual_ok)
+ _bt_unmark_keys(scan, keyDataMap);
+
/* Could pfree arrayKeyData/keyDataMap now, but not worth the cycles */
}
@@ -746,9 +752,12 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
*
* Depending on the operator type, the key may be required for both scan
* directions or just one. Also, if the key is a row comparison header,
- * we have to mark its first subsidiary ScanKey as required. (Subsequent
- * subsidiary ScanKeys are normally for lower-order columns, and thus
- * cannot be required, since they're after the first non-equality scankey.)
+ * we have to mark the appropriate subsidiary ScanKeys as required. In such
+ * cases, the first subsidiary key is required, but subsequent ones are
+ * required only as long as they correspond to successive index columns and
+ * match the leading column as to sort direction. Otherwise the row
+ * comparison ordering is different from the index ordering and so we can't
+ * stop the scan on the basis of those lower-order columns.
*
* Note: when we set required-key flag bits in a subsidiary scankey, we are
* scribbling on a data structure belonging to the index AM's caller, not on
@@ -786,12 +795,25 @@ _bt_mark_scankey_required(ScanKey skey)
if (skey->sk_flags & SK_ROW_HEADER)
{
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
+ AttrNumber attno = skey->sk_attno;
/* First subkey should be same column/operator as the header */
- Assert(subkey->sk_flags & SK_ROW_MEMBER);
- Assert(subkey->sk_attno == skey->sk_attno);
+ Assert(subkey->sk_attno == attno);
Assert(subkey->sk_strategy == skey->sk_strategy);
- subkey->sk_flags |= addflags;
+
+ for (;;)
+ {
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ if (subkey->sk_attno != attno)
+ break; /* non-adjacent key, so not required */
+ if (subkey->sk_strategy != skey->sk_strategy)
+ break; /* wrong direction, so not required */
+ subkey->sk_flags |= addflags;
+ if (subkey->sk_flags & SK_ROW_END)
+ break;
+ subkey++;
+ attno++;
+ }
}
}
@@ -847,8 +869,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
cmp_op;
StrategyNumber strat;
- Assert(!((leftarg->sk_flags | rightarg->sk_flags) &
- (SK_ROW_HEADER | SK_ROW_MEMBER)));
+ Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_MEMBER));
/*
* First, deal with cases where one or both args are NULL. This should
@@ -925,6 +946,16 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
}
/*
+ * We don't yet know how to determine redundancy when it involves a row
+ * compare key (barring simple cases involving IS NULL/IS NOT NULL)
+ */
+ if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_HEADER)
+ {
+ Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_BT_SKIP));
+ return false;
+ }
+
+ /*
* If either leftarg or rightarg are equality-type array scankeys, we need
* specialized handling (since by now we know that IS NULL wasn't used)
*/
@@ -1468,6 +1499,283 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
}
/*
+ * _bt_unmark_keys() -- make superfluous required keys nonrequired after all
+ *
+ * When _bt_preprocess_keys fails to eliminate one or more redundant keys, it
+ * calls here to make sure that no index attribute has more than one > or >=
+ * key marked required, and no more than one required < or <= key. Attributes
+ * with = keys will always get one = key as their required key. All other
+ * keys that were initially marked required get "unmarked" here. That way,
+ * _bt_first and _bt_checkkeys will reliably agree on which keys to use to
+ * start and/or to end the scan.
+ *
+ * We also relocate keys that become/started out nonrequired to the end of
+ * so->keyData[]. That way, _bt_first and _bt_checkkeys cannot fail to reach
+ * a required key due to some earlier nonrequired key getting in the way.
+ *
+ * Only call here when _bt_compare_scankey_args returned false at least once
+ * (otherwise, calling here will just waste cycles).
+ */
+static void
+_bt_unmark_keys(IndexScanDesc scan, int *keyDataMap)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ AttrNumber attno;
+ bool *unmarkikey;
+ int nunmark,
+ nunmarked,
+ nkept,
+ firsti;
+ ScanKey keepKeys,
+ unmarkKeys;
+ FmgrInfo *keepOrderProcs = NULL,
+ *unmarkOrderProcs = NULL;
+ bool haveReqEquals,
+ haveReqForward,
+ haveReqBackward;
+
+ /*
+ * Do an initial pass over so->keyData[] that determines which keys to
+ * keep as required. We expect so->keyData[] to still be in attribute
+ * order when we're called (though we don't expect any particular order
+ * among each attribute's keys).
+ *
+ * When both equality and inequality keys remain on a single attribute, we
+ * *must* make sure that exactly one of the equalities remains required.
+ * Any requiredness markings that we might leave on later keys/attributes
+ * are predicated on there being required = keys on all prior columns.
+ */
+ unmarkikey = palloc0(so->numberOfKeys * sizeof(bool));
+ nunmark = 0;
+
+ /* Set things up for first key's attribute */
+ attno = so->keyData[0].sk_attno;
+ firsti = 0;
+ haveReqEquals = false;
+ haveReqForward = false;
+ haveReqBackward = false;
+ for (int i = 0; i < so->numberOfKeys; i++)
+ {
+ ScanKey origkey = &so->keyData[i];
+
+ if (origkey->sk_attno != attno)
+ {
+ /* Reset for next attribute */
+ attno = origkey->sk_attno;
+ firsti = i;
+
+ haveReqEquals = false;
+ haveReqForward = false;
+ haveReqBackward = false;
+ }
+
+ /* Equalities get priority over inequalities */
+ if (haveReqEquals)
+ {
+ /*
+ * We already found the first "=" key for this attribute. We've
+ * already decided that all its other keys will be unmarked.
+ */
+ Assert(!(origkey->sk_flags & SK_SEARCHNULL));
+ unmarkikey[i] = true;
+ nunmark++;
+ continue;
+ }
+ else if ((origkey->sk_flags & SK_BT_REQFWD) &&
+ (origkey->sk_flags & SK_BT_REQBKWD))
+ {
+ /*
+ * Found the first "=" key for attno. All other attno keys will
+ * be unmarked.
+ */
+ Assert(origkey->sk_strategy == BTEqualStrategyNumber);
+
+ haveReqEquals = true;
+ for (int j = firsti; j < i; j++)
+ {
+ /* Unmark any prior inequality keys on attno after all */
+ if (!unmarkikey[j])
+ {
+ unmarkikey[j] = true;
+ nunmark++;
+ }
+ }
+ continue;
+ }
+
+ /* Deal with inequalities next */
+ if ((origkey->sk_flags & SK_BT_REQFWD) && !haveReqForward)
+ {
+ haveReqForward = true;
+ continue;
+ }
+ else if ((origkey->sk_flags & SK_BT_REQBKWD) && !haveReqBackward)
+ {
+ haveReqBackward = true;
+ continue;
+ }
+
+ /*
+ * We have either a redundant inequality key that will be unmarked, or
+ * we have a key that wasn't marked required in the first place
+ */
+ unmarkikey[i] = true;
+ nunmark++;
+ }
+
+ /* Should only be called when _bt_compare_scankey_args reported failure */
+ Assert(nunmark > 0);
+
+ /*
+ * Next, allocate temp arrays: one for required keys that'll remain
+ * required, the other for all remaining keys
+ */
+ unmarkKeys = palloc(nunmark * sizeof(ScanKeyData));
+ keepKeys = palloc((so->numberOfKeys - nunmark) * sizeof(ScanKeyData));
+ nunmarked = 0;
+ nkept = 0;
+ if (so->numArrayKeys)
+ {
+ unmarkOrderProcs = palloc(nunmark * sizeof(FmgrInfo));
+ keepOrderProcs = palloc((so->numberOfKeys - nunmark) * sizeof(FmgrInfo));
+ }
+
+ /*
+ * Next, copy the contents of so->keyData[] into the appropriate temp
+ * array.
+ *
+ * Scans with = array keys need us to maintain invariants around the order
+ * of so->orderProcs[] and so->arrayKeys[] relative to so->keyData[]. See
+ * _bt_preprocess_array_keys_final for a full explanation.
+ */
+ for (int i = 0; i < so->numberOfKeys; i++)
+ {
+ ScanKey origkey = &so->keyData[i];
+ ScanKey unmark;
+
+ if (!unmarkikey[i])
+ {
+ /*
+ * Key gets to keep its original requiredness markings.
+ *
+ * Key will stay in its original position, unless we're going to
+ * unmark an earlier key (in which case this key gets moved back).
+ */
+ memcpy(keepKeys + nkept, origkey, sizeof(ScanKeyData));
+
+ if (so->numArrayKeys)
+ {
+ keyDataMap[i] = nkept;
+ memcpy(keepOrderProcs + nkept, &so->orderProcs[i],
+ sizeof(FmgrInfo));
+ }
+
+ nkept++;
+ continue;
+ }
+
+ /*
+ * Key will be unmarked as needed, and moved to the end of the array,
+ * next to other keys that will become (or always were) nonrequired
+ */
+ unmark = unmarkKeys + nunmarked;
+ memcpy(unmark, origkey, sizeof(ScanKeyData));
+
+ if (so->numArrayKeys)
+ {
+ keyDataMap[i] = (so->numberOfKeys - nunmark) + nunmarked;
+ memcpy(&unmarkOrderProcs[nunmarked], &so->orderProcs[i],
+ sizeof(FmgrInfo));
+ }
+
+ /*
+ * Preprocessing only generates skip arrays when it knows that they'll
+ * be the only required = key on the attr. We'll never unmark them.
+ */
+ Assert(!(unmark->sk_flags & SK_BT_SKIP));
+
+ /*
+ * Also shouldn't have to unmark an IS NULL or an IS NOT NULL key.
+ * They aren't cross-type, so an incomplete opfamily can't matter.
+ */
+ Assert(!(unmark->sk_flags & SK_ISNULL) ||
+ !(unmark->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)));
+
+ /* Clear requiredness flags on redundant key (and on any subkeys) */
+ unmark->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD);
+ if (unmark->sk_flags & SK_ROW_HEADER)
+ {
+ ScanKey subkey = (ScanKey) DatumGetPointer(unmark->sk_argument);
+
+ Assert(subkey->sk_strategy == unmark->sk_strategy);
+ for (;;)
+ {
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ subkey->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD);
+ if (subkey->sk_flags & SK_ROW_END)
+ break;
+ subkey++;
+ }
+ }
+
+ nunmarked++;
+ }
+
+ /* Copy both temp arrays back into so->keyData[] to reorder */
+ Assert(nkept == so->numberOfKeys - nunmark);
+ Assert(nunmarked == nunmark);
+ memcpy(so->keyData, keepKeys, sizeof(ScanKeyData) * nkept);
+ memcpy(so->keyData + nkept, unmarkKeys, sizeof(ScanKeyData) * nunmarked);
+
+ /* Done with temp arrays */
+ pfree(unmarkikey);
+ pfree(keepKeys);
+ pfree(unmarkKeys);
+
+ /*
+ * Now copy so->orderProcs[] temp entries needed by scans with = array
+ * keys back (just like with the so->keyData[] temp arrays)
+ */
+ if (so->numArrayKeys)
+ {
+ memcpy(so->orderProcs, keepOrderProcs, sizeof(FmgrInfo) * nkept);
+ memcpy(so->orderProcs + nkept, unmarkOrderProcs,
+ sizeof(FmgrInfo) * nunmarked);
+
+ /* Also fix-up array->scan_key references */
+ for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
+ {
+ BTArrayKeyInfo *array = &so->arrayKeys[arridx];
+
+ array->scan_key = keyDataMap[array->scan_key];
+ }
+
+ /*
+ * Sort so->arrayKeys[] based on its new BTArrayKeyInfo.scan_key
+ * offsets, so that its order matches so->keyData[] order as expected
+ */
+ qsort(so->arrayKeys, so->numArrayKeys, sizeof(BTArrayKeyInfo),
+ _bt_reorder_array_cmp);
+
+ /* Done with temp arrays */
+ pfree(unmarkOrderProcs);
+ pfree(keepOrderProcs);
+ }
+}
+
+/*
+ * qsort comparator for reordering so->arrayKeys[] BTArrayKeyInfo entries
+ */
+static int
+_bt_reorder_array_cmp(const void *a, const void *b)
+{
+ BTArrayKeyInfo *arraya = (BTArrayKeyInfo *) a;
+ BTArrayKeyInfo *arrayb = (BTArrayKeyInfo *) b;
+
+ return pg_cmp_s32(arraya->scan_key, arrayb->scan_key);
+}
+
+/*
* _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys
*
* If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 36544ecfd58..d69798795b4 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -892,9 +892,9 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
OffsetNumber offnum;
BTScanInsertData inskey;
ScanKey startKeys[INDEX_MAX_KEYS];
- ScanKeyData notnullkeys[INDEX_MAX_KEYS];
+ ScanKeyData notnullkey;
int keysz = 0;
- StrategyNumber strat_total;
+ StrategyNumber strat_total = InvalidStrategy;
BlockNumber blkno = InvalidBlockNumber,
lastcurrblkno;
@@ -960,46 +960,51 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
/*----------
* Examine the scan keys to discover where we need to start the scan.
+ * The selected scan keys (at most one per index column) are remembered by
+ * storing their addresses into the local startKeys[] array. The final
+ * startKeys[] entry's strategy is set in strat_total. (Actually, there
+ * are a couple of cases where we force a less/more restrictive strategy.)
*
- * We want to identify the keys that can be used as starting boundaries;
- * these are =, >, or >= keys for a forward scan or =, <, <= keys for
- * a backwards scan. We can use keys for multiple attributes so long as
- * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
- * a > or < boundary or find an attribute with no boundary (which can be
- * thought of as the same as "> -infinity"), we can't use keys for any
- * attributes to its right, because it would break our simplistic notion
- * of what initial positioning strategy to use.
+ * We must use the key that was marked required (in the direction opposite
+ * our own scan's) during preprocessing. Each index attribute can only
+ * have one such required key. In general, the keys that we use to find
+ * an initial position when scanning forwards are the same keys that end
+ * the scan on the leaf level when scanning backwards (and vice-versa).
*
* When the scan keys include cross-type operators, _bt_preprocess_keys
- * may not be able to eliminate redundant keys; in such cases we will
- * arbitrarily pick a usable one for each attribute. This is correct
- * but possibly not optimal behavior. (For example, with keys like
- * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
- * x=5 would be more efficient.) Since the situation only arises given
- * a poorly-worded query plus an incomplete opfamily, live with it.
+ * may not be able to eliminate redundant keys; in such cases it will
+ * arbitrarily pick a usable key for each attribute (and scan direction),
+ * ensuring that there is no more than one key required in each direction.
+ * We stop considering further keys once we reach the first nonrequired
+ * key (which must come after all required keys), so this can't affect us.
+ *
+ * The required keys that we use as starting boundaries have to be =, >,
+ * or >= keys for a forward scan or =, <, <= keys for a backwards scan.
+ * We can use keys for multiple attributes so long as the prior attributes
+ * had only =, >= (resp. =, <=) keys. These rules are very similar to the
+ * rules that preprocessing used to determine which keys to mark required.
+ * We cannot always use every required key as a positioning key, though.
+ * Skip arrays necessitate independently applying our own rules here.
+ * Skip arrays are always generally considered = array keys, but we'll
+ * nevertheless treat them as inequalities at certain points of the scan.
+ * When that happens, it _might_ have implications for the number of
+ * required keys that we can safely use for initial positioning purposes.
*
- * When both equality and inequality keys appear for a single attribute
- * (again, only possible when cross-type operators appear), we *must*
- * select one of the equality keys for the starting point, because
- * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
- * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
- * start at x=4, we will fail and stop before reaching x=10. If multiple
- * equality quals survive preprocessing, however, it doesn't matter which
- * one we use --- by definition, they are either redundant or
- * contradictory.
+ * For example, a forward scan with a skip array on its leading attribute
+ * (with no low_compare/high_compare) will have at least two required scan
+ * keys, but we won't use any of them as boundary keys during the scan's
+ * initial call here. Our positioning key during the first call here can
+ * be thought of as representing "> -infinity". Similarly, if such a skip
+ * array's low_compare is "a > 'foo'", then we position using "a > 'foo'"
+ * during the scan's initial call here; a lower-order key such as "b = 42"
+ * can't be used until the "a" array advances beyond MINVAL/low_compare.
*
- * In practice we rarely see any "attribute boundary key gaps" here.
- * Preprocessing can usually backfill skip array keys for any attributes
- * that were omitted from the original scan->keyData[] input keys. All
- * array keys are always considered = keys, but we'll sometimes need to
- * treat the current key value as if we were using an inequality strategy.
- * This happens with range skip arrays, which store inequality keys in the
- * array's low_compare/high_compare fields (used to find the first/last
- * set of matches, when = key will lack a usable sk_argument value).
- * These are always preferred over any redundant "standard" inequality
- * keys on the same column (per the usual rule about preferring = keys).
- * Note also that any column with an = skip array key can never have an
- * additional, contradictory = key.
+ * On the other hand, if such a skip array's low_compare was "a >= 'foo'",
+ * then we _can_ use "a >= 'foo' AND b = 42" during the initial call here.
+ * A subsequent call here might have us use "a = 'fop' AND b = 42". Note
+ * that we treat = and >= as equivalent when scanning forwards (just as we
+ * treat = and <= as equivalent when scanning backwards). We effectively
+ * do the same thing (though with a distinct "a" element/value) each time.
*
* All keys (with the exception of SK_SEARCHNULL keys and SK_BT_SKIP
* array keys whose array is "null_elem=true") imply a NOT NULL qualifier.
@@ -1011,41 +1016,38 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* traversing a lot of null entries at the start of the scan.
*
* In this loop, row-comparison keys are treated the same as keys on their
- * first (leftmost) columns. We'll add on lower-order columns of the row
- * comparison below, if possible.
+ * first (leftmost) columns. We'll add all lower-order columns of the row
+ * comparison that were marked required during preprocessing below.
*
- * The selected scan keys (at most one per index column) are remembered by
- * storing their addresses into the local startKeys[] array.
- *
- * _bt_checkkeys/_bt_advance_array_keys decide whether and when to start
- * the next primitive index scan (for scans with array keys) based in part
- * on an understanding of how it'll enable us to reposition the scan.
- * They're directly aware of how we'll sometimes cons up an explicit
- * SK_SEARCHNOTNULL key. They'll even end primitive scans by applying a
- * symmetric "deduce NOT NULL" rule of their own. This allows top-level
- * scans to skip large groups of NULLs through repeated deductions about
- * key strictness (for a required inequality key) and whether NULLs in the
- * key's index column are stored last or first (relative to non-NULLs).
+ * _bt_advance_array_keys needs to know exactly how we'll reposition the
+ * scan (should it opt to schedule another primitive index scan). It is
+ * critical that primscans only be scheduled when they'll definitely make
+ * some useful progress. _bt_advance_array_keys does this by calling
+ * _bt_checkkeys routines that report whether a tuple is past the end of
+ * matches for the scan's keys (given the scan's current array elements).
+ * If the page's final tuple is "after the end of matches" for a scan that
+ * uses the *opposite* scan direction, then it must follow that it's also
+ * "before the start of matches" for the actual current scan direction.
+ * It is therefore essential that all of our initial positioning rules are
+ * symmetric with _bt_checkkeys's corresponding continuescan=false rule.
* If you update anything here, _bt_checkkeys/_bt_advance_array_keys might
* need to be kept in sync.
*----------
*/
- strat_total = BTEqualStrategyNumber;
if (so->numberOfKeys > 0)
{
AttrNumber curattr;
- ScanKey chosen;
+ ScanKey bkey;
ScanKey impliesNN;
ScanKey cur;
/*
- * chosen is the so-far-chosen key for the current attribute, if any.
- * We don't cast the decision in stone until we reach keys for the
- * next attribute.
+ * bkey will be set to the key that preprocessing left behind as the
+ * boundary key for this attribute, in this scan direction (if any)
*/
cur = so->keyData;
curattr = 1;
- chosen = NULL;
+ bkey = NULL;
/* Also remember any scankey that implies a NOT NULL constraint */
impliesNN = NULL;
@@ -1058,23 +1060,29 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
{
if (i >= so->numberOfKeys || cur->sk_attno != curattr)
{
+ /* Done looking for the curattr boundary key */
+ Assert(bkey == NULL ||
+ (bkey->sk_attno == curattr &&
+ (bkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
+ Assert(impliesNN == NULL ||
+ (impliesNN->sk_attno == curattr &&
+ (impliesNN->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
+
/*
- * Done looking at keys for curattr.
- *
* If this is a scan key for a skip array whose current
* element is MINVAL, choose low_compare (when scanning
* backwards it'll be MAXVAL, and we'll choose high_compare).
*
- * Note: if the array's low_compare key makes 'chosen' NULL,
+ * Note: if the array's low_compare key makes 'bkey' NULL,
* then we behave as if the array's first element is -inf,
* except when !array->null_elem implies a usable NOT NULL
* constraint.
*/
- if (chosen != NULL &&
- (chosen->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
+ if (bkey != NULL &&
+ (bkey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
{
- int ikey = chosen - so->keyData;
- ScanKey skipequalitykey = chosen;
+ int ikey = bkey - so->keyData;
+ ScanKey skipequalitykey = bkey;
BTArrayKeyInfo *array = NULL;
for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
@@ -1087,42 +1095,41 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (ScanDirectionIsForward(dir))
{
Assert(!(skipequalitykey->sk_flags & SK_BT_MAXVAL));
- chosen = array->low_compare;
+ bkey = array->low_compare;
}
else
{
Assert(!(skipequalitykey->sk_flags & SK_BT_MINVAL));
- chosen = array->high_compare;
+ bkey = array->high_compare;
}
- Assert(chosen == NULL ||
- chosen->sk_attno == skipequalitykey->sk_attno);
+ Assert(bkey == NULL ||
+ bkey->sk_attno == skipequalitykey->sk_attno);
if (!array->null_elem)
impliesNN = skipequalitykey;
else
- Assert(chosen == NULL && impliesNN == NULL);
+ Assert(bkey == NULL && impliesNN == NULL);
}
/*
* If we didn't find a usable boundary key, see if we can
* deduce a NOT NULL key
*/
- if (chosen == NULL && impliesNN != NULL &&
+ if (bkey == NULL && impliesNN != NULL &&
((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
ScanDirectionIsForward(dir) :
ScanDirectionIsBackward(dir)))
{
- /* Yes, so build the key in notnullkeys[keysz] */
- chosen = &notnullkeys[keysz];
- ScanKeyEntryInitialize(chosen,
+ /* Final startKeys[] entry will be deduced NOT NULL key */
+ bkey = &notnullkey;
+ ScanKeyEntryInitialize(bkey,
(SK_SEARCHNOTNULL | SK_ISNULL |
(impliesNN->sk_flags &
(SK_BT_DESC | SK_BT_NULLS_FIRST))),
curattr,
- ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
- BTGreaterStrategyNumber :
- BTLessStrategyNumber),
+ ScanDirectionIsForward(dir) ?
+ BTGreaterStrategyNumber : BTLessStrategyNumber,
InvalidOid,
InvalidOid,
InvalidOid,
@@ -1130,12 +1137,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
}
/*
- * If we still didn't find a usable boundary key, quit; else
- * save the boundary key pointer in startKeys.
+ * If preprocessing didn't leave a usable boundary key, quit;
+ * else save the boundary key pointer in startKeys[]
*/
- if (chosen == NULL)
+ if (bkey == NULL)
break;
- startKeys[keysz++] = chosen;
+ startKeys[keysz++] = bkey;
/*
* We can only consider adding more boundary keys when the one
@@ -1143,7 +1150,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* (during backwards scans we can only do so when the key that
* we just added to startKeys[] uses the = or <= strategy)
*/
- strat_total = chosen->sk_strategy;
+ strat_total = bkey->sk_strategy;
if (strat_total == BTGreaterStrategyNumber ||
strat_total == BTLessStrategyNumber)
break;
@@ -1154,19 +1161,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* make strat_total > or < (and stop adding boundary keys).
* This can only happen with opclasses that lack skip support.
*/
- if (chosen->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
+ if (bkey->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
{
- Assert(chosen->sk_flags & SK_BT_SKIP);
+ Assert(bkey->sk_flags & SK_BT_SKIP);
Assert(strat_total == BTEqualStrategyNumber);
if (ScanDirectionIsForward(dir))
{
- Assert(!(chosen->sk_flags & SK_BT_PRIOR));
+ Assert(!(bkey->sk_flags & SK_BT_PRIOR));
strat_total = BTGreaterStrategyNumber;
}
else
{
- Assert(!(chosen->sk_flags & SK_BT_NEXT));
+ Assert(!(bkey->sk_flags & SK_BT_NEXT));
strat_total = BTLessStrategyNumber;
}
@@ -1180,24 +1187,30 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
/*
* Done if that was the last scan key output by preprocessing.
- * Also done if there is a gap index attribute that lacks a
- * usable key (only possible when preprocessing was unable to
- * generate a skip array key to "fill in the gap").
+ * Also done if we've now examined all keys marked required.
*/
if (i >= so->numberOfKeys ||
- cur->sk_attno != curattr + 1)
+ !(cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
break;
/*
* Reset for next attr.
*/
+ Assert(cur->sk_attno == curattr + 1);
curattr = cur->sk_attno;
- chosen = NULL;
+ bkey = NULL;
impliesNN = NULL;
}
/*
- * Can we use this key as a starting boundary for this attr?
+ * If we've located the starting boundary key for curattr, we have
+ * no interest in curattr's other required key
+ */
+ if (bkey != NULL)
+ continue;
+
+ /*
+ * Is this key the starting boundary key for curattr?
*
* If not, does it imply a NOT NULL constraint? (Because
* SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
@@ -1207,27 +1220,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
{
case BTLessStrategyNumber:
case BTLessEqualStrategyNumber:
- if (chosen == NULL)
- {
- if (ScanDirectionIsBackward(dir))
- chosen = cur;
- else
- impliesNN = cur;
- }
+ if (ScanDirectionIsBackward(dir))
+ bkey = cur;
+ else if (impliesNN == NULL)
+ impliesNN = cur;
break;
case BTEqualStrategyNumber:
- /* override any non-equality choice */
- chosen = cur;
+ bkey = cur;
break;
case BTGreaterEqualStrategyNumber:
case BTGreaterStrategyNumber:
- if (chosen == NULL)
- {
- if (ScanDirectionIsForward(dir))
- chosen = cur;
- else
- impliesNN = cur;
- }
+ if (ScanDirectionIsForward(dir))
+ bkey = cur;
+ else if (impliesNN == NULL)
+ impliesNN = cur;
break;
}
}
@@ -1253,16 +1259,18 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
Assert(keysz <= INDEX_MAX_KEYS);
for (int i = 0; i < keysz; i++)
{
- ScanKey cur = startKeys[i];
+ ScanKey bkey = startKeys[i];
- Assert(cur->sk_attno == i + 1);
+ Assert(bkey->sk_attno == i + 1);
- if (cur->sk_flags & SK_ROW_HEADER)
+ if (bkey->sk_flags & SK_ROW_HEADER)
{
/*
* Row comparison header: look to the first row member instead
*/
- ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
+ ScanKey subkey = (ScanKey) DatumGetPointer(bkey->sk_argument);
+ bool loosen_strat = false,
+ tighten_strat = false;
/*
* Cannot be a NULL in the first row member: _bt_preprocess_keys
@@ -1270,122 +1278,160 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* ever getting this far
*/
Assert(subkey->sk_flags & SK_ROW_MEMBER);
- Assert(subkey->sk_attno == cur->sk_attno);
+ Assert(subkey->sk_attno == bkey->sk_attno);
Assert(!(subkey->sk_flags & SK_ISNULL));
/*
+ * This is either a > or >= key (during backwards scans it is
+ * either < or <=) that was marked required during preprocessing.
+ * Later so->keyData[] keys can't have been marked required, so
+ * our row compare header key must be the final startKeys[] entry.
+ */
+ Assert(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD));
+ Assert(i == keysz - 1);
+
+ /*
* The member scankeys are already in insertion format (ie, they
* have sk_func = 3-way-comparison function)
*/
memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData));
/*
- * If the row comparison is the last positioning key we accepted,
- * try to add additional keys from the lower-order row members.
- * (If we accepted independent conditions on additional index
- * columns, we use those instead --- doesn't seem worth trying to
- * determine which is more restrictive.) Note that this is OK
- * even if the row comparison is of ">" or "<" type, because the
- * condition applied to all but the last row member is effectively
- * ">=" or "<=", and so the extra keys don't break the positioning
- * scheme. But, by the same token, if we aren't able to use all
- * the row members, then the part of the row comparison that we
- * did use has to be treated as just a ">=" or "<=" condition, and
- * so we'd better adjust strat_total accordingly.
+ * Now look to later row compare members.
+ *
+ * If there's an "index attribute gap" between two row compare
+ * members, the second member won't have been marked required, and
+ * so can't be used as a starting boundary key here. The part of
+ * the row comparison that we do still use has to be treated as a
+ * ">=" or "<=" condition. For example, a qual "(a, c) > (1, 42)"
+ * with an omitted intervening index attribute "b" will use an
+ * insertion scan key "a >= 1". Even the first "a = 1" tuple on
+ * the leaf level might satisfy the row compare qual.
+ *
+ * We're able to use a _more_ restrictive strategy when we reach a
+ * NULL row compare member, since they're always unsatisfiable.
+ * For example, a qual "(a, b, c) >= (1, NULL, 77)" will use an
+ * insertion scan key "a > 1". All tuples where "a = 1" cannot
+ * possibly satisfy the row compare qual, so this is safe.
*/
- if (i == keysz - 1)
+ Assert(!(subkey->sk_flags & SK_ROW_END));
+ for (;;)
{
- bool used_all_subkeys = false;
+ subkey++;
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
- Assert(!(subkey->sk_flags & SK_ROW_END));
- for (;;)
+ if (subkey->sk_flags & SK_ISNULL)
{
- subkey++;
- Assert(subkey->sk_flags & SK_ROW_MEMBER);
- if (subkey->sk_attno != keysz + 1)
- break; /* out-of-sequence, can't use it */
- if (subkey->sk_strategy != cur->sk_strategy)
- break; /* wrong direction, can't use it */
- if (subkey->sk_flags & SK_ISNULL)
- break; /* can't use null keys */
- Assert(keysz < INDEX_MAX_KEYS);
- memcpy(inskey.scankeys + keysz, subkey,
- sizeof(ScanKeyData));
- keysz++;
- if (subkey->sk_flags & SK_ROW_END)
- {
- used_all_subkeys = true;
- break;
- }
+ /*
+ * NULL member key, can only use earlier keys.
+ *
+ * We deliberately avoid checking if this key is marked
+ * required. All earlier keys are required, and this key
+ * is unsatisfiable either way, so we can't miss anything.
+ */
+ tighten_strat = true;
+ break;
}
- if (!used_all_subkeys)
+
+ if (!(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
{
- switch (strat_total)
- {
- case BTLessStrategyNumber:
- strat_total = BTLessEqualStrategyNumber;
- break;
- case BTGreaterStrategyNumber:
- strat_total = BTGreaterEqualStrategyNumber;
- break;
- }
+ /* nonrequired member key, can only use earlier keys */
+ loosen_strat = true;
+ break;
}
- break; /* done with outer loop */
+
+ Assert(subkey->sk_attno == keysz + 1);
+ Assert(subkey->sk_strategy == bkey->sk_strategy);
+ Assert(keysz < INDEX_MAX_KEYS);
+
+ memcpy(inskey.scankeys + keysz, subkey,
+ sizeof(ScanKeyData));
+ keysz++;
+ if (subkey->sk_flags & SK_ROW_END)
+ break;
}
- }
- else
- {
- /*
- * Ordinary comparison key. Transform the search-style scan key
- * to an insertion scan key by replacing the sk_func with the
- * appropriate btree comparison function.
- *
- * If scankey operator is not a cross-type comparison, we can use
- * the cached comparison function; otherwise gotta look it up in
- * the catalogs. (That can't lead to infinite recursion, since no
- * indexscan initiated by syscache lookup will use cross-data-type
- * operators.)
- *
- * We support the convention that sk_subtype == InvalidOid means
- * the opclass input type; this is a hack to simplify life for
- * ScanKeyInit().
- */
- if (cur->sk_subtype == rel->rd_opcintype[i] ||
- cur->sk_subtype == InvalidOid)
+ Assert(!(loosen_strat && tighten_strat));
+ if (loosen_strat)
{
- FmgrInfo *procinfo;
-
- procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
- ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
- cur->sk_flags,
- cur->sk_attno,
- InvalidStrategy,
- cur->sk_subtype,
- cur->sk_collation,
- procinfo,
- cur->sk_argument);
+ /* Use less restrictive strategy (and fewer member keys) */
+ switch (strat_total)
+ {
+ case BTLessStrategyNumber:
+ strat_total = BTLessEqualStrategyNumber;
+ break;
+ case BTGreaterStrategyNumber:
+ strat_total = BTGreaterEqualStrategyNumber;
+ break;
+ }
}
- else
+ if (tighten_strat)
{
- RegProcedure cmp_proc;
-
- cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
- rel->rd_opcintype[i],
- cur->sk_subtype,
- BTORDER_PROC);
- if (!RegProcedureIsValid(cmp_proc))
- elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
- BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
- cur->sk_attno, RelationGetRelationName(rel));
- ScanKeyEntryInitialize(inskey.scankeys + i,
- cur->sk_flags,
- cur->sk_attno,
- InvalidStrategy,
- cur->sk_subtype,
- cur->sk_collation,
- cmp_proc,
- cur->sk_argument);
+ /* Use more restrictive strategy (and fewer member keys) */
+ switch (strat_total)
+ {
+ case BTLessEqualStrategyNumber:
+ strat_total = BTLessStrategyNumber;
+ break;
+ case BTGreaterEqualStrategyNumber:
+ strat_total = BTGreaterStrategyNumber;
+ break;
+ }
}
+
+ /* done adding to inskey (row comparison keys always come last) */
+ break;
+ }
+
+ /*
+ * Ordinary comparison key/search-style key.
+ *
+ * Transform the search-style scan key to an insertion scan key by
+ * replacing the sk_func with the appropriate btree 3-way-comparison
+ * function.
+ *
+ * If scankey operator is not a cross-type comparison, we can use the
+ * cached comparison function; otherwise gotta look it up in the
+ * catalogs. (That can't lead to infinite recursion, since no
+ * indexscan initiated by syscache lookup will use cross-data-type
+ * operators.)
+ *
+ * We support the convention that sk_subtype == InvalidOid means the
+ * opclass input type; this hack simplifies life for ScanKeyInit().
+ */
+ if (bkey->sk_subtype == rel->rd_opcintype[i] ||
+ bkey->sk_subtype == InvalidOid)
+ {
+ FmgrInfo *procinfo;
+
+ procinfo = index_getprocinfo(rel, bkey->sk_attno, BTORDER_PROC);
+ ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
+ bkey->sk_flags,
+ bkey->sk_attno,
+ InvalidStrategy,
+ bkey->sk_subtype,
+ bkey->sk_collation,
+ procinfo,
+ bkey->sk_argument);
+ }
+ else
+ {
+ RegProcedure cmp_proc;
+
+ cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
+ rel->rd_opcintype[i],
+ bkey->sk_subtype, BTORDER_PROC);
+ if (!RegProcedureIsValid(cmp_proc))
+ elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
+ BTORDER_PROC, rel->rd_opcintype[i], bkey->sk_subtype,
+ bkey->sk_attno, RelationGetRelationName(rel));
+ ScanKeyEntryInitialize(inskey.scankeys + i,
+ bkey->sk_flags,
+ bkey->sk_attno,
+ InvalidStrategy,
+ bkey->sk_subtype,
+ bkey->sk_collation,
+ cmp_proc,
+ bkey->sk_argument);
}
}
@@ -1474,6 +1520,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (!BufferIsValid(so->currPos.buf))
{
+ Assert(!so->needPrimScan);
+
/*
* We only get here if the index is completely empty. Lock relation
* because nothing finer to lock exists. Without a buffer lock, it's
@@ -1492,7 +1540,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (!BufferIsValid(so->currPos.buf))
{
- Assert(!so->needPrimScan);
_bt_parallel_done(scan);
return false;
}
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index c71d1b6f2e1..9aed207995f 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -44,7 +44,6 @@ static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *arra
static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array);
static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
bool *skip_array_set);
-static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir);
static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
bool readpagetup, int sktrig, bool *scanBehind);
@@ -52,7 +51,6 @@ static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
int sktrig, bool sktrig_required);
#ifdef USE_ASSERT_CHECKING
-static bool _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir);
static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan);
#endif
static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
@@ -1035,73 +1033,6 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
}
/*
- * _bt_rewind_nonrequired_arrays() -- Rewind SAOP arrays not marked required
- *
- * Called when _bt_advance_array_keys decides to start a new primitive index
- * scan on the basis of the current scan position being before the position
- * that _bt_first is capable of repositioning the scan to by applying an
- * inequality operator required in the opposite-to-scan direction only.
- *
- * Although equality strategy scan keys (for both arrays and non-arrays alike)
- * are either marked required in both directions or in neither direction,
- * there is a sense in which non-required arrays behave like required arrays.
- * With a qual such as "WHERE a IN (100, 200) AND b >= 3 AND c IN (5, 6, 7)",
- * the scan key on "c" is non-required, but nevertheless enables positioning
- * the scan at the first tuple >= "(100, 3, 5)" on the leaf level during the
- * first descent of the tree by _bt_first. Later on, there could also be a
- * second descent, that places the scan right before tuples >= "(200, 3, 5)".
- * _bt_first must never be allowed to build an insertion scan key whose "c"
- * entry is set to a value other than 5, the "c" array's first element/value.
- * (Actually, it's the first in the current scan direction. This example uses
- * a forward scan.)
- *
- * Calling here resets the array scan key elements for the scan's non-required
- * arrays. This is strictly necessary for correctness in a subset of cases
- * involving "required in opposite direction"-triggered primitive index scans.
- * Not all callers are at risk of _bt_first using a non-required array like
- * this, but advancement always resets the arrays when another primitive scan
- * is scheduled, just to keep things simple. Array advancement even makes
- * sure to reset non-required arrays during scans that have no inequalities.
- * (Advancement still won't call here when there are no inequalities, though
- * that's just because it's all handled indirectly instead.)
- *
- * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that
- * everybody got this right.
- *
- * Note: In practice almost all SAOP arrays are marked required during
- * preprocessing (if necessary by generating skip arrays). It is hardly ever
- * truly necessary to call here, but consistently doing so is simpler.
- */
-static void
-_bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir)
-{
- Relation rel = scan->indexRelation;
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
- int arrayidx = 0;
-
- for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
- {
- ScanKey cur = so->keyData + ikey;
- BTArrayKeyInfo *array = NULL;
-
- if (!(cur->sk_flags & SK_SEARCHARRAY) ||
- cur->sk_strategy != BTEqualStrategyNumber)
- continue;
-
- array = &so->arrayKeys[arrayidx++];
- Assert(array->scan_key == ikey);
-
- if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
- continue;
-
- Assert(array->num_elems != -1); /* No non-required skip arrays */
-
- _bt_array_set_low_or_high(rel, cur, array,
- ScanDirectionIsForward(dir));
- }
-}
-
-/*
* _bt_tuple_before_array_skeys() -- too early to advance required arrays?
*
* We always compare the tuple using the current array keys (which we assume
@@ -1380,8 +1311,6 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
*/
if (so->needPrimScan)
{
- Assert(_bt_verify_arrays_bt_first(scan, dir));
-
/*
* Flag was set -- must call _bt_first again, which will reset the
* scan's needPrimScan flag
@@ -2007,14 +1936,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
*/
else if (has_required_opposite_direction_only && pstate->finaltup &&
unlikely(!_bt_oppodir_checkkeys(scan, dir, pstate->finaltup)))
- {
- /*
- * Make sure that any SAOP arrays that were not marked required by
- * preprocessing are reset to their first element for this direction
- */
- _bt_rewind_nonrequired_arrays(scan, dir);
goto new_prim_scan;
- }
continue_scan:
@@ -2045,8 +1967,6 @@ continue_scan:
*/
so->oppositeDirCheck = has_required_opposite_direction_only;
- _bt_rewind_nonrequired_arrays(scan, dir);
-
/*
* skip by setting "look ahead" mechanism's offnum for forwards scans
* (backwards scans check scanBehind flag directly instead)
@@ -2143,48 +2063,6 @@ end_toplevel_scan:
#ifdef USE_ASSERT_CHECKING
/*
- * Verify that the scan's qual state matches what we expect at the point that
- * _bt_start_prim_scan is about to start a just-scheduled new primitive scan.
- *
- * We enforce a rule against non-required array scan keys: they must start out
- * with whatever element is the first for the scan's current scan direction.
- * See _bt_rewind_nonrequired_arrays comments for an explanation.
- */
-static bool
-_bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir)
-{
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
- int arrayidx = 0;
-
- for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
- {
- ScanKey cur = so->keyData + ikey;
- BTArrayKeyInfo *array = NULL;
- int first_elem_dir;
-
- if (!(cur->sk_flags & SK_SEARCHARRAY) ||
- cur->sk_strategy != BTEqualStrategyNumber)
- continue;
-
- array = &so->arrayKeys[arrayidx++];
-
- if (((cur->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
- ((cur->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
- continue;
-
- if (ScanDirectionIsForward(dir))
- first_elem_dir = 0;
- else
- first_elem_dir = array->num_elems - 1;
-
- if (array->cur_elem != first_elem_dir)
- return false;
- }
-
- return _bt_verify_keys_with_arraykeys(scan);
-}
-
-/*
* Verify that the scan's "so->keyData[]" scan keys are in agreement with
* its array key state
*/
@@ -2194,6 +2072,7 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
BTScanOpaque so = (BTScanOpaque) scan->opaque;
int last_sk_attno = InvalidAttrNumber,
arrayidx = 0;
+ bool nonrequiredseen = false;
if (!so->qual_ok)
return false;
@@ -2217,8 +2096,16 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
if (array->num_elems != -1 &&
cur->sk_argument != array->elem_values[array->cur_elem])
return false;
- if (last_sk_attno > cur->sk_attno)
- return false;
+ if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
+ {
+ if (last_sk_attno > cur->sk_attno)
+ return false;
+ if (nonrequiredseen)
+ return false;
+ }
+ else
+ nonrequiredseen = true;
+
last_sk_attno = cur->sk_attno;
}
@@ -2551,37 +2438,12 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
if (!(key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
{
/* Scan key isn't marked required (corner case) */
- Assert(!(key->sk_flags & SK_ROW_HEADER));
break; /* unsafe */
}
if (key->sk_flags & SK_ROW_HEADER)
{
- /*
- * RowCompare inequality.
- *
- * Only the first subkey from a RowCompare can ever be marked
- * required (that happens when the row header is marked required).
- * There is no simple, general way for us to transitively deduce
- * whether or not every tuple on the page satisfies a RowCompare
- * key based only on firsttup and lasttup -- so we just give up.
- */
- if (!start_past_saop_eq && !so->skipScan)
- break; /* unsafe to go further */
-
- /*
- * We have to be even more careful with RowCompares that come
- * after an array: we assume it's unsafe to even bypass the array.
- * Calling _bt_start_array_keys to recover the scan's arrays
- * following use of forcenonrequired mode isn't compatible with
- * _bt_check_rowcompare's continuescan=false behavior with NULL
- * row compare members. _bt_advance_array_keys must not make a
- * decision on the basis of a key not being satisfied in the
- * opposite-to-scan direction until the scan reaches a leaf page
- * where the same key begins to be satisfied in scan direction.
- * The _bt_first !used_all_subkeys behavior makes this limitation
- * hard to work around some other way.
- */
- return; /* completely unsafe to set pstate.startikey */
+ /* RowCompare inequalities currently aren't supported */
+ break; /* "unsafe" */
}
if (key->sk_strategy != BTEqualStrategyNumber)
{
@@ -3078,6 +2940,31 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ /* When a NULL row member is compared, the row never matches */
+ if (subkey->sk_flags & SK_ISNULL)
+ {
+ /*
+ * Unlike the simple-scankey case, this isn't a disallowed case
+ * (except when it's the first row element that has the NULL arg).
+ * But it can never match. If all the earlier row comparison
+ * columns are required for the scan direction, we can stop the
+ * scan, because there can't be another tuple that will succeed.
+ */
+ Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
+ subkey--;
+ if (forcenonrequired)
+ {
+ /* treating scan's keys as non-required */
+ }
+ else if ((subkey->sk_flags & SK_BT_REQFWD) &&
+ ScanDirectionIsForward(dir))
+ *continuescan = false;
+ else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
+ ScanDirectionIsBackward(dir))
+ *continuescan = false;
+ return false;
+ }
+
if (subkey->sk_attno > tupnatts)
{
/*
@@ -3087,11 +2974,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
* attribute passes the qual.
*/
Assert(BTreeTupleIsPivot(tuple));
- cmpresult = 0;
- if (subkey->sk_flags & SK_ROW_END)
- break;
- subkey++;
- continue;
+ return true;
}
datum = index_getattr(tuple,
@@ -3101,6 +2984,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
if (isNull)
{
+ int reqflags;
+
if (forcenonrequired)
{
/* treating scan's keys as non-required */
@@ -3111,15 +2996,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
* Since NULLs are sorted before non-NULLs, we know we have
* reached the lower limit of the range of values for this
* index attr. On a backward scan, we can stop if this qual
- * is one of the "must match" subset. We can stop regardless
- * of whether the qual is > or <, so long as it's required,
- * because it's not possible for any future tuples to pass. On
- * a forward scan, however, we must keep going, because we may
- * have initially positioned to the start of the index.
- * (_bt_advance_array_keys also relies on this behavior during
- * forward scans.)
+ * is one of the "must match" subset. However, on a forwards
+ * scan, we must keep going, because we may have initially
+ * positioned to the start of the index.
+ *
+ * All required NULLS FIRST > row members can use NULL tuple
+ * values to end backwards scans, just like with other values.
+ * A qual "WHERE (a, b, c) > (9, 42, 'foo')" can terminate a
+ * backwards scan upon reaching the index's rightmost "a = 9"
+ * tuple whose "b" column contains a NULL (if not sooner).
+ * Since "b" is NULLS FIRST, we can treat its NULLs as "<" 42.
+ */
+ reqflags = SK_BT_REQBKWD;
+
+ /*
+ * When a most significant required NULLS FIRST < row compare
+ * member sees NULL tuple values during a backwards scan, it
+ * signals the end of matches for the whole row compare/scan.
+ * A qual "WHERE (a, b, c) < (9, 42, 'foo')" will terminate a
+ * backwards scan upon reaching the rightmost tuple whose "a"
+ * column has a NULL. The "a" NULL value is "<" 9, and yet
+ * our < row compare will still end the scan. (This isn't
+ * safe with later/lower-order row members. Notice that it
+ * can only happen with an "a" NULL some time after the scan
+ * completely stops needing to use its "b" and "c" members.)
*/
- if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
+ if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument))
+ reqflags |= SK_BT_REQFWD; /* safe, first row member */
+
+ if ((subkey->sk_flags & reqflags) &&
ScanDirectionIsBackward(dir))
*continuescan = false;
}
@@ -3129,15 +3034,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
* Since NULLs are sorted after non-NULLs, we know we have
* reached the upper limit of the range of values for this
* index attr. On a forward scan, we can stop if this qual is
- * one of the "must match" subset. We can stop regardless of
- * whether the qual is > or <, so long as it's required,
- * because it's not possible for any future tuples to pass. On
- * a backward scan, however, we must keep going, because we
- * may have initially positioned to the end of the index.
- * (_bt_advance_array_keys also relies on this behavior during
- * backward scans.)
+ * one of the "must match" subset. However, on a backward
+ * scan, we must keep going, because we may have initially
+ * positioned to the end of the index.
+ *
+ * All required NULLS LAST < row members can use NULL tuple
+ * values to end forwards scans, just like with other values.
+ * A qual "WHERE (a, b, c) < (9, 42, 'foo')" can terminate a
+ * forwards scan upon reaching the index's leftmost "a = 9"
+ * tuple whose "b" column contains a NULL (if not sooner).
+ * Since "b" is NULLS LAST, we can treat its NULLs as ">" 42.
*/
- if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
+ reqflags = SK_BT_REQFWD;
+
+ /*
+ * When a most significant required NULLS LAST > row compare
+ * member sees NULL tuple values during a forwards scan, it
+ * signals the end of matches for the whole row compare/scan.
+ * A qual "WHERE (a, b, c) > (9, 42, 'foo')" will terminate a
+ * forwards scan upon reaching the leftmost tuple whose "a"
+ * column has a NULL. The "a" NULL value is ">" 9, and yet
+ * our > row compare will end the scan. (This isn't safe with
+ * later/lower-order row members. Notice that it can only
+ * happen with an "a" NULL some time after the scan completely
+ * stops needing to use its "b" and "c" members.)
+ */
+ if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument))
+ reqflags |= SK_BT_REQBKWD; /* safe, first row member */
+
+ if ((subkey->sk_flags & reqflags) &&
ScanDirectionIsForward(dir))
*continuescan = false;
}
@@ -3148,30 +3073,6 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
return false;
}
- if (subkey->sk_flags & SK_ISNULL)
- {
- /*
- * Unlike the simple-scankey case, this isn't a disallowed case
- * (except when it's the first row element that has the NULL arg).
- * But it can never match. If all the earlier row comparison
- * columns are required for the scan direction, we can stop the
- * scan, because there can't be another tuple that will succeed.
- */
- Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
- subkey--;
- if (forcenonrequired)
- {
- /* treating scan's keys as non-required */
- }
- else if ((subkey->sk_flags & SK_BT_REQFWD) &&
- ScanDirectionIsForward(dir))
- *continuescan = false;
- else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
- ScanDirectionIsBackward(dir))
- *continuescan = false;
- return false;
- }
-
/* Perform the test --- three-way comparison not bool operator */
cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
subkey->sk_collation,
diff --git a/src/backend/access/rmgrdesc/replorigindesc.c b/src/backend/access/rmgrdesc/replorigindesc.c
index 5dd74233996..35e3af2903e 100644
--- a/src/backend/access/rmgrdesc/replorigindesc.c
+++ b/src/backend/access/rmgrdesc/replorigindesc.c
@@ -29,7 +29,7 @@ replorigin_desc(StringInfo buf, XLogReaderState *record)
xlrec = (xl_replorigin_set *) rec;
- appendStringInfo(buf, "set %u; lsn %X/%X; force: %d",
+ appendStringInfo(buf, "set %u; lsn %X/%08X; force: %d",
xlrec->node_id,
LSN_FORMAT_ARGS(xlrec->remote_lsn),
xlrec->force);
diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c
index 305598e2865..f0f696855b9 100644
--- a/src/backend/access/rmgrdesc/xactdesc.c
+++ b/src/backend/access/rmgrdesc/xactdesc.c
@@ -359,7 +359,7 @@ xact_desc_commit(StringInfo buf, uint8 info, xl_xact_commit *xlrec, RepOriginId
if (parsed.xinfo & XACT_XINFO_HAS_ORIGIN)
{
- appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s",
+ appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s",
origin_id,
LSN_FORMAT_ARGS(parsed.origin_lsn),
timestamptz_to_str(parsed.origin_timestamp));
@@ -384,7 +384,7 @@ xact_desc_abort(StringInfo buf, uint8 info, xl_xact_abort *xlrec, RepOriginId or
if (parsed.xinfo & XACT_XINFO_HAS_ORIGIN)
{
- appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s",
+ appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s",
origin_id,
LSN_FORMAT_ARGS(parsed.origin_lsn),
timestamptz_to_str(parsed.origin_timestamp));
@@ -418,7 +418,7 @@ xact_desc_prepare(StringInfo buf, uint8 info, xl_xact_prepare *xlrec, RepOriginI
* way as PrepareRedoAdd().
*/
if (origin_id != InvalidRepOriginId)
- appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s",
+ appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s",
origin_id,
LSN_FORMAT_ARGS(parsed.origin_lsn),
timestamptz_to_str(parsed.origin_timestamp));
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 58040f28656..cd6c2a2f650 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -65,7 +65,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
{
CheckPoint *checkpoint = (CheckPoint *) rec;
- appendStringInfo(buf, "redo %X/%X; "
+ appendStringInfo(buf, "redo %X/%08X; "
"tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
"oldest xid %u in DB %u; oldest multi %u in DB %u; "
"oldest/newest commit timestamp xid: %u/%u; "
@@ -111,7 +111,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
XLogRecPtr startpoint;
memcpy(&startpoint, rec, sizeof(XLogRecPtr));
- appendStringInfo(buf, "%X/%X", LSN_FORMAT_ARGS(startpoint));
+ appendStringInfo(buf, "%X/%08X", LSN_FORMAT_ARGS(startpoint));
}
else if (info == XLOG_PARAMETER_CHANGE)
{
@@ -156,7 +156,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
xl_overwrite_contrecord xlrec;
memcpy(&xlrec, rec, sizeof(xl_overwrite_contrecord));
- appendStringInfo(buf, "lsn %X/%X; time %s",
+ appendStringInfo(buf, "lsn %X/%08X; time %s",
LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
timestamptz_to_str(xlrec.overwrite_time));
}
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 48f10bec91e..e80fbe109cf 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -110,9 +110,7 @@ static SlruCtlData XactCtlData;
#define XactCtl (&XactCtlData)
-static int ZeroCLOGPage(int64 pageno, bool writeXlog);
static bool CLOGPagePrecedes(int64 page1, int64 page2);
-static void WriteZeroPageXlogRec(int64 pageno);
static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact,
Oid oldestXactDb);
static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
@@ -832,41 +830,8 @@ check_transaction_buffers(int *newval, void **extra, GucSource source)
void
BootStrapCLOG(void)
{
- int slotno;
- LWLock *lock = SimpleLruGetBankLock(XactCtl, 0);
-
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- /* Create and zero the first page of the commit log */
- slotno = ZeroCLOGPage(0, false);
-
- /* Make sure it's written out */
- SimpleLruWritePage(XactCtl, slotno);
- Assert(!XactCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of CLOG to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroCLOGPage(int64 pageno, bool writeXlog)
-{
- int slotno;
-
- slotno = SimpleLruZeroPage(XactCtl, pageno);
-
- if (writeXlog)
- WriteZeroPageXlogRec(pageno);
-
- return slotno;
+ /* Zero the initial page and flush it to disk */
+ SimpleLruZeroAndWritePage(XactCtl, 0);
}
/*
@@ -974,8 +939,9 @@ ExtendCLOG(TransactionId newestXact)
LWLockAcquire(lock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroCLOGPage(pageno, true);
+ /* Zero the page and make a WAL entry about it */
+ SimpleLruZeroPage(XactCtl, pageno);
+ XLogSimpleInsertInt64(RM_CLOG_ID, CLOG_ZEROPAGE, pageno);
LWLockRelease(lock);
}
@@ -1068,17 +1034,6 @@ CLOGPagePrecedes(int64 page1, int64 page2)
/*
- * Write a ZEROPAGE xlog record
- */
-static void
-WriteZeroPageXlogRec(int64 pageno)
-{
- XLogBeginInsert();
- XLogRegisterData(&pageno, sizeof(pageno));
- (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
-}
-
-/*
* Write a TRUNCATE xlog record
*
* We must flush the xlog record to disk before returning --- see notes
@@ -1114,19 +1069,9 @@ clog_redo(XLogReaderState *record)
if (info == CLOG_ZEROPAGE)
{
int64 pageno;
- int slotno;
- LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
- lock = SimpleLruGetBankLock(XactCtl, pageno);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- slotno = ZeroCLOGPage(pageno, false);
- SimpleLruWritePage(XactCtl, slotno);
- Assert(!XactCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
+ SimpleLruZeroAndWritePage(XactCtl, pageno);
}
else if (info == CLOG_TRUNCATE)
{
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 113fae1437a..370b38e048b 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -114,11 +114,9 @@ static void SetXidCommitTsInPage(TransactionId xid, int nsubxids,
static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
RepOriginId nodeid, int slotno);
static void error_commit_ts_disabled(void);
-static int ZeroCommitTsPage(int64 pageno, bool writeXlog);
static bool CommitTsPagePrecedes(int64 page1, int64 page2);
static void ActivateCommitTs(void);
static void DeactivateCommitTs(void);
-static void WriteZeroPageXlogRec(int64 pageno);
static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid);
/*
@@ -603,28 +601,6 @@ BootStrapCommitTs(void)
}
/*
- * Initialize (or reinitialize) a page of CommitTs to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroCommitTsPage(int64 pageno, bool writeXlog)
-{
- int slotno;
-
- slotno = SimpleLruZeroPage(CommitTsCtl, pageno);
-
- if (writeXlog)
- WriteZeroPageXlogRec(pageno);
-
- return slotno;
-}
-
-/*
* This must be called ONCE during postmaster or standalone-backend startup,
* after StartupXLOG has initialized TransamVariables->nextXid.
*/
@@ -707,6 +683,13 @@ ActivateCommitTs(void)
TransactionId xid;
int64 pageno;
+ /*
+ * During bootstrap, we should not register commit timestamps so skip the
+ * activation in this case.
+ */
+ if (IsBootstrapProcessingMode())
+ return;
+
/* If we've done this already, there's nothing to do */
LWLockAcquire(CommitTsLock, LW_EXCLUSIVE);
if (commitTsShared->commitTsActive)
@@ -747,16 +730,7 @@ ActivateCommitTs(void)
/* Create the current segment file, if necessary */
if (!SimpleLruDoesPhysicalPageExist(CommitTsCtl, pageno))
- {
- LWLock *lock = SimpleLruGetBankLock(CommitTsCtl, pageno);
- int slotno;
-
- LWLockAcquire(lock, LW_EXCLUSIVE);
- slotno = ZeroCommitTsPage(pageno, false);
- SimpleLruWritePage(CommitTsCtl, slotno);
- Assert(!CommitTsCtl->shared->page_dirty[slotno]);
- LWLockRelease(lock);
- }
+ SimpleLruZeroAndWritePage(CommitTsCtl, pageno);
/* Change the activation status in shared memory. */
LWLockAcquire(CommitTsLock, LW_EXCLUSIVE);
@@ -867,8 +841,12 @@ ExtendCommitTs(TransactionId newestXact)
LWLockAcquire(lock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroCommitTsPage(pageno, !InRecovery);
+ /* Zero the page ... */
+ SimpleLruZeroPage(CommitTsCtl, pageno);
+
+ /* and make a WAL entry about that, unless we're in REDO */
+ if (!InRecovery)
+ XLogSimpleInsertInt64(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE, pageno);
LWLockRelease(lock);
}
@@ -983,17 +961,6 @@ CommitTsPagePrecedes(int64 page1, int64 page2)
/*
- * Write a ZEROPAGE xlog record
- */
-static void
-WriteZeroPageXlogRec(int64 pageno)
-{
- XLogBeginInsert();
- XLogRegisterData(&pageno, sizeof(pageno));
- (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE);
-}
-
-/*
* Write a TRUNCATE xlog record
*/
static void
@@ -1023,19 +990,9 @@ commit_ts_redo(XLogReaderState *record)
if (info == COMMIT_TS_ZEROPAGE)
{
int64 pageno;
- int slotno;
- LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
- lock = SimpleLruGetBankLock(CommitTsCtl, pageno);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- slotno = ZeroCommitTsPage(pageno, false);
- SimpleLruWritePage(CommitTsCtl, slotno);
- Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
+ SimpleLruZeroAndWritePage(CommitTsCtl, pageno);
}
else if (info == COMMIT_TS_TRUNCATE)
{
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 3c06ac45532..3cb09c3d598 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -401,8 +401,6 @@ static void mXactCachePut(MultiXactId multi, int nmembers,
static char *mxstatus_to_string(MultiXactStatus status);
/* management of SLRU infrastructure */
-static int ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog);
-static int ZeroMultiXactMemberPage(int64 pageno, bool writeXlog);
static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
@@ -413,7 +411,6 @@ static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
MultiXactOffset start, uint32 distance);
static bool SetOffsetVacuumLimit(bool is_startup);
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
-static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
static void WriteMTruncateXlogRec(Oid oldestMultiDB,
MultiXactId startTruncOff,
MultiXactId endTruncOff,
@@ -1847,7 +1844,7 @@ AtPrepare_MultiXact(void)
* Clean up after successful PREPARE TRANSACTION
*/
void
-PostPrepare_MultiXact(TransactionId xid)
+PostPrepare_MultiXact(FullTransactionId fxid)
{
MultiXactId myOldestMember;
@@ -1858,7 +1855,7 @@ PostPrepare_MultiXact(TransactionId xid)
myOldestMember = OldestMemberMXactId[MyProcNumber];
if (MultiXactIdIsValid(myOldestMember))
{
- ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false);
+ ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
/*
* Even though storing MultiXactId is atomic, acquire lock to make
@@ -1896,10 +1893,10 @@ PostPrepare_MultiXact(TransactionId xid)
* Recover the state of a prepared transaction at startup
*/
void
-multixact_twophase_recover(TransactionId xid, uint16 info,
+multixact_twophase_recover(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
- ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false);
+ ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
MultiXactId oldestMember;
/*
@@ -1917,10 +1914,10 @@ multixact_twophase_recover(TransactionId xid, uint16 info,
* Similar to AtEOXact_MultiXact but for COMMIT PREPARED
*/
void
-multixact_twophase_postcommit(TransactionId xid, uint16 info,
+multixact_twophase_postcommit(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
- ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, true);
+ ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true);
Assert(len == sizeof(MultiXactId));
@@ -1932,10 +1929,10 @@ multixact_twophase_postcommit(TransactionId xid, uint16 info,
* This is actually just the same as the COMMIT case.
*/
void
-multixact_twophase_postabort(TransactionId xid, uint16 info,
+multixact_twophase_postabort(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
- multixact_twophase_postcommit(xid, info, recdata, len);
+ multixact_twophase_postcommit(fxid, info, recdata, len);
}
/*
@@ -2033,70 +2030,9 @@ check_multixact_member_buffers(int *newval, void **extra, GucSource source)
void
BootStrapMultiXact(void)
{
- int slotno;
- LWLock *lock;
-
- lock = SimpleLruGetBankLock(MultiXactOffsetCtl, 0);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- /* Create and zero the first page of the offsets log */
- slotno = ZeroMultiXactOffsetPage(0, false);
-
- /* Make sure it's written out */
- SimpleLruWritePage(MultiXactOffsetCtl, slotno);
- Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
-
- lock = SimpleLruGetBankLock(MultiXactMemberCtl, 0);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- /* Create and zero the first page of the members log */
- slotno = ZeroMultiXactMemberPage(0, false);
-
- /* Make sure it's written out */
- SimpleLruWritePage(MultiXactMemberCtl, slotno);
- Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog)
-{
- int slotno;
-
- slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
-
- if (writeXlog)
- WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
-
- return slotno;
-}
-
-/*
- * Ditto, for MultiXactMember
- */
-static int
-ZeroMultiXactMemberPage(int64 pageno, bool writeXlog)
-{
- int slotno;
-
- slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
-
- if (writeXlog)
- WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
-
- return slotno;
+ /* Zero the initial pages and flush them to disk */
+ SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0);
+ SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
}
/*
@@ -2134,7 +2070,7 @@ MaybeExtendOffsetSlru(void)
* with creating a new segment file even if the page we're writing is
* not the first in it, so this is enough.
*/
- slotno = ZeroMultiXactOffsetPage(pageno, false);
+ slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
SimpleLruWritePage(MultiXactOffsetCtl, slotno);
}
@@ -2568,8 +2504,10 @@ ExtendMultiXactOffset(MultiXactId multi)
LWLockAcquire(lock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroMultiXactOffsetPage(pageno, true);
+ /* Zero the page and make a WAL entry about it */
+ SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+ XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE,
+ pageno);
LWLockRelease(lock);
}
@@ -2611,8 +2549,10 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
LWLockAcquire(lock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroMultiXactMemberPage(pageno, true);
+ /* Zero the page and make a WAL entry about it */
+ SimpleLruZeroPage(MultiXactMemberCtl, pageno);
+ XLogSimpleInsertInt64(RM_MULTIXACT_ID,
+ XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno);
LWLockRelease(lock);
}
@@ -3348,18 +3288,6 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
}
/*
- * Write an xlog record reflecting the zeroing of either a MEMBERs or
- * OFFSETs page (info shows which)
- */
-static void
-WriteMZeroPageXlogRec(int64 pageno, uint8 info)
-{
- XLogBeginInsert();
- XLogRegisterData(&pageno, sizeof(pageno));
- (void) XLogInsert(RM_MULTIXACT_ID, info);
-}
-
-/*
* Write a TRUNCATE xlog record
*
* We must flush the xlog record to disk before returning --- see notes in
@@ -3401,36 +3329,16 @@ multixact_redo(XLogReaderState *record)
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
{
int64 pageno;
- int slotno;
- LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
- lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- slotno = ZeroMultiXactOffsetPage(pageno, false);
- SimpleLruWritePage(MultiXactOffsetCtl, slotno);
- Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
+ SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno);
}
else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
{
int64 pageno;
- int slotno;
- LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
- lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- slotno = ZeroMultiXactMemberPage(pageno, false);
- SimpleLruWritePage(MultiXactMemberCtl, slotno);
- Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
+ SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno);
}
else if (info == XLOG_MULTIXACT_CREATE_ID)
{
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index fe56286d9a9..10ec259f382 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -434,6 +434,31 @@ SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
}
/*
+ * This is a convenience wrapper for the common case of zeroing a page and
+ * immediately flushing it to disk.
+ *
+ * Control lock is acquired and released here.
+ */
+void
+SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno)
+{
+ int slotno;
+ LWLock *lock;
+
+ lock = SimpleLruGetBankLock(ctl, pageno);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
+
+ /* Create and zero the page */
+ slotno = SimpleLruZeroPage(ctl, pageno);
+
+ /* Make sure it's written out */
+ SimpleLruWritePage(ctl, slotno);
+ Assert(!ctl->shared->page_dirty[slotno]);
+
+ LWLockRelease(lock);
+}
+
+/*
* Wait for any active I/O on a page slot to finish. (This does not
* guarantee that new I/O hasn't been started before we return, though.
* In fact the slot might not even contain the same page anymore.)
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 15153618fad..09aace9e09f 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -74,7 +74,6 @@ static SlruCtlData SubTransCtlData;
#define SubTransCtl (&SubTransCtlData)
-static int ZeroSUBTRANSPage(int64 pageno);
static bool SubTransPagePrecedes(int64 page1, int64 page2);
@@ -269,33 +268,8 @@ check_subtrans_buffers(int *newval, void **extra, GucSource source)
void
BootStrapSUBTRANS(void)
{
- int slotno;
- LWLock *lock = SimpleLruGetBankLock(SubTransCtl, 0);
-
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- /* Create and zero the first page of the subtrans log */
- slotno = ZeroSUBTRANSPage(0);
-
- /* Make sure it's written out */
- SimpleLruWritePage(SubTransCtl, slotno);
- Assert(!SubTransCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of SUBTRANS to zeroes.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroSUBTRANSPage(int64 pageno)
-{
- return SimpleLruZeroPage(SubTransCtl, pageno);
+ /* Zero the initial page and flush it to disk */
+ SimpleLruZeroAndWritePage(SubTransCtl, 0);
}
/*
@@ -335,7 +309,7 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
prevlock = lock;
}
- (void) ZeroSUBTRANSPage(startPage);
+ (void) SimpleLruZeroPage(SubTransCtl, startPage);
if (startPage == endPage)
break;
@@ -395,7 +369,7 @@ ExtendSUBTRANS(TransactionId newestXact)
LWLockAcquire(lock, LW_EXCLUSIVE);
/* Zero the page */
- ZeroSUBTRANSPage(pageno);
+ SimpleLruZeroPage(SubTransCtl, pageno);
LWLockRelease(lock);
}
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
index a27f27cc037..186eb91f609 100644
--- a/src/backend/access/transam/timeline.c
+++ b/src/backend/access/transam/timeline.c
@@ -154,7 +154,7 @@ readTimeLineHistory(TimeLineID targetTLI)
if (*ptr == '\0' || *ptr == '#')
continue;
- nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
+ nfields = sscanf(fline, "%u\t%X/%08X", &tli, &switchpoint_hi, &switchpoint_lo);
if (nfields < 1)
{
@@ -399,7 +399,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
* parent file failed to end with one.
*/
snprintf(buffer, sizeof(buffer),
- "%s%u\t%X/%X\t%s\n",
+ "%s%u\t%X/%08X\t%s\n",
(srcfd < 0) ? "" : "\n",
parentTLI,
LSN_FORMAT_ARGS(switchpoint),
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 73a80559194..85cbe397cb2 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -159,7 +159,7 @@ typedef struct GlobalTransactionData
*/
XLogRecPtr prepare_start_lsn; /* XLOG offset of prepare record start */
XLogRecPtr prepare_end_lsn; /* XLOG offset of prepare record end */
- TransactionId xid; /* The GXACT id */
+ FullTransactionId fxid; /* The GXACT full xid */
Oid owner; /* ID of user that executed the xact */
ProcNumber locking_backend; /* backend currently working on the xact */
@@ -197,6 +197,7 @@ static GlobalTransaction MyLockedGxact = NULL;
static bool twophaseExitRegistered = false;
+static void PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning);
static void RecordTransactionCommitPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
@@ -216,19 +217,19 @@ static void RecordTransactionAbortPrepared(TransactionId xid,
int nstats,
xl_xact_stats_item *stats,
const char *gid);
-static void ProcessRecords(char *bufptr, TransactionId xid,
+static void ProcessRecords(char *bufptr, FullTransactionId fxid,
const TwoPhaseCallback callbacks[]);
static void RemoveGXact(GlobalTransaction gxact);
static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
-static char *ProcessTwoPhaseBuffer(TransactionId xid,
+static char *ProcessTwoPhaseBuffer(FullTransactionId fxid,
XLogRecPtr prepare_start_lsn,
bool fromdisk, bool setParent, bool setNextXid);
-static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid,
+static void MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid,
const char *gid, TimestampTz prepared_at, Oid owner,
Oid databaseid);
-static void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning);
-static void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
+static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning);
+static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len);
/*
* Initialization of shared memory
@@ -356,7 +357,7 @@ PostPrepare_Twophase(void)
* Reserve the GID for the given transaction.
*/
GlobalTransaction
-MarkAsPreparing(TransactionId xid, const char *gid,
+MarkAsPreparing(FullTransactionId fxid, const char *gid,
TimestampTz prepared_at, Oid owner, Oid databaseid)
{
GlobalTransaction gxact;
@@ -407,7 +408,7 @@ MarkAsPreparing(TransactionId xid, const char *gid,
gxact = TwoPhaseState->freeGXacts;
TwoPhaseState->freeGXacts = gxact->next;
- MarkAsPreparingGuts(gxact, xid, gid, prepared_at, owner, databaseid);
+ MarkAsPreparingGuts(gxact, fxid, gid, prepared_at, owner, databaseid);
gxact->ondisk = false;
@@ -430,11 +431,13 @@ MarkAsPreparing(TransactionId xid, const char *gid,
* Note: This function should be called with appropriate locks held.
*/
static void
-MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
- TimestampTz prepared_at, Oid owner, Oid databaseid)
+MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid,
+ const char *gid, TimestampTz prepared_at, Oid owner,
+ Oid databaseid)
{
PGPROC *proc;
int i;
+ TransactionId xid = XidFromFullTransactionId(fxid);
Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
@@ -479,7 +482,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
proc->subxidStatus.count = 0;
gxact->prepared_at = prepared_at;
- gxact->xid = xid;
+ gxact->fxid = fxid;
gxact->owner = owner;
gxact->locking_backend = MyProcNumber;
gxact->valid = false;
@@ -797,12 +800,12 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
* caller had better hold it.
*/
static GlobalTransaction
-TwoPhaseGetGXact(TransactionId xid, bool lock_held)
+TwoPhaseGetGXact(FullTransactionId fxid, bool lock_held)
{
GlobalTransaction result = NULL;
int i;
- static TransactionId cached_xid = InvalidTransactionId;
+ static FullTransactionId cached_fxid = {InvalidTransactionId};
static GlobalTransaction cached_gxact = NULL;
Assert(!lock_held || LWLockHeldByMe(TwoPhaseStateLock));
@@ -811,7 +814,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held)
* During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
* repeatedly for the same XID. We can save work with a simple cache.
*/
- if (xid == cached_xid)
+ if (FullTransactionIdEquals(fxid, cached_fxid))
return cached_gxact;
if (!lock_held)
@@ -821,7 +824,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held)
{
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
- if (gxact->xid == xid)
+ if (FullTransactionIdEquals(gxact->fxid, fxid))
{
result = gxact;
break;
@@ -832,9 +835,10 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held)
LWLockRelease(TwoPhaseStateLock);
if (result == NULL) /* should not happen */
- elog(ERROR, "failed to find GlobalTransaction for xid %u", xid);
+ elog(ERROR, "failed to find GlobalTransaction for xid %u",
+ XidFromFullTransactionId(fxid));
- cached_xid = xid;
+ cached_fxid = fxid;
cached_gxact = result;
return result;
@@ -881,7 +885,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
*have_more = true;
break;
}
- result = gxact->xid;
+ result = XidFromFullTransactionId(gxact->fxid);
}
}
@@ -892,7 +896,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
/*
* TwoPhaseGetDummyProcNumber
- * Get the dummy proc number for prepared transaction specified by XID
+ * Get the dummy proc number for prepared transaction
*
* Dummy proc numbers are similar to proc numbers of real backends. They
* start at MaxBackends, and are unique across all currently active real
@@ -900,24 +904,24 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
* TwoPhaseStateLock will not be taken, so the caller had better hold it.
*/
ProcNumber
-TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held)
+TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
{
- GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held);
+ GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held);
return gxact->pgprocno;
}
/*
* TwoPhaseGetDummyProc
- * Get the PGPROC that represents a prepared transaction specified by XID
+ * Get the PGPROC that represents a prepared transaction
*
* If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
* caller had better hold it.
*/
PGPROC *
-TwoPhaseGetDummyProc(TransactionId xid, bool lock_held)
+TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held)
{
- GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held);
+ GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held);
return GetPGProcByNumber(gxact->pgprocno);
}
@@ -942,10 +946,8 @@ AdjustToFullTransactionId(TransactionId xid)
}
static inline int
-TwoPhaseFilePath(char *path, TransactionId xid)
+TwoPhaseFilePath(char *path, FullTransactionId fxid)
{
- FullTransactionId fxid = AdjustToFullTransactionId(xid);
-
return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X",
EpochFromFullTransactionId(fxid),
XidFromFullTransactionId(fxid));
@@ -1049,7 +1051,7 @@ void
StartPrepare(GlobalTransaction gxact)
{
PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
- TransactionId xid = gxact->xid;
+ TransactionId xid = XidFromFullTransactionId(gxact->fxid);
TwoPhaseFileHeader hdr;
TransactionId *children;
RelFileLocator *commitrels;
@@ -1281,10 +1283,11 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
* If it looks OK (has a valid magic number and CRC), return the palloc'd
* contents of the file, issuing an error when finding corrupted data. If
* missing_ok is true, which indicates that missing files can be safely
- * ignored, then return NULL. This state can be reached when doing recovery.
+ * ignored, then return NULL. This state can be reached when doing recovery
+ * after discarding two-phase files from frozen epochs.
*/
static char *
-ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
+ReadTwoPhaseFile(FullTransactionId fxid, bool missing_ok)
{
char path[MAXPGPATH];
char *buf;
@@ -1296,7 +1299,7 @@ ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
file_crc;
int r;
- TwoPhaseFilePath(path, xid);
+ TwoPhaseFilePath(path, fxid);
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
if (fd < 0)
@@ -1426,12 +1429,12 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
if (errormsg)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not read two-phase state from WAL at %X/%X: %s",
+ errmsg("could not read two-phase state from WAL at %X/%08X: %s",
LSN_FORMAT_ARGS(lsn), errormsg)));
else
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not read two-phase state from WAL at %X/%X",
+ errmsg("could not read two-phase state from WAL at %X/%08X",
LSN_FORMAT_ARGS(lsn))));
}
@@ -1439,7 +1442,7 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
(XLogRecGetInfo(xlogreader) & XLOG_XACT_OPMASK) != XLOG_XACT_PREPARE)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("expected two-phase state data is not present in WAL at %X/%X",
+ errmsg("expected two-phase state data is not present in WAL at %X/%08X",
LSN_FORMAT_ARGS(lsn))));
if (len != NULL)
@@ -1461,6 +1464,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid)
char *buf;
TwoPhaseFileHeader *hdr;
bool result;
+ FullTransactionId fxid;
Assert(TransactionIdIsValid(xid));
@@ -1468,7 +1472,8 @@ StandbyTransactionIdIsPrepared(TransactionId xid)
return false; /* nothing to do */
/* Read and validate file */
- buf = ReadTwoPhaseFile(xid, true);
+ fxid = AdjustToFullTransactionId(xid);
+ buf = ReadTwoPhaseFile(fxid, true);
if (buf == NULL)
return false;
@@ -1488,6 +1493,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
{
GlobalTransaction gxact;
PGPROC *proc;
+ FullTransactionId fxid;
TransactionId xid;
bool ondisk;
char *buf;
@@ -1509,7 +1515,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
*/
gxact = LockGXact(gid, GetUserId());
proc = GetPGProcByNumber(gxact->pgprocno);
- xid = gxact->xid;
+ fxid = gxact->fxid;
+ xid = XidFromFullTransactionId(fxid);
/*
* Read and validate 2PC state data. State data will typically be stored
@@ -1517,7 +1524,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
* to disk if for some reason they have lived for a long time.
*/
if (gxact->ondisk)
- buf = ReadTwoPhaseFile(xid, false);
+ buf = ReadTwoPhaseFile(fxid, false);
else
XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
@@ -1636,11 +1643,11 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
/* And now do the callbacks */
if (isCommit)
- ProcessRecords(bufptr, xid, twophase_postcommit_callbacks);
+ ProcessRecords(bufptr, fxid, twophase_postcommit_callbacks);
else
- ProcessRecords(bufptr, xid, twophase_postabort_callbacks);
+ ProcessRecords(bufptr, fxid, twophase_postabort_callbacks);
- PredicateLockTwoPhaseFinish(xid, isCommit);
+ PredicateLockTwoPhaseFinish(fxid, isCommit);
/*
* Read this value while holding the two-phase lock, as the on-disk 2PC
@@ -1664,7 +1671,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
* And now we can clean up any files we may have left.
*/
if (ondisk)
- RemoveTwoPhaseFile(xid, true);
+ RemoveTwoPhaseFile(fxid, true);
MyLockedGxact = NULL;
@@ -1677,7 +1684,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
* Scan 2PC state data in memory and call the indicated callbacks for each 2PC record.
*/
static void
-ProcessRecords(char *bufptr, TransactionId xid,
+ProcessRecords(char *bufptr, FullTransactionId fxid,
const TwoPhaseCallback callbacks[])
{
for (;;)
@@ -1691,24 +1698,28 @@ ProcessRecords(char *bufptr, TransactionId xid,
bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk));
if (callbacks[record->rmid] != NULL)
- callbacks[record->rmid] (xid, record->info, bufptr, record->len);
+ callbacks[record->rmid] (fxid, record->info, bufptr, record->len);
bufptr += MAXALIGN(record->len);
}
}
/*
- * Remove the 2PC file for the specified XID.
+ * Remove the 2PC file.
*
* If giveWarning is false, do not complain about file-not-present;
* this is an expected case during WAL replay.
+ *
+ * This routine is used at early stages at recovery where future and
+ * past orphaned files are checked, hence the FullTransactionId to build
+ * a complete file name fit for the removal.
*/
static void
-RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
+RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning)
{
char path[MAXPGPATH];
- TwoPhaseFilePath(path, xid);
+ TwoPhaseFilePath(path, fxid);
if (unlink(path))
if (errno != ENOENT || giveWarning)
ereport(WARNING,
@@ -1723,7 +1734,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
* Note: content and len don't include CRC.
*/
static void
-RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
+RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len)
{
char path[MAXPGPATH];
pg_crc32c statefile_crc;
@@ -1734,7 +1745,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
COMP_CRC32C(statefile_crc, content, len);
FIN_CRC32C(statefile_crc);
- TwoPhaseFilePath(path, xid);
+ TwoPhaseFilePath(path, fxid);
fd = OpenTransientFile(path,
O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY);
@@ -1846,7 +1857,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
int len;
XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, &len);
- RecreateTwoPhaseFile(gxact->xid, buf, len);
+ RecreateTwoPhaseFile(gxact->fxid, buf, len);
gxact->ondisk = true;
gxact->prepare_start_lsn = InvalidXLogRecPtr;
gxact->prepare_end_lsn = InvalidXLogRecPtr;
@@ -1897,19 +1908,17 @@ restoreTwoPhaseData(void)
if (strlen(clde->d_name) == 16 &&
strspn(clde->d_name, "0123456789ABCDEF") == 16)
{
- TransactionId xid;
FullTransactionId fxid;
char *buf;
fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16));
- xid = XidFromFullTransactionId(fxid);
- buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr,
+ buf = ProcessTwoPhaseBuffer(fxid, InvalidXLogRecPtr,
true, false, false);
if (buf == NULL)
continue;
- PrepareRedoAdd(buf, InvalidXLogRecPtr,
+ PrepareRedoAdd(fxid, buf, InvalidXLogRecPtr,
InvalidXLogRecPtr, InvalidRepOriginId);
}
}
@@ -1968,9 +1977,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Assert(gxact->inredo);
- xid = gxact->xid;
-
- buf = ProcessTwoPhaseBuffer(xid,
+ buf = ProcessTwoPhaseBuffer(gxact->fxid,
gxact->prepare_start_lsn,
gxact->ondisk, false, true);
@@ -1981,6 +1988,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
* OK, we think this file is valid. Incorporate xid into the
* running-minimum result.
*/
+ xid = XidFromFullTransactionId(gxact->fxid);
if (TransactionIdPrecedes(xid, result))
result = xid;
@@ -2036,15 +2044,12 @@ StandbyRecoverPreparedTransactions(void)
LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
{
- TransactionId xid;
char *buf;
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
Assert(gxact->inredo);
- xid = gxact->xid;
-
- buf = ProcessTwoPhaseBuffer(xid,
+ buf = ProcessTwoPhaseBuffer(gxact->fxid,
gxact->prepare_start_lsn,
gxact->ondisk, true, false);
if (buf != NULL)
@@ -2077,16 +2082,14 @@ RecoverPreparedTransactions(void)
LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
{
- TransactionId xid;
char *buf;
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+ FullTransactionId fxid = gxact->fxid;
char *bufptr;
TwoPhaseFileHeader *hdr;
TransactionId *subxids;
const char *gid;
- xid = gxact->xid;
-
/*
* Reconstruct subtrans state for the transaction --- needed because
* pg_subtrans is not preserved over a restart. Note that we are
@@ -2096,17 +2099,20 @@ RecoverPreparedTransactions(void)
* SubTransSetParent has been set before, if the prepared transaction
* generated xid assignment records.
*/
- buf = ProcessTwoPhaseBuffer(xid,
+ buf = ProcessTwoPhaseBuffer(gxact->fxid,
gxact->prepare_start_lsn,
gxact->ondisk, true, false);
if (buf == NULL)
continue;
ereport(LOG,
- (errmsg("recovering prepared transaction %u from shared memory", xid)));
+ (errmsg("recovering prepared transaction %u of epoch %u from shared memory",
+ XidFromFullTransactionId(gxact->fxid),
+ EpochFromFullTransactionId(gxact->fxid))));
hdr = (TwoPhaseFileHeader *) buf;
- Assert(TransactionIdEquals(hdr->xid, xid));
+ Assert(TransactionIdEquals(hdr->xid,
+ XidFromFullTransactionId(gxact->fxid)));
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
gid = (const char *) bufptr;
bufptr += MAXALIGN(hdr->gidlen);
@@ -2122,7 +2128,7 @@ RecoverPreparedTransactions(void)
* Recreate its GXACT and dummy PGPROC. But, check whether it was
* added in redo and already has a shmem entry for it.
*/
- MarkAsPreparingGuts(gxact, xid, gid,
+ MarkAsPreparingGuts(gxact, gxact->fxid, gid,
hdr->prepared_at,
hdr->owner, hdr->database);
@@ -2137,7 +2143,7 @@ RecoverPreparedTransactions(void)
/*
* Recover other state (notably locks) using resource managers.
*/
- ProcessRecords(bufptr, xid, twophase_recover_callbacks);
+ ProcessRecords(bufptr, fxid, twophase_recover_callbacks);
/*
* Release locks held by the standby process after we process each
@@ -2145,7 +2151,7 @@ RecoverPreparedTransactions(void)
* additional locks at any one time.
*/
if (InHotStandby)
- StandbyReleaseLockTree(xid, hdr->nsubxacts, subxids);
+ StandbyReleaseLockTree(hdr->xid, hdr->nsubxacts, subxids);
/*
* We're done with recovering this transaction. Clear MyLockedGxact,
@@ -2164,7 +2170,7 @@ RecoverPreparedTransactions(void)
/*
* ProcessTwoPhaseBuffer
*
- * Given a transaction id, read it either from disk or read it directly
+ * Given a FullTransactionId, read it either from disk or read it directly
* via shmem xlog record pointer using the provided "prepare_start_lsn".
*
* If setParent is true, set up subtransaction parent linkages.
@@ -2173,13 +2179,12 @@ RecoverPreparedTransactions(void)
* value scanned.
*/
static char *
-ProcessTwoPhaseBuffer(TransactionId xid,
+ProcessTwoPhaseBuffer(FullTransactionId fxid,
XLogRecPtr prepare_start_lsn,
bool fromdisk,
bool setParent, bool setNextXid)
{
FullTransactionId nextXid = TransamVariables->nextXid;
- TransactionId origNextXid = XidFromFullTransactionId(nextXid);
TransactionId *subxids;
char *buf;
TwoPhaseFileHeader *hdr;
@@ -2191,41 +2196,46 @@ ProcessTwoPhaseBuffer(TransactionId xid,
Assert(prepare_start_lsn != InvalidXLogRecPtr);
/* Already processed? */
- if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
+ if (TransactionIdDidCommit(XidFromFullTransactionId(fxid)) ||
+ TransactionIdDidAbort(XidFromFullTransactionId(fxid)))
{
if (fromdisk)
{
ereport(WARNING,
- (errmsg("removing stale two-phase state file for transaction %u",
- xid)));
- RemoveTwoPhaseFile(xid, true);
+ (errmsg("removing stale two-phase state file for transaction %u of epoch %u",
+ XidFromFullTransactionId(fxid),
+ EpochFromFullTransactionId(fxid))));
+ RemoveTwoPhaseFile(fxid, true);
}
else
{
ereport(WARNING,
- (errmsg("removing stale two-phase state from memory for transaction %u",
- xid)));
- PrepareRedoRemove(xid, true);
+ (errmsg("removing stale two-phase state from memory for transaction %u of epoch %u",
+ XidFromFullTransactionId(fxid),
+ EpochFromFullTransactionId(fxid))));
+ PrepareRedoRemoveFull(fxid, true);
}
return NULL;
}
/* Reject XID if too new */
- if (TransactionIdFollowsOrEquals(xid, origNextXid))
+ if (FullTransactionIdFollowsOrEquals(fxid, nextXid))
{
if (fromdisk)
{
ereport(WARNING,
- (errmsg("removing future two-phase state file for transaction %u",
- xid)));
- RemoveTwoPhaseFile(xid, true);
+ (errmsg("removing future two-phase state file for transaction %u of epoch %u",
+ XidFromFullTransactionId(fxid),
+ EpochFromFullTransactionId(fxid))));
+ RemoveTwoPhaseFile(fxid, true);
}
else
{
ereport(WARNING,
- (errmsg("removing future two-phase state from memory for transaction %u",
- xid)));
- PrepareRedoRemove(xid, true);
+ (errmsg("removing future two-phase state from memory for transaction %u of epoch %u",
+ XidFromFullTransactionId(fxid),
+ EpochFromFullTransactionId(fxid))));
+ PrepareRedoRemoveFull(fxid, true);
}
return NULL;
}
@@ -2233,7 +2243,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
if (fromdisk)
{
/* Read and validate file */
- buf = ReadTwoPhaseFile(xid, false);
+ buf = ReadTwoPhaseFile(fxid, false);
}
else
{
@@ -2243,18 +2253,20 @@ ProcessTwoPhaseBuffer(TransactionId xid,
/* Deconstruct header */
hdr = (TwoPhaseFileHeader *) buf;
- if (!TransactionIdEquals(hdr->xid, xid))
+ if (!TransactionIdEquals(hdr->xid, XidFromFullTransactionId(fxid)))
{
if (fromdisk)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
- errmsg("corrupted two-phase state file for transaction %u",
- xid)));
+ errmsg("corrupted two-phase state file for transaction %u of epoch %u",
+ XidFromFullTransactionId(fxid),
+ EpochFromFullTransactionId(fxid))));
else
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
- errmsg("corrupted two-phase state in memory for transaction %u",
- xid)));
+ errmsg("corrupted two-phase state in memory for transaction %u of epoch %u",
+ XidFromFullTransactionId(fxid),
+ EpochFromFullTransactionId(fxid))));
}
/*
@@ -2268,14 +2280,14 @@ ProcessTwoPhaseBuffer(TransactionId xid,
{
TransactionId subxid = subxids[i];
- Assert(TransactionIdFollows(subxid, xid));
+ Assert(TransactionIdFollows(subxid, XidFromFullTransactionId(fxid)));
/* update nextXid if needed */
if (setNextXid)
AdvanceNextFullTransactionIdPastXid(subxid);
if (setParent)
- SubTransSetParent(subxid, xid);
+ SubTransSetParent(subxid, XidFromFullTransactionId(fxid));
}
return buf;
@@ -2466,8 +2478,9 @@ RecordTransactionAbortPrepared(TransactionId xid,
* data, the entry is marked as located on disk.
*/
void
-PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
- XLogRecPtr end_lsn, RepOriginId origin_id)
+PrepareRedoAdd(FullTransactionId fxid, char *buf,
+ XLogRecPtr start_lsn, XLogRecPtr end_lsn,
+ RepOriginId origin_id)
{
TwoPhaseFileHeader *hdr = (TwoPhaseFileHeader *) buf;
char *bufptr;
@@ -2477,6 +2490,13 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
Assert(RecoveryInProgress());
+ if (!FullTransactionIdIsValid(fxid))
+ {
+ Assert(InRecovery);
+ fxid = FullTransactionIdFromAllowableAt(TransamVariables->nextXid,
+ hdr->xid);
+ }
+
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
gid = (const char *) bufptr;
@@ -2505,14 +2525,15 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
{
char path[MAXPGPATH];
- TwoPhaseFilePath(path, hdr->xid);
+ Assert(InRecovery);
+ TwoPhaseFilePath(path, fxid);
if (access(path, F_OK) == 0)
{
ereport(reachedConsistency ? ERROR : WARNING,
(errmsg("could not recover two-phase state file for transaction %u",
hdr->xid),
- errdetail("Two-phase state file has been found in WAL record %X/%X, but this transaction has already been restored from disk.",
+ errdetail("Two-phase state file has been found in WAL record %X/%08X, but this transaction has already been restored from disk.",
LSN_FORMAT_ARGS(start_lsn))));
return;
}
@@ -2536,7 +2557,7 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
gxact->prepared_at = hdr->prepared_at;
gxact->prepare_start_lsn = start_lsn;
gxact->prepare_end_lsn = end_lsn;
- gxact->xid = hdr->xid;
+ gxact->fxid = fxid;
gxact->owner = hdr->owner;
gxact->locking_backend = INVALID_PROC_NUMBER;
gxact->valid = false;
@@ -2555,11 +2576,13 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
false /* backward */ , false /* WAL */ );
}
- elog(DEBUG2, "added 2PC data in shared memory for transaction %u", gxact->xid);
+ elog(DEBUG2, "added 2PC data in shared memory for transaction %u of epoch %u",
+ XidFromFullTransactionId(gxact->fxid),
+ EpochFromFullTransactionId(gxact->fxid));
}
/*
- * PrepareRedoRemove
+ * PrepareRedoRemoveFull
*
* Remove the corresponding gxact entry from TwoPhaseState. Also remove
* the 2PC file if a prepared transaction was saved via an earlier checkpoint.
@@ -2567,8 +2590,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
* Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState
* is updated.
*/
-void
-PrepareRedoRemove(TransactionId xid, bool giveWarning)
+static void
+PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning)
{
GlobalTransaction gxact = NULL;
int i;
@@ -2581,7 +2604,7 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning)
{
gxact = TwoPhaseState->prepXacts[i];
- if (gxact->xid == xid)
+ if (FullTransactionIdEquals(gxact->fxid, fxid))
{
Assert(gxact->inredo);
found = true;
@@ -2598,13 +2621,29 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning)
/*
* And now we can clean up any files we may have left.
*/
- elog(DEBUG2, "removing 2PC data for transaction %u", xid);
+ elog(DEBUG2, "removing 2PC data for transaction %u of epoch %u ",
+ XidFromFullTransactionId(fxid),
+ EpochFromFullTransactionId(fxid));
+
if (gxact->ondisk)
- RemoveTwoPhaseFile(xid, giveWarning);
+ RemoveTwoPhaseFile(fxid, giveWarning);
+
RemoveGXact(gxact);
}
/*
+ * Wrapper of PrepareRedoRemoveFull(), for TransactionIds.
+ */
+void
+PrepareRedoRemove(TransactionId xid, bool giveWarning)
+{
+ FullTransactionId fxid =
+ FullTransactionIdFromAllowableAt(TransamVariables->nextXid, xid);
+
+ PrepareRedoRemoveFull(fxid, giveWarning);
+}
+
+/*
* LookupGXact
* Check if the prepared transaction with the given GID, lsn and timestamp
* exists.
@@ -2648,7 +2687,7 @@ LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn,
* between publisher and subscriber.
*/
if (gxact->ondisk)
- buf = ReadTwoPhaseFile(gxact->xid, false);
+ buf = ReadTwoPhaseFile(gxact->fxid, false);
else
{
Assert(gxact->prepare_start_lsn);
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b885513f765..41601fcb280 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2515,7 +2515,7 @@ static void
PrepareTransaction(void)
{
TransactionState s = CurrentTransactionState;
- TransactionId xid = GetCurrentTransactionId();
+ FullTransactionId fxid = GetCurrentFullTransactionId();
GlobalTransaction gxact;
TimestampTz prepared_at;
@@ -2644,7 +2644,7 @@ PrepareTransaction(void)
* Reserve the GID for this transaction. This could fail if the requested
* GID is invalid or already in use.
*/
- gxact = MarkAsPreparing(xid, prepareGID, prepared_at,
+ gxact = MarkAsPreparing(fxid, prepareGID, prepared_at,
GetUserId(), MyDatabaseId);
prepareGID = NULL;
@@ -2694,7 +2694,7 @@ PrepareTransaction(void)
* ProcArrayClearTransaction(). Otherwise, a GetLockConflicts() would
* conclude "xact already committed or aborted" for our locks.
*/
- PostPrepare_Locks(xid);
+ PostPrepare_Locks(fxid);
/*
* Let others know about no transaction in progress by me. This has to be
@@ -2738,9 +2738,9 @@ PrepareTransaction(void)
PostPrepare_smgr();
- PostPrepare_MultiXact(xid);
+ PostPrepare_MultiXact(fxid);
- PostPrepare_PredicateLocks(xid);
+ PostPrepare_PredicateLocks(fxid);
ResourceOwnerRelease(TopTransactionResourceOwner,
RESOURCE_RELEASE_LOCKS,
@@ -6420,7 +6420,8 @@ xact_redo(XLogReaderState *record)
* gxact entry.
*/
LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
- PrepareRedoAdd(XLogRecGetData(record),
+ PrepareRedoAdd(InvalidFullTransactionId,
+ XLogRecGetData(record),
record->ReadRecPtr,
record->EndRecPtr,
XLogRecGetOrigin(record));
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 47ffc0a2307..8e7827c6ed9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -449,7 +449,6 @@ typedef struct XLogCtlData
/* Protected by info_lck: */
XLogwrtRqst LogwrtRqst;
XLogRecPtr RedoRecPtr; /* a recent copy of Insert->RedoRecPtr */
- FullTransactionId ckptFullXid; /* nextXid of latest checkpoint */
XLogRecPtr asyncXactLSN; /* LSN of newest async commit/abort */
XLogRecPtr replicationSlotMinLSN; /* oldest LSN needed by any slot */
@@ -1028,7 +1027,7 @@ XLogInsertRecord(XLogRecData *rdata,
oldCxt = MemoryContextSwitchTo(walDebugCxt);
initStringInfo(&buf);
- appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
+ appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos));
/*
* We have to piece together the WAL record data from the XLogRecData
@@ -1549,8 +1548,8 @@ WaitXLogInsertionsToFinish(XLogRecPtr upto)
if (upto > reservedUpto)
{
ereport(LOG,
- (errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X",
- LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))));
+ errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X",
+ LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto)));
upto = reservedUpto;
}
@@ -1716,7 +1715,7 @@ GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
if (expectedEndPtr != endptr)
- elog(PANIC, "could not find WAL buffer for %X/%X",
+ elog(PANIC, "could not find WAL buffer for %X/%08X",
LSN_FORMAT_ARGS(ptr));
}
else
@@ -1776,7 +1775,7 @@ WALReadFromBuffers(char *dstbuf, XLogRecPtr startptr, Size count,
inserted = pg_atomic_read_u64(&XLogCtl->logInsertResult);
if (startptr + count > inserted)
ereport(ERROR,
- errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
+ errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X",
LSN_FORMAT_ARGS(startptr + count),
LSN_FORMAT_ARGS(inserted)));
@@ -2281,7 +2280,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
#ifdef WAL_DEBUG
if (XLOG_DEBUG && npages > 0)
{
- elog(DEBUG1, "initialized %d pages, up to %X/%X",
+ elog(DEBUG1, "initialized %d pages, up to %X/%08X",
npages, LSN_FORMAT_ARGS(NewPageEndPtr));
}
#endif
@@ -2347,25 +2346,6 @@ check_wal_segment_size(int *newval, void **extra, GucSource source)
}
/*
- * GUC check_hook for max_slot_wal_keep_size
- *
- * We don't allow the value of max_slot_wal_keep_size other than -1 during the
- * binary upgrade. See start_postmaster() in pg_upgrade for more details.
- */
-bool
-check_max_slot_wal_keep_size(int *newval, void **extra, GucSource source)
-{
- if (IsBinaryUpgrade && *newval != -1)
- {
- GUC_check_errdetail("\"%s\" must be set to -1 during binary upgrade mode.",
- "max_slot_wal_keep_size");
- return false;
- }
-
- return true;
-}
-
-/*
* At a checkpoint, how many WAL segments to recycle as preallocated future
* XLOG segments? Returns the highest segment that should be preallocated.
*/
@@ -2492,7 +2472,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
if (LogwrtResult.Write >= EndPtr)
- elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
+ elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X",
LSN_FORMAT_ARGS(LogwrtResult.Write),
LSN_FORMAT_ARGS(EndPtr));
@@ -2892,7 +2872,7 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
if (!force && newMinRecoveryPoint < lsn)
elog(WARNING,
- "xlog min recovery request %X/%X is past current point %X/%X",
+ "xlog min recovery request %X/%08X is past current point %X/%08X",
LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
/* update control file */
@@ -2905,9 +2885,9 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
ereport(DEBUG2,
- (errmsg_internal("updated min recovery point to %X/%X on timeline %u",
- LSN_FORMAT_ARGS(newMinRecoveryPoint),
- newMinRecoveryPointTLI)));
+ errmsg_internal("updated min recovery point to %X/%08X on timeline %u",
+ LSN_FORMAT_ARGS(newMinRecoveryPoint),
+ newMinRecoveryPointTLI));
}
}
LWLockRelease(ControlFileLock);
@@ -2945,7 +2925,7 @@ XLogFlush(XLogRecPtr record)
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
- elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
+ elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X",
LSN_FORMAT_ARGS(record),
LSN_FORMAT_ARGS(LogwrtResult.Write),
LSN_FORMAT_ARGS(LogwrtResult.Flush));
@@ -3078,7 +3058,7 @@ XLogFlush(XLogRecPtr record)
*/
if (LogwrtResult.Flush < record)
elog(ERROR,
- "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
+ "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X",
LSN_FORMAT_ARGS(record),
LSN_FORMAT_ARGS(LogwrtResult.Flush));
}
@@ -3205,7 +3185,7 @@ XLogBackgroundFlush(void)
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
- elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
+ elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X",
LSN_FORMAT_ARGS(WriteRqst.Write),
LSN_FORMAT_ARGS(WriteRqst.Flush),
LSN_FORMAT_ARGS(LogwrtResult.Write),
@@ -5763,7 +5743,6 @@ StartupXLOG(void)
SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
SetCommitTsLimit(checkPoint.oldestCommitTsXid,
checkPoint.newestCommitTsXid);
- XLogCtl->ckptFullXid = checkPoint.nextXid;
/*
* Clear out any old relcache cache files. This is *necessary* if we do
@@ -6505,7 +6484,7 @@ PerformRecoveryXLogAction(void)
else
{
RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
- CHECKPOINT_IMMEDIATE |
+ CHECKPOINT_FAST |
CHECKPOINT_WAIT);
}
@@ -6814,7 +6793,7 @@ ShutdownXLOG(int code, Datum arg)
WalSndWaitStopping();
if (RecoveryInProgress())
- CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
+ CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_FAST);
else
{
/*
@@ -6826,7 +6805,7 @@ ShutdownXLOG(int code, Datum arg)
if (XLogArchivingActive())
RequestXLogSwitch(false);
- CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
+ CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_FAST);
}
}
@@ -6842,24 +6821,24 @@ LogCheckpointStart(int flags, bool restartpoint)
(errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
(flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
(flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
- (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
+ (flags & CHECKPOINT_FAST) ? " fast" : "",
(flags & CHECKPOINT_FORCE) ? " force" : "",
(flags & CHECKPOINT_WAIT) ? " wait" : "",
(flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
(flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
- (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
+ (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
else
ereport(LOG,
/* translator: the placeholders show checkpoint options */
(errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
(flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
(flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
- (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
+ (flags & CHECKPOINT_FAST) ? " fast" : "",
(flags & CHECKPOINT_FORCE) ? " force" : "",
(flags & CHECKPOINT_WAIT) ? " wait" : "",
(flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
(flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
- (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
+ (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
}
/*
@@ -6921,7 +6900,7 @@ LogCheckpointEnd(bool restartpoint)
"%d removed, %d recycled; write=%ld.%03d s, "
"sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
"longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
- "estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X",
+ "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
CheckpointStats.ckpt_bufs_written,
(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
CheckpointStats.ckpt_slru_written,
@@ -6945,7 +6924,7 @@ LogCheckpointEnd(bool restartpoint)
"%d removed, %d recycled; write=%ld.%03d s, "
"sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
"longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
- "estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X",
+ "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
CheckpointStats.ckpt_bufs_written,
(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
CheckpointStats.ckpt_slru_written,
@@ -7042,12 +7021,12 @@ update_checkpoint_display(int flags, bool restartpoint, bool reset)
* flags is a bitwise OR of the following:
* CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
* CHECKPOINT_END_OF_RECOVERY: checkpoint is for end of WAL recovery.
- * CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
- * ignoring checkpoint_completion_target parameter.
+ * CHECKPOINT_FAST: finish the checkpoint ASAP, ignoring
+ * checkpoint_completion_target parameter.
* CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occurred
* since the last one (implied by CHECKPOINT_IS_SHUTDOWN or
* CHECKPOINT_END_OF_RECOVERY).
- * CHECKPOINT_FLUSH_ALL: also flush buffers of unlogged tables.
+ * CHECKPOINT_FLUSH_UNLOGGED: also flush buffers of unlogged tables.
*
* Note: flags contains other bits, of interest here only for logging purposes.
* In particular note that this routine is synchronous and does not pay
@@ -7456,11 +7435,6 @@ CreateCheckPoint(int flags)
UpdateControlFile();
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
- SpinLockAcquire(&XLogCtl->info_lck);
- XLogCtl->ckptFullXid = checkPoint.nextXid;
- SpinLockRelease(&XLogCtl->info_lck);
-
/*
* We are now done with critical updates; no need for system panic if we
* have trouble while fooling with old log segments.
@@ -7641,7 +7615,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr,
if (!RecoveryInProgress())
elog(ERROR, "can only be used at end of recovery");
if (pagePtr % XLOG_BLCKSZ != 0)
- elog(ERROR, "invalid position for missing continuation record %X/%X",
+ elog(ERROR, "invalid position for missing continuation record %X/%08X",
LSN_FORMAT_ARGS(pagePtr));
/* The current WAL insert position should be right after the page header */
@@ -7652,7 +7626,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr,
startPos += SizeOfXLogShortPHD;
recptr = GetXLogInsertRecPtr();
if (recptr != startPos)
- elog(ERROR, "invalid WAL insert position %X/%X for OVERWRITE_CONTRECORD",
+ elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD",
LSN_FORMAT_ARGS(recptr));
START_CRIT_SECTION();
@@ -7682,7 +7656,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr,
/* check that the record was inserted to the right place */
if (ProcLastRecPtr != startPos)
- elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%X",
+ elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X",
LSN_FORMAT_ARGS(ProcLastRecPtr));
XLogFlush(recptr);
@@ -7751,8 +7725,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
if (XLogHaveInvalidPages())
{
elog(DEBUG2,
- "could not record restart point at %X/%X because there "
- "are unresolved references to invalid pages",
+ "could not record restart point at %X/%08X because there are unresolved references to invalid pages",
LSN_FORMAT_ARGS(checkPoint->redo));
return;
}
@@ -7832,8 +7805,8 @@ CreateRestartPoint(int flags)
lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
{
ereport(DEBUG2,
- (errmsg_internal("skipping restartpoint, already performed at %X/%X",
- LSN_FORMAT_ARGS(lastCheckPoint.redo))));
+ errmsg_internal("skipping restartpoint, already performed at %X/%08X",
+ LSN_FORMAT_ARGS(lastCheckPoint.redo)));
UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
if (flags & CHECKPOINT_IS_SHUTDOWN)
@@ -8017,10 +7990,10 @@ CreateRestartPoint(int flags)
xtime = GetLatestXTime();
ereport((log_checkpoints ? LOG : DEBUG2),
- (errmsg("recovery restart point at %X/%X",
- LSN_FORMAT_ARGS(lastCheckPoint.redo)),
- xtime ? errdetail("Last completed transaction was at log time %s.",
- timestamptz_to_str(xtime)) : 0));
+ errmsg("recovery restart point at %X/%08X",
+ LSN_FORMAT_ARGS(lastCheckPoint.redo)),
+ xtime ? errdetail("Last completed transaction was at log time %s.",
+ timestamptz_to_str(xtime)) : 0);
/*
* Finally, execute archive_cleanup_command, if any.
@@ -8151,17 +8124,19 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
XLByteToSeg(recptr, currSegNo, wal_segment_size);
segno = currSegNo;
- /*
- * Calculate how many segments are kept by slots first, adjusting for
- * max_slot_wal_keep_size.
- */
+ /* Calculate how many segments are kept by slots. */
keep = XLogGetReplicationSlotMinimumLSN();
if (keep != InvalidXLogRecPtr && keep < recptr)
{
XLByteToSeg(keep, segno, wal_segment_size);
- /* Cap by max_slot_wal_keep_size ... */
- if (max_slot_wal_keep_size_mb >= 0)
+ /*
+ * Account for max_slot_wal_keep_size to avoid keeping more than
+ * configured. However, don't do that during a binary upgrade: if
+ * slots were to be invalidated because of this, it would not be
+ * possible to preserve logical ones during the upgrade.
+ */
+ if (max_slot_wal_keep_size_mb >= 0 && !IsBinaryUpgrade)
{
uint64 slot_keep_segs;
@@ -8281,8 +8256,8 @@ XLogRestorePoint(const char *rpName)
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
ereport(LOG,
- (errmsg("restore point \"%s\" created at %X/%X",
- rpName, LSN_FORMAT_ARGS(RecPtr))));
+ errmsg("restore point \"%s\" created at %X/%08X",
+ rpName, LSN_FORMAT_ARGS(RecPtr)));
return RecPtr;
}
@@ -8534,11 +8509,6 @@ xlog_redo(XLogReaderState *record)
ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
- SpinLockAcquire(&XLogCtl->info_lck);
- XLogCtl->ckptFullXid = checkPoint.nextXid;
- SpinLockRelease(&XLogCtl->info_lck);
-
/*
* We should've already switched to the new TLI before replaying this
* record.
@@ -8595,11 +8565,6 @@ xlog_redo(XLogReaderState *record)
ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
- SpinLockAcquire(&XLogCtl->info_lck);
- XLogCtl->ckptFullXid = checkPoint.nextXid;
- SpinLockRelease(&XLogCtl->info_lck);
-
/* TLI should not change in an on-line checkpoint */
(void) GetCurrentReplayRecPtr(&replayTLI);
if (checkPoint.ThisTimeLineID != replayTLI)
@@ -8947,9 +8912,8 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
* backup state and tablespace map.
*
* Input parameters are "state" (the backup state), "fast" (if true, we do
- * the checkpoint in immediate mode to make it faster), and "tablespaces"
- * (if non-NULL, indicates a list of tablespaceinfo structs describing the
- * cluster's tablespaces.).
+ * the checkpoint in fast mode), and "tablespaces" (if non-NULL, indicates a
+ * list of tablespaceinfo structs describing the cluster's tablespaces.).
*
* The tablespace map contents are appended to passed-in parameter
* tablespace_map and the caller is responsible for including it in the backup
@@ -9077,11 +9041,11 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
* during recovery means that checkpointer is running, we can use
* RequestCheckpoint() to establish a restartpoint.
*
- * We use CHECKPOINT_IMMEDIATE only if requested by user (via
- * passing fast = true). Otherwise this can take awhile.
+ * We use CHECKPOINT_FAST only if requested by user (via passing
+ * fast = true). Otherwise this can take awhile.
*/
RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT |
- (fast ? CHECKPOINT_IMMEDIATE : 0));
+ (fast ? CHECKPOINT_FAST : 0));
/*
* Now we need to fetch the checkpoint record location, and also
diff --git a/src/backend/access/transam/xlogbackup.c b/src/backend/access/transam/xlogbackup.c
index 342590e0a46..cda4b38b7d6 100644
--- a/src/backend/access/transam/xlogbackup.c
+++ b/src/backend/access/transam/xlogbackup.c
@@ -42,7 +42,7 @@ build_backup_content(BackupState *state, bool ishistoryfile)
XLByteToSeg(state->startpoint, startsegno, wal_segment_size);
XLogFileName(startxlogfile, state->starttli, startsegno, wal_segment_size);
- appendStringInfo(result, "START WAL LOCATION: %X/%X (file %s)\n",
+ appendStringInfo(result, "START WAL LOCATION: %X/%08X (file %s)\n",
LSN_FORMAT_ARGS(state->startpoint), startxlogfile);
if (ishistoryfile)
@@ -52,11 +52,11 @@ build_backup_content(BackupState *state, bool ishistoryfile)
XLByteToSeg(state->stoppoint, stopsegno, wal_segment_size);
XLogFileName(stopxlogfile, state->stoptli, stopsegno, wal_segment_size);
- appendStringInfo(result, "STOP WAL LOCATION: %X/%X (file %s)\n",
+ appendStringInfo(result, "STOP WAL LOCATION: %X/%08X (file %s)\n",
LSN_FORMAT_ARGS(state->stoppoint), stopxlogfile);
}
- appendStringInfo(result, "CHECKPOINT LOCATION: %X/%X\n",
+ appendStringInfo(result, "CHECKPOINT LOCATION: %X/%08X\n",
LSN_FORMAT_ARGS(state->checkpointloc));
appendStringInfoString(result, "BACKUP METHOD: streamed\n");
appendStringInfo(result, "BACKUP FROM: %s\n",
@@ -81,7 +81,7 @@ build_backup_content(BackupState *state, bool ishistoryfile)
Assert(XLogRecPtrIsInvalid(state->istartpoint) == (state->istarttli == 0));
if (!XLogRecPtrIsInvalid(state->istartpoint))
{
- appendStringInfo(result, "INCREMENTAL FROM LSN: %X/%X\n",
+ appendStringInfo(result, "INCREMENTAL FROM LSN: %X/%08X\n",
LSN_FORMAT_ARGS(state->istartpoint));
appendStringInfo(result, "INCREMENTAL FROM TLI: %u\n",
state->istarttli);
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 5ee9d0b028e..c7571429e8e 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -530,6 +530,18 @@ XLogInsert(RmgrId rmid, uint8 info)
}
/*
+ * Simple wrapper to XLogInsert to insert a WAL record with elementary
+ * contents (only an int64 is supported as value currently).
+ */
+XLogRecPtr
+XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
+{
+ XLogBeginInsert();
+ XLogRegisterData(&value, sizeof(value));
+ return XLogInsert(rmid, info);
+}
+
+/*
* Assemble a WAL record from the registered data and buffers into an
* XLogRecData chain, ready for insertion with XLogInsertRecord().
*
diff --git a/src/backend/access/transam/xlogprefetcher.c b/src/backend/access/transam/xlogprefetcher.c
index 7735562db01..ed3aacabc98 100644
--- a/src/backend/access/transam/xlogprefetcher.c
+++ b/src/backend/access/transam/xlogprefetcher.c
@@ -546,7 +546,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing all readahead until %X/%X is replayed due to possible TLI change",
+ "suppressing all readahead until %X/%08X is replayed due to possible TLI change",
LSN_FORMAT_ARGS(record->lsn));
#endif
@@ -579,7 +579,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing prefetch in database %u until %X/%X is replayed due to raw file copy",
+ "suppressing prefetch in database %u until %X/%08X is replayed due to raw file copy",
rlocator.dbOid,
LSN_FORMAT_ARGS(record->lsn));
#endif
@@ -607,7 +607,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing prefetch in relation %u/%u/%u until %X/%X is replayed, which creates the relation",
+ "suppressing prefetch in relation %u/%u/%u until %X/%08X is replayed, which creates the relation",
xlrec->rlocator.spcOid,
xlrec->rlocator.dbOid,
xlrec->rlocator.relNumber,
@@ -630,7 +630,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, which truncates the relation",
+ "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, which truncates the relation",
xlrec->rlocator.spcOid,
xlrec->rlocator.dbOid,
xlrec->rlocator.relNumber,
@@ -729,7 +729,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
{
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing all prefetch in relation %u/%u/%u until %X/%X is replayed, because the relation does not exist on disk",
+ "suppressing all prefetch in relation %u/%u/%u until %X/%08X is replayed, because the relation does not exist on disk",
reln->smgr_rlocator.locator.spcOid,
reln->smgr_rlocator.locator.dbOid,
reln->smgr_rlocator.locator.relNumber,
@@ -750,7 +750,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
{
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, because the relation is too small",
+ "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, because the relation is too small",
reln->smgr_rlocator.locator.spcOid,
reln->smgr_rlocator.locator.dbOid,
reln->smgr_rlocator.locator.relNumber,
@@ -928,7 +928,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator,
{
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)",
+ "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (blocks >= %u filtered)",
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
LSN_FORMAT_ARGS(filter->filter_until_replayed),
filter->filter_from_block);
@@ -944,7 +944,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator,
{
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (whole database)",
+ "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (whole database)",
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
LSN_FORMAT_ARGS(filter->filter_until_replayed));
#endif
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 2790ade1f91..ac1f801b1eb 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -617,7 +617,7 @@ restart:
}
else if (targetRecOff < pageHeaderSize)
{
- report_invalid_record(state, "invalid record offset at %X/%X: expected at least %u, got %u",
+ report_invalid_record(state, "invalid record offset at %X/%08X: expected at least %u, got %u",
LSN_FORMAT_ARGS(RecPtr),
pageHeaderSize, targetRecOff);
goto err;
@@ -626,7 +626,7 @@ restart:
if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
targetRecOff == pageHeaderSize)
{
- report_invalid_record(state, "contrecord is requested by %X/%X",
+ report_invalid_record(state, "contrecord is requested by %X/%08X",
LSN_FORMAT_ARGS(RecPtr));
goto err;
}
@@ -667,7 +667,7 @@ restart:
if (total_len < SizeOfXLogRecord)
{
report_invalid_record(state,
- "invalid record length at %X/%X: expected at least %u, got %u",
+ "invalid record length at %X/%08X: expected at least %u, got %u",
LSN_FORMAT_ARGS(RecPtr),
(uint32) SizeOfXLogRecord, total_len);
goto err;
@@ -756,7 +756,7 @@ restart:
if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD))
{
report_invalid_record(state,
- "there is no contrecord flag at %X/%X",
+ "there is no contrecord flag at %X/%08X",
LSN_FORMAT_ARGS(RecPtr));
goto err;
}
@@ -769,7 +769,7 @@ restart:
total_len != (pageHeader->xlp_rem_len + gotlen))
{
report_invalid_record(state,
- "invalid contrecord length %u (expected %lld) at %X/%X",
+ "invalid contrecord length %u (expected %lld) at %X/%08X",
pageHeader->xlp_rem_len,
((long long) total_len) - gotlen,
LSN_FORMAT_ARGS(RecPtr));
@@ -1132,7 +1132,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
if (record->xl_tot_len < SizeOfXLogRecord)
{
report_invalid_record(state,
- "invalid record length at %X/%X: expected at least %u, got %u",
+ "invalid record length at %X/%08X: expected at least %u, got %u",
LSN_FORMAT_ARGS(RecPtr),
(uint32) SizeOfXLogRecord, record->xl_tot_len);
return false;
@@ -1140,7 +1140,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
if (!RmgrIdIsValid(record->xl_rmid))
{
report_invalid_record(state,
- "invalid resource manager ID %u at %X/%X",
+ "invalid resource manager ID %u at %X/%08X",
record->xl_rmid, LSN_FORMAT_ARGS(RecPtr));
return false;
}
@@ -1153,7 +1153,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
if (!(record->xl_prev < RecPtr))
{
report_invalid_record(state,
- "record with incorrect prev-link %X/%X at %X/%X",
+ "record with incorrect prev-link %X/%08X at %X/%08X",
LSN_FORMAT_ARGS(record->xl_prev),
LSN_FORMAT_ARGS(RecPtr));
return false;
@@ -1169,7 +1169,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
if (record->xl_prev != PrevRecPtr)
{
report_invalid_record(state,
- "record with incorrect prev-link %X/%X at %X/%X",
+ "record with incorrect prev-link %X/%08X at %X/%08X",
LSN_FORMAT_ARGS(record->xl_prev),
LSN_FORMAT_ARGS(RecPtr));
return false;
@@ -1207,7 +1207,7 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
if (!EQ_CRC32C(record->xl_crc, crc))
{
report_invalid_record(state,
- "incorrect resource manager data checksum in record at %X/%X",
+ "incorrect resource manager data checksum in record at %X/%08X",
LSN_FORMAT_ARGS(recptr));
return false;
}
@@ -1241,7 +1241,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
report_invalid_record(state,
- "invalid magic number %04X in WAL segment %s, LSN %X/%X, offset %u",
+ "invalid magic number %04X in WAL segment %s, LSN %X/%08X, offset %u",
hdr->xlp_magic,
fname,
LSN_FORMAT_ARGS(recptr),
@@ -1256,7 +1256,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
report_invalid_record(state,
- "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
+ "invalid info bits %04X in WAL segment %s, LSN %X/%08X, offset %u",
hdr->xlp_info,
fname,
LSN_FORMAT_ARGS(recptr),
@@ -1298,7 +1298,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
/* hmm, first page of file doesn't have a long header? */
report_invalid_record(state,
- "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
+ "invalid info bits %04X in WAL segment %s, LSN %X/%08X, offset %u",
hdr->xlp_info,
fname,
LSN_FORMAT_ARGS(recptr),
@@ -1318,7 +1318,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
report_invalid_record(state,
- "unexpected pageaddr %X/%X in WAL segment %s, LSN %X/%X, offset %u",
+ "unexpected pageaddr %X/%08X in WAL segment %s, LSN %X/%08X, offset %u",
LSN_FORMAT_ARGS(hdr->xlp_pageaddr),
fname,
LSN_FORMAT_ARGS(recptr),
@@ -1344,7 +1344,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
report_invalid_record(state,
- "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%X, offset %u",
+ "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%08X, offset %u",
hdr->xlp_tli,
state->latestPageTLI,
fname,
@@ -1756,7 +1756,7 @@ DecodeXLogRecord(XLogReaderState *state,
if (block_id <= decoded->max_block_id)
{
report_invalid_record(state,
- "out-of-order block_id %u at %X/%X",
+ "out-of-order block_id %u at %X/%08X",
block_id,
LSN_FORMAT_ARGS(state->ReadRecPtr));
goto err;
@@ -1780,14 +1780,14 @@ DecodeXLogRecord(XLogReaderState *state,
if (blk->has_data && blk->data_len == 0)
{
report_invalid_record(state,
- "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
+ "BKPBLOCK_HAS_DATA set, but no data included at %X/%08X",
LSN_FORMAT_ARGS(state->ReadRecPtr));
goto err;
}
if (!blk->has_data && blk->data_len != 0)
{
report_invalid_record(state,
- "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
+ "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%08X",
(unsigned int) blk->data_len,
LSN_FORMAT_ARGS(state->ReadRecPtr));
goto err;
@@ -1823,7 +1823,7 @@ DecodeXLogRecord(XLogReaderState *state,
blk->bimg_len == BLCKSZ))
{
report_invalid_record(state,
- "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
+ "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%08X",
(unsigned int) blk->hole_offset,
(unsigned int) blk->hole_length,
(unsigned int) blk->bimg_len,
@@ -1839,7 +1839,7 @@ DecodeXLogRecord(XLogReaderState *state,
(blk->hole_offset != 0 || blk->hole_length != 0))
{
report_invalid_record(state,
- "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
+ "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%08X",
(unsigned int) blk->hole_offset,
(unsigned int) blk->hole_length,
LSN_FORMAT_ARGS(state->ReadRecPtr));
@@ -1853,7 +1853,7 @@ DecodeXLogRecord(XLogReaderState *state,
blk->bimg_len == BLCKSZ)
{
report_invalid_record(state,
- "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%X",
+ "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%08X",
(unsigned int) blk->bimg_len,
LSN_FORMAT_ARGS(state->ReadRecPtr));
goto err;
@@ -1868,7 +1868,7 @@ DecodeXLogRecord(XLogReaderState *state,
blk->bimg_len != BLCKSZ)
{
report_invalid_record(state,
- "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%X",
+ "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%08X",
(unsigned int) blk->data_len,
LSN_FORMAT_ARGS(state->ReadRecPtr));
goto err;
@@ -1884,7 +1884,7 @@ DecodeXLogRecord(XLogReaderState *state,
if (rlocator == NULL)
{
report_invalid_record(state,
- "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
+ "BKPBLOCK_SAME_REL set but no previous rel at %X/%08X",
LSN_FORMAT_ARGS(state->ReadRecPtr));
goto err;
}
@@ -1896,7 +1896,7 @@ DecodeXLogRecord(XLogReaderState *state,
else
{
report_invalid_record(state,
- "invalid block_id %u at %X/%X",
+ "invalid block_id %u at %X/%08X",
block_id, LSN_FORMAT_ARGS(state->ReadRecPtr));
goto err;
}
@@ -1963,7 +1963,7 @@ DecodeXLogRecord(XLogReaderState *state,
shortdata_err:
report_invalid_record(state,
- "record with invalid length at %X/%X",
+ "record with invalid length at %X/%08X",
LSN_FORMAT_ARGS(state->ReadRecPtr));
err:
*errormsg = state->errormsg_buf;
@@ -2073,14 +2073,14 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
!record->record->blocks[block_id].in_use)
{
report_invalid_record(record,
- "could not restore image at %X/%X with invalid block %d specified",
+ "could not restore image at %X/%08X with invalid block %d specified",
LSN_FORMAT_ARGS(record->ReadRecPtr),
block_id);
return false;
}
if (!record->record->blocks[block_id].has_image)
{
- report_invalid_record(record, "could not restore image at %X/%X with invalid state, block %d",
+ report_invalid_record(record, "could not restore image at %X/%08X with invalid state, block %d",
LSN_FORMAT_ARGS(record->ReadRecPtr),
block_id);
return false;
@@ -2107,7 +2107,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0)
decomp_success = false;
#else
- report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
+ report_invalid_record(record, "could not restore image at %X/%08X compressed with %s not supported by build, block %d",
LSN_FORMAT_ARGS(record->ReadRecPtr),
"LZ4",
block_id);
@@ -2124,7 +2124,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
if (ZSTD_isError(decomp_result))
decomp_success = false;
#else
- report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
+ report_invalid_record(record, "could not restore image at %X/%08X compressed with %s not supported by build, block %d",
LSN_FORMAT_ARGS(record->ReadRecPtr),
"zstd",
block_id);
@@ -2133,7 +2133,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
}
else
{
- report_invalid_record(record, "could not restore image at %X/%X compressed with unknown method, block %d",
+ report_invalid_record(record, "could not restore image at %X/%08X compressed with unknown method, block %d",
LSN_FORMAT_ARGS(record->ReadRecPtr),
block_id);
return false;
@@ -2141,7 +2141,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
if (!decomp_success)
{
- report_invalid_record(record, "could not decompress image at %X/%X, block %d",
+ report_invalid_record(record, "could not decompress image at %X/%08X, block %d",
LSN_FORMAT_ARGS(record->ReadRecPtr),
block_id);
return false;
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 6ce979f2d8b..23878b2dd91 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -620,10 +620,10 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
* than ControlFile->checkPoint is used.
*/
ereport(LOG,
- (errmsg("starting backup recovery with redo LSN %X/%X, checkpoint LSN %X/%X, on timeline ID %u",
- LSN_FORMAT_ARGS(RedoStartLSN),
- LSN_FORMAT_ARGS(CheckPointLoc),
- CheckPointTLI)));
+ errmsg("starting backup recovery with redo LSN %X/%08X, checkpoint LSN %X/%08X, on timeline ID %u",
+ LSN_FORMAT_ARGS(RedoStartLSN),
+ LSN_FORMAT_ARGS(CheckPointLoc),
+ CheckPointTLI));
/*
* When a backup_label file is present, we want to roll forward from
@@ -636,8 +636,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
ereport(DEBUG1,
- (errmsg_internal("checkpoint record is at %X/%X",
- LSN_FORMAT_ARGS(CheckPointLoc))));
+ errmsg_internal("checkpoint record is at %X/%08X",
+ LSN_FORMAT_ARGS(CheckPointLoc)));
InRecovery = true; /* force recovery even if SHUTDOWNED */
/*
@@ -652,23 +652,23 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
if (!ReadRecord(xlogprefetcher, LOG, false,
checkPoint.ThisTimeLineID))
ereport(FATAL,
- (errmsg("could not find redo location %X/%X referenced by checkpoint record at %X/%X",
- LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)),
- errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
- "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
- "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
- DataDir, DataDir, DataDir, DataDir)));
+ errmsg("could not find redo location %X/%08X referenced by checkpoint record at %X/%08X",
+ LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)),
+ errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
+ "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
+ "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
+ DataDir, DataDir, DataDir, DataDir));
}
}
else
{
ereport(FATAL,
- (errmsg("could not locate required checkpoint record at %X/%X",
- LSN_FORMAT_ARGS(CheckPointLoc)),
- errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
- "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
- "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
- DataDir, DataDir, DataDir, DataDir)));
+ errmsg("could not locate required checkpoint record at %X/%08X",
+ LSN_FORMAT_ARGS(CheckPointLoc)),
+ errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
+ "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
+ "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
+ DataDir, DataDir, DataDir, DataDir));
wasShutdown = false; /* keep compiler quiet */
}
@@ -773,8 +773,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
*/
if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
ereport(LOG,
- (errmsg("restarting backup recovery with redo LSN %X/%X",
- LSN_FORMAT_ARGS(ControlFile->backupStartPoint))));
+ errmsg("restarting backup recovery with redo LSN %X/%08X",
+ LSN_FORMAT_ARGS(ControlFile->backupStartPoint)));
/* Get the last valid checkpoint record. */
CheckPointLoc = ControlFile->checkPoint;
@@ -786,8 +786,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
if (record != NULL)
{
ereport(DEBUG1,
- (errmsg_internal("checkpoint record is at %X/%X",
- LSN_FORMAT_ARGS(CheckPointLoc))));
+ errmsg_internal("checkpoint record is at %X/%08X",
+ LSN_FORMAT_ARGS(CheckPointLoc)));
}
else
{
@@ -798,8 +798,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
* simplify processing around checkpoints.
*/
ereport(PANIC,
- (errmsg("could not locate a valid checkpoint record at %X/%X",
- LSN_FORMAT_ARGS(CheckPointLoc))));
+ errmsg("could not locate a valid checkpoint record at %X/%08X",
+ LSN_FORMAT_ARGS(CheckPointLoc)));
}
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
@@ -824,8 +824,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
recoveryTargetName)));
else if (recoveryTarget == RECOVERY_TARGET_LSN)
ereport(LOG,
- (errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
- LSN_FORMAT_ARGS(recoveryTargetLSN))));
+ errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%08X\"",
+ LSN_FORMAT_ARGS(recoveryTargetLSN)));
else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
ereport(LOG,
(errmsg("starting point-in-time recovery to earliest consistent point")));
@@ -855,7 +855,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
(errmsg("requested timeline %u is not a child of this server's history",
recoveryTargetTLI),
/* translator: %s is a backup_label file or a pg_control file */
- errdetail("Latest checkpoint in file \"%s\" is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.",
+ errdetail("Latest checkpoint in file \"%s\" is at %X/%08X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%08X.",
haveBackupLabel ? "backup_label" : "pg_control",
LSN_FORMAT_ARGS(CheckPointLoc),
CheckPointTLI,
@@ -870,15 +870,15 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
ControlFile->minRecoveryPointTLI)
ereport(FATAL,
- (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
- recoveryTargetTLI,
- LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
- ControlFile->minRecoveryPointTLI)));
+ errmsg("requested timeline %u does not contain minimum recovery point %X/%08X on timeline %u",
+ recoveryTargetTLI,
+ LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
+ ControlFile->minRecoveryPointTLI));
ereport(DEBUG1,
- (errmsg_internal("redo record is at %X/%X; shutdown %s",
- LSN_FORMAT_ARGS(checkPoint.redo),
- wasShutdown ? "true" : "false")));
+ errmsg_internal("redo record is at %X/%08X; shutdown %s",
+ LSN_FORMAT_ARGS(checkPoint.redo),
+ wasShutdown ? "true" : "false"));
ereport(DEBUG1,
(errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u",
U64FromFullTransactionId(checkPoint.nextXid),
@@ -1253,14 +1253,14 @@ read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
* is pretty crude, but we are not expecting any variability in the file
* format).
*/
- if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c",
+ if (fscanf(lfp, "START WAL LOCATION: %X/%08X (file %08X%16s)%c",
&hi, &lo, &tli_from_walseg, startxlogfilename, &ch) != 5 || ch != '\n')
ereport(FATAL,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
RedoStartLSN = ((uint64) hi) << 32 | lo;
RedoStartTLI = tli_from_walseg;
- if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%X%c",
+ if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%08X%c",
&hi, &lo, &ch) != 3 || ch != '\n')
ereport(FATAL,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
@@ -1332,7 +1332,7 @@ read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
tli_from_file, BACKUP_LABEL_FILE)));
}
- if (fscanf(lfp, "INCREMENTAL FROM LSN: %X/%X\n", &hi, &lo) > 0)
+ if (fscanf(lfp, "INCREMENTAL FROM LSN: %X/%08X\n", &hi, &lo) > 0)
ereport(FATAL,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("this is an incremental backup, not a data directory"),
@@ -1722,8 +1722,8 @@ PerformWalRecovery(void)
if (record->xl_rmid != RM_XLOG_ID ||
(record->xl_info & ~XLR_INFO_MASK) != XLOG_CHECKPOINT_REDO)
ereport(FATAL,
- (errmsg("unexpected record type found at redo point %X/%X",
- LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
+ errmsg("unexpected record type found at redo point %X/%08X",
+ LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)));
}
else
{
@@ -1745,8 +1745,8 @@ PerformWalRecovery(void)
RmgrStartup();
ereport(LOG,
- (errmsg("redo starts at %X/%X",
- LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
+ errmsg("redo starts at %X/%08X",
+ LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)));
/* Prepare to report progress of the redo phase. */
if (!StandbyMode)
@@ -1758,7 +1758,7 @@ PerformWalRecovery(void)
do
{
if (!StandbyMode)
- ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
+ ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%08X",
LSN_FORMAT_ARGS(xlogreader->ReadRecPtr));
#ifdef WAL_DEBUG
@@ -1767,7 +1767,7 @@ PerformWalRecovery(void)
StringInfoData buf;
initStringInfo(&buf);
- appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
+ appendStringInfo(&buf, "REDO @ %X/%08X; LSN %X/%08X: ",
LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
xlog_outrec(&buf, xlogreader);
@@ -1880,9 +1880,9 @@ PerformWalRecovery(void)
RmgrCleanup();
ereport(LOG,
- (errmsg("redo done at %X/%X system usage: %s",
- LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
- pg_rusage_show(&ru0))));
+ errmsg("redo done at %X/%08X system usage: %s",
+ LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
+ pg_rusage_show(&ru0)));
xtime = GetLatestXTime();
if (xtime)
ereport(LOG,
@@ -2092,7 +2092,7 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_overwrite_contrecord));
if (xlrec.overwritten_lsn != record->overwrittenRecPtr)
- elog(FATAL, "mismatching overwritten LSN %X/%X -> %X/%X",
+ elog(FATAL, "mismatching overwritten LSN %X/%08X -> %X/%08X",
LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
LSN_FORMAT_ARGS(record->overwrittenRecPtr));
@@ -2101,9 +2101,9 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
missingContrecPtr = InvalidXLogRecPtr;
ereport(LOG,
- (errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s",
- LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
- timestamptz_to_str(xlrec.overwrite_time))));
+ errmsg("successfully skipped missing contrecord at %X/%08X, overwritten at %s",
+ LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
+ timestamptz_to_str(xlrec.overwrite_time)));
/* Verifying the record should only happen once */
record->overwrittenRecPtr = InvalidXLogRecPtr;
@@ -2129,7 +2129,7 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
backupEndPoint = lsn;
}
else
- elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%X, waiting for %X/%X",
+ elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%08X, waiting for %X/%08X",
LSN_FORMAT_ARGS(startpoint), LSN_FORMAT_ARGS(backupStartPoint));
}
}
@@ -2224,9 +2224,9 @@ CheckRecoveryConsistency(void)
backupEndRequired = false;
ereport(LOG,
- (errmsg("completed backup recovery with redo LSN %X/%X and end LSN %X/%X",
- LSN_FORMAT_ARGS(saveBackupStartPoint),
- LSN_FORMAT_ARGS(saveBackupEndPoint))));
+ errmsg("completed backup recovery with redo LSN %X/%08X and end LSN %X/%08X",
+ LSN_FORMAT_ARGS(saveBackupStartPoint),
+ LSN_FORMAT_ARGS(saveBackupEndPoint)));
}
/*
@@ -2255,8 +2255,8 @@ CheckRecoveryConsistency(void)
reachedConsistency = true;
SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
ereport(LOG,
- (errmsg("consistent recovery state reached at %X/%X",
- LSN_FORMAT_ARGS(lastReplayedEndRecPtr))));
+ errmsg("consistent recovery state reached at %X/%08X",
+ LSN_FORMAT_ARGS(lastReplayedEndRecPtr)));
}
/*
@@ -2293,7 +2293,7 @@ rm_redo_error_callback(void *arg)
xlog_block_info(&buf, record);
/* translator: %s is a WAL record description */
- errcontext("WAL redo at %X/%X for %s",
+ errcontext("WAL redo at %X/%08X for %s",
LSN_FORMAT_ARGS(record->ReadRecPtr),
buf.data);
@@ -2328,7 +2328,7 @@ xlog_outdesc(StringInfo buf, XLogReaderState *record)
static void
xlog_outrec(StringInfo buf, XLogReaderState *record)
{
- appendStringInfo(buf, "prev %X/%X; xid %u",
+ appendStringInfo(buf, "prev %X/%08X; xid %u",
LSN_FORMAT_ARGS(XLogRecGetPrev(record)),
XLogRecGetXid(record));
@@ -2416,10 +2416,10 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI,
lsn < minRecoveryPoint &&
newTLI > minRecoveryPointTLI)
ereport(PANIC,
- (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
- newTLI,
- LSN_FORMAT_ARGS(minRecoveryPoint),
- minRecoveryPointTLI)));
+ errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%08X on timeline %u",
+ newTLI,
+ LSN_FORMAT_ARGS(minRecoveryPoint),
+ minRecoveryPointTLI));
/* Looks good */
}
@@ -2621,8 +2621,8 @@ recoveryStopsBefore(XLogReaderState *record)
recoveryStopTime = 0;
recoveryStopName[0] = '\0';
ereport(LOG,
- (errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"",
- LSN_FORMAT_ARGS(recoveryStopLSN))));
+ errmsg("recovery stopping before WAL location (LSN) \"%X/%08X\"",
+ LSN_FORMAT_ARGS(recoveryStopLSN)));
return true;
}
@@ -2789,8 +2789,8 @@ recoveryStopsAfter(XLogReaderState *record)
recoveryStopTime = 0;
recoveryStopName[0] = '\0';
ereport(LOG,
- (errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"",
- LSN_FORMAT_ARGS(recoveryStopLSN))));
+ errmsg("recovery stopping after WAL location (LSN) \"%X/%08X\"",
+ LSN_FORMAT_ARGS(recoveryStopLSN)));
return true;
}
@@ -2910,7 +2910,7 @@ getRecoveryStopReason(void)
timestamptz_to_str(recoveryStopTime));
else if (recoveryTarget == RECOVERY_TARGET_LSN)
snprintf(reason, sizeof(reason),
- "%s LSN %X/%X\n",
+ "%s LSN %X/%08X\n",
recoveryStopAfter ? "after" : "before",
LSN_FORMAT_ARGS(recoveryStopLSN));
else if (recoveryTarget == RECOVERY_TARGET_NAME)
@@ -3213,11 +3213,11 @@ ReadRecord(XLogPrefetcher *xlogprefetcher, int emode,
XLogFileName(fname, xlogreader->seg.ws_tli, segno,
wal_segment_size);
ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
- (errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%X, offset %u",
- xlogreader->latestPageTLI,
- fname,
- LSN_FORMAT_ARGS(xlogreader->latestPagePtr),
- offset)));
+ errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%08X, offset %u",
+ xlogreader->latestPageTLI,
+ fname,
+ LSN_FORMAT_ARGS(xlogreader->latestPagePtr),
+ offset));
record = NULL;
}
@@ -3429,14 +3429,14 @@ retry:
errno = save_errno;
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
(errcode_for_file_access(),
- errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: %m",
+ errmsg("could not read from WAL segment %s, LSN %X/%08X, offset %u: %m",
fname, LSN_FORMAT_ARGS(targetPagePtr),
readOff)));
}
else
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
(errcode(ERRCODE_DATA_CORRUPTED),
- errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: read %d of %zu",
+ errmsg("could not read from WAL segment %s, LSN %X/%08X, offset %u: read %d of %zu",
fname, LSN_FORMAT_ARGS(targetPagePtr),
readOff, r, (Size) XLOG_BLCKSZ)));
goto next_record_is_invalid;
@@ -3718,7 +3718,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
wait_time = wal_retrieve_retry_interval -
TimestampDifferenceMilliseconds(last_fail_time, now);
- elog(LOG, "waiting for WAL to become available at %X/%X",
+ elog(LOG, "waiting for WAL to become available at %X/%08X",
LSN_FORMAT_ARGS(RecPtr));
/* Do background tasks that might benefit us later. */
@@ -3864,7 +3864,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
if (curFileTLI > 0 && tli < curFileTLI)
- elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+ elog(ERROR, "according to history file, WAL location %X/%08X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
LSN_FORMAT_ARGS(tliRecPtr),
tli, curFileTLI);
}
@@ -4177,10 +4177,10 @@ rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN)
if (currentTle->end < replayLSN)
{
ereport(LOG,
- (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
- newtarget,
- replayTLI,
- LSN_FORMAT_ARGS(replayLSN))));
+ errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%08X",
+ newtarget,
+ replayTLI,
+ LSN_FORMAT_ARGS(replayLSN)));
return false;
}
@@ -4994,13 +4994,25 @@ check_recovery_target_timeline(char **newval, void **extra, GucSource source)
rttg = RECOVERY_TARGET_TIMELINE_LATEST;
else
{
+ char *endp;
+ uint64 timeline;
+
rttg = RECOVERY_TARGET_TIMELINE_NUMERIC;
errno = 0;
- strtoul(*newval, NULL, 0);
- if (errno == EINVAL || errno == ERANGE)
+ timeline = strtou64(*newval, &endp, 0);
+
+ if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
+ {
+ GUC_check_errdetail("\"%s\" is not a valid number.",
+ "recovery_target_timeline");
+ return false;
+ }
+
+ if (timeline < 1 || timeline > PG_UINT32_MAX)
{
- GUC_check_errdetail("\"recovery_target_timeline\" is not a valid number.");
+ GUC_check_errdetail("\"%s\" must be between %u and %u.",
+ "recovery_target_timeline", 1, UINT_MAX);
return false;
}
}
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index c389b27f77d..27ea52fdfee 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -795,7 +795,7 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage,
list_free_deep(timelineHistory);
- elog(DEBUG3, "switched to timeline %u valid until %X/%X",
+ elog(DEBUG3, "switched to timeline %u valid until %X/%08X",
state->currTLI,
LSN_FORMAT_ARGS(state->currTLIValidUntil));
}
diff --git a/src/backend/backup/backup_manifest.c b/src/backend/backup/backup_manifest.c
index 22e2be37c95..d05252f383c 100644
--- a/src/backend/backup/backup_manifest.c
+++ b/src/backend/backup/backup_manifest.c
@@ -281,7 +281,7 @@ AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
}
AppendToManifest(manifest,
- "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
+ "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%08X\", \"End-LSN\": \"%X/%08X\" }",
first_wal_range ? "" : ",\n",
entry->tli,
LSN_FORMAT_ARGS(tl_beginptr),
diff --git a/src/backend/backup/basebackup_copy.c b/src/backend/backup/basebackup_copy.c
index a284ce318ff..18b0b5a52d3 100644
--- a/src/backend/backup/basebackup_copy.c
+++ b/src/backend/backup/basebackup_copy.c
@@ -361,7 +361,7 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
/* Data row */
- values[0] = CStringGetTextDatum(psprintf("%X/%X", LSN_FORMAT_ARGS(ptr)));
+ values[0] = CStringGetTextDatum(psprintf("%X/%08X", LSN_FORMAT_ARGS(ptr)));
values[1] = Int64GetDatum(tli);
do_tup_output(tstate, values, nulls);
diff --git a/src/backend/backup/basebackup_incremental.c b/src/backend/backup/basebackup_incremental.c
index 28491b1e0ab..a0d48ff0fef 100644
--- a/src/backend/backup/basebackup_incremental.c
+++ b/src/backend/backup/basebackup_incremental.c
@@ -409,7 +409,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
if (range->start_lsn < tlep[i]->begin)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("manifest requires WAL from initial timeline %u starting at %X/%X, but that timeline begins at %X/%X",
+ errmsg("manifest requires WAL from initial timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
range->tli,
LSN_FORMAT_ARGS(range->start_lsn),
LSN_FORMAT_ARGS(tlep[i]->begin))));
@@ -419,7 +419,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
if (range->start_lsn != tlep[i]->begin)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("manifest requires WAL from continuation timeline %u starting at %X/%X, but that timeline begins at %X/%X",
+ errmsg("manifest requires WAL from continuation timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
range->tli,
LSN_FORMAT_ARGS(range->start_lsn),
LSN_FORMAT_ARGS(tlep[i]->begin))));
@@ -430,7 +430,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
if (range->end_lsn > backup_state->startpoint)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("manifest requires WAL from final timeline %u ending at %X/%X, but this backup starts at %X/%X",
+ errmsg("manifest requires WAL from final timeline %u ending at %X/%08X, but this backup starts at %X/%08X",
range->tli,
LSN_FORMAT_ARGS(range->end_lsn),
LSN_FORMAT_ARGS(backup_state->startpoint)),
@@ -441,7 +441,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
if (range->end_lsn != tlep[i]->end)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("manifest requires WAL from non-final timeline %u ending at %X/%X, but this server switched timelines at %X/%X",
+ errmsg("manifest requires WAL from non-final timeline %u ending at %X/%08X, but this server switched timelines at %X/%08X",
range->tli,
LSN_FORMAT_ARGS(range->end_lsn),
LSN_FORMAT_ARGS(tlep[i]->end))));
@@ -522,18 +522,18 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
if (XLogRecPtrIsInvalid(tli_missing_lsn))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but no summaries for that timeline and LSN range exist",
+ errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but no summaries for that timeline and LSN range exist",
tle->tli,
LSN_FORMAT_ARGS(tli_start_lsn),
LSN_FORMAT_ARGS(tli_end_lsn))));
else
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but the summaries for that timeline and LSN range are incomplete",
+ errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but the summaries for that timeline and LSN range are incomplete",
tle->tli,
LSN_FORMAT_ARGS(tli_start_lsn),
LSN_FORMAT_ARGS(tli_end_lsn)),
- errdetail("The first unsummarized LSN in this range is %X/%X.",
+ errdetail("The first unsummarized LSN in this range is %X/%08X.",
LSN_FORMAT_ARGS(tli_missing_lsn))));
}
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 6db864892d0..fc8638c1b61 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -109,6 +109,8 @@ static const struct typinfo TypInfo[] = {
F_REGROLEIN, F_REGROLEOUT},
{"regnamespace", REGNAMESPACEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
F_REGNAMESPACEIN, F_REGNAMESPACEOUT},
+ {"regdatabase", REGDATABASEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
+ F_REGDATABASEIN, F_REGDATABASEOUT},
{"text", TEXTOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID,
F_TEXTIN, F_TEXTOUT},
{"oid", OIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 18316a3968b..7dded634eb8 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -1850,6 +1850,17 @@ find_expr_references_walker(Node *node,
errmsg("constant of the type %s cannot be used here",
"regrole")));
break;
+
+ /*
+ * Dependencies for regdatabase should be shared among all
+ * databases, so explicitly inhibit to have dependencies.
+ */
+ case REGDATABASEOID:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constant of the type %s cannot be used here",
+ "regdatabase")));
+ break;
}
}
return false;
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 10f43c51c5a..fd6537567ea 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -665,6 +665,15 @@ CheckAttributeType(const char *attname,
}
/*
+ * For consistency with check_virtual_generated_security().
+ */
+ if ((flags & CHKATYPE_IS_VIRTUAL) && atttypid >= FirstUnpinnedObjectId)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("virtual generated column \"%s\" cannot have a user-defined type", attname),
+ errdetail("Virtual generated columns that make use of user-defined types are not yet supported."));
+
+ /*
* This might not be strictly invalid per SQL standard, but it is pretty
* useless, and it cannot be dumped, so we must disallow it.
*/
@@ -2997,7 +3006,7 @@ AddRelationNotNullConstraints(Relation rel, List *constraints,
if (constr->is_no_inherit)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
- errmsg("cannot define not-null constraint on column \"%s\" with NO INHERIT",
+ errmsg("cannot define not-null constraint with NO INHERIT on column \"%s\"",
strVal(linitial(constr->keys))),
errdetail("The column has an inherited not-null constraint.")));
@@ -3216,6 +3225,86 @@ check_nested_generated(ParseState *pstate, Node *node)
}
/*
+ * Check security of virtual generated column expression.
+ *
+ * Just like selecting from a view is exploitable (CVE-2024-7348), selecting
+ * from a table with virtual generated columns is exploitable. Users who are
+ * concerned about this can avoid selecting from views, but telling them to
+ * avoid selecting from tables is less practical.
+ *
+ * To address this, this restricts generation expressions for virtual
+ * generated columns are restricted to using built-in functions and types. We
+ * assume that built-in functions and types cannot be exploited for this
+ * purpose. Note the overall security also requires that all functions in use
+ * a immutable. (For example, there are some built-in non-immutable functions
+ * that can run arbitrary SQL.) The immutability is checked elsewhere, since
+ * that is a property that needs to hold independent of security
+ * considerations.
+ *
+ * In the future, this could be expanded by some new mechanism to declare
+ * other functions and types as safe or trusted for this purpose, but that is
+ * to be designed.
+ */
+
+/*
+ * Callback for check_functions_in_node() that determines whether a function
+ * is user-defined.
+ */
+static bool
+contains_user_functions_checker(Oid func_id, void *context)
+{
+ return (func_id >= FirstUnpinnedObjectId);
+}
+
+/*
+ * Checks for all the things we don't want in the generation expressions of
+ * virtual generated columns for security reasons. Errors out if it finds
+ * one.
+ */
+static bool
+check_virtual_generated_security_walker(Node *node, void *context)
+{
+ ParseState *pstate = context;
+
+ if (node == NULL)
+ return false;
+
+ if (!IsA(node, List))
+ {
+ if (check_functions_in_node(node, contains_user_functions_checker, NULL))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("generation expression uses user-defined function"),
+ errdetail("Virtual generated columns that make use of user-defined functions are not yet supported."),
+ parser_errposition(pstate, exprLocation(node)));
+
+ /*
+ * check_functions_in_node() doesn't check some node types (see
+ * comment there). We handle CoerceToDomain and MinMaxExpr by
+ * checking for built-in types. The other listed node types cannot
+ * call user-definable SQL-visible functions.
+ *
+ * We furthermore need this type check to handle built-in, immutable
+ * polymorphic functions such as array_eq().
+ */
+ if (exprType(node) >= FirstUnpinnedObjectId)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("generation expression uses user-defined type"),
+ errdetail("Virtual generated columns that make use of user-defined types are not yet supported."),
+ parser_errposition(pstate, exprLocation(node)));
+ }
+
+ return expression_tree_walker(node, check_virtual_generated_security_walker, context);
+}
+
+static void
+check_virtual_generated_security(ParseState *pstate, Node *node)
+{
+ check_virtual_generated_security_walker(node, pstate);
+}
+
+/*
* Take a raw default and convert it to a cooked format ready for
* storage.
*
@@ -3254,6 +3343,10 @@ cookDefault(ParseState *pstate,
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("generation expression is not immutable")));
+
+ /* Check security of expressions for virtual generated column */
+ if (attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+ check_virtual_generated_security(pstate, expr);
}
else
{
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index aa216683b74..c4029a4f3d3 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -800,11 +800,11 @@ index_create(Relation heapRelation,
errmsg("user-defined indexes on system catalog tables are not supported")));
/*
- * Btree text_pattern_ops uses text_eq as the equality operator, which is
- * fine as long as the collation is deterministic; text_eq then reduces to
+ * Btree text_pattern_ops uses texteq as the equality operator, which is
+ * fine as long as the collation is deterministic; texteq then reduces to
* bitwise equality and so it is semantically compatible with the other
* operators and functions in that opclass. But with a nondeterministic
- * collation, text_eq could yield results that are incompatible with the
+ * collation, texteq could yield results that are incompatible with the
* actual behavior of the index (which is determined by the opclass's
* comparison function). We prevent such problems by refusing creation of
* an index with that opclass and a nondeterministic collation.
@@ -814,7 +814,7 @@ index_create(Relation heapRelation,
* opclasses as incompatible with nondeterminism; but for now, this small
* hack suffices.
*
- * Another solution is to use a special operator, not text_eq, as the
+ * Another solution is to use a special operator, not texteq, as the
* equality opclass member; but that is undesirable because it would
* prevent index usage in many queries that work fine today.
*/
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 08f780a2e63..b2d5332effc 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -666,6 +666,14 @@ GRANT SELECT ON pg_shmem_allocations_numa TO pg_read_all_stats;
REVOKE EXECUTE ON FUNCTION pg_get_shmem_allocations_numa() FROM PUBLIC;
GRANT EXECUTE ON FUNCTION pg_get_shmem_allocations_numa() TO pg_read_all_stats;
+CREATE VIEW pg_dsm_registry_allocations AS
+ SELECT * FROM pg_get_dsm_registry_allocations();
+
+REVOKE ALL ON pg_dsm_registry_allocations FROM PUBLIC;
+GRANT SELECT ON pg_dsm_registry_allocations TO pg_read_all_stats;
+REVOKE EXECUTE ON FUNCTION pg_get_dsm_registry_allocations() FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_dsm_registry_allocations() TO pg_read_all_stats;
+
CREATE VIEW pg_backend_memory_contexts AS
SELECT * FROM pg_get_backend_memory_contexts();
@@ -895,7 +903,7 @@ CREATE VIEW pg_stat_activity AS
S.wait_event,
S.state,
S.backend_xid,
- s.backend_xmin,
+ S.backend_xmin,
S.query_id,
S.query,
S.backend_type
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 4fffb76e557..7111d5d5334 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -76,7 +76,7 @@ static BufferAccessStrategy vac_strategy;
static void do_analyze_rel(Relation onerel,
- VacuumParams *params, List *va_cols,
+ const VacuumParams params, List *va_cols,
AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
bool inh, bool in_outer_xact, int elevel);
static void compute_index_stats(Relation onerel, double totalrows,
@@ -107,7 +107,7 @@ static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
*/
void
analyze_rel(Oid relid, RangeVar *relation,
- VacuumParams *params, List *va_cols, bool in_outer_xact,
+ const VacuumParams params, List *va_cols, bool in_outer_xact,
BufferAccessStrategy bstrategy)
{
Relation onerel;
@@ -116,7 +116,7 @@ analyze_rel(Oid relid, RangeVar *relation,
BlockNumber relpages = 0;
/* Select logging level */
- if (params->options & VACOPT_VERBOSE)
+ if (params.options & VACOPT_VERBOSE)
elevel = INFO;
else
elevel = DEBUG2;
@@ -138,8 +138,8 @@ analyze_rel(Oid relid, RangeVar *relation,
*
* Make sure to generate only logs for ANALYZE in this case.
*/
- onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM),
- params->log_min_duration >= 0,
+ onerel = vacuum_open_relation(relid, relation, params.options & ~(VACOPT_VACUUM),
+ params.log_min_duration >= 0,
ShareUpdateExclusiveLock);
/* leave if relation could not be opened or locked */
@@ -155,7 +155,7 @@ analyze_rel(Oid relid, RangeVar *relation,
*/
if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel),
onerel->rd_rel,
- params->options & ~VACOPT_VACUUM))
+ params.options & ~VACOPT_VACUUM))
{
relation_close(onerel, ShareUpdateExclusiveLock);
return;
@@ -227,7 +227,7 @@ analyze_rel(Oid relid, RangeVar *relation,
else
{
/* No need for a WARNING if we already complained during VACUUM */
- if (!(params->options & VACOPT_VACUUM))
+ if (!(params.options & VACOPT_VACUUM))
ereport(WARNING,
(errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
RelationGetRelationName(onerel))));
@@ -275,7 +275,7 @@ analyze_rel(Oid relid, RangeVar *relation,
* appropriate acquirefunc for each child table.
*/
static void
-do_analyze_rel(Relation onerel, VacuumParams *params,
+do_analyze_rel(Relation onerel, const VacuumParams params,
List *va_cols, AcquireSampleRowsFunc acquirefunc,
BlockNumber relpages, bool inh, bool in_outer_xact,
int elevel)
@@ -309,9 +309,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
PgStat_Counter startreadtime = 0;
PgStat_Counter startwritetime = 0;
- verbose = (params->options & VACOPT_VERBOSE) != 0;
+ verbose = (params.options & VACOPT_VERBOSE) != 0;
instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
- params->log_min_duration >= 0));
+ params.log_min_duration >= 0));
if (inh)
ereport(elevel,
(errmsg("analyzing \"%s.%s\" inheritance tree",
@@ -706,7 +706,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
* amvacuumcleanup() when called in ANALYZE-only mode. The only exception
* among core index AMs is GIN/ginvacuumcleanup().
*/
- if (!(params->options & VACOPT_VACUUM))
+ if (!(params.options & VACOPT_VACUUM))
{
for (ind = 0; ind < nindexes; ind++)
{
@@ -736,9 +736,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
{
TimestampTz endtime = GetCurrentTimestamp();
- if (verbose || params->log_min_duration == 0 ||
+ if (verbose || params.log_min_duration == 0 ||
TimestampDifferenceExceeds(starttime, endtime,
- params->log_min_duration))
+ params.log_min_duration))
{
long delay_in_ms;
WalUsage walusage;
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 54a08e4102e..b55221d44cd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -917,7 +917,7 @@ copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verb
* not to be aggressive about this.
*/
memset(&params, 0, sizeof(VacuumParams));
- vacuum_get_cutoffs(OldHeap, &params, &cutoffs);
+ vacuum_get_cutoffs(OldHeap, params, &cutoffs);
/*
* FreezeXid will become the table's new relfrozenxid, and that mustn't go
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 74ae42b19a7..fae9c41db65 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
}
/*
- * Extract a CopyHeaderChoice value from a DefElem. This is like
- * defGetBoolean() but also accepts the special value "match".
+ * Extract the CopyFormatOptions.header_line value from a DefElem.
+ *
+ * Parses the HEADER option for COPY, which can be a boolean, a non-negative
+ * integer (number of lines to skip), or the special value "match".
*/
-static CopyHeaderChoice
-defGetCopyHeaderChoice(DefElem *def, bool is_from)
+static int
+defGetCopyHeaderOption(DefElem *def, bool is_from)
{
/*
* If no parameter value given, assume "true" is meant.
@@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
return COPY_HEADER_TRUE;
/*
- * Allow 0, 1, "true", "false", "on", "off", or "match".
+ * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
+ * "match".
*/
switch (nodeTag(def->arg))
{
case T_Integer:
- switch (intVal(def->arg))
{
- case 0:
- return COPY_HEADER_FALSE;
- case 1:
- return COPY_HEADER_TRUE;
- default:
- /* otherwise, error out below */
- break;
+ int ival = intVal(def->arg);
+
+ if (ival < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("a negative integer value cannot be "
+ "specified for %s", def->defname)));
+
+ if (!is_from && ival > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use multi-line header in COPY TO")));
+
+ return ival;
}
break;
default:
@@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
}
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("%s requires a Boolean value or \"match\"",
+ errmsg("%s requires a Boolean value, a non-negative integer, "
+ "or the string \"match\"",
def->defname)));
return COPY_HEADER_FALSE; /* keep compiler quiet */
}
@@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
if (header_specified)
errorConflictingDefElem(defel, pstate);
header_specified = true;
- opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+ opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
}
else if (strcmp(defel->defname, "quote") == 0)
{
@@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
/* Check header */
- if (opts_out->binary && opts_out->header_line)
+ if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index f5fc346e201..b1ae97b833d 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
{
int fldct;
- bool done;
+ bool done = false;
/* only available for text or csv input */
Assert(!cstate->opts.binary);
/* on input check that the header line is correct if needed */
- if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+ if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
{
ListCell *cur;
TupleDesc tupDesc;
+ int lines_to_skip = cstate->opts.header_line;
+
+ /* If set to "match", one header line is skipped */
+ if (cstate->opts.header_line == COPY_HEADER_MATCH)
+ lines_to_skip = 1;
tupDesc = RelationGetDescr(cstate->rel);
- cstate->cur_lineno++;
- done = CopyReadLine(cstate, is_csv);
+ for (int i = 0; i < lines_to_skip; i++)
+ {
+ cstate->cur_lineno++;
+ if ((done = CopyReadLine(cstate, is_csv)))
+ break;
+ }
if (cstate->opts.header_line == COPY_HEADER_MATCH)
{
@@ -1538,7 +1547,7 @@ GetDecimalFromHex(char hex)
if (isdigit((unsigned char) hex))
return hex - '0';
else
- return tolower((unsigned char) hex) - 'a' + 10;
+ return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;
}
/*
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ea6f18f2c80..67b94b91cae 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
cstate->file_encoding);
/* if a header has been requested send the line */
- if (cstate->opts.header_line)
+ if (cstate->opts.header_line == COPY_HEADER_TRUE)
{
ListCell *cur;
bool hdr_delim = false;
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index c95eb945016..502a45163c8 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -570,8 +570,8 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
* any CREATE DATABASE commands.
*/
if (!IsBinaryUpgrade)
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
- CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL);
+ RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE |
+ CHECKPOINT_WAIT | CHECKPOINT_FLUSH_UNLOGGED);
/*
* Iterate through all tablespaces of the template database, and copy each
@@ -673,7 +673,7 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
* strategy that avoids these problems.
*/
if (!IsBinaryUpgrade)
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
+ RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE |
CHECKPOINT_WAIT);
}
@@ -1870,7 +1870,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
* Force a checkpoint to make sure the checkpointer has received the
* message sent by ForgetDatabaseSyncRequests.
*/
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+ RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
/* Close all smgr fds in all backends. */
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
@@ -2120,8 +2120,8 @@ movedb(const char *dbname, const char *tblspcname)
* On Windows, this also ensures that background procs don't hold any open
* files, which would cause rmdir() to fail.
*/
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
- | CHECKPOINT_FLUSH_ALL);
+ RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE | CHECKPOINT_WAIT
+ | CHECKPOINT_FLUSH_UNLOGGED);
/* Close all smgr fds in all backends. */
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
@@ -2252,7 +2252,7 @@ movedb(const char *dbname, const char *tblspcname)
* any unlogged operations done in the new DB tablespace before the
* next checkpoint.
*/
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+ RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
/*
* Force synchronous commit, thus minimizing the window between
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index c3ec2076a52..6f753ab6d7a 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -2469,8 +2469,8 @@ GetOperatorFromCompareType(Oid opclass, Oid rhstype, CompareType cmptype,
cmptype == COMPARE_EQ ? errmsg("could not identify an equality operator for type %s", format_type_be(opcintype)) :
cmptype == COMPARE_OVERLAP ? errmsg("could not identify an overlaps operator for type %s", format_type_be(opcintype)) :
cmptype == COMPARE_CONTAINED_BY ? errmsg("could not identify a contained-by operator for type %s", format_type_be(opcintype)) : 0,
- errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".",
- cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid)));
+ errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".",
+ cmptype, get_opfamily_name(opfamily, false), get_am_name(amid)));
/*
* We parameterize rhstype so foreign keys can ask for a <@ operator
@@ -2592,7 +2592,9 @@ makeObjectName(const char *name1, const char *name2, const char *label)
* constraint names.)
*
* Note: it is theoretically possible to get a collision anyway, if someone
- * else chooses the same name concurrently. This is fairly unlikely to be
+ * else chooses the same name concurrently. We shorten the race condition
+ * window by checking for conflicting relations using SnapshotDirty, but
+ * that doesn't close the window entirely. This is fairly unlikely to be
* a problem in practice, especially if one is holding an exclusive lock on
* the relation identified by name1. However, if choosing multiple names
* within a single command, you'd better create the new object and do
@@ -2608,15 +2610,45 @@ ChooseRelationName(const char *name1, const char *name2,
int pass = 0;
char *relname = NULL;
char modlabel[NAMEDATALEN];
+ SnapshotData SnapshotDirty;
+ Relation pgclassrel;
+
+ /* prepare to search pg_class with a dirty snapshot */
+ InitDirtySnapshot(SnapshotDirty);
+ pgclassrel = table_open(RelationRelationId, AccessShareLock);
/* try the unmodified label first */
strlcpy(modlabel, label, sizeof(modlabel));
for (;;)
{
+ ScanKeyData key[2];
+ SysScanDesc scan;
+ bool collides;
+
relname = makeObjectName(name1, name2, modlabel);
- if (!OidIsValid(get_relname_relid(relname, namespaceid)))
+ /* is there any conflicting relation name? */
+ ScanKeyInit(&key[0],
+ Anum_pg_class_relname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(relname));
+ ScanKeyInit(&key[1],
+ Anum_pg_class_relnamespace,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(namespaceid));
+
+ scan = systable_beginscan(pgclassrel, ClassNameNspIndexId,
+ true /* indexOK */ ,
+ &SnapshotDirty,
+ 2, key);
+
+ collides = HeapTupleIsValid(systable_getnext(scan));
+
+ systable_endscan(scan);
+
+ /* break out of loop if no conflict */
+ if (!collides)
{
if (!isconstraint ||
!ConstraintNameExists(relname, namespaceid))
@@ -2628,6 +2660,8 @@ ChooseRelationName(const char *name1, const char *name2,
snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
}
+ table_close(pgclassrel, AccessShareLock);
+
return relname;
}
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index 27c2cb26ef5..188e26f0e6e 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -835,7 +835,8 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
if (!foundUniqueIndex)
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("could not find suitable unique index on materialized view"));
+ errmsg("could not find suitable unique index on materialized view \"%s\"",
+ RelationGetRelationName(matviewRel)));
appendStringInfoString(&querybuf,
" AND newdata.* OPERATOR(pg_catalog.*=) mv.*) "
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0b23d94c38e..1bf7eaae5b3 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -2130,8 +2130,8 @@ defGetGeneratedColsOption(DefElem *def)
ereport(ERROR,
errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("%s requires a \"none\" or \"stored\" value",
- def->defname));
+ errmsg("invalid value for publication parameter \"%s\": \"%s\"", def->defname, sval),
+ errdetail("Valid values are \"%s\" and \"%s\".", "none", "stored"));
return PUBLISH_GENCOLS_NONE; /* keep compiler quiet */
}
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 4aec73bcc6b..e23b0de7242 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -1267,7 +1267,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
IsSet(opts.specified_opts, SUBOPT_SLOT_NAME))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("slot_name and two_phase cannot be altered at the same time")));
+ errmsg("\"slot_name\" and \"two_phase\" cannot be altered at the same time")));
/*
* Note that workers may still survive even if the
@@ -1283,7 +1283,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
if (logicalrep_workers_find(subid, true, true))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("cannot alter two_phase when logical replication worker is still running"),
+ errmsg("cannot alter \"two_phase\" when logical replication worker is still running"),
errhint("Try again after some time.")));
/*
@@ -1297,7 +1297,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
LookupGXactBySubid(subid))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("cannot disable two_phase when prepared transactions are present"),
+ errmsg("cannot disable \"two_phase\" when prepared transactions exist"),
errhint("Resolve these transactions and try again.")));
/* Change system catalog accordingly */
@@ -1539,7 +1539,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
if (!XLogRecPtrIsInvalid(remote_lsn) && opts.lsn < remote_lsn)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("skip WAL location (LSN %X/%X) must be greater than origin LSN %X/%X",
+ errmsg("skip WAL location (LSN %X/%08X) must be greater than origin LSN %X/%08X",
LSN_FORMAT_ARGS(opts.lsn),
LSN_FORMAT_ARGS(remote_lsn))));
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index ea96947d813..cb811520c29 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -2711,8 +2711,7 @@ MergeAttributes(List *columns, const List *supers, char relpersistence,
RelationGetRelationName(relation))));
/* If existing rel is temp, it must belong to this session */
- if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !relation->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(relation))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg(!is_partition
@@ -7374,7 +7373,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
/* make sure datatype is legal for a column */
CheckAttributeType(NameStr(attribute->attname), attribute->atttypid, attribute->attcollation,
list_make1_oid(rel->rd_rel->reltype),
- 0);
+ (attribute->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0));
InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL);
@@ -8609,7 +8608,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName,
rel->rd_att->constr && rel->rd_att->constr->num_check > 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints"),
+ errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints"),
errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.",
colName, RelationGetRelationName(rel))));
@@ -8627,7 +8626,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName,
GetRelationPublications(RelationGetRelid(rel)) != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables that are part of a publication"),
+ errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables that are part of a publication"),
errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.",
colName, RelationGetRelationName(rel))));
@@ -10189,7 +10188,7 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
if (pk_has_without_overlaps && !with_period)
ereport(ERROR,
errcode(ERRCODE_INVALID_FOREIGN_KEY),
- errmsg("foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS"));
+ errmsg("foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS"));
/*
* Now we can check permissions.
@@ -10330,8 +10329,8 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
for_overlaps
? errmsg("could not identify an overlaps operator for foreign key")
: errmsg("could not identify an equality operator for foreign key"),
- errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".",
- cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid)));
+ errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".",
+ cmptype, get_opfamily_name(opfamily, false), get_am_name(amid)));
/*
* There had better be a primary equality operator for the index.
@@ -12913,8 +12912,9 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
con->contype != CONSTRAINT_NOTNULL)
ereport(ERROR,
errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key, check, or not-null constraint",
- constrName, RelationGetRelationName(rel)));
+ errmsg("cannot validate constraint \"%s\" of relation \"%s\"",
+ constrName, RelationGetRelationName(rel)),
+ errdetail("This operation is not supported for this type of constraint."));
if (!con->conenforced)
ereport(ERROR,
@@ -14426,7 +14426,7 @@ ATPrepAlterColumnType(List **wqueue,
/* make sure datatype is legal for a column */
CheckAttributeType(colName, targettype, targetcollid,
list_make1_oid(rel->rd_rel->reltype),
- 0);
+ (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0));
if (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
{
@@ -14484,6 +14484,9 @@ ATPrepAlterColumnType(List **wqueue,
/* Fix collations after all else */
assign_expr_collations(pstate, transform);
+ /* Expand virtual generated columns in the expr. */
+ transform = expand_generated_columns_in_expr(transform, rel, 1);
+
/* Plan the expr now so we can accurately assess the need to rewrite. */
transform = (Node *) expression_planner((Expr *) transform);
@@ -15411,9 +15414,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
/*
* Re-parse the index and constraint definitions, and attach them to the
* appropriate work queue entries. We do this before dropping because in
- * the case of a FOREIGN KEY constraint, we might not yet have exclusive
- * lock on the table the constraint is attached to, and we need to get
- * that before reparsing/dropping.
+ * the case of a constraint on another table, we might not yet have
+ * exclusive lock on the table the constraint is attached to, and we need
+ * to get that before reparsing/dropping. (That's possible at least for
+ * FOREIGN KEY, CHECK, and EXCLUSION constraints; in non-FK cases it
+ * requires a dependency on the target table's composite type in the other
+ * table's constraint expressions.)
*
* We can't rely on the output of deparsing to tell us which relation to
* operate on, because concurrent activity might have made the name
@@ -15429,7 +15435,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
Form_pg_constraint con;
Oid relid;
Oid confrelid;
- char contype;
bool conislocal;
tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
@@ -15446,7 +15451,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
elog(ERROR, "could not identify relation associated with constraint %u", oldId);
}
confrelid = con->confrelid;
- contype = con->contype;
conislocal = con->conislocal;
ReleaseSysCache(tup);
@@ -15464,12 +15468,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
continue;
/*
- * When rebuilding an FK constraint that references the table we're
- * modifying, we might not yet have any lock on the FK's table, so get
- * one now. We'll need AccessExclusiveLock for the DROP CONSTRAINT
- * step, so there's no value in asking for anything weaker.
+ * When rebuilding another table's constraint that references the
+ * table we're modifying, we might not yet have any lock on the other
+ * table, so get one now. We'll need AccessExclusiveLock for the DROP
+ * CONSTRAINT step, so there's no value in asking for anything weaker.
*/
- if (relid != tab->relid && contype == CONSTRAINT_FOREIGN)
+ if (relid != tab->relid)
LockRelationOid(relid, AccessExclusiveLock);
ATPostAlterTypeParse(oldId, relid, confrelid,
@@ -15483,6 +15487,14 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
Oid relid;
relid = IndexGetRelation(oldId, false);
+
+ /*
+ * As above, make sure we have lock on the index's table if it's not
+ * the same table.
+ */
+ if (relid != tab->relid)
+ LockRelationOid(relid, AccessExclusiveLock);
+
ATPostAlterTypeParse(oldId, relid, InvalidOid,
(char *) lfirst(def_item),
wqueue, lockmode, tab->rewrite);
@@ -15499,6 +15511,20 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
Oid relid;
relid = StatisticsGetRelation(oldId, false);
+
+ /*
+ * As above, make sure we have lock on the statistics object's table
+ * if it's not the same table. However, we take
+ * ShareUpdateExclusiveLock here, aligning with the lock level used in
+ * CreateStatistics and RemoveStatisticsById.
+ *
+ * CAUTION: this should be done after all cases that grab
+ * AccessExclusiveLock, else we risk causing deadlock due to needing
+ * to promote our table lock.
+ */
+ if (relid != tab->relid)
+ LockRelationOid(relid, ShareUpdateExclusiveLock);
+
ATPostAlterTypeParse(oldId, relid, InvalidOid,
(char *) lfirst(def_item),
wqueue, lockmode, tab->rewrite);
@@ -15722,7 +15748,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
{
AlterDomainStmt *stmt = (AlterDomainStmt *) stm;
- if (stmt->subtype == 'C') /* ADD CONSTRAINT */
+ if (stmt->subtype == AD_AddConstraint)
{
Constraint *con = castNode(Constraint, stmt->def);
AlterTableCmd *cmd = makeNode(AlterTableCmd);
@@ -17225,15 +17251,13 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
RelationGetRelationName(parent_rel))));
/* If parent rel is temp, it must belong to this session */
- if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !parent_rel->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(parent_rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot inherit from temporary relation of another session")));
/* Ditto for the child */
- if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !child_rel->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(child_rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot inherit to temporary relation of another session")));
@@ -20304,15 +20328,13 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd,
RelationGetRelationName(rel))));
/* If the parent is temp, it must belong to this session */
- if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !rel->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot attach as partition of temporary relation of another session")));
/* Ditto for the partition */
- if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !attachrel->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(attachrel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot attach temporary relation of another session as partition")));
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index a9005cc7212..df31eace47a 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -500,7 +500,7 @@ DropTableSpace(DropTableSpaceStmt *stmt)
* mustn't delete. So instead, we force a checkpoint which will clean
* out any lingering files, and try again.
*/
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+ RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
/*
* On Windows, an unlinked file persists in the directory listing
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 45ae7472ab5..26d985193ae 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -939,11 +939,19 @@ DefineDomain(ParseState *pstate, CreateDomainStmt *stmt)
break;
case CONSTR_NOTNULL:
- if (nullDefined && !typNotNull)
+ if (nullDefined)
+ {
+ if (!typNotNull)
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting NULL/NOT NULL constraints"),
+ parser_errposition(pstate, constr->location));
+
ereport(ERROR,
- errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("conflicting NULL/NOT NULL constraints"),
+ errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("redundant NOT NULL constraint definition"),
parser_errposition(pstate, constr->location));
+ }
if (constr->is_no_inherit)
ereport(ERROR,
errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 33a33bf6b1c..733ef40ae7c 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -56,6 +56,7 @@
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/guc_hooks.h"
+#include "utils/injection_point.h"
#include "utils/memutils.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@@ -123,7 +124,7 @@ static void vac_truncate_clog(TransactionId frozenXID,
MultiXactId minMulti,
TransactionId lastSaneFrozenXid,
MultiXactId lastSaneMinMulti);
-static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
+static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
BufferAccessStrategy bstrategy);
static double compute_parallel_delay(void);
static VacOptValue get_vacoptval_from_boolean(DefElem *def);
@@ -464,7 +465,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
}
/* Now go through the common routine */
- vacuum(vacstmt->rels, &params, bstrategy, vac_context, isTopLevel);
+ vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel);
/* Finally, clean up the vacuum memory context */
MemoryContextDelete(vac_context);
@@ -493,7 +494,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
* memory context that will not disappear at transaction commit.
*/
void
-vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
+vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy,
MemoryContext vac_context, bool isTopLevel)
{
static bool in_vacuum = false;
@@ -502,9 +503,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
volatile bool in_outer_xact,
use_own_xacts;
- Assert(params != NULL);
-
- stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
+ stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
/*
* We cannot run VACUUM inside a user transaction block; if we were inside
@@ -514,7 +513,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
*
* ANALYZE (without VACUUM) can run either way.
*/
- if (params->options & VACOPT_VACUUM)
+ if (params.options & VACOPT_VACUUM)
{
PreventInTransactionBlock(isTopLevel, stmttype);
in_outer_xact = false;
@@ -537,7 +536,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
* Build list of relation(s) to process, putting any new data in
* vac_context for safekeeping.
*/
- if (params->options & VACOPT_ONLY_DATABASE_STATS)
+ if (params.options & VACOPT_ONLY_DATABASE_STATS)
{
/* We don't process any tables in this case */
Assert(relations == NIL);
@@ -553,7 +552,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
List *sublist;
MemoryContext old_context;
- sublist = expand_vacuum_rel(vrel, vac_context, params->options);
+ sublist = expand_vacuum_rel(vrel, vac_context, params.options);
old_context = MemoryContextSwitchTo(vac_context);
newrels = list_concat(newrels, sublist);
MemoryContextSwitchTo(old_context);
@@ -561,7 +560,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
relations = newrels;
}
else
- relations = get_all_vacuum_rels(vac_context, params->options);
+ relations = get_all_vacuum_rels(vac_context, params.options);
/*
* Decide whether we need to start/commit our own transactions.
@@ -577,11 +576,11 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
* transaction block, and also in an autovacuum worker, use own
* transactions so we can release locks sooner.
*/
- if (params->options & VACOPT_VACUUM)
+ if (params.options & VACOPT_VACUUM)
use_own_xacts = true;
else
{
- Assert(params->options & VACOPT_ANALYZE);
+ Assert(params.options & VACOPT_ANALYZE);
if (AmAutoVacuumWorkerProcess())
use_own_xacts = true;
else if (in_outer_xact)
@@ -632,13 +631,13 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
{
VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
- if (params->options & VACOPT_VACUUM)
+ if (params.options & VACOPT_VACUUM)
{
if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
continue;
}
- if (params->options & VACOPT_ANALYZE)
+ if (params.options & VACOPT_ANALYZE)
{
/*
* If using separate xacts, start one for analyze. Otherwise,
@@ -702,8 +701,8 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
StartTransactionCommand();
}
- if ((params->options & VACOPT_VACUUM) &&
- !(params->options & VACOPT_SKIP_DATABASE_STATS))
+ if ((params.options & VACOPT_VACUUM) &&
+ !(params.options & VACOPT_SKIP_DATABASE_STATS))
{
/*
* Update pg_database.datfrozenxid, and truncate pg_xact if possible.
@@ -1101,7 +1100,7 @@ get_all_vacuum_rels(MemoryContext vac_context, int options)
* minimum).
*/
bool
-vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
+vacuum_get_cutoffs(Relation rel, const VacuumParams params,
struct VacuumCutoffs *cutoffs)
{
int freeze_min_age,
@@ -1117,10 +1116,10 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
aggressiveMXIDCutoff;
/* Use mutable copies of freeze age parameters */
- freeze_min_age = params->freeze_min_age;
- multixact_freeze_min_age = params->multixact_freeze_min_age;
- freeze_table_age = params->freeze_table_age;
- multixact_freeze_table_age = params->multixact_freeze_table_age;
+ freeze_min_age = params.freeze_min_age;
+ multixact_freeze_min_age = params.multixact_freeze_min_age;
+ freeze_table_age = params.freeze_table_age;
+ multixact_freeze_table_age = params.multixact_freeze_table_age;
/* Set pg_class fields in cutoffs */
cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
@@ -1997,7 +1996,7 @@ vac_truncate_clog(TransactionId frozenXID,
* At entry and exit, we are not inside a transaction.
*/
static bool
-vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
+vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
BufferAccessStrategy bstrategy)
{
LOCKMODE lmode;
@@ -2008,13 +2007,18 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
Oid save_userid;
int save_sec_context;
int save_nestlevel;
+ VacuumParams toast_vacuum_params;
- Assert(params != NULL);
+ /*
+ * This function scribbles on the parameters, so make a copy early to
+ * avoid affecting the TOAST table (if we do end up recursing to it).
+ */
+ memcpy(&toast_vacuum_params, &params, sizeof(VacuumParams));
/* Begin a transaction for vacuuming this relation */
StartTransactionCommand();
- if (!(params->options & VACOPT_FULL))
+ if (!(params.options & VACOPT_FULL))
{
/*
* In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
@@ -2040,7 +2044,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
*/
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_IN_VACUUM;
- if (params->is_wraparound)
+ if (params.is_wraparound)
MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
LWLockRelease(ProcArrayLock);
@@ -2064,12 +2068,12 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
* vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
* way, we can be sure that no other backend is vacuuming the same table.
*/
- lmode = (params->options & VACOPT_FULL) ?
+ lmode = (params.options & VACOPT_FULL) ?
AccessExclusiveLock : ShareUpdateExclusiveLock;
/* open the relation and get the appropriate lock on it */
- rel = vacuum_open_relation(relid, relation, params->options,
- params->log_min_duration >= 0, lmode);
+ rel = vacuum_open_relation(relid, relation, params.options,
+ params.log_min_duration >= 0, lmode);
/* leave if relation could not be opened or locked */
if (!rel)
@@ -2084,8 +2088,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
* This is only safe to do because we hold a session lock on the main
* relation that prevents concurrent deletion.
*/
- if (OidIsValid(params->toast_parent))
- priv_relid = params->toast_parent;
+ if (OidIsValid(params.toast_parent))
+ priv_relid = params.toast_parent;
else
priv_relid = RelationGetRelid(rel);
@@ -2098,7 +2102,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
*/
if (!vacuum_is_permitted_for_relation(priv_relid,
rel->rd_rel,
- params->options & ~VACOPT_ANALYZE))
+ params.options & ~VACOPT_ANALYZE))
{
relation_close(rel, lmode);
PopActiveSnapshot();
@@ -2169,7 +2173,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
* Set index_cleanup option based on index_cleanup reloption if it wasn't
* specified in VACUUM command, or when running in an autovacuum worker
*/
- if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
+ if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
{
StdRdOptIndexCleanup vacuum_index_cleanup;
@@ -2180,56 +2184,74 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
- params->index_cleanup = VACOPTVALUE_AUTO;
+ params.index_cleanup = VACOPTVALUE_AUTO;
else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
- params->index_cleanup = VACOPTVALUE_ENABLED;
+ params.index_cleanup = VACOPTVALUE_ENABLED;
else
{
Assert(vacuum_index_cleanup ==
STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
- params->index_cleanup = VACOPTVALUE_DISABLED;
+ params.index_cleanup = VACOPTVALUE_DISABLED;
}
}
+#ifdef USE_INJECTION_POINTS
+ if (params.index_cleanup == VACOPTVALUE_AUTO)
+ INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
+ else if (params.index_cleanup == VACOPTVALUE_DISABLED)
+ INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
+ else if (params.index_cleanup == VACOPTVALUE_ENABLED)
+ INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
+#endif
+
/*
* Check if the vacuum_max_eager_freeze_failure_rate table storage
* parameter was specified. This overrides the GUC value.
*/
if (rel->rd_options != NULL &&
((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
- params->max_eager_freeze_failure_rate =
+ params.max_eager_freeze_failure_rate =
((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
/*
* Set truncate option based on truncate reloption or GUC if it wasn't
* specified in VACUUM command, or when running in an autovacuum worker
*/
- if (params->truncate == VACOPTVALUE_UNSPECIFIED)
+ if (params.truncate == VACOPTVALUE_UNSPECIFIED)
{
StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
if (opts && opts->vacuum_truncate_set)
{
if (opts->vacuum_truncate)
- params->truncate = VACOPTVALUE_ENABLED;
+ params.truncate = VACOPTVALUE_ENABLED;
else
- params->truncate = VACOPTVALUE_DISABLED;
+ params.truncate = VACOPTVALUE_DISABLED;
}
else if (vacuum_truncate)
- params->truncate = VACOPTVALUE_ENABLED;
+ params.truncate = VACOPTVALUE_ENABLED;
else
- params->truncate = VACOPTVALUE_DISABLED;
+ params.truncate = VACOPTVALUE_DISABLED;
}
+#ifdef USE_INJECTION_POINTS
+ if (params.truncate == VACOPTVALUE_AUTO)
+ INJECTION_POINT("vacuum-truncate-auto", NULL);
+ else if (params.truncate == VACOPTVALUE_DISABLED)
+ INJECTION_POINT("vacuum-truncate-disabled", NULL);
+ else if (params.truncate == VACOPTVALUE_ENABLED)
+ INJECTION_POINT("vacuum-truncate-enabled", NULL);
+#endif
+
/*
* Remember the relation's TOAST relation for later, if the caller asked
* us to process it. In VACUUM FULL, though, the toast table is
* automatically rebuilt by cluster_rel so we shouldn't recurse to it,
* unless PROCESS_MAIN is disabled.
*/
- if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
- ((params->options & VACOPT_FULL) == 0 ||
- (params->options & VACOPT_PROCESS_MAIN) == 0))
+ if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
+ ((params.options & VACOPT_FULL) == 0 ||
+ (params.options & VACOPT_PROCESS_MAIN) == 0))
toast_relid = rel->rd_rel->reltoastrelid;
else
toast_relid = InvalidOid;
@@ -2252,16 +2274,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
* table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
* to be set when we recurse to the TOAST table.
*/
- if (params->options & VACOPT_PROCESS_MAIN)
+ if (params.options & VACOPT_PROCESS_MAIN)
{
/*
* Do the actual work --- either FULL or "lazy" vacuum
*/
- if (params->options & VACOPT_FULL)
+ if (params.options & VACOPT_FULL)
{
ClusterParams cluster_params = {0};
- if ((params->options & VACOPT_VERBOSE) != 0)
+ if ((params.options & VACOPT_VERBOSE) != 0)
cluster_params.options |= CLUOPT_VERBOSE;
/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
@@ -2299,19 +2321,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
*/
if (toast_relid != InvalidOid)
{
- VacuumParams toast_vacuum_params;
-
/*
* Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
* set toast_parent so that the privilege checks are done on the main
* relation. NB: This is only safe to do because we hold a session
* lock on the main relation that prevents concurrent deletion.
*/
- memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
toast_vacuum_params.toast_parent = relid;
- vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy);
+ vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy);
}
/*
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 8a72b5e70a4..1a37737d4a2 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -5228,7 +5228,6 @@ ExecEvalJsonCoercionFinish(ExprState *state, ExprEvalStep *op)
* JsonBehavior expression.
*/
jsestate->escontext.error_occurred = false;
- jsestate->escontext.error_occurred = false;
jsestate->escontext.details_wanted = true;
}
}
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index bdf862b2406..ca33a854278 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -279,7 +279,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* executor is performing an UPDATE that could not use an
* optimization like heapam's HOT (in more general terms a
* call to table_tuple_update() took place and set
- * 'update_indexes' to TUUI_All). Receiving this hint makes
+ * 'update_indexes' to TU_All). Receiving this hint makes
* us consider if we should pass down the 'indexUnchanged'
* hint in turn. That's something that we figure out for
* each index_insert() call iff 'update' is true.
@@ -290,7 +290,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* HOT has been applied and any updated columns are indexed
* only by summarizing indexes (or in more general terms a
* call to table_tuple_update() took place and set
- * 'update_indexes' to TUUI_Summarizing). We can (and must)
+ * 'update_indexes' to TU_Summarizing). We can (and must)
* therefore only update the indexes that have
* 'amsummarizing' = true.
*
diff --git a/src/backend/jit/llvm/meson.build b/src/backend/jit/llvm/meson.build
index c8e06dfbe35..805fbd69006 100644
--- a/src/backend/jit/llvm/meson.build
+++ b/src/backend/jit/llvm/meson.build
@@ -53,7 +53,7 @@ llvm_irgen_args = [
if ccache.found()
llvm_irgen_command = ccache
- llvm_irgen_args = [clang.path()] + llvm_irgen_args
+ llvm_irgen_args = [clang.full_path()] + llvm_irgen_args
else
llvm_irgen_command = clang
endif
diff --git a/src/backend/lib/README b/src/backend/lib/README
index f2fb591237d..c28cbe356f0 100644
--- a/src/backend/lib/README
+++ b/src/backend/lib/README
@@ -1,8 +1,6 @@
This directory contains a general purpose data structures, for use anywhere
in the backend:
-binaryheap.c - a binary heap
-
bipartite_match.c - Hopcroft-Karp maximum cardinality algorithm for bipartite graphs
bloomfilter.c - probabilistic, space-efficient set membership testing
@@ -21,8 +19,6 @@ pairingheap.c - a pairing heap
rbtree.c - a red-black tree
-stringinfo.c - an extensible string type
-
Aside from the inherent characteristics of the data structures, there are a
few practical differences between the binary heap and the pairing heap. The
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index 64ff3ce3d6a..c8b63ef8249 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -1436,10 +1436,10 @@ initialize_ecdh(SSL_CTX *context, bool isServerStart)
*/
ereport(isServerStart ? FATAL : LOG,
errcode(ERRCODE_CONFIG_FILE_ERROR),
- errmsg("failed to set group names specified in ssl_groups: %s",
+ errmsg("could not set group names specified in ssl_groups: %s",
SSLerrmessageExt(ERR_get_error(),
_("No valid groups found"))),
- errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL"));
+ errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL."));
return false;
}
#endif
diff --git a/src/backend/libpq/hba.c b/src/backend/libpq/hba.c
index 332fad27835..fecee8224d0 100644
--- a/src/backend/libpq/hba.c
+++ b/src/backend/libpq/hba.c
@@ -2873,8 +2873,11 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
!token_has_regexp(identLine->pg_user) &&
(ofs = strstr(identLine->pg_user->string, "\\1")) != NULL)
{
+ const char *repl_str;
+ size_t repl_len;
+ char *old_pg_user;
char *expanded_pg_user;
- int offset;
+ size_t offset;
/* substitution of the first argument requested */
if (matches[1].rm_so < 0)
@@ -2886,18 +2889,33 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
*error_p = true;
return;
}
+ repl_str = system_user + matches[1].rm_so;
+ repl_len = matches[1].rm_eo - matches[1].rm_so;
/*
- * length: original length minus length of \1 plus length of match
- * plus null terminator
+ * It's allowed to have more than one \1 in the string, and we'll
+ * replace them all. But that's pretty unusual so we optimize on
+ * the assumption of only one occurrence, which motivates doing
+ * repeated replacements instead of making two passes over the
+ * string to determine the final length right away.
*/
- expanded_pg_user = palloc0(strlen(identLine->pg_user->string) - 2 + (matches[1].rm_eo - matches[1].rm_so) + 1);
- offset = ofs - identLine->pg_user->string;
- memcpy(expanded_pg_user, identLine->pg_user->string, offset);
- memcpy(expanded_pg_user + offset,
- system_user + matches[1].rm_so,
- matches[1].rm_eo - matches[1].rm_so);
- strcat(expanded_pg_user, ofs + 2);
+ old_pg_user = identLine->pg_user->string;
+ do
+ {
+ /*
+ * length: current length minus length of \1 plus length of
+ * replacement plus null terminator
+ */
+ expanded_pg_user = palloc(strlen(old_pg_user) - 2 + repl_len + 1);
+ /* ofs points into the old_pg_user string at this point */
+ offset = ofs - old_pg_user;
+ memcpy(expanded_pg_user, old_pg_user, offset);
+ memcpy(expanded_pg_user + offset, repl_str, repl_len);
+ strcpy(expanded_pg_user + offset + repl_len, ofs + 2);
+ if (old_pg_user != identLine->pg_user->string)
+ pfree(old_pg_user);
+ old_pg_user = expanded_pg_user;
+ } while ((ofs = strstr(old_pg_user + offset + repl_len, "\\1")) != NULL);
/*
* Mark the token as quoted, so it will only be compared literally
diff --git a/src/backend/libpq/pg_ident.conf.sample b/src/backend/libpq/pg_ident.conf.sample
index f5225f26cdf..8ee6c0ba315 100644
--- a/src/backend/libpq/pg_ident.conf.sample
+++ b/src/backend/libpq/pg_ident.conf.sample
@@ -13,25 +13,25 @@
# user names to their corresponding PostgreSQL user names. Records
# are of the form:
#
-# MAPNAME SYSTEM-USERNAME PG-USERNAME
+# MAPNAME SYSTEM-USERNAME DATABASE-USERNAME
#
# (The uppercase quantities must be replaced by actual values.)
#
# MAPNAME is the (otherwise freely chosen) map name that was used in
# pg_hba.conf. SYSTEM-USERNAME is the detected user name of the
-# client. PG-USERNAME is the requested PostgreSQL user name. The
-# existence of a record specifies that SYSTEM-USERNAME may connect as
-# PG-USERNAME.
+# client. DATABASE-USERNAME is the requested PostgreSQL user name.
+# The existence of a record specifies that SYSTEM-USERNAME may connect
+# as DATABASE-USERNAME.
#
-# If SYSTEM-USERNAME starts with a slash (/), it will be treated as a
-# regular expression. Optionally this can contain a capture (a
-# parenthesized subexpression). The substring matching the capture
-# will be substituted for \1 (backslash-one) if present in
-# PG-USERNAME.
+# If SYSTEM-USERNAME starts with a slash (/), the rest of it will be
+# treated as a regular expression. Optionally this can contain a capture
+# (a parenthesized subexpression). The substring matching the capture
+# will be substituted for \1 (backslash-one) if that appears in
+# DATABASE-USERNAME.
#
-# PG-USERNAME can be "all", a user name, a group name prefixed with "+", or
-# a regular expression (if it starts with a slash (/)). If it is a regular
-# expression, the substring matching with \1 has no effect.
+# DATABASE-USERNAME can be "all", a user name, a group name prefixed with "+",
+# or a regular expression (if it starts with a slash (/)). If it is a regular
+# expression, no substitution for \1 will occur.
#
# Multiple maps may be specified in this file and used by pg_hba.conf.
#
@@ -69,4 +69,4 @@
# Put your actual configuration here
# ----------------------------------
-# MAPNAME SYSTEM-USERNAME PG-USERNAME
+# MAPNAME SYSTEM-USERNAME DATABASE-USERNAME
diff --git a/src/backend/main/main.c b/src/backend/main/main.c
index 7d63cf94a6b..bdcb5e4f261 100644
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -125,13 +125,17 @@ main(int argc, char *argv[])
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("postgres"));
/*
- * In the postmaster, absorb the environment values for LC_COLLATE and
- * LC_CTYPE. Individual backends will change these later to settings
- * taken from pg_database, but the postmaster cannot do that. If we leave
- * these set to "C" then message localization might not work well in the
- * postmaster.
+ * Collation is handled by pg_locale.c, and the behavior is dependent on
+ * the provider. strcoll(), etc., should not be called directly.
+ */
+ init_locale("LC_COLLATE", LC_COLLATE, "C");
+
+ /*
+ * In the postmaster, absorb the environment value for LC_CTYPE.
+ * Individual backends will change it later to pg_database.datctype, but
+ * the postmaster cannot do that. If we leave it set to "C" then message
+ * localization might not work well in the postmaster.
*/
- init_locale("LC_COLLATE", LC_COLLATE, "");
init_locale("LC_CTYPE", LC_CTYPE, "");
/*
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index fb33e6931ad..31f97151977 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -21,6 +21,11 @@
* tree(s) generated from the query. The executor can then use this value
* to blame query costs on the proper queryId.
*
+ * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed"
+ * and contribute only once to the jumble. This has the effect that queries
+ * that differ only on the length of such lists have the same queryId.
+ *
+ *
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -61,11 +66,13 @@ static void AppendJumble(JumbleState *jstate,
const unsigned char *value, Size size);
static void FlushPendingNulls(JumbleState *jstate);
static void RecordConstLocation(JumbleState *jstate,
+ bool extern_param,
int location, int len);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
+static void _jumbleParam(JumbleState *jstate, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
-static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
static void _jumbleRangeTblEntry_eref(JumbleState *jstate,
RangeTblEntry *rte,
@@ -185,6 +192,7 @@ InitJumble(void)
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
jstate->pending_nulls = 0;
+ jstate->has_squashed_lists = false;
#ifdef USE_ASSERT_CHECKING
jstate->total_jumble_len = 0;
#endif
@@ -207,6 +215,10 @@ DoJumble(JumbleState *jstate, Node *node)
if (jstate->pending_nulls > 0)
FlushPendingNulls(jstate);
+ /* Squashed list found, reset highest_extern_param_id */
+ if (jstate->has_squashed_lists)
+ jstate->highest_extern_param_id = 0;
+
/* Process the jumble buffer and produce the hash value */
return DatumGetInt64(hash_any_extended(jstate->jumble,
jstate->jumble_len,
@@ -376,14 +388,14 @@ FlushPendingNulls(JumbleState *jstate)
* Record the location of some kind of constant within a query string.
* These are not only bare constants but also expressions that ultimately
* constitute a constant, such as those inside casts and simple function
- * calls.
+ * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param.
*
* If length is -1, it indicates a single such constant element. If
* it's a positive integer, it indicates the length of a squashable
* list of them.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, int len)
+RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -406,6 +418,7 @@ RecordConstLocation(JumbleState *jstate, int location, int len)
Assert(len > -1 || len == -1);
jstate->clocations[jstate->clocations_count].length = len;
jstate->clocations[jstate->clocations_count].squashed = (len > -1);
+ jstate->clocations[jstate->clocations_count].extern_param = extern_param;
jstate->clocations_count++;
}
}
@@ -414,22 +427,34 @@ RecordConstLocation(JumbleState *jstate, int location, int len)
* Subroutine for _jumbleElements: Verify a few simple cases where we can
* deduce that the expression is a constant:
*
- * - Ignore a possible wrapping RelabelType and CoerceViaIO.
+ * - See through any wrapping RelabelType and CoerceViaIO layers.
* - If it's a FuncExpr, check that the function is a builtin
* cast and its arguments are Const.
- * - Otherwise test if the expression is a simple Const.
+ * - Otherwise test if the expression is a simple Const or a
+ * PARAM_EXTERN param.
*/
static bool
IsSquashableConstant(Node *element)
{
- if (IsA(element, RelabelType))
- element = (Node *) ((RelabelType *) element)->arg;
-
- if (IsA(element, CoerceViaIO))
- element = (Node *) ((CoerceViaIO *) element)->arg;
-
+restart:
switch (nodeTag(element))
{
+ case T_RelabelType:
+ /* Unwrap RelabelType */
+ element = (Node *) ((RelabelType *) element)->arg;
+ goto restart;
+
+ case T_CoerceViaIO:
+ /* Unwrap CoerceViaIO */
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+ goto restart;
+
+ case T_Const:
+ return true;
+
+ case T_Param:
+ return castNode(Param, element)->paramkind == PARAM_EXTERN;
+
case T_FuncExpr:
{
FuncExpr *func = (FuncExpr *) element;
@@ -468,11 +493,8 @@ IsSquashableConstant(Node *element)
}
default:
- if (!IsA(element, Const))
- return false;
+ return false;
}
-
- return true;
}
/*
@@ -482,8 +504,8 @@ IsSquashableConstant(Node *element)
* Return value indicates if squashing is possible.
*
* Note that this function searches only for explicit Const nodes with
- * possibly very simple decorations on top, and does not try to simplify
- * expressions.
+ * possibly very simple decorations on top and PARAM_EXTERN parameters,
+ * and does not try to simplify expressions.
*/
static bool
IsSquashableConstantList(List *elements)
@@ -508,7 +530,7 @@ IsSquashableConstantList(List *elements)
#define JUMBLE_ELEMENTS(list, node) \
_jumbleElements(jstate, (List *) expr->list, node)
#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location, -1)
+ RecordConstLocation(jstate, false, expr->location, -1)
#define JUMBLE_FIELD(item) \
do { \
if (sizeof(expr->item) == 8) \
@@ -535,42 +557,6 @@ do { \
#include "queryjumblefuncs.funcs.c"
-/*
- * We try to jumble lists of expressions as one individual item regardless
- * of how many elements are in the list. This is know as squashing, which
- * results in different queries jumbling to the same query_id, if the only
- * difference is the number of elements in the list.
- *
- * We allow constants to be squashed. To normalize such queries, we use
- * the start and end locations of the list of elements in a list.
- */
-static void
-_jumbleElements(JumbleState *jstate, List *elements, Node *node)
-{
- bool normalize_list = false;
-
- if (IsSquashableConstantList(elements))
- {
- if (IsA(node, ArrayExpr))
- {
- ArrayExpr *aexpr = (ArrayExpr *) node;
-
- if (aexpr->list_start > 0 && aexpr->list_end > 0)
- {
- RecordConstLocation(jstate,
- aexpr->list_start + 1,
- (aexpr->list_end - aexpr->list_start) - 1);
- normalize_list = true;
- }
- }
- }
-
- if (!normalize_list)
- {
- _jumbleNode(jstate, (Node *) elements);
- }
-}
-
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -612,26 +598,6 @@ _jumbleNode(JumbleState *jstate, Node *node)
break;
}
- /* Special cases to handle outside the automated code */
- switch (nodeTag(expr))
- {
- case T_Param:
- {
- Param *p = (Param *) node;
-
- /*
- * Update the highest Param id seen, in order to start
- * normalization correctly.
- */
- if (p->paramkind == PARAM_EXTERN &&
- p->paramid > jstate->highest_extern_param_id)
- jstate->highest_extern_param_id = p->paramid;
- }
- break;
- default:
- break;
- }
-
/* Ensure we added something to the jumble buffer */
Assert(jstate->total_jumble_len > prev_jumble_len);
}
@@ -667,6 +633,79 @@ _jumbleList(JumbleState *jstate, Node *node)
}
}
+/*
+ * We try to jumble lists of expressions as one individual item regardless
+ * of how many elements are in the list. This is know as squashing, which
+ * results in different queries jumbling to the same query_id, if the only
+ * difference is the number of elements in the list.
+ *
+ * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize
+ * such queries, we use the start and end locations of the list of elements in
+ * a list.
+ */
+static void
+_jumbleElements(JumbleState *jstate, List *elements, Node *node)
+{
+ bool normalize_list = false;
+
+ if (IsSquashableConstantList(elements))
+ {
+ if (IsA(node, ArrayExpr))
+ {
+ ArrayExpr *aexpr = (ArrayExpr *) node;
+
+ if (aexpr->list_start > 0 && aexpr->list_end > 0)
+ {
+ RecordConstLocation(jstate,
+ false,
+ aexpr->list_start + 1,
+ (aexpr->list_end - aexpr->list_start) - 1);
+ normalize_list = true;
+ jstate->has_squashed_lists = true;
+ }
+ }
+ }
+
+ if (!normalize_list)
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
+/*
+ * We store the highest param ID of extern params. This can later be used
+ * to start the numbering of the placeholder for squashed lists.
+ */
+static void
+_jumbleParam(JumbleState *jstate, Node *node)
+{
+ Param *expr = (Param *) node;
+
+ JUMBLE_FIELD(paramkind);
+ JUMBLE_FIELD(paramid);
+ JUMBLE_FIELD(paramtype);
+ /* paramtypmode and paramcollid are ignored */
+
+ if (expr->paramkind == PARAM_EXTERN)
+ {
+ /*
+ * At this point, only external parameter locations outside of
+ * squashable lists will be recorded.
+ */
+ RecordConstLocation(jstate, true, expr->location, -1);
+
+ /*
+ * Update the highest Param id seen, in order to start normalization
+ * correctly.
+ *
+ * Note: This value is reset at the end of jumbling if there exists a
+ * squashable list. See the comment in the definition of JumbleState.
+ */
+ if (expr->paramid > jstate->highest_extern_param_id)
+ jstate->highest_extern_param_id = expr->paramid;
+ }
+}
+
static void
_jumbleA_Const(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 3d44815ed5a..1f04a2c182c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
* Determines and returns the cost of an Append node.
*/
void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
{
ListCell *l;
@@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath)
foreach(l, apath->subpaths)
{
Path *subpath = (Path *) lfirst(l);
- Path sort_path; /* dummy for result of cost_sort */
+ int presorted_keys;
+ Path sort_path; /* dummy for result of
+ * cost_sort/cost_incremental_sort */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
/*
* We'll need to insert a Sort node, so include costs for
- * that. We can use the parent's LIMIT if any, since we
+ * that. We choose to use incremental sort if it is
+ * enabled and there are presorted keys; otherwise we use
+ * full sort.
+ *
+ * We can use the parent's LIMIT if any, since we
* certainly won't pull more than that many tuples from
* any child.
*/
- cost_sort(&sort_path,
- NULL, /* doesn't currently need root */
- pathkeys,
- subpath->disabled_nodes,
- subpath->total_cost,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- apath->limit_tuples);
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ cost_incremental_sort(&sort_path,
+ root,
+ pathkeys,
+ presorted_keys,
+ subpath->disabled_nodes,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ apath->limit_tuples);
+ }
+ else
+ {
+ cost_sort(&sort_path,
+ root,
+ pathkeys,
+ subpath->disabled_nodes,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ apath->limit_tuples);
+ }
+
subpath = &sort_path;
}
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 26f0336f1e4..ebedc5574ca 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -154,13 +154,17 @@ add_paths_to_joinrel(PlannerInfo *root,
/*
* See if the inner relation is provably unique for this outer rel.
*
- * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
- * matter since the executor can make the equivalent optimization anyway;
- * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
- * must be considering a semijoin whose inner side is not provably unique
- * (else reduce_unique_semijoins would've simplified it), so there's no
- * point in calling innerrel_is_unique. However, if the LHS covers all of
- * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+ * We have some special cases: for JOIN_SEMI, it doesn't matter since the
+ * executor can make the equivalent optimization anyway. It also doesn't
+ * help enable use of Memoize, since a semijoin with a provably unique
+ * inner side should have been reduced to an inner join in that case.
+ * Therefore, we need not expend planner cycles on proofs. (For
+ * JOIN_ANTI, although it doesn't help the executor for the same reason,
+ * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be
+ * considering a semijoin whose inner side is not provably unique (else
+ * reduce_unique_semijoins would've simplified it), so there's no point in
+ * calling innerrel_is_unique. However, if the LHS covers all of the
+ * semijoin's min_lefthand, then it's appropriate to set inner_unique
* because the path produced by create_unique_path will be unique relative
* to the LHS. (If we have an LHS that's only part of the min_lefthand,
* that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
@@ -169,12 +173,6 @@ add_paths_to_joinrel(PlannerInfo *root,
switch (jointype)
{
case JOIN_SEMI:
- case JOIN_ANTI:
-
- /*
- * XXX it may be worth proving this to allow a Memoize to be
- * considered for Nested Loop Semi/Anti Joins.
- */
extra.inner_unique = false; /* well, unproven */
break;
case JOIN_UNIQUE_INNER:
@@ -715,16 +713,21 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
return NULL;
/*
- * Currently we don't do this for SEMI and ANTI joins unless they're
- * marked as inner_unique. This is because nested loop SEMI/ANTI joins
- * don't scan the inner node to completion, which will mean memoize cannot
- * mark the cache entry as complete.
- *
- * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
- * = true. Should we? See add_paths_to_joinrel()
+ * Currently we don't do this for SEMI and ANTI joins, because nested loop
+ * SEMI/ANTI joins don't scan the inner node to completion, which means
+ * memoize cannot mark the cache entry as complete. Nor can we mark the
+ * cache entry as complete after fetching the first inner tuple, because
+ * if that tuple and the current outer tuple don't satisfy the join
+ * clauses, a second inner tuple that satisfies the parameters would find
+ * the cache entry already marked as complete. The only exception is when
+ * the inner relation is provably unique, as in that case, there won't be
+ * a second matching tuple and we can safely mark the cache entry as
+ * complete after fetching the first inner tuple. Note that in such
+ * cases, the SEMI join should have been reduced to an inner join by
+ * reduce_unique_semijoins.
*/
- if (!extra->inner_unique && (jointype == JOIN_SEMI ||
- jointype == JOIN_ANTI))
+ if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
+ !extra->inner_unique)
return NULL;
/*
@@ -876,16 +879,13 @@ try_nestloop_path(PlannerInfo *root,
/*
* Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible --- unless allow_star_schema_join
- * says to allow it anyway. Also, we must reject if have_dangerous_phv
- * doesn't like the look of it, which could only happen if the nestloop is
- * still parameterized.
+ * says to allow it anyway.
*/
required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
innerrelids, inner_paramrels);
if (required_outer &&
- ((!bms_overlap(required_outer, extra->param_source_rels) &&
- !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
- have_dangerous_phv(root, outerrelids, inner_paramrels)))
+ !bms_overlap(required_outer, extra->param_source_rels) &&
+ !allow_star_schema_join(root, outerrelids, inner_paramrels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 60d65762b5d..aad41b94009 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -565,9 +565,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
* Also, if the lateral reference is only indirect, we should reject
* the join; whatever rel(s) the reference chain goes through must be
* joined to first.
- *
- * Another case that might keep us from building a valid plan is the
- * implementation restriction described by have_dangerous_phv().
*/
lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
@@ -584,9 +581,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
/* check there is a direct reference from rel2 to rel1 */
if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
return false; /* only indirect refs, so reject */
- /* check we won't have a dangerous PHV */
- if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
- return false; /* might be unable to handle required PHV */
}
else if (lateral_rev)
{
@@ -599,9 +593,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
/* check there is a direct reference from rel1 to rel2 */
if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
return false; /* only indirect refs, so reject */
- /* check we won't have a dangerous PHV */
- if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
- return false; /* might be unable to handle required PHV */
}
/*
@@ -1279,57 +1270,6 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
/*
- * There's a pitfall for creating parameterized nestloops: suppose the inner
- * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
- * minimum eval_at set includes the outer rel (B) and some third rel (C).
- * We might think we could create a B/A nestloop join that's parameterized by
- * C. But we would end up with a plan in which the PHV's expression has to be
- * evaluated as a nestloop parameter at the B/A join; and the executor is only
- * set up to handle simple Vars as NestLoopParams. Rather than add complexity
- * and overhead to the executor for such corner cases, it seems better to
- * forbid the join. (Note that we can still make use of A's parameterized
- * path with pre-joined B+C as the outer rel. have_join_order_restriction()
- * ensures that we will consider making such a join even if there are not
- * other reasons to do so.)
- *
- * So we check whether any PHVs used in the query could pose such a hazard.
- * We don't have any simple way of checking whether a risky PHV would actually
- * be used in the inner plan, and the case is so unusual that it doesn't seem
- * worth working very hard on it.
- *
- * This needs to be checked in two places. If the inner rel's minimum
- * parameterization would trigger the restriction, then join_is_legal() should
- * reject the join altogether, because there will be no workable paths for it.
- * But joinpath.c has to check again for every proposed nestloop path, because
- * the inner path might have more than the minimum parameterization, causing
- * some PHV to be dangerous for it that otherwise wouldn't be.
- */
-bool
-have_dangerous_phv(PlannerInfo *root,
- Relids outer_relids, Relids inner_params)
-{
- ListCell *lc;
-
- foreach(lc, root->placeholder_list)
- {
- PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
-
- if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
- continue; /* ignore, could not be a nestloop param */
- if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
- continue; /* ignore, not relevant to this join */
- if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
- continue; /* safe, it can be eval'd within outerrel */
- /* Otherwise, it's potentially unsafe, so reject the join */
- return true;
- }
-
- /* OK to perform the join */
- return false;
-}
-
-
-/*
* is_dummy_rel --- has relation been proven empty?
*/
bool
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4ad30b7627e..8a9f1d7a943 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
+ int presorted_keys;
/*
* Compute sort column info, and adjust subplan's tlist as needed.
@@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- Sort *sort = make_sort(subplan, numsortkeys,
+ Plan *sort_plan;
+
+ /*
+ * We choose to use incremental sort if it is enabled and
+ * there are presorted keys; otherwise we use full sort.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ sort_plan = (Plan *)
+ make_incrementalsort(subplan, numsortkeys, presorted_keys,
sortColIdx, sortOperators,
collations, nullsFirst);
- label_sort_with_costsize(root, sort, best_path->limit_tuples);
- subplan = (Plan *) sort;
+ label_incrementalsort_with_costsize(root,
+ (IncrementalSort *) sort_plan,
+ pathkeys,
+ best_path->limit_tuples);
+ }
+ else
+ {
+ sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+ sortColIdx, sortOperators,
+ collations, nullsFirst);
+
+ label_sort_with_costsize(root, (Sort *) sort_plan,
+ best_path->limit_tuples);
+ }
+
+ subplan = sort_plan;
}
}
@@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
+ int presorted_keys;
/* Build the child plan */
/* Must insist that all children return the same tlist */
@@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- Sort *sort = make_sort(subplan, numsortkeys,
+ Plan *sort_plan;
+
+ /*
+ * We choose to use incremental sort if it is enabled and there
+ * are presorted keys; otherwise we use full sort.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ sort_plan = (Plan *)
+ make_incrementalsort(subplan, numsortkeys, presorted_keys,
sortColIdx, sortOperators,
collations, nullsFirst);
- label_sort_with_costsize(root, sort, best_path->limit_tuples);
- subplan = (Plan *) sort;
+ label_incrementalsort_with_costsize(root,
+ (IncrementalSort *) sort_plan,
+ pathkeys,
+ best_path->limit_tuples);
+ }
+ else
+ {
+ sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+ sortColIdx, sortOperators,
+ collations, nullsFirst);
+
+ label_sort_with_costsize(root, (Sort *) sort_plan,
+ best_path->limit_tuples);
+ }
+
+ subplan = sort_plan;
}
subplans = lappend(subplans, subplan);
@@ -4344,13 +4394,16 @@ create_nestloop_plan(PlannerInfo *root,
NestLoop *join_plan;
Plan *outer_plan;
Plan *inner_plan;
+ Relids outerrelids;
List *tlist = build_path_tlist(root, &best_path->jpath.path);
List *joinrestrictclauses = best_path->jpath.joinrestrictinfo;
List *joinclauses;
List *otherclauses;
- Relids outerrelids;
List *nestParams;
+ List *outer_tlist;
+ bool outer_parallel_safe;
Relids saveOuterRels = root->curOuterRels;
+ ListCell *lc;
/*
* If the inner path is parameterized by the topmost parent of the outer
@@ -4372,8 +4425,8 @@ create_nestloop_plan(PlannerInfo *root,
outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0);
/* For a nestloop, include outer relids in curOuterRels for inner side */
- root->curOuterRels = bms_union(root->curOuterRels,
- best_path->jpath.outerjoinpath->parent->relids);
+ outerrelids = best_path->jpath.outerjoinpath->parent->relids;
+ root->curOuterRels = bms_union(root->curOuterRels, outerrelids);
inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, 0);
@@ -4412,9 +4465,66 @@ create_nestloop_plan(PlannerInfo *root,
* Identify any nestloop parameters that should be supplied by this join
* node, and remove them from root->curOuterParams.
*/
- outerrelids = best_path->jpath.outerjoinpath->parent->relids;
- nestParams = identify_current_nestloop_params(root, outerrelids);
+ nestParams = identify_current_nestloop_params(root,
+ outerrelids,
+ PATH_REQ_OUTER((Path *) best_path));
+
+ /*
+ * While nestloop parameters that are Vars had better be available from
+ * the outer_plan already, there are edge cases where nestloop parameters
+ * that are PHVs won't be. In such cases we must add them to the
+ * outer_plan's tlist, since the executor's NestLoopParam machinery
+ * requires the params to be simple outer-Var references to that tlist.
+ * (This is cheating a little bit, because the outer path's required-outer
+ * relids might not be enough to allow evaluating such a PHV. But in
+ * practice, if we could have evaluated the PHV at the nestloop node, we
+ * can do so in the outer plan too.)
+ */
+ outer_tlist = outer_plan->targetlist;
+ outer_parallel_safe = outer_plan->parallel_safe;
+ foreach(lc, nestParams)
+ {
+ NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
+ PlaceHolderVar *phv;
+ TargetEntry *tle;
+
+ if (IsA(nlp->paramval, Var))
+ continue; /* nothing to do for simple Vars */
+ /* Otherwise it must be a PHV */
+ phv = castNode(PlaceHolderVar, nlp->paramval);
+
+ if (tlist_member((Expr *) phv, outer_tlist))
+ continue; /* already available */
+
+ /*
+ * It's possible that nestloop parameter PHVs selected to evaluate
+ * here contain references to surviving root->curOuterParams items
+ * (that is, they reference values that will be supplied by some
+ * higher-level nestloop). Those need to be converted to Params now.
+ * Note: it's safe to do this after the tlist_member() check, because
+ * equal() won't pay attention to phv->phexpr.
+ */
+ phv->phexpr = (Expr *) replace_nestloop_params(root,
+ (Node *) phv->phexpr);
+
+ /* Make a shallow copy of outer_tlist, if we didn't already */
+ if (outer_tlist == outer_plan->targetlist)
+ outer_tlist = list_copy(outer_tlist);
+ /* ... and add the needed expression */
+ tle = makeTargetEntry((Expr *) copyObject(phv),
+ list_length(outer_tlist) + 1,
+ NULL,
+ true);
+ outer_tlist = lappend(outer_tlist, tle);
+ /* ... and track whether tlist is (still) parallel-safe */
+ if (outer_parallel_safe)
+ outer_parallel_safe = is_parallel_safe(root, (Node *) phv);
+ }
+ if (outer_tlist != outer_plan->targetlist)
+ outer_plan = change_plan_targetlist(outer_plan, outer_tlist,
+ outer_parallel_safe);
+ /* And finally, we can build the join plan node */
join_plan = make_nestloop(tlist,
joinclauses,
otherclauses,
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 26a3e050086..f45131c34c5 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -3333,6 +3333,13 @@ eval_const_expressions_mutator(Node *node,
-1,
coalesceexpr->coalescecollid);
+ /*
+ * If there's exactly one surviving argument, we no longer
+ * need COALESCE at all: the result is that argument
+ */
+ if (list_length(newargs) == 1)
+ return (Node *) linitial(newargs);
+
newcoalesce = makeNode(CoalesceExpr);
newcoalesce->coalescetype = coalesceexpr->coalescetype;
newcoalesce->coalescecollid = coalesceexpr->coalescecollid;
diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c
index 3bd3ce37c8f..4c13c5931b4 100644
--- a/src/backend/optimizer/util/paramassign.c
+++ b/src/backend/optimizer/util/paramassign.c
@@ -599,38 +599,46 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)
}
/*
- * Identify any NestLoopParams that should be supplied by a NestLoop plan
- * node with the specified lefthand rels. Remove them from the active
- * root->curOuterParams list and return them as the result list.
+ * Identify any NestLoopParams that should be supplied by a NestLoop
+ * plan node with the specified lefthand rels and required-outer rels.
+ * Remove them from the active root->curOuterParams list and return
+ * them as the result list.
*
- * XXX Here we also hack up the returned Vars and PHVs so that they do not
- * contain nullingrel sets exceeding what is available from the outer side.
- * This is needed if we have applied outer join identity 3,
- * (A leftjoin B on (Pab)) leftjoin C on (Pb*c)
- * = A leftjoin (B leftjoin C on (Pbc)) on (Pab)
- * and C contains lateral references to B. It's still safe to apply the
- * identity, but the parser will have created those references in the form
- * "b*" (i.e., with varnullingrels listing the A/B join), while what we will
- * have available from the nestloop's outer side is just "b". We deal with
- * that here by stripping the nullingrels down to what is available from the
- * outer side according to leftrelids.
- *
- * That fixes matters for the case of forward application of identity 3.
- * If the identity was applied in the reverse direction, we will have
- * parameter Vars containing too few nullingrel bits rather than too many.
- * Currently, that causes no problems because setrefs.c applies only a
- * subset check to nullingrels in NestLoopParams, but we'd have to work
- * harder if we ever want to tighten that check. This is all pretty annoying
- * because it greatly weakens setrefs.c's cross-check, but the alternative
+ * Vars and PHVs appearing in the result list must have nullingrel sets
+ * that could validly appear in the lefthand rel's output. Ordinarily that
+ * would be true already, but if we have applied outer join identity 3,
+ * there could be more or fewer nullingrel bits in the nodes appearing in
+ * curOuterParams than are in the nominal leftrelids. We deal with that by
+ * forcing their nullingrel sets to include exactly the outer-join relids
+ * that appear in leftrelids and can null the respective Var or PHV.
+ * This fix is a bit ad-hoc and intellectually unsatisfactory, because it's
+ * essentially jumping to the conclusion that we've placed evaluation of
+ * the nestloop parameters correctly, and thus it defeats the intent of the
+ * subsequent nullingrel cross-checks in setrefs.c. But the alternative
* seems to be to generate multiple versions of each laterally-parameterized
* subquery, which'd be unduly expensive.
*/
List *
-identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
+identify_current_nestloop_params(PlannerInfo *root,
+ Relids leftrelids,
+ Relids outerrelids)
{
List *result;
+ Relids allleftrelids;
ListCell *cell;
+ /*
+ * We'll be able to evaluate a PHV in the lefthand path if it uses the
+ * lefthand rels plus any available required-outer rels. But don't do so
+ * if it uses *only* required-outer rels; in that case it should be
+ * evaluated higher in the tree. For Vars, no such hair-splitting is
+ * necessary since they depend on only one relid.
+ */
+ if (outerrelids)
+ allleftrelids = bms_union(leftrelids, outerrelids);
+ else
+ allleftrelids = leftrelids;
+
result = NIL;
foreach(cell, root->curOuterParams)
{
@@ -646,25 +654,60 @@ identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
bms_is_member(nlp->paramval->varno, leftrelids))
{
Var *var = (Var *) nlp->paramval;
+ RelOptInfo *rel = root->simple_rel_array[var->varno];
root->curOuterParams = foreach_delete_current(root->curOuterParams,
cell);
- var->varnullingrels = bms_intersect(var->varnullingrels,
+ var->varnullingrels = bms_intersect(rel->nulling_relids,
leftrelids);
result = lappend(result, nlp);
}
- else if (IsA(nlp->paramval, PlaceHolderVar) &&
- bms_is_subset(find_placeholder_info(root,
- (PlaceHolderVar *) nlp->paramval)->ph_eval_at,
- leftrelids))
+ else if (IsA(nlp->paramval, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval;
+ PlaceHolderInfo *phinfo = find_placeholder_info(root, phv);
+ Relids eval_at = phinfo->ph_eval_at;
- root->curOuterParams = foreach_delete_current(root->curOuterParams,
- cell);
- phv->phnullingrels = bms_intersect(phv->phnullingrels,
- leftrelids);
- result = lappend(result, nlp);
+ if (bms_is_subset(eval_at, allleftrelids) &&
+ bms_overlap(eval_at, leftrelids))
+ {
+ root->curOuterParams = foreach_delete_current(root->curOuterParams,
+ cell);
+
+ /*
+ * Deal with an edge case: if the PHV was pulled up out of a
+ * subquery and it contains a subquery that was originally
+ * pushed down from this query level, then that will still be
+ * represented as a SubLink, because SS_process_sublinks won't
+ * recurse into outer PHVs, so it didn't get transformed
+ * during expression preprocessing in the subquery. We need a
+ * version of the PHV that has a SubPlan, which we can get
+ * from the current query level's placeholder_list. This is
+ * quite grotty of course, but dealing with it earlier in the
+ * handling of subplan params would be just as grotty, and it
+ * might end up being a waste of cycles if we don't decide to
+ * treat the PHV as a NestLoopParam. (Perhaps that whole
+ * mechanism should be redesigned someday, but today is not
+ * that day.)
+ */
+ if (root->parse->hasSubLinks)
+ {
+ phv = copyObject(phinfo->ph_var);
+
+ /*
+ * The ph_var will have empty nullingrels, but that
+ * doesn't matter since we're about to overwrite
+ * phv->phnullingrels. Other fields should be OK already.
+ */
+ nlp->paramval = (Var *) phv;
+ }
+
+ phv->phnullingrels =
+ bms_intersect(get_placeholder_nulling_relids(root, phinfo),
+ leftrelids);
+
+ result = lappend(result, nlp);
+ }
}
}
return result;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e0192d4a491..9cc602788ea 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root,
pathnode->path.total_cost = child->total_cost;
}
else
- cost_append(pathnode);
+ cost_append(pathnode, root);
/* Must do this last, else cost_append complains */
pathnode->path.pathkeys = child->pathkeys;
}
else
- cost_append(pathnode);
+ cost_append(pathnode, root);
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)
@@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root,
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
+ int presorted_keys;
+ Path sort_path; /* dummy for result of
+ * cost_sort/cost_incremental_sort */
/* All child paths should be unparameterized */
Assert(bms_is_empty(PATH_REQ_OUTER(subpath)));
@@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root,
pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
subpath->parallel_safe;
- if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- /* Subpath is adequately ordered, we won't need to sort it */
- input_disabled_nodes += subpath->disabled_nodes;
- input_startup_cost += subpath->startup_cost;
- input_total_cost += subpath->total_cost;
- }
- else
- {
- /* We'll need to insert a Sort node, so include cost for that */
- Path sort_path; /* dummy for result of cost_sort */
+ /*
+ * We'll need to insert a Sort node, so include costs for that. We
+ * choose to use incremental sort if it is enabled and there are
+ * presorted keys; otherwise we use full sort.
+ *
+ * We can use the parent's LIMIT if any, since we certainly won't
+ * pull more than that many tuples from any child.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ cost_incremental_sort(&sort_path,
+ root,
+ pathkeys,
+ presorted_keys,
+ subpath->disabled_nodes,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ pathnode->limit_tuples);
+ }
+ else
+ {
+ cost_sort(&sort_path,
+ root,
+ pathkeys,
+ subpath->disabled_nodes,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ pathnode->limit_tuples);
+ }
- cost_sort(&sort_path,
- root,
- pathkeys,
- subpath->disabled_nodes,
- subpath->total_cost,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- pathnode->limit_tuples);
- input_disabled_nodes += sort_path.disabled_nodes;
- input_startup_cost += sort_path.startup_cost;
- input_total_cost += sort_path.total_cost;
+ subpath = &sort_path;
}
+
+ input_disabled_nodes += subpath->disabled_nodes;
+ input_startup_cost += subpath->startup_cost;
+ input_total_cost += subpath->total_cost;
}
/*
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 41a4c81e94a..e1cd00a72fb 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -545,3 +545,43 @@ contain_placeholder_references_walker(Node *node,
return expression_tree_walker(node, contain_placeholder_references_walker,
context);
}
+
+/*
+ * Compute the set of outer-join relids that can null a placeholder.
+ *
+ * This is analogous to RelOptInfo.nulling_relids for Vars, but we compute it
+ * on-the-fly rather than saving it somewhere. Currently the value is needed
+ * at most once per query, so there's little value in doing otherwise. If it
+ * ever gains more widespread use, perhaps we should cache the result in
+ * PlaceHolderInfo.
+ */
+Relids
+get_placeholder_nulling_relids(PlannerInfo *root, PlaceHolderInfo *phinfo)
+{
+ Relids result = NULL;
+ int relid = -1;
+
+ /*
+ * Form the union of all potential nulling OJs for each baserel included
+ * in ph_eval_at.
+ */
+ while ((relid = bms_next_member(phinfo->ph_eval_at, relid)) > 0)
+ {
+ RelOptInfo *rel = root->simple_rel_array[relid];
+
+ /* ignore the RTE_GROUP RTE */
+ if (relid == root->group_rtindex)
+ continue;
+
+ if (rel == NULL) /* must be an outer join */
+ {
+ Assert(bms_is_member(relid, root->outer_join_rels));
+ continue;
+ }
+ result = bms_add_members(result, rel->nulling_relids);
+ }
+
+ /* Now remove any OJs already included in ph_eval_at, and we're done. */
+ result = bms_del_members(result, phinfo->ph_eval_at);
+ return result;
+}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 50f53159d58..73345bb3c70 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -2034,6 +2034,13 @@ CheckPointStmt:
$$ = (Node *) n;
}
+ | CHECKPOINT '(' utility_option_list ')'
+ {
+ CheckPointStmt *n = makeNode(CheckPointStmt);
+
+ $$ = (Node *) n;
+ n->options = $3;
+ }
;
@@ -2668,6 +2675,12 @@ alter_table_cmd:
c->alterDeferrability = true;
if ($4 & CAS_NO_INHERIT)
c->alterInheritability = true;
+ /* handle unsupported case with specific error message */
+ if ($4 & CAS_NOT_VALID)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constraints cannot be altered to be NOT VALID"),
+ parser_errposition(@4));
processCASbits($4, @4, "FOREIGN KEY",
&c->deferrable,
&c->initdeferred,
@@ -6035,6 +6048,26 @@ CreateTrigStmt:
EXECUTE FUNCTION_or_PROCEDURE func_name '(' TriggerFuncArgs ')'
{
CreateTrigStmt *n = makeNode(CreateTrigStmt);
+ bool dummy;
+
+ if (($11 & CAS_NOT_VALID) != 0)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constraint triggers cannot be marked %s",
+ "NOT VALID"),
+ parser_errposition(@11));
+ if (($11 & CAS_NO_INHERIT) != 0)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constraint triggers cannot be marked %s",
+ "NO INHERIT"),
+ parser_errposition(@11));
+ if (($11 & CAS_NOT_ENFORCED) != 0)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constraint triggers cannot be marked %s",
+ "NOT ENFORCED"),
+ parser_errposition(@11));
n->replace = $2;
if (n->replace) /* not supported, see CreateTrigger */
@@ -6054,7 +6087,7 @@ CreateTrigStmt:
n->whenClause = $15;
n->transitionRels = NIL;
processCASbits($11, @11, "TRIGGER",
- &n->deferrable, &n->initdeferred, NULL,
+ &n->deferrable, &n->initdeferred, &dummy,
NULL, NULL, yyscanner);
n->constrrel = $10;
$$ = (Node *) n;
@@ -7477,6 +7510,8 @@ fetch_args: cursor_name
n->portalname = $1;
n->direction = FETCH_FORWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_NONE;
$$ = (Node *) n;
}
| from_in cursor_name
@@ -7486,6 +7521,19 @@ fetch_args: cursor_name
n->portalname = $2;
n->direction = FETCH_FORWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_NONE;
+ $$ = (Node *) n;
+ }
+ | SignedIconst opt_from_in cursor_name
+ {
+ FetchStmt *n = makeNode(FetchStmt);
+
+ n->portalname = $3;
+ n->direction = FETCH_FORWARD;
+ n->howMany = $1;
+ n->location = @1;
+ n->direction_keyword = FETCH_KEYWORD_NONE;
$$ = (Node *) n;
}
| NEXT opt_from_in cursor_name
@@ -7495,6 +7543,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_FORWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_NEXT;
$$ = (Node *) n;
}
| PRIOR opt_from_in cursor_name
@@ -7504,6 +7554,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_BACKWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_PRIOR;
$$ = (Node *) n;
}
| FIRST_P opt_from_in cursor_name
@@ -7513,6 +7565,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_ABSOLUTE;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_FIRST;
$$ = (Node *) n;
}
| LAST_P opt_from_in cursor_name
@@ -7522,6 +7576,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_ABSOLUTE;
n->howMany = -1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_LAST;
$$ = (Node *) n;
}
| ABSOLUTE_P SignedIconst opt_from_in cursor_name
@@ -7531,6 +7587,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_ABSOLUTE;
n->howMany = $2;
+ n->location = @2;
+ n->direction_keyword = FETCH_KEYWORD_ABSOLUTE;
$$ = (Node *) n;
}
| RELATIVE_P SignedIconst opt_from_in cursor_name
@@ -7540,15 +7598,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_RELATIVE;
n->howMany = $2;
- $$ = (Node *) n;
- }
- | SignedIconst opt_from_in cursor_name
- {
- FetchStmt *n = makeNode(FetchStmt);
-
- n->portalname = $3;
- n->direction = FETCH_FORWARD;
- n->howMany = $1;
+ n->location = @2;
+ n->direction_keyword = FETCH_KEYWORD_RELATIVE;
$$ = (Node *) n;
}
| ALL opt_from_in cursor_name
@@ -7558,6 +7609,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_FORWARD;
n->howMany = FETCH_ALL;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_ALL;
$$ = (Node *) n;
}
| FORWARD opt_from_in cursor_name
@@ -7567,6 +7620,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_FORWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_FORWARD;
$$ = (Node *) n;
}
| FORWARD SignedIconst opt_from_in cursor_name
@@ -7576,6 +7631,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_FORWARD;
n->howMany = $2;
+ n->location = @2;
+ n->direction_keyword = FETCH_KEYWORD_FORWARD;
$$ = (Node *) n;
}
| FORWARD ALL opt_from_in cursor_name
@@ -7585,6 +7642,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_FORWARD;
n->howMany = FETCH_ALL;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_FORWARD_ALL;
$$ = (Node *) n;
}
| BACKWARD opt_from_in cursor_name
@@ -7594,6 +7653,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_BACKWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_BACKWARD;
$$ = (Node *) n;
}
| BACKWARD SignedIconst opt_from_in cursor_name
@@ -7603,6 +7664,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_BACKWARD;
n->howMany = $2;
+ n->location = @2;
+ n->direction_keyword = FETCH_KEYWORD_BACKWARD;
$$ = (Node *) n;
}
| BACKWARD ALL opt_from_in cursor_name
@@ -7612,6 +7675,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_BACKWARD;
n->howMany = FETCH_ALL;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_BACKWARD_ALL;
$$ = (Node *) n;
}
;
@@ -11627,7 +11692,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'T';
+ n->subtype = AD_AlterDefault;
n->typeName = $3;
n->def = $4;
$$ = (Node *) n;
@@ -11637,7 +11702,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'N';
+ n->subtype = AD_DropNotNull;
n->typeName = $3;
$$ = (Node *) n;
}
@@ -11646,7 +11711,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'O';
+ n->subtype = AD_SetNotNull;
n->typeName = $3;
$$ = (Node *) n;
}
@@ -11655,7 +11720,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'C';
+ n->subtype = AD_AddConstraint;
n->typeName = $3;
n->def = $5;
$$ = (Node *) n;
@@ -11665,7 +11730,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'X';
+ n->subtype = AD_DropConstraint;
n->typeName = $3;
n->name = $6;
n->behavior = $7;
@@ -11677,7 +11742,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'X';
+ n->subtype = AD_DropConstraint;
n->typeName = $3;
n->name = $8;
n->behavior = $9;
@@ -11689,7 +11754,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'V';
+ n->subtype = AD_ValidateConstraint;
n->typeName = $3;
n->name = $6;
$$ = (Node *) n;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 62015431fdf..afcf54169c3 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1279,6 +1279,28 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla
lst = RelationGetNotNullConstraints(RelationGetRelid(relation), false,
true);
cxt->nnconstraints = list_concat(cxt->nnconstraints, lst);
+
+ /* Copy comments on not-null constraints */
+ if (table_like_clause->options & CREATE_TABLE_LIKE_COMMENTS)
+ {
+ foreach_node(Constraint, nnconstr, lst)
+ {
+ if ((comment = GetComment(get_relation_constraint_oid(RelationGetRelid(relation),
+ nnconstr->conname, false),
+ ConstraintRelationId,
+ 0)) != NULL)
+ {
+ CommentStmt *stmt = makeNode(CommentStmt);
+
+ stmt->objtype = OBJECT_TABCONSTRAINT;
+ stmt->object = (Node *) list_make3(makeString(cxt->relation->schemaname),
+ makeString(cxt->relation->relname),
+ makeString(nnconstr->conname));
+ stmt->comment = comment;
+ cxt->alist = lappend(cxt->alist, stmt);
+ }
+ }
+ }
}
/*
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 451fb90a610..9474095f271 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -3190,7 +3190,7 @@ autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy)
rel_list = list_make1(rel);
MemoryContextSwitchTo(old_context);
- vacuum(rel_list, &tab->at_params, bstrategy, vac_context, true);
+ vacuum(rel_list, tab->at_params, bstrategy, vac_context, true);
MemoryContextDelete(vac_context);
}
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index fda91ffd1ce..2809e298a44 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -42,6 +42,8 @@
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xlogrecovery.h"
+#include "catalog/pg_authid.h"
+#include "commands/defrem.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
@@ -61,6 +63,7 @@
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "storage/spin.h"
+#include "utils/acl.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/resowner.h"
@@ -161,7 +164,7 @@ static pg_time_t last_xlog_switch_time;
static void ProcessCheckpointerInterrupts(void);
static void CheckArchiveTimeout(void);
static bool IsCheckpointOnSchedule(double progress);
-static bool ImmediateCheckpointRequested(void);
+static bool FastCheckpointRequested(void);
static bool CompactCheckpointerRequestQueue(void);
static void UpdateSharedMemoryConfig(void);
@@ -734,12 +737,12 @@ CheckArchiveTimeout(void)
}
/*
- * Returns true if an immediate checkpoint request is pending. (Note that
- * this does not check the *current* checkpoint's IMMEDIATE flag, but whether
- * there is one pending behind it.)
+ * Returns true if a fast checkpoint request is pending. (Note that this does
+ * not check the *current* checkpoint's FAST flag, but whether there is one
+ * pending behind it.)
*/
static bool
-ImmediateCheckpointRequested(void)
+FastCheckpointRequested(void)
{
volatile CheckpointerShmemStruct *cps = CheckpointerShmem;
@@ -747,7 +750,7 @@ ImmediateCheckpointRequested(void)
* We don't need to acquire the ckpt_lck in this case because we're only
* looking at a single flag bit.
*/
- if (cps->ckpt_flags & CHECKPOINT_IMMEDIATE)
+ if (cps->ckpt_flags & CHECKPOINT_FAST)
return true;
return false;
}
@@ -760,7 +763,7 @@ ImmediateCheckpointRequested(void)
* checkpoint_completion_target.
*
* The checkpoint request flags should be passed in; currently the only one
- * examined is CHECKPOINT_IMMEDIATE, which disables delays between writes.
+ * examined is CHECKPOINT_FAST, which disables delays between writes.
*
* 'progress' is an estimate of how much of the work has been done, as a
* fraction between 0.0 meaning none, and 1.0 meaning all done.
@@ -778,10 +781,10 @@ CheckpointWriteDelay(int flags, double progress)
* Perform the usual duties and take a nap, unless we're behind schedule,
* in which case we just try to catch up as quickly as possible.
*/
- if (!(flags & CHECKPOINT_IMMEDIATE) &&
+ if (!(flags & CHECKPOINT_FAST) &&
!ShutdownXLOGPending &&
!ShutdownRequestPending &&
- !ImmediateCheckpointRequested() &&
+ !FastCheckpointRequested() &&
IsCheckpointOnSchedule(progress))
{
if (ConfigReloadPending)
@@ -977,17 +980,67 @@ CheckpointerShmemInit(void)
}
/*
+ * ExecCheckpoint
+ * Primary entry point for manual CHECKPOINT commands
+ *
+ * This is mainly a wrapper for RequestCheckpoint().
+ */
+void
+ExecCheckpoint(ParseState *pstate, CheckPointStmt *stmt)
+{
+ bool fast = true;
+ bool unlogged = false;
+
+ foreach_ptr(DefElem, opt, stmt->options)
+ {
+ if (strcmp(opt->defname, "mode") == 0)
+ {
+ char *mode = defGetString(opt);
+
+ if (strcmp(mode, "spread") == 0)
+ fast = false;
+ else if (strcmp(mode, "fast") != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized MODE option \"%s\"", mode),
+ parser_errposition(pstate, opt->location)));
+ }
+ else if (strcmp(opt->defname, "flush_unlogged") == 0)
+ unlogged = defGetBoolean(opt);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized CHECKPOINT option \"%s\"", opt->defname),
+ parser_errposition(pstate, opt->location)));
+ }
+
+ if (!has_privs_of_role(GetUserId(), ROLE_PG_CHECKPOINT))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ /* translator: %s is name of an SQL command (e.g., CHECKPOINT) */
+ errmsg("permission denied to execute %s command",
+ "CHECKPOINT"),
+ errdetail("Only roles with privileges of the \"%s\" role may execute this command.",
+ "pg_checkpoint")));
+
+ RequestCheckpoint(CHECKPOINT_WAIT |
+ (fast ? CHECKPOINT_FAST : 0) |
+ (unlogged ? CHECKPOINT_FLUSH_UNLOGGED : 0) |
+ (RecoveryInProgress() ? 0 : CHECKPOINT_FORCE));
+}
+
+/*
* RequestCheckpoint
* Called in backend processes to request a checkpoint
*
* flags is a bitwise OR of the following:
* CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
* CHECKPOINT_END_OF_RECOVERY: checkpoint is for end of WAL recovery.
- * CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
+ * CHECKPOINT_FAST: finish the checkpoint ASAP,
* ignoring checkpoint_completion_target parameter.
* CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occurred
* since the last one (implied by CHECKPOINT_IS_SHUTDOWN or
- * CHECKPOINT_END_OF_RECOVERY).
+ * CHECKPOINT_END_OF_RECOVERY, and the CHECKPOINT command).
* CHECKPOINT_WAIT: wait for completion before returning (otherwise,
* just signal checkpointer to do it, and return).
* CHECKPOINT_CAUSE_XLOG: checkpoint is requested due to xlog filling.
@@ -1009,7 +1062,7 @@ RequestCheckpoint(int flags)
* There's no point in doing slow checkpoints in a standalone backend,
* because there's no other backends the checkpoint could disrupt.
*/
- CreateCheckPoint(flags | CHECKPOINT_IMMEDIATE);
+ CreateCheckPoint(flags | CHECKPOINT_FAST);
/* Free all smgr objects, as CheckpointerMain() normally would. */
smgrdestroyall();
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 7e622ae4bd2..78e39e5f866 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -718,15 +718,15 @@ pgarch_readyXlog(char *xlog)
/*
* Store the file in our max-heap if it has a high enough priority.
*/
- if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN)
+ if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN)
{
/* If the heap isn't full yet, quickly add it. */
- arch_file = arch_files->arch_filenames[arch_files->arch_heap->bh_size];
+ arch_file = arch_files->arch_filenames[binaryheap_size(arch_files->arch_heap)];
strcpy(arch_file, basename);
binaryheap_add_unordered(arch_files->arch_heap, CStringGetDatum(arch_file));
/* If we just filled the heap, make it a valid one. */
- if (arch_files->arch_heap->bh_size == NUM_FILES_PER_DIRECTORY_SCAN)
+ if (binaryheap_size(arch_files->arch_heap) == NUM_FILES_PER_DIRECTORY_SCAN)
binaryheap_build(arch_files->arch_heap);
}
else if (ready_file_comparator(binaryheap_first(arch_files->arch_heap),
@@ -744,21 +744,21 @@ pgarch_readyXlog(char *xlog)
FreeDir(rldir);
/* If no files were found, simply return. */
- if (arch_files->arch_heap->bh_size == 0)
+ if (binaryheap_empty(arch_files->arch_heap))
return false;
/*
* If we didn't fill the heap, we didn't make it a valid one. Do that
* now.
*/
- if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN)
+ if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN)
binaryheap_build(arch_files->arch_heap);
/*
* Fill arch_files array with the files to archive in ascending order of
* priority.
*/
- arch_files->arch_files_size = arch_files->arch_heap->bh_size;
+ arch_files->arch_files_size = binaryheap_size(arch_files->arch_heap);
for (int i = 0; i < arch_files->arch_files_size; i++)
arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 490f7ce3664..cca9b946e53 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -4337,15 +4337,15 @@ maybe_start_bgworkers(void)
static bool
maybe_reap_io_worker(int pid)
{
- for (int id = 0; id < MAX_IO_WORKERS; ++id)
+ for (int i = 0; i < MAX_IO_WORKERS; ++i)
{
- if (io_worker_children[id] &&
- io_worker_children[id]->pid == pid)
+ if (io_worker_children[i] &&
+ io_worker_children[i]->pid == pid)
{
- ReleasePostmasterChildSlot(io_worker_children[id]);
+ ReleasePostmasterChildSlot(io_worker_children[i]);
--io_worker_count;
- io_worker_children[id] = NULL;
+ io_worker_children[i] = NULL;
return true;
}
}
@@ -4389,22 +4389,22 @@ maybe_adjust_io_workers(void)
while (io_worker_count < io_workers)
{
PMChild *child;
- int id;
+ int i;
/* find unused entry in io_worker_children array */
- for (id = 0; id < MAX_IO_WORKERS; ++id)
+ for (i = 0; i < MAX_IO_WORKERS; ++i)
{
- if (io_worker_children[id] == NULL)
+ if (io_worker_children[i] == NULL)
break;
}
- if (id == MAX_IO_WORKERS)
- elog(ERROR, "could not find a free IO worker ID");
+ if (i == MAX_IO_WORKERS)
+ elog(ERROR, "could not find a free IO worker slot");
/* Try to launch one. */
child = StartChildProcess(B_IO_WORKER);
if (child != NULL)
{
- io_worker_children[id] = child;
+ io_worker_children[i] = child;
++io_worker_count;
}
else
@@ -4415,11 +4415,11 @@ maybe_adjust_io_workers(void)
if (io_worker_count > io_workers)
{
/* ask the IO worker in the highest slot to exit */
- for (int id = MAX_IO_WORKERS - 1; id >= 0; --id)
+ for (int i = MAX_IO_WORKERS - 1; i >= 0; --i)
{
- if (io_worker_children[id] != NULL)
+ if (io_worker_children[i] != NULL)
{
- kill(io_worker_children[id]->pid, SIGUSR2);
+ kill(io_worker_children[i]->pid, SIGUSR2);
break;
}
}
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index 0fec4f1f871..777c9a8d555 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -385,7 +385,7 @@ WalSummarizerMain(const void *startup_data, size_t startup_data_len)
switch_lsn = tliSwitchPoint(current_tli, tles, &switch_tli);
ereport(DEBUG1,
- errmsg_internal("switch point from TLI %u to TLI %u is at %X/%X",
+ errmsg_internal("switch point from TLI %u to TLI %u is at %X/%08X",
current_tli, switch_tli, LSN_FORMAT_ARGS(switch_lsn)));
}
@@ -741,7 +741,7 @@ WaitForWalSummarization(XLogRecPtr lsn)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("WAL summarization is not progressing"),
- errdetail("Summarization is needed through %X/%X, but is stuck at %X/%X on disk and %X/%X in memory.",
+ errdetail("Summarization is needed through %X/%08X, but is stuck at %X/%08X on disk and %X/%08X in memory.",
LSN_FORMAT_ARGS(lsn),
LSN_FORMAT_ARGS(summarized_lsn),
LSN_FORMAT_ARGS(pending_lsn))));
@@ -755,12 +755,12 @@ WaitForWalSummarization(XLogRecPtr lsn)
current_time) / 1000;
ereport(WARNING,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg_plural("still waiting for WAL summarization through %X/%X after %ld second",
- "still waiting for WAL summarization through %X/%X after %ld seconds",
+ errmsg_plural("still waiting for WAL summarization through %X/%08X after %ld second",
+ "still waiting for WAL summarization through %X/%08X after %ld seconds",
elapsed_seconds,
LSN_FORMAT_ARGS(lsn),
elapsed_seconds),
- errdetail("Summarization has reached %X/%X on disk and %X/%X in memory.",
+ errdetail("Summarization has reached %X/%08X on disk and %X/%08X in memory.",
LSN_FORMAT_ARGS(summarized_lsn),
LSN_FORMAT_ARGS(pending_lsn))));
}
@@ -981,7 +981,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
if (private_data->end_of_wal)
{
ereport(DEBUG1,
- errmsg_internal("could not read WAL from timeline %u at %X/%X: end of WAL at %X/%X",
+ errmsg_internal("could not read WAL from timeline %u at %X/%08X: end of WAL at %X/%08X",
tli,
LSN_FORMAT_ARGS(start_lsn),
LSN_FORMAT_ARGS(private_data->read_upto)));
@@ -1000,8 +1000,8 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
}
else
ereport(ERROR,
- (errmsg("could not find a valid record after %X/%X",
- LSN_FORMAT_ARGS(start_lsn))));
+ errmsg("could not find a valid record after %X/%08X",
+ LSN_FORMAT_ARGS(start_lsn)));
}
/* We shouldn't go backward. */
@@ -1034,7 +1034,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
* able to read a complete record.
*/
ereport(DEBUG1,
- errmsg_internal("could not read WAL from timeline %u at %X/%X: end of WAL at %X/%X",
+ errmsg_internal("could not read WAL from timeline %u at %X/%08X: end of WAL at %X/%08X",
tli,
LSN_FORMAT_ARGS(xlogreader->EndRecPtr),
LSN_FORMAT_ARGS(private_data->read_upto)));
@@ -1045,13 +1045,13 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
if (errormsg)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not read WAL from timeline %u at %X/%X: %s",
+ errmsg("could not read WAL from timeline %u at %X/%08X: %s",
tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr),
errormsg)));
else
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not read WAL from timeline %u at %X/%X",
+ errmsg("could not read WAL from timeline %u at %X/%08X",
tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr))));
}
@@ -1222,7 +1222,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
/* Tell the user what we did. */
ereport(DEBUG1,
- errmsg_internal("summarized WAL on TLI %u from %X/%X to %X/%X",
+ errmsg_internal("summarized WAL on TLI %u from %X/%08X to %X/%08X",
tli,
LSN_FORMAT_ARGS(summary_start_lsn),
LSN_FORMAT_ARGS(summary_end_lsn)));
@@ -1234,7 +1234,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
/* If we skipped a non-zero amount of WAL, log a debug message. */
if (summary_end_lsn > summary_start_lsn && fast_forward)
ereport(DEBUG1,
- errmsg_internal("skipped summarizing WAL on TLI %u from %X/%X to %X/%X",
+ errmsg_internal("skipped summarizing WAL on TLI %u from %X/%08X to %X/%08X",
tli,
LSN_FORMAT_ARGS(summary_start_lsn),
LSN_FORMAT_ARGS(summary_end_lsn)));
@@ -1580,7 +1580,7 @@ summarizer_read_local_xlog_page(XLogReaderState *state,
/* Debugging output. */
ereport(DEBUG1,
- errmsg_internal("timeline %u became historic, can read up to %X/%X",
+ errmsg_internal("timeline %u became historic, can read up to %X/%08X",
private_data->tli, LSN_FORMAT_ARGS(private_data->read_upto)));
}
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 78193cfb964..d9eab5357bc 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -20,58 +20,13 @@
#include "common/unicode_category.h"
#include "utils/pg_locale.h"
-/*
- * For the libc provider, to provide as much functionality as possible on a
- * variety of platforms without going so far as to implement everything from
- * scratch, we use several implementation strategies depending on the
- * situation:
- *
- * 1. In C/POSIX collations, we use hard-wired code. We can't depend on
- * the <ctype.h> functions since those will obey LC_CTYPE. Note that these
- * collations don't give a fig about multibyte characters.
- *
- * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
- * This assumes that every platform uses Unicode codepoints directly
- * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
- * even for non-UTF8 encodings, which may be a problem.) On some platforms
- * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
- *
- * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
- * values up to 255, and punt for values above that. This is 100% correct
- * only in single-byte encodings such as LATINn. However, non-Unicode
- * multibyte encodings are mostly Far Eastern character sets for which the
- * properties being tested here aren't very relevant for higher code values
- * anyway. The difficulty with using the <wctype.h> functions with
- * non-Unicode multibyte encodings is that we can have no certainty that
- * the platform's wchar_t representation matches what we do in pg_wchar
- * conversions.
- *
- * As a special case, in the "default" collation, (2) and (3) force ASCII
- * letters to follow ASCII upcase/downcase rules, while in a non-default
- * collation we just let the library functions do what they will. The case
- * where this matters is treatment of I/i in Turkish, and the behavior is
- * meant to match the upper()/lower() SQL functions.
- *
- * We store the active collation setting in static variables. In principle
- * it could be passed down to here via the regex library's "struct vars" data
- * structure; but that would require somewhat invasive changes in the regex
- * library, and right now there's no real benefit to be gained from that.
- *
- * NB: the coding here assumes pg_wchar is an unsigned type.
- */
-
-typedef enum
-{
- PG_REGEX_STRATEGY_C, /* C locale (encoding independent) */
- PG_REGEX_STRATEGY_BUILTIN, /* built-in Unicode semantics */
- PG_REGEX_STRATEGY_LIBC_WIDE, /* Use locale_t <wctype.h> functions */
- PG_REGEX_STRATEGY_LIBC_1BYTE, /* Use locale_t <ctype.h> functions */
- PG_REGEX_STRATEGY_ICU, /* Use ICU uchar.h functions */
-} PG_Locale_Strategy;
-
-static PG_Locale_Strategy pg_regex_strategy;
static pg_locale_t pg_regex_locale;
+static struct pg_locale_struct dummy_c_locale = {
+ .collate_is_c = true,
+ .ctype_is_c = true,
+};
+
/*
* Hard-wired character properties for C locale
*/
@@ -228,7 +183,6 @@ void
pg_set_regex_collation(Oid collation)
{
pg_locale_t locale = 0;
- PG_Locale_Strategy strategy;
if (!OidIsValid(collation))
{
@@ -249,8 +203,7 @@ pg_set_regex_collation(Oid collation)
* catalog access is available, so we can't call
* pg_newlocale_from_collation().
*/
- strategy = PG_REGEX_STRATEGY_C;
- locale = 0;
+ locale = &dummy_c_locale;
}
else
{
@@ -267,113 +220,41 @@ pg_set_regex_collation(Oid collation)
* C/POSIX collations use this path regardless of database
* encoding
*/
- strategy = PG_REGEX_STRATEGY_C;
- locale = 0;
- }
- else if (locale->provider == COLLPROVIDER_BUILTIN)
- {
- Assert(GetDatabaseEncoding() == PG_UTF8);
- strategy = PG_REGEX_STRATEGY_BUILTIN;
- }
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- {
- strategy = PG_REGEX_STRATEGY_ICU;
- }
-#endif
- else
- {
- Assert(locale->provider == COLLPROVIDER_LIBC);
- if (GetDatabaseEncoding() == PG_UTF8)
- strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
- else
- strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
+ locale = &dummy_c_locale;
}
}
- pg_regex_strategy = strategy;
pg_regex_locale = locale;
}
static int
pg_wc_isdigit(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISDIGIT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isdigit(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISDIGIT));
+ else
+ return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale);
}
static int
pg_wc_isalpha(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISALPHA));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalpha(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isalpha(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISALPHA));
+ else
+ return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale);
}
static int
pg_wc_isalnum(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISALNUM));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isalnum(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISALNUM));
+ else
+ return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale);
}
static int
@@ -388,231 +269,87 @@ pg_wc_isword(pg_wchar c)
static int
pg_wc_isupper(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISUPPER));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isupper(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isupper_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isupper(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISUPPER));
+ else
+ return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale);
}
static int
pg_wc_islower(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISLOWER));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_islower(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- islower_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_islower(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISLOWER));
+ else
+ return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale);
}
static int
pg_wc_isgraph(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISGRAPH));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isgraph(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isgraph(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISGRAPH));
+ else
+ return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale);
}
static int
pg_wc_isprint(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISPRINT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isprint(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isprint_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isprint(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISPRINT));
+ else
+ return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale);
}
static int
pg_wc_ispunct(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISPUNCT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_ispunct(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISPUNCT));
+ else
+ return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale);
}
static int
pg_wc_isspace(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISSPACE));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isspace(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isspace_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isspace(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISSPACE));
+ else
+ return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale);
}
static pg_wchar
pg_wc_toupper(pg_wchar c)
{
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
- if (c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- return c;
- case PG_REGEX_STRATEGY_BUILTIN:
- return unicode_uppercase_simple(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towupper_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
- return c;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_toupper(c);
-#endif
- break;
+ if (c <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) c);
+ return c;
}
- return 0; /* can't get here, but keep compiler quiet */
+ else
+ return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
}
static pg_wchar
pg_wc_tolower(pg_wchar c)
{
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
- if (c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- return c;
- case PG_REGEX_STRATEGY_BUILTIN:
- return unicode_lowercase_simple(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towlower_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
- return c;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_tolower(c);
-#endif
- break;
+ if (c <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) c);
+ return c;
}
- return 0; /* can't get here, but keep compiler quiet */
+ else
+ return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
}
@@ -738,37 +475,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
* would always be true for production values of MAX_SIMPLE_CHR, but it's
* useful to allow it to be small for testing purposes.)
*/
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
#if MAX_SIMPLE_CHR >= 127
- max_chr = (pg_wchar) 127;
- pcc->cv.cclasscode = -1;
+ max_chr = (pg_wchar) 127;
+ pcc->cv.cclasscode = -1;
#else
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+ max_chr = (pg_wchar) MAX_SIMPLE_CHR;
#endif
- break;
- case PG_REGEX_STRATEGY_BUILTIN:
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
-#if MAX_SIMPLE_CHR >= UCHAR_MAX
- max_chr = (pg_wchar) UCHAR_MAX;
+ }
+ else
+ {
+ if (pg_regex_locale->ctype->max_chr != 0 &&
+ pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
+ {
+ max_chr = pg_regex_locale->ctype->max_chr;
pcc->cv.cclasscode = -1;
-#else
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-#endif
- break;
- case PG_REGEX_STRATEGY_ICU:
+ }
+ else
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- default:
- Assert(false);
- max_chr = 0; /* can't get here, but keep compiler quiet */
- break;
}
/*
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 7b4ddf7a8f5..f7b5d093681 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -534,7 +534,7 @@ libpqrcv_startstreaming(WalReceiverConn *conn,
if (options->logical)
appendStringInfoString(&cmd, " LOGICAL");
- appendStringInfo(&cmd, " %X/%X", LSN_FORMAT_ARGS(options->startpoint));
+ appendStringInfo(&cmd, " %X/%08X", LSN_FORMAT_ARGS(options->startpoint));
/*
* Additional options are different depending on if we are doing logical
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index 1c3c051403d..4aed0dfcebb 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -175,12 +175,14 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
uint16 generation,
BackgroundWorkerHandle *handle)
{
- BgwHandleStatus status;
- int rc;
+ bool result = false;
+ bool dropped_latch = false;
for (;;)
{
+ BgwHandleStatus status;
pid_t pid;
+ int rc;
CHECK_FOR_INTERRUPTS();
@@ -189,8 +191,9 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
/* Worker either died or has started. Return false if died. */
if (!worker->in_use || worker->proc)
{
+ result = worker->in_use;
LWLockRelease(LogicalRepWorkerLock);
- return worker->in_use;
+ break;
}
LWLockRelease(LogicalRepWorkerLock);
@@ -205,7 +208,7 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
if (generation == worker->generation)
logicalrep_worker_cleanup(worker);
LWLockRelease(LogicalRepWorkerLock);
- return false;
+ break; /* result is already false */
}
/*
@@ -220,8 +223,18 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
{
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
+ dropped_latch = true;
}
}
+
+ /*
+ * If we had to clear a latch event in order to wait, be sure to restore
+ * it before exiting. Otherwise caller may miss events.
+ */
+ if (dropped_latch)
+ SetLatch(MyLatch);
+
+ return result;
}
/*
@@ -328,7 +341,7 @@ logicalrep_worker_launch(LogicalRepWorkerType wtype,
if (max_active_replication_origins == 0)
ereport(ERROR,
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
- errmsg("cannot start logical replication workers when \"max_active_replication_origins\"=0")));
+ errmsg("cannot start logical replication workers when \"max_active_replication_origins\" is 0")));
/*
* We need to do the modification of the shared memory under lock so that
@@ -1194,10 +1207,21 @@ ApplyLauncherMain(Datum main_arg)
(elapsed = TimestampDifferenceMilliseconds(last_start, now)) >= wal_retrieve_retry_interval)
{
ApplyLauncherSetWorkerStartTime(sub->oid, now);
- logicalrep_worker_launch(WORKERTYPE_APPLY,
- sub->dbid, sub->oid, sub->name,
- sub->owner, InvalidOid,
- DSM_HANDLE_INVALID);
+ if (!logicalrep_worker_launch(WORKERTYPE_APPLY,
+ sub->dbid, sub->oid, sub->name,
+ sub->owner, InvalidOid,
+ DSM_HANDLE_INVALID))
+ {
+ /*
+ * We get here either if we failed to launch a worker
+ * (perhaps for resource-exhaustion reasons) or if we
+ * launched one but it immediately quit. Either way, it
+ * seems appropriate to try again after
+ * wal_retrieve_retry_interval.
+ */
+ wait_time = Min(wait_time,
+ wal_retrieve_retry_interval);
+ }
}
else
{
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index f1eb798f3e9..7e363a7c05b 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -567,7 +567,7 @@ CreateDecodingContext(XLogRecPtr start_lsn,
* kinds of client errors; so the client may wish to check that
* confirmed_flush_lsn matches its expectations.
*/
- elog(LOG, "%X/%X has been already streamed, forwarding to %X/%X",
+ elog(LOG, "%X/%08X has been already streamed, forwarding to %X/%08X",
LSN_FORMAT_ARGS(start_lsn),
LSN_FORMAT_ARGS(slot->data.confirmed_flush));
@@ -610,7 +610,7 @@ CreateDecodingContext(XLogRecPtr start_lsn,
ereport(LOG,
(errmsg("starting logical decoding for slot \"%s\"",
NameStr(slot->data.name)),
- errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.",
+ errdetail("Streaming transactions committing after %X/%08X, reading WAL from %X/%08X.",
LSN_FORMAT_ARGS(slot->data.confirmed_flush),
LSN_FORMAT_ARGS(slot->data.restart_lsn))));
@@ -637,7 +637,7 @@ DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
/* Initialize from where to start reading WAL. */
XLogBeginRead(ctx->reader, slot->data.restart_lsn);
- elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
+ elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%08X",
LSN_FORMAT_ARGS(slot->data.restart_lsn));
/* Wait for a consistent starting point */
@@ -758,7 +758,7 @@ output_plugin_error_callback(void *arg)
/* not all callbacks have an associated LSN */
if (state->report_location != InvalidXLogRecPtr)
- errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
+ errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%08X",
NameStr(state->ctx->slot->data.name),
NameStr(state->ctx->slot->data.plugin),
state->callback_name,
@@ -1725,7 +1725,7 @@ LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
SpinLockRelease(&slot->mutex);
if (got_new_xmin)
- elog(DEBUG1, "got new catalog xmin %u at %X/%X", xmin,
+ elog(DEBUG1, "got new catalog xmin %u at %X/%08X", xmin,
LSN_FORMAT_ARGS(current_lsn));
/* candidate already valid with the current flush position, apply */
@@ -1785,7 +1785,7 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart
slot->candidate_restart_lsn = restart_lsn;
SpinLockRelease(&slot->mutex);
- elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
+ elog(DEBUG1, "got new restart lsn %X/%08X at %X/%08X",
LSN_FORMAT_ARGS(restart_lsn),
LSN_FORMAT_ARGS(current_lsn));
}
@@ -1800,7 +1800,7 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart
confirmed_flush = slot->data.confirmed_flush;
SpinLockRelease(&slot->mutex);
- elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
+ elog(DEBUG1, "failed to increase restart lsn: proposed %X/%08X, after %X/%08X, current candidate %X/%08X, current after %X/%08X, flushed up to %X/%08X",
LSN_FORMAT_ARGS(restart_lsn),
LSN_FORMAT_ARGS(current_lsn),
LSN_FORMAT_ARGS(candidate_restart_lsn),
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index a17bacf88e7..87f10e50dcc 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -826,9 +826,9 @@ StartupReplicationOrigin(void)
last_state++;
ereport(LOG,
- (errmsg("recovered replication state of node %d to %X/%X",
- disk_state.roident,
- LSN_FORMAT_ARGS(disk_state.remote_lsn))));
+ errmsg("recovered replication state of node %d to %X/%08X",
+ disk_state.roident,
+ LSN_FORMAT_ARGS(disk_state.remote_lsn)));
}
/* now check checksum */
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index c4299c76fb1..7b4e8629553 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -1415,7 +1415,7 @@ ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
int32 off;
/* nothing there anymore */
- if (state->heap->bh_size == 0)
+ if (binaryheap_empty(state->heap))
return NULL;
off = DatumGetInt32(binaryheap_first(state->heap));
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 656e66e0ae0..2f0c08b8fbd 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -211,9 +211,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
* impact the users, so we used DEBUG1 level to log the message.
*/
ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1,
- errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot",
+ errmsg("could not synchronize replication slot \"%s\"",
remote_slot->name),
- errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.",
+ errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
LSN_FORMAT_ARGS(remote_slot->restart_lsn),
remote_slot->catalog_xmin,
LSN_FORMAT_ARGS(slot->data.restart_lsn),
@@ -275,7 +275,7 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
ereport(ERROR,
errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
remote_slot->name),
- errdetail_internal("Remote slot has LSN %X/%X but local slot has LSN %X/%X.",
+ errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
LSN_FORMAT_ARGS(slot->data.confirmed_flush)));
}
@@ -593,7 +593,7 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
{
ereport(LOG,
errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),
- errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.",
+ errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
LSN_FORMAT_ARGS(slot->data.restart_lsn)));
return false;
@@ -642,7 +642,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
ereport(AmLogicalSlotSyncWorkerProcess() ? LOG : ERROR,
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("skipping slot synchronization because the received slot sync"
- " LSN %X/%X for slot \"%s\" is ahead of the standby position %X/%X",
+ " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
remote_slot->name,
LSN_FORMAT_ARGS(latestFlushPtr)));
@@ -733,7 +733,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
ereport(ERROR,
errmsg_internal("cannot synchronize local slot \"%s\"",
remote_slot->name),
- errdetail_internal("Local slot's start streaming location LSN(%X/%X) is ahead of remote slot's LSN(%X/%X).",
+ errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
LSN_FORMAT_ARGS(slot->data.confirmed_flush),
LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index adf18c397db..8532bfd27e5 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -774,7 +774,7 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact
if (rbtxn_is_prepared(txn))
continue;
- elog(DEBUG2, "adding a new snapshot and invalidations to %u at %X/%X",
+ elog(DEBUG2, "adding a new snapshot and invalidations to %u at %X/%08X",
txn->xid, LSN_FORMAT_ARGS(lsn));
/*
@@ -1271,10 +1271,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
builder->initial_xmin_horizon))
{
ereport(DEBUG1,
- (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low",
- LSN_FORMAT_ARGS(lsn)),
- errdetail_internal("initial xmin horizon of %u vs the snapshot's %u",
- builder->initial_xmin_horizon, running->oldestRunningXid)));
+ errmsg_internal("skipping snapshot at %X/%08X while building logical decoding snapshot, xmin horizon too low",
+ LSN_FORMAT_ARGS(lsn)),
+ errdetail_internal("initial xmin horizon of %u vs the snapshot's %u",
+ builder->initial_xmin_horizon, running->oldestRunningXid));
SnapBuildWaitSnapshot(running, builder->initial_xmin_horizon);
@@ -1310,9 +1310,9 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
builder->next_phase_at = InvalidTransactionId;
ereport(LOG,
- (errmsg("logical decoding found consistent point at %X/%X",
- LSN_FORMAT_ARGS(lsn)),
- errdetail("There are no running transactions.")));
+ errmsg("logical decoding found consistent point at %X/%08X",
+ LSN_FORMAT_ARGS(lsn)),
+ errdetail("There are no running transactions."));
return false;
}
@@ -1359,10 +1359,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
Assert(TransactionIdIsNormal(builder->xmax));
ereport(LOG,
- (errmsg("logical decoding found initial starting point at %X/%X",
- LSN_FORMAT_ARGS(lsn)),
- errdetail("Waiting for transactions (approximately %d) older than %u to end.",
- running->xcnt, running->nextXid)));
+ errmsg("logical decoding found initial starting point at %X/%08X",
+ LSN_FORMAT_ARGS(lsn)),
+ errdetail("Waiting for transactions (approximately %d) older than %u to end.",
+ running->xcnt, running->nextXid));
SnapBuildWaitSnapshot(running, running->nextXid);
}
@@ -1383,10 +1383,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
builder->next_phase_at = running->nextXid;
ereport(LOG,
- (errmsg("logical decoding found initial consistent point at %X/%X",
- LSN_FORMAT_ARGS(lsn)),
- errdetail("Waiting for transactions (approximately %d) older than %u to end.",
- running->xcnt, running->nextXid)));
+ errmsg("logical decoding found initial consistent point at %X/%08X",
+ LSN_FORMAT_ARGS(lsn)),
+ errdetail("Waiting for transactions (approximately %d) older than %u to end.",
+ running->xcnt, running->nextXid));
SnapBuildWaitSnapshot(running, running->nextXid);
}
@@ -1407,9 +1407,9 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
builder->next_phase_at = InvalidTransactionId;
ereport(LOG,
- (errmsg("logical decoding found consistent point at %X/%X",
- LSN_FORMAT_ARGS(lsn)),
- errdetail("There are no old transactions anymore.")));
+ errmsg("logical decoding found consistent point at %X/%08X",
+ LSN_FORMAT_ARGS(lsn)),
+ errdetail("There are no old transactions anymore."));
}
/*
@@ -1913,9 +1913,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
Assert(builder->state == SNAPBUILD_CONSISTENT);
ereport(LOG,
- (errmsg("logical decoding found consistent point at %X/%X",
- LSN_FORMAT_ARGS(lsn)),
- errdetail("Logical decoding will begin using saved snapshot.")));
+ errmsg("logical decoding found consistent point at %X/%08X",
+ LSN_FORMAT_ARGS(lsn)),
+ errdetail("Logical decoding will begin using saved snapshot."));
return true;
snapshot_not_interesting:
@@ -2061,7 +2061,7 @@ SnapBuildSnapshotExists(XLogRecPtr lsn)
int ret;
struct stat stat_buf;
- sprintf(path, "%s/%X-%X.snap",
+ sprintf(path, "%s/%08X-%08X.snap",
PG_LOGICAL_SNAPSHOTS_DIR,
LSN_FORMAT_ARGS(lsn));
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index 8e1e8762f62..e4fd6347fd1 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -603,14 +603,19 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
TimestampDifferenceExceeds(hentry->last_start_time, now,
wal_retrieve_retry_interval))
{
- logicalrep_worker_launch(WORKERTYPE_TABLESYNC,
- MyLogicalRepWorker->dbid,
- MySubscription->oid,
- MySubscription->name,
- MyLogicalRepWorker->userid,
- rstate->relid,
- DSM_HANDLE_INVALID);
+ /*
+ * Set the last_start_time even if we fail to start
+ * the worker, so that we won't retry until
+ * wal_retrieve_retry_interval has elapsed.
+ */
hentry->last_start_time = now;
+ (void) logicalrep_worker_launch(WORKERTYPE_TABLESYNC,
+ MyLogicalRepWorker->dbid,
+ MySubscription->oid,
+ MySubscription->name,
+ MyLogicalRepWorker->userid,
+ rstate->relid,
+ DSM_HANDLE_INVALID);
}
}
}
@@ -1548,7 +1553,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
copy_table_done:
elog(DEBUG1,
- "LogicalRepSyncTableStart: '%s' origin_startpos lsn %X/%X",
+ "LogicalRepSyncTableStart: '%s' origin_startpos lsn %X/%08X",
originname, LSN_FORMAT_ARGS(*origin_startpos));
/*
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index a23262957ac..c5fb627aa56 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -109,13 +109,6 @@
* If ever a user needs to be aware of the tri-state value, they can fetch it
* from the pg_subscription catalog (see column subtwophasestate).
*
- * We don't allow to toggle two_phase option of a subscription because it can
- * lead to an inconsistent replica. Consider, initially, it was on and we have
- * received some prepare then we turn it off, now at commit time the server
- * will send the entire transaction data along with the commit. With some more
- * analysis, we can allow changing this option from off to on but not sure if
- * that alone would be useful.
- *
* Finally, to avoid problems mentioned in previous paragraphs from any
* subsequent (not READY) tablesyncs (need to toggle two_phase option from 'on'
* to 'off' and then again back to 'on') there is a restriction for
@@ -1023,7 +1016,7 @@ apply_handle_commit(StringInfo s)
if (commit_data.commit_lsn != remote_final_lsn)
ereport(ERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
- errmsg_internal("incorrect commit LSN %X/%X in commit message (expected %X/%X)",
+ errmsg_internal("incorrect commit LSN %X/%08X in commit message (expected %X/%08X)",
LSN_FORMAT_ARGS(commit_data.commit_lsn),
LSN_FORMAT_ARGS(remote_final_lsn))));
@@ -1115,7 +1108,7 @@ apply_handle_prepare(StringInfo s)
if (prepare_data.prepare_lsn != remote_final_lsn)
ereport(ERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
- errmsg_internal("incorrect prepare LSN %X/%X in prepare message (expected %X/%X)",
+ errmsg_internal("incorrect prepare LSN %X/%08X in prepare message (expected %X/%08X)",
LSN_FORMAT_ARGS(prepare_data.prepare_lsn),
LSN_FORMAT_ARGS(remote_final_lsn))));
@@ -3910,7 +3903,7 @@ send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
pq_sendint64(reply_message, now); /* sendTime */
pq_sendbyte(reply_message, requestReply); /* replyRequested */
- elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X",
+ elog(DEBUG2, "sending feedback (force %d) to recv %X/%08X, write %X/%08X, flush %X/%08X",
force,
LSN_FORMAT_ARGS(recvpos),
LSN_FORMAT_ARGS(writepos),
@@ -4916,7 +4909,7 @@ maybe_start_skipping_changes(XLogRecPtr finish_lsn)
skip_xact_finish_lsn = finish_lsn;
ereport(LOG,
- errmsg("logical replication starts skipping transaction at LSN %X/%X",
+ errmsg("logical replication starts skipping transaction at LSN %X/%08X",
LSN_FORMAT_ARGS(skip_xact_finish_lsn)));
}
@@ -4930,8 +4923,8 @@ stop_skipping_changes(void)
return;
ereport(LOG,
- (errmsg("logical replication completed skipping transaction at LSN %X/%X",
- LSN_FORMAT_ARGS(skip_xact_finish_lsn))));
+ errmsg("logical replication completed skipping transaction at LSN %X/%08X",
+ LSN_FORMAT_ARGS(skip_xact_finish_lsn)));
/* Stop skipping changes */
skip_xact_finish_lsn = InvalidXLogRecPtr;
@@ -5019,7 +5012,7 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
if (myskiplsn != finish_lsn)
ereport(WARNING,
errmsg("skip-LSN of subscription \"%s\" cleared", MySubscription->name),
- errdetail("Remote transaction's finish WAL location (LSN) %X/%X did not match skip-LSN %X/%X.",
+ errdetail("Remote transaction's finish WAL location (LSN) %X/%08X did not match skip-LSN %X/%08X.",
LSN_FORMAT_ARGS(finish_lsn),
LSN_FORMAT_ARGS(myskiplsn)));
}
@@ -5056,7 +5049,7 @@ apply_error_callback(void *arg)
logicalrep_message_type(errarg->command),
errarg->remote_xid);
else
- errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" in transaction %u, finished at %X/%X",
+ errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" in transaction %u, finished at %X/%08X",
errarg->origin_name,
logicalrep_message_type(errarg->command),
errarg->remote_xid,
@@ -5074,7 +5067,7 @@ apply_error_callback(void *arg)
errarg->rel->remoterel.relname,
errarg->remote_xid);
else
- errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" in transaction %u, finished at %X/%X",
+ errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" in transaction %u, finished at %X/%08X",
errarg->origin_name,
logicalrep_message_type(errarg->command),
errarg->rel->remoterel.nspname,
@@ -5093,7 +5086,7 @@ apply_error_callback(void *arg)
errarg->rel->remoterel.attnames[errarg->remote_attnum],
errarg->remote_xid);
else
- errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" column \"%s\" in transaction %u, finished at %X/%X",
+ errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" column \"%s\" in transaction %u, finished at %X/%08X",
errarg->origin_name,
logicalrep_message_type(errarg->command),
errarg->rel->remoterel.nspname,
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 693a766e6d7..f4c977262c5 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -297,10 +297,12 @@ parse_output_parameters(List *options, PGOutputData *data)
bool two_phase_option_given = false;
bool origin_option_given = false;
+ /* Initialize optional parameters to defaults */
data->binary = false;
data->streaming = LOGICALREP_STREAM_OFF;
data->messages = false;
data->two_phase = false;
+ data->publish_no_origin = false;
foreach(lc, options)
{
@@ -1789,7 +1791,7 @@ LoadPublications(List *pubnames)
else
ereport(WARNING,
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("skipped loading publication: %s", pubname),
+ errmsg("skipped loading publication \"%s\"", pubname),
errdetail("The publication does not exist at this point in the WAL."),
errhint("Create the publication if it does not exist."));
}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 7440aae5a1a..8a649199ec6 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -279,7 +279,7 @@ alter_replication_slot:
;
/*
- * START_REPLICATION [SLOT slot] [PHYSICAL] %X/%X [TIMELINE %u]
+ * START_REPLICATION [SLOT slot] [PHYSICAL] %X/%08X [TIMELINE %u]
*/
start_replication:
K_START_REPLICATION opt_slot opt_physical RECPTR opt_timeline
@@ -295,7 +295,7 @@ start_replication:
}
;
-/* START_REPLICATION SLOT slot LOGICAL %X/%X options */
+/* START_REPLICATION SLOT slot LOGICAL %X/%08X options */
start_logical_replication:
K_START_REPLICATION K_SLOT IDENT K_LOGICAL RECPTR plugin_options
{
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 014ea8d25c6..b6930e28659 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -155,7 +155,7 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
{hexdigit}+\/{hexdigit}+ {
uint32 hi,
lo;
- if (sscanf(yytext, "%X/%X", &hi, &lo) != 2)
+ if (sscanf(yytext, "%X/%08X", &hi, &lo) != 2)
replication_yyerror(NULL, yyscanner, "invalid streaming start location");
yylval->recptr = ((uint64) hi) << 32 | lo;
return RECPTR;
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index c64f020742f..e44ad576bc7 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -154,7 +154,7 @@ int max_replication_slots = 10; /* the maximum number of replication
* Invalidate replication slots that have remained idle longer than this
* duration; '0' disables it.
*/
-int idle_replication_slot_timeout_mins = 0;
+int idle_replication_slot_timeout_secs = 0;
/*
* This GUC lists streaming replication standby server slot names that
@@ -1591,8 +1591,8 @@ ReportSlotInvalidation(ReplicationSlotInvalidationCause cause,
uint64 ex = oldestLSN - restart_lsn;
appendStringInfo(&err_detail,
- ngettext("The slot's restart_lsn %X/%X exceeds the limit by %" PRIu64 " byte.",
- "The slot's restart_lsn %X/%X exceeds the limit by %" PRIu64 " bytes.",
+ ngettext("The slot's restart_lsn %X/%08X exceeds the limit by %" PRIu64 " byte.",
+ "The slot's restart_lsn %X/%08X exceeds the limit by %" PRIu64 " bytes.",
ex),
LSN_FORMAT_ARGS(restart_lsn),
ex);
@@ -1612,13 +1612,10 @@ ReportSlotInvalidation(ReplicationSlotInvalidationCause cause,
case RS_INVAL_IDLE_TIMEOUT:
{
- int minutes = slot_idle_seconds / SECS_PER_MINUTE;
- int secs = slot_idle_seconds % SECS_PER_MINUTE;
-
/* translator: %s is a GUC variable name */
- appendStringInfo(&err_detail, _("The slot's idle time of %dmin %02ds exceeds the configured \"%s\" duration of %dmin."),
- minutes, secs, "idle_replication_slot_timeout",
- idle_replication_slot_timeout_mins);
+ appendStringInfo(&err_detail, _("The slot's idle time of %lds exceeds the configured \"%s\" duration of %ds."),
+ slot_idle_seconds, "idle_replication_slot_timeout",
+ idle_replication_slot_timeout_secs);
/* translator: %s is a GUC variable name */
appendStringInfo(&err_hint, _("You might need to increase \"%s\"."),
"idle_replication_slot_timeout");
@@ -1656,7 +1653,7 @@ ReportSlotInvalidation(ReplicationSlotInvalidationCause cause,
static inline bool
CanInvalidateIdleSlot(ReplicationSlot *s)
{
- return (idle_replication_slot_timeout_mins != 0 &&
+ return (idle_replication_slot_timeout_secs != 0 &&
!XLogRecPtrIsInvalid(s->data.restart_lsn) &&
s->inactive_since > 0 &&
!(RecoveryInProgress() && s->data.synced));
@@ -1717,9 +1714,9 @@ DetermineSlotInvalidationCause(uint32 possible_causes, ReplicationSlot *s,
if (CanInvalidateIdleSlot(s))
{
/*
- * We simulate the invalidation due to idle_timeout as the minimum
- * time idle time is one minute which makes tests take a long
- * time.
+ * Simulate the invalidation due to idle_timeout to test the
+ * timeout behavior promptly, without waiting for it to trigger
+ * naturally.
*/
#ifdef USE_INJECTION_POINTS
if (IS_INJECTION_POINT_ATTACHED("slot-timeout-inval"))
@@ -1734,7 +1731,7 @@ DetermineSlotInvalidationCause(uint32 possible_causes, ReplicationSlot *s,
* idle_replication_slot_timeout GUC.
*/
if (TimestampDifferenceExceedsSeconds(s->inactive_since, now,
- idle_replication_slot_timeout_mins * SECS_PER_MINUTE))
+ idle_replication_slot_timeout_secs))
{
*inactive_since = s->inactive_since;
return RS_INVAL_IDLE_TIMEOUT;
@@ -1810,8 +1807,6 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
*/
SpinLockAcquire(&s->mutex);
- Assert(s->data.restart_lsn >= s->last_saved_restart_lsn);
-
restart_lsn = s->data.restart_lsn;
/* we do nothing if the slot is already invalid */
@@ -1893,15 +1888,6 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
SpinLockRelease(&s->mutex);
/*
- * The logical replication slots shouldn't be invalidated as GUC
- * max_slot_wal_keep_size is set to -1 and
- * idle_replication_slot_timeout is set to 0 during the binary
- * upgrade. See check_old_cluster_for_valid_slots() where we ensure
- * that no invalidated before the upgrade.
- */
- Assert(!(*invalidated && SlotIsLogical(s) && IsBinaryUpgrade));
-
- /*
* Calculate the idle time duration of the slot if slot is marked
* invalidated with RS_INVAL_IDLE_TIMEOUT.
*/
@@ -2047,6 +2033,10 @@ restart:
if (!s->in_use)
continue;
+ /* Prevent invalidation of logical slots during binary upgrade */
+ if (SlotIsLogical(s) && IsBinaryUpgrade)
+ continue;
+
if (InvalidatePossiblyObsoleteSlot(possible_causes, s, oldestLSN, dboid,
snapshotConflictHorizon,
&invalidated))
@@ -2081,6 +2071,7 @@ void
CheckPointReplicationSlots(bool is_shutdown)
{
int i;
+ bool last_saved_restart_lsn_updated = false;
elog(DEBUG1, "performing replication slot checkpoint");
@@ -2125,15 +2116,23 @@ CheckPointReplicationSlots(bool is_shutdown)
SpinLockRelease(&s->mutex);
}
+ /*
+ * Track if we're going to update slot's last_saved_restart_lsn. We
+ * need this to know if we need to recompute the required LSN.
+ */
+ if (s->last_saved_restart_lsn != s->data.restart_lsn)
+ last_saved_restart_lsn_updated = true;
+
SaveSlotToPath(s, path, LOG);
}
LWLockRelease(ReplicationSlotAllocationLock);
/*
- * Recompute the required LSN as SaveSlotToPath() updated
- * last_saved_restart_lsn for slots.
+ * Recompute the required LSN if SaveSlotToPath() updated
+ * last_saved_restart_lsn for any slot.
*/
- ReplicationSlotsComputeRequiredLSN();
+ if (last_saved_restart_lsn_updated)
+ ReplicationSlotsComputeRequiredLSN();
}
/*
@@ -3050,22 +3049,3 @@ WaitForStandbyConfirmation(XLogRecPtr wait_for_lsn)
ConditionVariableCancelSleep();
}
-
-/*
- * GUC check_hook for idle_replication_slot_timeout
- *
- * The value of idle_replication_slot_timeout must be set to 0 during
- * a binary upgrade. See start_postmaster() in pg_upgrade for more details.
- */
-bool
-check_idle_replication_slot_timeout(int *newval, void **extra, GucSource source)
-{
- if (IsBinaryUpgrade && *newval != 0)
- {
- GUC_check_errdetail("\"%s\" must be set to 0 during binary upgrade mode.",
- "idle_replication_slot_timeout");
- return false;
- }
-
- return true;
-}
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c
index 36cc2ed4e44..69f4c6157c5 100644
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -566,7 +566,7 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS)
if (moveto < minlsn)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("cannot advance replication slot to %X/%X, minimum is %X/%X",
+ errmsg("cannot advance replication slot to %X/%08X, minimum is %X/%08X",
LSN_FORMAT_ARGS(moveto), LSN_FORMAT_ARGS(minlsn))));
/* Do the actual slot update, depending on the slot type */
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index cc35984ad00..32cf3a48b89 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -258,7 +258,7 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
{
char buffer[32];
- sprintf(buffer, "waiting for %X/%X", LSN_FORMAT_ARGS(lsn));
+ sprintf(buffer, "waiting for %X/%08X", LSN_FORMAT_ARGS(lsn));
set_ps_display_suffix(buffer);
}
@@ -566,7 +566,7 @@ SyncRepReleaseWaiters(void)
LWLockRelease(SyncRepLock);
- elog(DEBUG3, "released %d procs up to write %X/%X, %d procs up to flush %X/%X, %d procs up to apply %X/%X",
+ elog(DEBUG3, "released %d procs up to write %X/%08X, %d procs up to flush %X/%08X, %d procs up to apply %X/%08X",
numwrite, LSN_FORMAT_ARGS(writePtr),
numflush, LSN_FORMAT_ARGS(flushPtr),
numapply, LSN_FORMAT_ARGS(applyPtr));
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 8c4d0fd9aed..b6281101711 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -386,12 +386,12 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len)
{
if (first_stream)
ereport(LOG,
- (errmsg("started streaming WAL from primary at %X/%X on timeline %u",
- LSN_FORMAT_ARGS(startpoint), startpointTLI)));
+ errmsg("started streaming WAL from primary at %X/%08X on timeline %u",
+ LSN_FORMAT_ARGS(startpoint), startpointTLI));
else
ereport(LOG,
- (errmsg("restarted WAL streaming at %X/%X on timeline %u",
- LSN_FORMAT_ARGS(startpoint), startpointTLI)));
+ errmsg("restarted WAL streaming at %X/%08X on timeline %u",
+ LSN_FORMAT_ARGS(startpoint), startpointTLI));
first_stream = false;
/* Initialize LogstreamResult and buffers for processing messages */
@@ -470,7 +470,7 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len)
{
ereport(LOG,
(errmsg("replication terminated by primary server"),
- errdetail("End of WAL reached on timeline %u at %X/%X.",
+ errdetail("End of WAL reached on timeline %u at %X/%08X.",
startpointTLI,
LSN_FORMAT_ARGS(LogstreamResult.Write))));
endofwal = true;
@@ -711,7 +711,7 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
{
char activitymsg[50];
- snprintf(activitymsg, sizeof(activitymsg), "restarting at %X/%X",
+ snprintf(activitymsg, sizeof(activitymsg), "restarting at %X/%08X",
LSN_FORMAT_ARGS(*startpoint));
set_ps_display(activitymsg);
}
@@ -1014,7 +1014,7 @@ XLogWalRcvFlush(bool dying, TimeLineID tli)
{
char activitymsg[50];
- snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",
+ snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%08X",
LSN_FORMAT_ARGS(LogstreamResult.Write));
set_ps_display(activitymsg);
}
@@ -1138,7 +1138,7 @@ XLogWalRcvSendReply(bool force, bool requestReply)
pq_sendbyte(&reply_message, requestReply ? 1 : 0);
/* Send it */
- elog(DEBUG2, "sending write %X/%X flush %X/%X apply %X/%X%s",
+ elog(DEBUG2, "sending write %X/%08X flush %X/%08X apply %X/%08X%s",
LSN_FORMAT_ARGS(writePtr),
LSN_FORMAT_ARGS(flushPtr),
LSN_FORMAT_ARGS(applyPtr),
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index f2c33250e8b..28b8591efa5 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -408,7 +408,7 @@ IdentifySystem(void)
else
logptr = GetFlushRecPtr(&currTLI);
- snprintf(xloc, sizeof(xloc), "%X/%X", LSN_FORMAT_ARGS(logptr));
+ snprintf(xloc, sizeof(xloc), "%X/%08X", LSN_FORMAT_ARGS(logptr));
if (MyDatabaseId != InvalidOid)
{
@@ -515,7 +515,7 @@ ReadReplicationSlot(ReadReplicationSlotCmd *cmd)
{
char xloc[64];
- snprintf(xloc, sizeof(xloc), "%X/%X",
+ snprintf(xloc, sizeof(xloc), "%X/%08X",
LSN_FORMAT_ARGS(slot_contents.data.restart_lsn));
values[i] = CStringGetTextDatum(xloc);
nulls[i] = false;
@@ -892,12 +892,12 @@ StartReplication(StartReplicationCmd *cmd)
switchpoint < cmd->startpoint)
{
ereport(ERROR,
- (errmsg("requested starting point %X/%X on timeline %u is not in this server's history",
- LSN_FORMAT_ARGS(cmd->startpoint),
- cmd->timeline),
- errdetail("This server's history forked from timeline %u at %X/%X.",
- cmd->timeline,
- LSN_FORMAT_ARGS(switchpoint))));
+ errmsg("requested starting point %X/%08X on timeline %u is not in this server's history",
+ LSN_FORMAT_ARGS(cmd->startpoint),
+ cmd->timeline),
+ errdetail("This server's history forked from timeline %u at %X/%08X.",
+ cmd->timeline,
+ LSN_FORMAT_ARGS(switchpoint)));
}
sendTimeLineValidUpto = switchpoint;
}
@@ -939,9 +939,9 @@ StartReplication(StartReplicationCmd *cmd)
if (FlushPtr < cmd->startpoint)
{
ereport(ERROR,
- (errmsg("requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X",
- LSN_FORMAT_ARGS(cmd->startpoint),
- LSN_FORMAT_ARGS(FlushPtr))));
+ errmsg("requested starting point %X/%08X is ahead of the WAL flush position of this server %X/%08X",
+ LSN_FORMAT_ARGS(cmd->startpoint),
+ LSN_FORMAT_ARGS(FlushPtr)));
}
/* Start streaming from the requested point */
@@ -983,7 +983,7 @@ StartReplication(StartReplicationCmd *cmd)
Datum values[2];
bool nulls[2] = {0};
- snprintf(startpos_str, sizeof(startpos_str), "%X/%X",
+ snprintf(startpos_str, sizeof(startpos_str), "%X/%08X",
LSN_FORMAT_ARGS(sendTimeLineValidUpto));
dest = CreateDestReceiver(DestRemoteSimple);
@@ -1324,7 +1324,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
ReplicationSlotPersist();
}
- snprintf(xloc, sizeof(xloc), "%X/%X",
+ snprintf(xloc, sizeof(xloc), "%X/%08X",
LSN_FORMAT_ARGS(MyReplicationSlot->data.confirmed_flush));
dest = CreateDestReceiver(DestRemoteSimple);
@@ -2429,7 +2429,7 @@ ProcessStandbyReplyMessage(void)
/* Copy because timestamptz_to_str returns a static buffer */
replyTimeStr = pstrdup(timestamptz_to_str(replyTime));
- elog(DEBUG2, "write %X/%X flush %X/%X apply %X/%X%s reply_time %s",
+ elog(DEBUG2, "write %X/%08X flush %X/%08X apply %X/%08X%s reply_time %s",
LSN_FORMAT_ARGS(writePtr),
LSN_FORMAT_ARGS(flushPtr),
LSN_FORMAT_ARGS(applyPtr),
@@ -3251,7 +3251,7 @@ XLogSendPhysical(void)
WalSndCaughtUp = true;
- elog(DEBUG1, "walsender reached end of timeline at %X/%X (sent up to %X/%X)",
+ elog(DEBUG1, "walsender reached end of timeline at %X/%08X (sent up to %X/%08X)",
LSN_FORMAT_ARGS(sendTimeLineValidUpto),
LSN_FORMAT_ARGS(sentPtr));
return;
@@ -3392,7 +3392,7 @@ retry:
{
char activitymsg[50];
- snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",
+ snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%08X",
LSN_FORMAT_ARGS(sentPtr));
set_ps_display(activitymsg);
}
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 2ef0e7fbf3a..adc9e7600e1 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -923,8 +923,9 @@ rewriteTargetListIU(List *targetList,
apply_default = true;
/*
- * Can only insert DEFAULT into generated columns, regardless of
- * any OVERRIDING clauses.
+ * Can only insert DEFAULT into generated columns. (The
+ * OVERRIDING clause does not apply to generated columns, so we
+ * don't consider it here.)
*/
if (att_tup->attgenerated && !apply_default)
{
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index b78048328e1..0a8c054162f 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -29,6 +29,9 @@
#ifdef IOMETHOD_IO_URING_ENABLED
+#include <sys/mman.h>
+#include <unistd.h>
+
#include <liburing.h>
#include "miscadmin.h"
@@ -94,12 +97,32 @@ PgAioUringContext
struct io_uring io_uring_ring;
} PgAioUringContext;
+/*
+ * Information about the capabilities that io_uring has.
+ *
+ * Depending on liburing and kernel version different features are
+ * supported. At least for the kernel a kernel version check does not suffice
+ * as various vendors do backport features to older kernels :(.
+ */
+typedef struct PgAioUringCaps
+{
+ bool checked;
+ /* -1 if io_uring_queue_init_mem() is unsupported */
+ int mem_init_size;
+} PgAioUringCaps;
+
+
/* PgAioUringContexts for all backends */
static PgAioUringContext *pgaio_uring_contexts;
/* the current backend's context */
static PgAioUringContext *pgaio_my_uring_context;
+static PgAioUringCaps pgaio_uring_caps =
+{
+ .checked = false,
+ .mem_init_size = -1,
+};
static uint32
pgaio_uring_procs(void)
@@ -111,16 +134,145 @@ pgaio_uring_procs(void)
return MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
}
-static Size
+/*
+ * Initializes pgaio_uring_caps, unless that's already done.
+ */
+static void
+pgaio_uring_check_capabilities(void)
+{
+ if (pgaio_uring_caps.checked)
+ return;
+
+ /*
+ * By default io_uring creates a shared memory mapping for each io_uring
+ * instance, leading to a large number of memory mappings. Unfortunately a
+ * large number of memory mappings slows things down, backend exit is
+ * particularly affected. To address that, newer kernels (6.5) support
+ * using user-provided memory for the memory, by putting the relevant
+ * memory into shared memory we don't need any additional mappings.
+ *
+ * To know whether this is supported, we unfortunately need to probe the
+ * kernel by trying to create a ring with userspace-provided memory. This
+ * also has a secondary benefit: We can determine precisely how much
+ * memory we need for each io_uring instance.
+ */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+ {
+ struct io_uring test_ring;
+ size_t ring_size;
+ void *ring_ptr;
+ struct io_uring_params p = {0};
+ int ret;
+
+ /*
+ * Liburing does not yet provide an API to query how much memory a
+ * ring will need. So we over-estimate it here. As the memory is freed
+ * just below that's small temporary waste of memory.
+ *
+ * 1MB is more than enough for rings within io_max_concurrency's
+ * range.
+ */
+ ring_size = 1024 * 1024;
+
+ /*
+ * Hard to believe a system exists where 1MB would not be a multiple
+ * of the page size. But it's cheap to ensure...
+ */
+ ring_size -= ring_size % sysconf(_SC_PAGESIZE);
+
+ ring_ptr = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (ring_ptr == MAP_FAILED)
+ elog(ERROR,
+ "mmap(%zu) to determine io_uring_queue_init_mem() support failed: %m",
+ ring_size);
+
+ ret = io_uring_queue_init_mem(io_max_concurrency, &test_ring, &p, ring_ptr, ring_size);
+ if (ret > 0)
+ {
+ pgaio_uring_caps.mem_init_size = ret;
+
+ elog(DEBUG1,
+ "can use combined memory mapping for io_uring, each ring needs %d bytes",
+ ret);
+
+ /* clean up the created ring, it was just for a test */
+ io_uring_queue_exit(&test_ring);
+ }
+ else
+ {
+ /*
+ * There are different reasons for ring creation to fail, but it's
+ * ok to treat that just as io_uring_queue_init_mem() not being
+ * supported. We'll report a more detailed error in
+ * pgaio_uring_shmem_init().
+ */
+ errno = -ret;
+ elog(DEBUG1,
+ "cannot use combined memory mapping for io_uring, ring creation failed: %m");
+
+ }
+
+ if (munmap(ring_ptr, ring_size) != 0)
+ elog(ERROR, "munmap() failed: %m");
+ }
+#else
+ {
+ elog(DEBUG1,
+ "can't use combined memory mapping for io_uring, kernel or liburing too old");
+ }
+#endif
+
+ pgaio_uring_caps.checked = true;
+}
+
+/*
+ * Memory for all PgAioUringContext instances
+ */
+static size_t
pgaio_uring_context_shmem_size(void)
{
return mul_size(pgaio_uring_procs(), sizeof(PgAioUringContext));
}
+/*
+ * Memory for the combined memory used by io_uring instances. Returns 0 if
+ * that is not supported by kernel/liburing.
+ */
+static size_t
+pgaio_uring_ring_shmem_size(void)
+{
+ size_t sz = 0;
+
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ /*
+ * Memory for rings needs to be allocated to the page boundary,
+ * reserve space. Luckily it does not need to be aligned to hugepage
+ * boundaries, even if huge pages are used.
+ */
+ sz = add_size(sz, sysconf(_SC_PAGESIZE));
+ sz = add_size(sz, mul_size(pgaio_uring_procs(),
+ pgaio_uring_caps.mem_init_size));
+ }
+
+ return sz;
+}
+
static size_t
pgaio_uring_shmem_size(void)
{
- return pgaio_uring_context_shmem_size();
+ size_t sz;
+
+ /*
+ * Kernel and liburing support for various features influences how much
+ * shmem we need, perform the necessary checks.
+ */
+ pgaio_uring_check_capabilities();
+
+ sz = pgaio_uring_context_shmem_size();
+ sz = add_size(sz, pgaio_uring_ring_shmem_size());
+
+ return sz;
}
static void
@@ -128,13 +280,38 @@ pgaio_uring_shmem_init(bool first_time)
{
int TotalProcs = pgaio_uring_procs();
bool found;
+ char *shmem;
+ size_t ring_mem_remain = 0;
+ char *ring_mem_next = 0;
- pgaio_uring_contexts = (PgAioUringContext *)
- ShmemInitStruct("AioUring", pgaio_uring_shmem_size(), &found);
-
+ /*
+ * We allocate memory for all PgAioUringContext instances and, if
+ * supported, the memory required for each of the io_uring instances, in
+ * one ShmemInitStruct().
+ */
+ shmem = ShmemInitStruct("AioUringContext", pgaio_uring_shmem_size(), &found);
if (found)
return;
+ pgaio_uring_contexts = (PgAioUringContext *) shmem;
+ shmem += pgaio_uring_context_shmem_size();
+
+ /* if supported, handle memory alignment / sizing for io_uring memory */
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ ring_mem_remain = pgaio_uring_ring_shmem_size();
+ ring_mem_next = (char *) shmem;
+
+ /* align to page boundary, see also pgaio_uring_ring_shmem_size() */
+ ring_mem_next = (char *) TYPEALIGN(sysconf(_SC_PAGESIZE), ring_mem_next);
+
+ /* account for alignment */
+ ring_mem_remain -= ring_mem_next - shmem;
+ shmem += ring_mem_next - shmem;
+
+ shmem += ring_mem_remain;
+ }
+
for (int contextno = 0; contextno < TotalProcs; contextno++)
{
PgAioUringContext *context = &pgaio_uring_contexts[contextno];
@@ -158,7 +335,28 @@ pgaio_uring_shmem_init(bool first_time)
* be worth using that - also need to evaluate if that causes
* noticeable additional contention?
*/
- ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+
+ /*
+ * If supported (c.f. pgaio_uring_check_capabilities()), create ring
+ * with its data in shared memory. Otherwise fall back io_uring
+ * creating a memory mapping for each ring.
+ */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ struct io_uring_params p = {0};
+
+ ret = io_uring_queue_init_mem(io_max_concurrency, &context->io_uring_ring, &p, ring_mem_next, ring_mem_remain);
+
+ ring_mem_remain -= ret;
+ ring_mem_next += ret;
+ }
+ else
+#endif
+ {
+ ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+ }
+
if (ret < 0)
{
char *hint = NULL;
diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c
index 36be179678d..bf8f77e6ff6 100644
--- a/src/backend/storage/aio/method_worker.c
+++ b/src/backend/storage/aio/method_worker.c
@@ -52,26 +52,26 @@
#define IO_WORKER_WAKEUP_FANOUT 2
-typedef struct AioWorkerSubmissionQueue
+typedef struct PgAioWorkerSubmissionQueue
{
uint32 size;
uint32 mask;
uint32 head;
uint32 tail;
- uint32 ios[FLEXIBLE_ARRAY_MEMBER];
-} AioWorkerSubmissionQueue;
+ uint32 sqes[FLEXIBLE_ARRAY_MEMBER];
+} PgAioWorkerSubmissionQueue;
-typedef struct AioWorkerSlot
+typedef struct PgAioWorkerSlot
{
Latch *latch;
bool in_use;
-} AioWorkerSlot;
+} PgAioWorkerSlot;
-typedef struct AioWorkerControl
+typedef struct PgAioWorkerControl
{
uint64 idle_worker_mask;
- AioWorkerSlot workers[FLEXIBLE_ARRAY_MEMBER];
-} AioWorkerControl;
+ PgAioWorkerSlot workers[FLEXIBLE_ARRAY_MEMBER];
+} PgAioWorkerControl;
static size_t pgaio_worker_shmem_size(void);
@@ -96,8 +96,8 @@ int io_workers = 3;
static int io_worker_queue_size = 64;
static int MyIoWorkerId;
-static AioWorkerSubmissionQueue *io_worker_submission_queue;
-static AioWorkerControl *io_worker_control;
+static PgAioWorkerSubmissionQueue *io_worker_submission_queue;
+static PgAioWorkerControl *io_worker_control;
static size_t
@@ -106,15 +106,15 @@ pgaio_worker_queue_shmem_size(int *queue_size)
/* Round size up to next power of two so we can make a mask. */
*queue_size = pg_nextpower2_32(io_worker_queue_size);
- return offsetof(AioWorkerSubmissionQueue, ios) +
+ return offsetof(PgAioWorkerSubmissionQueue, sqes) +
sizeof(uint32) * *queue_size;
}
static size_t
pgaio_worker_control_shmem_size(void)
{
- return offsetof(AioWorkerControl, workers) +
- sizeof(AioWorkerSlot) * MAX_IO_WORKERS;
+ return offsetof(PgAioWorkerControl, workers) +
+ sizeof(PgAioWorkerSlot) * MAX_IO_WORKERS;
}
static size_t
@@ -162,7 +162,7 @@ pgaio_worker_shmem_init(bool first_time)
}
static int
-pgaio_choose_idle_worker(void)
+pgaio_worker_choose_idle(void)
{
int worker;
@@ -172,6 +172,7 @@ pgaio_choose_idle_worker(void)
/* Find the lowest bit position, and clear it. */
worker = pg_rightmost_one_pos64(io_worker_control->idle_worker_mask);
io_worker_control->idle_worker_mask &= ~(UINT64_C(1) << worker);
+ Assert(io_worker_control->workers[worker].in_use);
return worker;
}
@@ -179,7 +180,7 @@ pgaio_choose_idle_worker(void)
static bool
pgaio_worker_submission_queue_insert(PgAioHandle *ioh)
{
- AioWorkerSubmissionQueue *queue;
+ PgAioWorkerSubmissionQueue *queue;
uint32 new_head;
queue = io_worker_submission_queue;
@@ -191,7 +192,7 @@ pgaio_worker_submission_queue_insert(PgAioHandle *ioh)
return false; /* full */
}
- queue->ios[queue->head] = pgaio_io_get_id(ioh);
+ queue->sqes[queue->head] = pgaio_io_get_id(ioh);
queue->head = new_head;
return true;
@@ -200,14 +201,14 @@ pgaio_worker_submission_queue_insert(PgAioHandle *ioh)
static uint32
pgaio_worker_submission_queue_consume(void)
{
- AioWorkerSubmissionQueue *queue;
+ PgAioWorkerSubmissionQueue *queue;
uint32 result;
queue = io_worker_submission_queue;
if (queue->tail == queue->head)
return UINT32_MAX; /* empty */
- result = queue->ios[queue->tail];
+ result = queue->sqes[queue->tail];
queue->tail = (queue->tail + 1) & (queue->size - 1);
return result;
@@ -240,37 +241,37 @@ pgaio_worker_needs_synchronous_execution(PgAioHandle *ioh)
}
static void
-pgaio_worker_submit_internal(int nios, PgAioHandle *ios[])
+pgaio_worker_submit_internal(int num_staged_ios, PgAioHandle **staged_ios)
{
PgAioHandle *synchronous_ios[PGAIO_SUBMIT_BATCH_SIZE];
int nsync = 0;
Latch *wakeup = NULL;
int worker;
- Assert(nios <= PGAIO_SUBMIT_BATCH_SIZE);
+ Assert(num_staged_ios <= PGAIO_SUBMIT_BATCH_SIZE);
LWLockAcquire(AioWorkerSubmissionQueueLock, LW_EXCLUSIVE);
- for (int i = 0; i < nios; ++i)
+ for (int i = 0; i < num_staged_ios; ++i)
{
- Assert(!pgaio_worker_needs_synchronous_execution(ios[i]));
- if (!pgaio_worker_submission_queue_insert(ios[i]))
+ Assert(!pgaio_worker_needs_synchronous_execution(staged_ios[i]));
+ if (!pgaio_worker_submission_queue_insert(staged_ios[i]))
{
/*
* We'll do it synchronously, but only after we've sent as many as
* we can to workers, to maximize concurrency.
*/
- synchronous_ios[nsync++] = ios[i];
+ synchronous_ios[nsync++] = staged_ios[i];
continue;
}
if (wakeup == NULL)
{
/* Choose an idle worker to wake up if we haven't already. */
- worker = pgaio_choose_idle_worker();
+ worker = pgaio_worker_choose_idle();
if (worker >= 0)
wakeup = io_worker_control->workers[worker].latch;
- pgaio_debug_io(DEBUG4, ios[i],
+ pgaio_debug_io(DEBUG4, staged_ios[i],
"choosing worker %d",
worker);
}
@@ -316,6 +317,7 @@ pgaio_worker_die(int code, Datum arg)
Assert(io_worker_control->workers[MyIoWorkerId].in_use);
Assert(io_worker_control->workers[MyIoWorkerId].latch == MyLatch);
+ io_worker_control->idle_worker_mask &= ~(UINT64_C(1) << MyIoWorkerId);
io_worker_control->workers[MyIoWorkerId].in_use = false;
io_worker_control->workers[MyIoWorkerId].latch = NULL;
LWLockRelease(AioWorkerSubmissionQueueLock);
@@ -488,7 +490,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
IO_WORKER_WAKEUP_FANOUT);
for (int i = 0; i < nwakeups; ++i)
{
- if ((worker = pgaio_choose_idle_worker()) < 0)
+ if ((worker = pgaio_worker_choose_idle()) < 0)
break;
latches[nlatches++] = io_worker_control->workers[worker].latch;
}
@@ -573,6 +575,12 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
}
CHECK_FOR_INTERRUPTS();
+
+ if (ConfigReloadPending)
+ {
+ ConfigReloadPending = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ }
}
error_context_stack = errcallback.previous;
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 667aa0c0c78..6afdd28dba6 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3339,10 +3339,10 @@ UnpinBufferNoOwner(BufferDesc *buf)
* BufferSync -- Write out all dirty buffers in the pool.
*
* This is called at checkpoint time to write out all dirty shared buffers.
- * The checkpoint request flags should be passed in. If CHECKPOINT_IMMEDIATE
- * is set, we disable delays between writes; if CHECKPOINT_IS_SHUTDOWN,
- * CHECKPOINT_END_OF_RECOVERY or CHECKPOINT_FLUSH_ALL is set, we write even
- * unlogged buffers, which are otherwise skipped. The remaining flags
+ * The checkpoint request flags should be passed in. If CHECKPOINT_FAST is
+ * set, we disable delays between writes; if CHECKPOINT_IS_SHUTDOWN,
+ * CHECKPOINT_END_OF_RECOVERY or CHECKPOINT_FLUSH_UNLOGGED is set, we write
+ * even unlogged buffers, which are otherwise skipped. The remaining flags
* currently have no effect here.
*/
static void
@@ -3367,7 +3367,7 @@ BufferSync(int flags)
* recovery, we write all dirty buffers.
*/
if (!((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
- CHECKPOINT_FLUSH_ALL))))
+ CHECKPOINT_FLUSH_UNLOGGED))))
mask |= BM_PERMANENT;
/*
@@ -4550,11 +4550,9 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
if (RelFileLocatorBackendIsTemp(rlocator))
{
if (rlocator.backend == MyProcNumber)
- {
- for (j = 0; j < nforks; j++)
- DropRelationLocalBuffers(rlocator.locator, forkNum[j],
- firstDelBlock[j]);
- }
+ DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
+ firstDelBlock);
+
return;
}
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index ba26627f7b0..3da9c41ee1d 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -660,10 +660,11 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
* See DropRelationBuffers in bufmgr.c for more notes.
*/
void
-DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
- BlockNumber firstDelBlock)
+DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum,
+ int nforks, BlockNumber *firstDelBlock)
{
int i;
+ int j;
for (i = 0; i < NLocBuffer; i++)
{
@@ -672,12 +673,18 @@ DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
buf_state = pg_atomic_read_u32(&bufHdr->state);
- if ((buf_state & BM_TAG_VALID) &&
- BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
- BufTagGetForkNum(&bufHdr->tag) == forkNum &&
- bufHdr->tag.blockNum >= firstDelBlock)
+ if (!(buf_state & BM_TAG_VALID) ||
+ !BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator))
+ continue;
+
+ for (j = 0; j < nforks; j++)
{
- InvalidateLocalBuffer(bufHdr, true);
+ if (BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
+ bufHdr->tag.blockNum >= firstDelBlock[j])
+ {
+ InvalidateLocalBuffer(bufHdr, true);
+ break;
+ }
}
}
}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 0e8299dd556..a4ec7959f31 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -400,25 +400,22 @@ pg_fsync(int fd)
* portable, even if it runs ok on the current system.
*
* We assert here that a descriptor for a file was opened with write
- * permissions (either O_RDWR or O_WRONLY) and for a directory without
- * write permissions (O_RDONLY).
+ * permissions (i.e., not O_RDONLY) and for a directory without write
+ * permissions (O_RDONLY). Notice that the assertion check is made even
+ * if fsync() is disabled.
*
- * Ignore any fstat errors and let the follow-up fsync() do its work.
- * Doing this sanity check here counts for the case where fsync() is
- * disabled.
+ * If fstat() fails, ignore it and let the follow-up fsync() complain.
*/
if (fstat(fd, &st) == 0)
{
int desc_flags = fcntl(fd, F_GETFL);
- /*
- * O_RDONLY is historically 0, so just make sure that for directories
- * no write flags are used.
- */
+ desc_flags &= O_ACCMODE;
+
if (S_ISDIR(st.st_mode))
- Assert((desc_flags & (O_RDWR | O_WRONLY)) == 0);
+ Assert(desc_flags == O_RDONLY);
else
- Assert((desc_flags & (O_RDWR | O_WRONLY)) != 0);
+ Assert(desc_flags != O_RDONLY);
}
errno = 0;
#endif
diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c
index 1d4fd31ffed..1682cc6d34c 100644
--- a/src/backend/storage/ipc/dsm_registry.c
+++ b/src/backend/storage/ipc/dsm_registry.c
@@ -15,6 +15,20 @@
* current backend. This function guarantees that only one backend
* initializes the segment and that all other backends just attach it.
*
+ * A DSA can be created in or retrieved from the registry by calling
+ * GetNamedDSA(). As with GetNamedDSMSegment(), if a DSA with the provided
+ * name does not yet exist, it is created. Otherwise, GetNamedDSA()
+ * ensures the DSA is attached to the current backend. This function
+ * guarantees that only one backend initializes the DSA and that all other
+ * backends just attach it.
+ *
+ * A dshash table can be created in or retrieved from the registry by
+ * calling GetNamedDSHash(). As with GetNamedDSMSegment(), if a hash
+ * table with the provided name does not yet exist, it is created.
+ * Otherwise, GetNamedDSHash() ensures the hash table is attached to the
+ * current backend. This function guarantees that only one backend
+ * initializes the table and that all other backends just attach it.
+ *
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -26,12 +40,20 @@
#include "postgres.h"
+#include "funcapi.h"
#include "lib/dshash.h"
#include "storage/dsm_registry.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"
+#include "utils/builtins.h"
#include "utils/memutils.h"
+#define DSMR_NAME_LEN 128
+
+#define DSMR_DSA_TRANCHE_SUFFIX " DSA"
+#define DSMR_DSA_TRANCHE_SUFFIX_LEN (sizeof(DSMR_DSA_TRANCHE_SUFFIX) - 1)
+#define DSMR_DSA_TRANCHE_NAME_LEN (DSMR_NAME_LEN + DSMR_DSA_TRANCHE_SUFFIX_LEN)
+
typedef struct DSMRegistryCtxStruct
{
dsa_handle dsah;
@@ -40,15 +62,55 @@ typedef struct DSMRegistryCtxStruct
static DSMRegistryCtxStruct *DSMRegistryCtx;
-typedef struct DSMRegistryEntry
+typedef struct NamedDSMState
{
- char name[64];
dsm_handle handle;
size_t size;
+} NamedDSMState;
+
+typedef struct NamedDSAState
+{
+ dsa_handle handle;
+ int tranche;
+ char tranche_name[DSMR_DSA_TRANCHE_NAME_LEN];
+} NamedDSAState;
+
+typedef struct NamedDSHState
+{
+ NamedDSAState dsa;
+ dshash_table_handle handle;
+ int tranche;
+ char tranche_name[DSMR_NAME_LEN];
+} NamedDSHState;
+
+typedef enum DSMREntryType
+{
+ DSMR_ENTRY_TYPE_DSM,
+ DSMR_ENTRY_TYPE_DSA,
+ DSMR_ENTRY_TYPE_DSH,
+} DSMREntryType;
+
+static const char *const DSMREntryTypeNames[] =
+{
+ [DSMR_ENTRY_TYPE_DSM] = "segment",
+ [DSMR_ENTRY_TYPE_DSA] = "area",
+ [DSMR_ENTRY_TYPE_DSH] = "hash",
+};
+
+typedef struct DSMRegistryEntry
+{
+ char name[DSMR_NAME_LEN];
+ DSMREntryType type;
+ union
+ {
+ NamedDSMState dsm;
+ NamedDSAState dsa;
+ NamedDSHState dsh;
+ } data;
} DSMRegistryEntry;
static const dshash_parameters dsh_params = {
- offsetof(DSMRegistryEntry, handle),
+ offsetof(DSMRegistryEntry, type),
sizeof(DSMRegistryEntry),
dshash_strcmp,
dshash_strhash,
@@ -141,7 +203,7 @@ GetNamedDSMSegment(const char *name, size_t size,
ereport(ERROR,
(errmsg("DSM segment name cannot be empty")));
- if (strlen(name) >= offsetof(DSMRegistryEntry, handle))
+ if (strlen(name) >= offsetof(DSMRegistryEntry, type))
ereport(ERROR,
(errmsg("DSM segment name too long")));
@@ -158,32 +220,39 @@ GetNamedDSMSegment(const char *name, size_t size,
entry = dshash_find_or_insert(dsm_registry_table, name, found);
if (!(*found))
{
+ NamedDSMState *state = &entry->data.dsm;
+ dsm_segment *seg;
+
+ entry->type = DSMR_ENTRY_TYPE_DSM;
+
/* Initialize the segment. */
- dsm_segment *seg = dsm_create(size, 0);
+ seg = dsm_create(size, 0);
dsm_pin_segment(seg);
dsm_pin_mapping(seg);
- entry->handle = dsm_segment_handle(seg);
- entry->size = size;
+ state->handle = dsm_segment_handle(seg);
+ state->size = size;
ret = dsm_segment_address(seg);
if (init_callback)
(*init_callback) (ret);
}
- else if (entry->size != size)
- {
+ else if (entry->type != DSMR_ENTRY_TYPE_DSM)
ereport(ERROR,
- (errmsg("requested DSM segment size does not match size of "
- "existing segment")));
- }
+ (errmsg("requested DSM segment does not match type of existing entry")));
+ else if (entry->data.dsm.size != size)
+ ereport(ERROR,
+ (errmsg("requested DSM segment size does not match size of existing segment")));
else
{
- dsm_segment *seg = dsm_find_mapping(entry->handle);
+ NamedDSMState *state = &entry->data.dsm;
+ dsm_segment *seg;
/* If the existing segment is not already attached, attach it now. */
+ seg = dsm_find_mapping(state->handle);
if (seg == NULL)
{
- seg = dsm_attach(entry->handle);
+ seg = dsm_attach(state->handle);
if (seg == NULL)
elog(ERROR, "could not map dynamic shared memory segment");
@@ -198,3 +267,220 @@ GetNamedDSMSegment(const char *name, size_t size,
return ret;
}
+
+/*
+ * Initialize or attach a named DSA.
+ *
+ * This routine returns a pointer to the DSA. A new LWLock tranche ID will be
+ * generated if needed. Note that the lock tranche will be registered with the
+ * provided name. Also note that this should be called at most once for a
+ * given DSA in each backend.
+ */
+dsa_area *
+GetNamedDSA(const char *name, bool *found)
+{
+ DSMRegistryEntry *entry;
+ MemoryContext oldcontext;
+ dsa_area *ret;
+
+ Assert(found);
+
+ if (!name || *name == '\0')
+ ereport(ERROR,
+ (errmsg("DSA name cannot be empty")));
+
+ if (strlen(name) >= offsetof(DSMRegistryEntry, type))
+ ereport(ERROR,
+ (errmsg("DSA name too long")));
+
+ /* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+ /* Connect to the registry. */
+ init_dsm_registry();
+
+ entry = dshash_find_or_insert(dsm_registry_table, name, found);
+ if (!(*found))
+ {
+ NamedDSAState *state = &entry->data.dsa;
+
+ entry->type = DSMR_ENTRY_TYPE_DSA;
+
+ /* Initialize the LWLock tranche for the DSA. */
+ state->tranche = LWLockNewTrancheId();
+ strcpy(state->tranche_name, name);
+ LWLockRegisterTranche(state->tranche, state->tranche_name);
+
+ /* Initialize the DSA. */
+ ret = dsa_create(state->tranche);
+ dsa_pin(ret);
+ dsa_pin_mapping(ret);
+
+ /* Store handle for other backends to use. */
+ state->handle = dsa_get_handle(ret);
+ }
+ else if (entry->type != DSMR_ENTRY_TYPE_DSA)
+ ereport(ERROR,
+ (errmsg("requested DSA does not match type of existing entry")));
+ else
+ {
+ NamedDSAState *state = &entry->data.dsa;
+
+ if (dsa_is_attached(state->handle))
+ ereport(ERROR,
+ (errmsg("requested DSA already attached to current process")));
+
+ /* Initialize existing LWLock tranche for the DSA. */
+ LWLockRegisterTranche(state->tranche, state->tranche_name);
+
+ /* Attach to existing DSA. */
+ ret = dsa_attach(state->handle);
+ dsa_pin_mapping(ret);
+ }
+
+ dshash_release_lock(dsm_registry_table, entry);
+ MemoryContextSwitchTo(oldcontext);
+
+ return ret;
+}
+
+/*
+ * Initialize or attach a named dshash table.
+ *
+ * This routine returns the address of the table. The tranche_id member of
+ * params is ignored; new tranche IDs will be generated if needed. Note that
+ * the DSA lock tranche will be registered with the provided name with " DSA"
+ * appended. The dshash lock tranche will be registered with the provided
+ * name. Also note that this should be called at most once for a given table
+ * in each backend.
+ */
+dshash_table *
+GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found)
+{
+ DSMRegistryEntry *entry;
+ MemoryContext oldcontext;
+ dshash_table *ret;
+
+ Assert(params);
+ Assert(found);
+
+ if (!name || *name == '\0')
+ ereport(ERROR,
+ (errmsg("DSHash name cannot be empty")));
+
+ if (strlen(name) >= offsetof(DSMRegistryEntry, type))
+ ereport(ERROR,
+ (errmsg("DSHash name too long")));
+
+ /* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+ /* Connect to the registry. */
+ init_dsm_registry();
+
+ entry = dshash_find_or_insert(dsm_registry_table, name, found);
+ if (!(*found))
+ {
+ NamedDSAState *dsa_state = &entry->data.dsh.dsa;
+ NamedDSHState *dsh_state = &entry->data.dsh;
+ dshash_parameters params_copy;
+ dsa_area *dsa;
+
+ entry->type = DSMR_ENTRY_TYPE_DSH;
+
+ /* Initialize the LWLock tranche for the DSA. */
+ dsa_state->tranche = LWLockNewTrancheId();
+ sprintf(dsa_state->tranche_name, "%s%s", name, DSMR_DSA_TRANCHE_SUFFIX);
+ LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name);
+
+ /* Initialize the LWLock tranche for the dshash table. */
+ dsh_state->tranche = LWLockNewTrancheId();
+ strcpy(dsh_state->tranche_name, name);
+ LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name);
+
+ /* Initialize the DSA for the hash table. */
+ dsa = dsa_create(dsa_state->tranche);
+ dsa_pin(dsa);
+ dsa_pin_mapping(dsa);
+
+ /* Initialize the dshash table. */
+ memcpy(&params_copy, params, sizeof(dshash_parameters));
+ params_copy.tranche_id = dsh_state->tranche;
+ ret = dshash_create(dsa, &params_copy, NULL);
+
+ /* Store handles for other backends to use. */
+ dsa_state->handle = dsa_get_handle(dsa);
+ dsh_state->handle = dshash_get_hash_table_handle(ret);
+ }
+ else if (entry->type != DSMR_ENTRY_TYPE_DSH)
+ ereport(ERROR,
+ (errmsg("requested DSHash does not match type of existing entry")));
+ else
+ {
+ NamedDSAState *dsa_state = &entry->data.dsh.dsa;
+ NamedDSHState *dsh_state = &entry->data.dsh;
+ dsa_area *dsa;
+
+ /* XXX: Should we verify params matches what table was created with? */
+
+ if (dsa_is_attached(dsa_state->handle))
+ ereport(ERROR,
+ (errmsg("requested DSHash already attached to current process")));
+
+ /* Initialize existing LWLock tranches for the DSA and dshash table. */
+ LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name);
+ LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name);
+
+ /* Attach to existing DSA for the hash table. */
+ dsa = dsa_attach(dsa_state->handle);
+ dsa_pin_mapping(dsa);
+
+ /* Attach to existing dshash table. */
+ ret = dshash_attach(dsa, params, dsh_state->handle, NULL);
+ }
+
+ dshash_release_lock(dsm_registry_table, entry);
+ MemoryContextSwitchTo(oldcontext);
+
+ return ret;
+}
+
+Datum
+pg_get_dsm_registry_allocations(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ DSMRegistryEntry *entry;
+ MemoryContext oldcontext;
+ dshash_seq_status status;
+
+ InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
+
+ /* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ init_dsm_registry();
+ MemoryContextSwitchTo(oldcontext);
+
+ dshash_seq_init(&status, dsm_registry_table, false);
+ while ((entry = dshash_seq_next(&status)) != NULL)
+ {
+ Datum vals[3];
+ bool nulls[3] = {0};
+
+ vals[0] = CStringGetTextDatum(entry->name);
+ vals[1] = CStringGetTextDatum(DSMREntryTypeNames[entry->type]);
+
+ /*
+ * Since we can't know the size of DSA/dshash entries without first
+ * attaching to them, return NULL for those.
+ */
+ if (entry->type == DSMR_ENTRY_TYPE_DSM)
+ vals[2] = Int64GetDatum(entry->data.dsm.size);
+ else
+ nulls[2] = true;
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, vals, nulls);
+ }
+ dshash_seq_term(&status);
+
+ return (Datum) 0;
+}
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index e5b945a9ee3..2418967def6 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -1622,58 +1622,6 @@ TransactionIdIsInProgress(TransactionId xid)
return false;
}
-/*
- * TransactionIdIsActive -- is xid the top-level XID of an active backend?
- *
- * This differs from TransactionIdIsInProgress in that it ignores prepared
- * transactions, as well as transactions running on the primary if we're in
- * hot standby. Also, we ignore subtransactions since that's not needed
- * for current uses.
- */
-bool
-TransactionIdIsActive(TransactionId xid)
-{
- bool result = false;
- ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
- int i;
-
- /*
- * Don't bother checking a transaction older than RecentXmin; it could not
- * possibly still be running.
- */
- if (TransactionIdPrecedes(xid, RecentXmin))
- return false;
-
- LWLockAcquire(ProcArrayLock, LW_SHARED);
-
- for (i = 0; i < arrayP->numProcs; i++)
- {
- int pgprocno = arrayP->pgprocnos[i];
- PGPROC *proc = &allProcs[pgprocno];
- TransactionId pxid;
-
- /* Fetch xid just once - see GetNewTransactionId */
- pxid = UINT32_ACCESS_ONCE(other_xids[i]);
-
- if (!TransactionIdIsValid(pxid))
- continue;
-
- if (proc->pid == 0)
- continue; /* ignore prepared transactions */
-
- if (TransactionIdEquals(pxid, xid))
- {
- result = true;
- break;
- }
- }
-
- LWLockRelease(ProcArrayLock);
-
- return result;
-}
-
/*
* Determine XID horizons.
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index c9ae3b45b76..ca3656fc76f 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -679,12 +679,10 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
*/
for (i = 0; i < shm_ent_page_count; i++)
{
- volatile uint64 touch pg_attribute_unused();
-
page_ptrs[i] = startptr + (i * os_page_size);
if (firstNumaTouch)
- pg_numa_touch_mem_if_required(touch, page_ptrs[i]);
+ pg_numa_touch_mem_if_required(page_ptrs[i]);
CHECK_FOR_INTERRUPTS();
}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 7fa8d9247e0..4222bdab078 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -1376,7 +1376,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
if (xlrec.subxid_overflow)
elog(DEBUG2,
- "snapshot of %d running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
+ "snapshot of %d running transactions overflowed (lsn %X/%08X oldest xid %u latest complete %u next xid %u)",
CurrRunningXacts->xcnt,
LSN_FORMAT_ARGS(recptr),
CurrRunningXacts->oldestRunningXid,
@@ -1384,7 +1384,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
CurrRunningXacts->nextXid);
else
elog(DEBUG2,
- "snapshot of %d+%d running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
+ "snapshot of %d+%d running transaction ids (lsn %X/%08X oldest xid %u latest complete %u next xid %u)",
CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
LSN_FORMAT_ARGS(recptr),
CurrRunningXacts->oldestRunningXid,
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 2776ceb295b..62f3471448e 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -3539,9 +3539,9 @@ AtPrepare_Locks(void)
* but that probably costs more cycles.
*/
void
-PostPrepare_Locks(TransactionId xid)
+PostPrepare_Locks(FullTransactionId fxid)
{
- PGPROC *newproc = TwoPhaseGetDummyProc(xid, false);
+ PGPROC *newproc = TwoPhaseGetDummyProc(fxid, false);
HASH_SEQ_STATUS status;
LOCALLOCK *locallock;
LOCK *lock;
@@ -4324,11 +4324,11 @@ DumpAllLocks(void)
* and PANIC anyway.
*/
void
-lock_twophase_recover(TransactionId xid, uint16 info,
+lock_twophase_recover(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
- PGPROC *proc = TwoPhaseGetDummyProc(xid, false);
+ PGPROC *proc = TwoPhaseGetDummyProc(fxid, false);
LOCKTAG *locktag;
LOCKMODE lockmode;
LOCKMETHODID lockmethodid;
@@ -4505,7 +4505,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
* starting up into hot standby mode.
*/
void
-lock_twophase_standby_recover(TransactionId xid, uint16 info,
+lock_twophase_standby_recover(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
@@ -4524,7 +4524,7 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info,
if (lockmode == AccessExclusiveLock &&
locktag->locktag_type == LOCKTAG_RELATION)
{
- StandbyAcquireAccessExclusiveLock(xid,
+ StandbyAcquireAccessExclusiveLock(XidFromFullTransactionId(fxid),
locktag->locktag_field1 /* dboid */ ,
locktag->locktag_field2 /* reloid */ );
}
@@ -4537,11 +4537,11 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info,
* Find and release the lock indicated by the 2PC record.
*/
void
-lock_twophase_postcommit(TransactionId xid, uint16 info,
+lock_twophase_postcommit(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
- PGPROC *proc = TwoPhaseGetDummyProc(xid, true);
+ PGPROC *proc = TwoPhaseGetDummyProc(fxid, true);
LOCKTAG *locktag;
LOCKMETHODID lockmethodid;
LockMethod lockMethodTable;
@@ -4563,10 +4563,10 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
* This is actually just the same as the COMMIT case.
*/
void
-lock_twophase_postabort(TransactionId xid, uint16 info,
+lock_twophase_postabort(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
- lock_twophase_postcommit(xid, info, recdata, len);
+ lock_twophase_postcommit(fxid, info, recdata, len);
}
/*
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 46f44bc4511..2d43bf2cc13 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -170,8 +170,8 @@ static const char *const BuiltinTrancheNames[] = {
[LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
[LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
[LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",
- [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
- [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
+ [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultiXactOffsetSLRU",
+ [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultiXactMemberSLRU",
[LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
[LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
[LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index d82114ffca1..c07fb588355 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -191,7 +191,7 @@
* AtPrepare_PredicateLocks(void);
* PostPrepare_PredicateLocks(TransactionId xid);
* PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);
- * predicatelock_twophase_recover(TransactionId xid, uint16 info,
+ * predicatelock_twophase_recover(FullTransactionId fxid, uint16 info,
* void *recdata, uint32 len);
*/
@@ -4856,7 +4856,7 @@ AtPrepare_PredicateLocks(void)
* anyway. We only need to clean up our local state.
*/
void
-PostPrepare_PredicateLocks(TransactionId xid)
+PostPrepare_PredicateLocks(FullTransactionId fxid)
{
if (MySerializableXact == InvalidSerializableXact)
return;
@@ -4879,12 +4879,12 @@ PostPrepare_PredicateLocks(TransactionId xid)
* commits or aborts.
*/
void
-PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit)
+PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit)
{
SERIALIZABLEXID *sxid;
SERIALIZABLEXIDTAG sxidtag;
- sxidtag.xid = xid;
+ sxidtag.xid = XidFromFullTransactionId(fxid);
LWLockAcquire(SerializableXactHashLock, LW_SHARED);
sxid = (SERIALIZABLEXID *)
@@ -4906,10 +4906,11 @@ PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit)
* Re-acquire a predicate lock belonging to a transaction that was prepared.
*/
void
-predicatelock_twophase_recover(TransactionId xid, uint16 info,
+predicatelock_twophase_recover(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
TwoPhasePredicateRecord *record;
+ TransactionId xid = XidFromFullTransactionId(fxid);
Assert(len == sizeof(TwoPhasePredicateRecord));
diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c
index a7d1fec981f..ad0af5edc1f 100644
--- a/src/backend/tcop/backend_startup.c
+++ b/src/backend/tcop/backend_startup.c
@@ -881,7 +881,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen)
{
ereport(COMMERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
- errmsg("invalid length of query cancel packet")));
+ errmsg("invalid length of cancel request packet")));
return;
}
len = pktlen - offsetof(CancelRequestPacket, cancelAuthCode);
@@ -889,7 +889,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen)
{
ereport(COMMERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
- errmsg("invalid length of query cancel key")));
+ errmsg("invalid length of cancel key in cancel request packet")));
return;
}
@@ -1077,7 +1077,7 @@ check_log_connections(char **newval, void **extra, GucSource source)
if (!SplitIdentifierString(rawstring, ',', &elemlist))
{
- GUC_check_errdetail("Invalid list syntax in parameter \"log_connections\".");
+ GUC_check_errdetail("Invalid list syntax in parameter \"%s\".", "log_connections");
pfree(rawstring);
list_free(elemlist);
return false;
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 25fe3d58016..4c1faf5575c 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -943,17 +943,7 @@ standard_ProcessUtility(PlannedStmt *pstmt,
break;
case T_CheckPointStmt:
- if (!has_privs_of_role(GetUserId(), ROLE_PG_CHECKPOINT))
- ereport(ERROR,
- (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
- /* translator: %s is name of a SQL command, eg CHECKPOINT */
- errmsg("permission denied to execute %s command",
- "CHECKPOINT"),
- errdetail("Only roles with privileges of the \"%s\" role may execute this command.",
- "pg_checkpoint")));
-
- RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_WAIT |
- (RecoveryInProgress() ? 0 : CHECKPOINT_FORCE));
+ ExecCheckpoint(pstate, (CheckPointStmt *) parsetree);
break;
/*
@@ -1343,7 +1333,7 @@ ProcessUtilitySlow(ParseState *pstate,
*/
switch (stmt->subtype)
{
- case 'T': /* ALTER DOMAIN DEFAULT */
+ case AD_AlterDefault:
/*
* Recursively alter column default for table and,
@@ -1353,30 +1343,30 @@ ProcessUtilitySlow(ParseState *pstate,
AlterDomainDefault(stmt->typeName,
stmt->def);
break;
- case 'N': /* ALTER DOMAIN DROP NOT NULL */
+ case AD_DropNotNull:
address =
AlterDomainNotNull(stmt->typeName,
false);
break;
- case 'O': /* ALTER DOMAIN SET NOT NULL */
+ case AD_SetNotNull:
address =
AlterDomainNotNull(stmt->typeName,
true);
break;
- case 'C': /* ADD CONSTRAINT */
+ case AD_AddConstraint:
address =
AlterDomainAddConstraint(stmt->typeName,
stmt->def,
&secondaryObject);
break;
- case 'X': /* DROP CONSTRAINT */
+ case AD_DropConstraint:
address =
AlterDomainDropConstraint(stmt->typeName,
stmt->name,
stmt->behavior,
stmt->missing_ok);
break;
- case 'V': /* VALIDATE CONSTRAINT */
+ case AD_ValidateConstraint:
address =
AlterDomainValidateConstraint(stmt->typeName,
stmt->name);
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index b77d8c23d36..4801fe90089 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -36,7 +36,7 @@ t_isalpha(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isalpha(TOUCHAR(ptr));
@@ -51,7 +51,7 @@ t_isalnum(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isalnum(TOUCHAR(ptr));
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 79bcd32a063..e2dd3da3aa3 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -299,7 +299,7 @@ TParserInit(char *str, int len)
*/
if (prs->charmaxlen > 1)
{
- pg_locale_t mylocale = 0; /* TODO */
+ locale_t mylocale = 0; /* TODO */
prs->usewide = true;
if (database_ctype_is_c)
diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c
index 28587e2916b..69df741cbf6 100644
--- a/src/backend/utils/activity/pgstat_relation.c
+++ b/src/backend/utils/activity/pgstat_relation.c
@@ -744,7 +744,7 @@ PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
* Load the saved counts into our local pgstats state.
*/
void
-pgstat_twophase_postcommit(TransactionId xid, uint16 info,
+pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
@@ -780,7 +780,7 @@ pgstat_twophase_postcommit(TransactionId xid, uint16 info,
* as aborted.
*/
void
-pgstat_twophase_postabort(TransactionId xid, uint16 info,
+pgstat_twophase_postabort(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len)
{
TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 4a233b63c32..ffeacf2b819 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -23,6 +23,7 @@ OBJS = \
arrayutils.o \
ascii.o \
bool.o \
+ bytea.o \
cash.o \
char.o \
cryptohashfuncs.o \
diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c
index ca3c5ee3df3..1213f9106d5 100644
--- a/src/backend/utils/adt/acl.c
+++ b/src/backend/utils/adt/acl.c
@@ -135,6 +135,22 @@ static void RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue
/*
+ * Test whether an identifier char can be left unquoted in ACLs.
+ *
+ * Formerly, we used isalnum() even on non-ASCII characters, resulting in
+ * unportable behavior. To ensure dump compatibility with old versions,
+ * we now treat high-bit-set characters as always requiring quoting during
+ * putid(), but getid() will always accept them without quotes.
+ */
+static inline bool
+is_safe_acl_char(unsigned char c, bool is_getid)
+{
+ if (IS_HIGHBIT_SET(c))
+ return is_getid;
+ return isalnum(c) || c == '_';
+}
+
+/*
* getid
* Consumes the first alphanumeric string (identifier) found in string
* 's', ignoring any leading white space. If it finds a double quote
@@ -159,21 +175,22 @@ getid(const char *s, char *n, Node *escontext)
while (isspace((unsigned char) *s))
s++;
- /* This code had better match what putid() does, below */
for (;
*s != '\0' &&
- (isalnum((unsigned char) *s) ||
- *s == '_' ||
- *s == '"' ||
- in_quotes);
+ (in_quotes || *s == '"' || is_safe_acl_char(*s, true));
s++)
{
if (*s == '"')
{
+ if (!in_quotes)
+ {
+ in_quotes = true;
+ continue;
+ }
/* safe to look at next char (could be '\0' though) */
if (*(s + 1) != '"')
{
- in_quotes = !in_quotes;
+ in_quotes = false;
continue;
}
/* it's an escaped double quote; skip the escaping char */
@@ -207,10 +224,10 @@ putid(char *p, const char *s)
const char *src;
bool safe = true;
+ /* Detect whether we need to use double quotes */
for (src = s; *src; src++)
{
- /* This test had better match what getid() does, above */
- if (!isalnum((unsigned char) *src) && *src != '_')
+ if (!is_safe_acl_char(*src, false))
{
safe = false;
break;
diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c
new file mode 100644
index 00000000000..2e539c2504e
--- /dev/null
+++ b/src/backend/utils/adt/bytea.c
@@ -0,0 +1,1143 @@
+/*-------------------------------------------------------------------------
+ *
+ * bytea.c
+ * Functions for the bytea type.
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/bytea.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "catalog/pg_collation_d.h"
+#include "catalog/pg_type_d.h"
+#include "common/int.h"
+#include "fmgr.h"
+#include "libpq/pqformat.h"
+#include "port/pg_bitutils.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "utils/fmgrprotos.h"
+#include "utils/memutils.h"
+#include "utils/sortsupport.h"
+#include "utils/varlena.h"
+#include "varatt.h"
+
+/* GUC variable */
+int bytea_output = BYTEA_OUTPUT_HEX;
+
+static bytea *bytea_catenate(bytea *t1, bytea *t2);
+static bytea *bytea_substring(Datum str, int S, int L,
+ bool length_not_specified);
+static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
+
+/*
+ * bytea_catenate
+ * Guts of byteacat(), broken out so it can be used by other functions
+ *
+ * Arguments can be in short-header form, but not compressed or out-of-line
+ */
+static bytea *
+bytea_catenate(bytea *t1, bytea *t2)
+{
+ bytea *result;
+ int len1,
+ len2,
+ len;
+ char *ptr;
+
+ len1 = VARSIZE_ANY_EXHDR(t1);
+ len2 = VARSIZE_ANY_EXHDR(t2);
+
+ /* paranoia ... probably should throw error instead? */
+ if (len1 < 0)
+ len1 = 0;
+ if (len2 < 0)
+ len2 = 0;
+
+ len = len1 + len2 + VARHDRSZ;
+ result = (bytea *) palloc(len);
+
+ /* Set size of result string... */
+ SET_VARSIZE(result, len);
+
+ /* Fill data field of result string... */
+ ptr = VARDATA(result);
+ if (len1 > 0)
+ memcpy(ptr, VARDATA_ANY(t1), len1);
+ if (len2 > 0)
+ memcpy(ptr + len1, VARDATA_ANY(t2), len2);
+
+ return result;
+}
+
+#define PG_STR_GET_BYTEA(str_) \
+ DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
+
+static bytea *
+bytea_substring(Datum str,
+ int S,
+ int L,
+ bool length_not_specified)
+{
+ int32 S1; /* adjusted start position */
+ int32 L1; /* adjusted substring length */
+ int32 E; /* end position */
+
+ /*
+ * The logic here should generally match text_substring().
+ */
+ S1 = Max(S, 1);
+
+ if (length_not_specified)
+ {
+ /*
+ * Not passed a length - DatumGetByteaPSlice() grabs everything to the
+ * end of the string if we pass it a negative value for length.
+ */
+ L1 = -1;
+ }
+ else if (L < 0)
+ {
+ /* SQL99 says to throw an error for E < S, i.e., negative length */
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ L1 = -1; /* silence stupider compilers */
+ }
+ else if (pg_add_s32_overflow(S, L, &E))
+ {
+ /*
+ * L could be large enough for S + L to overflow, in which case the
+ * substring must run to end of string.
+ */
+ L1 = -1;
+ }
+ else
+ {
+ /*
+ * A zero or negative value for the end position can happen if the
+ * start was negative or one. SQL99 says to return a zero-length
+ * string.
+ */
+ if (E < 1)
+ return PG_STR_GET_BYTEA("");
+
+ L1 = E - S1;
+ }
+
+ /*
+ * If the start position is past the end of the string, SQL99 says to
+ * return a zero-length string -- DatumGetByteaPSlice() will do that for
+ * us. We need only convert S1 to zero-based starting position.
+ */
+ return DatumGetByteaPSlice(str, S1 - 1, L1);
+}
+
+static bytea *
+bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
+{
+ bytea *result;
+ bytea *s1;
+ bytea *s2;
+ int sp_pl_sl;
+
+ /*
+ * Check for possible integer-overflow cases. For negative sp, throw a
+ * "substring length" error because that's what should be expected
+ * according to the spec's definition of OVERLAY().
+ */
+ if (sp <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
+ s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
+ result = bytea_catenate(s1, t2);
+ result = bytea_catenate(result, s2);
+
+ return result;
+}
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+#define VAL(CH) ((CH) - '0')
+#define DIG(VAL) ((VAL) + '0')
+
+/*
+ * byteain - converts from printable representation of byte array
+ *
+ * Non-printable characters must be passed as '\nnn' (octal) and are
+ * converted to internal form. '\' must be passed as '\\'.
+ * ereport(ERROR, ...) if bad form.
+ *
+ * BUGS:
+ * The input is scanned twice.
+ * The error checking of input is minimal.
+ */
+Datum
+byteain(PG_FUNCTION_ARGS)
+{
+ char *inputText = PG_GETARG_CSTRING(0);
+ Node *escontext = fcinfo->context;
+ char *tp;
+ char *rp;
+ int bc;
+ bytea *result;
+
+ /* Recognize hex input */
+ if (inputText[0] == '\\' && inputText[1] == 'x')
+ {
+ size_t len = strlen(inputText);
+
+ bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
+ result = palloc(bc);
+ bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
+ escontext);
+ SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
+
+ PG_RETURN_BYTEA_P(result);
+ }
+
+ /* Else, it's the traditional escaped style */
+ for (bc = 0, tp = inputText; *tp != '\0'; bc++)
+ {
+ if (tp[0] != '\\')
+ tp++;
+ else if ((tp[0] == '\\') &&
+ (tp[1] >= '0' && tp[1] <= '3') &&
+ (tp[2] >= '0' && tp[2] <= '7') &&
+ (tp[3] >= '0' && tp[3] <= '7'))
+ tp += 4;
+ else if ((tp[0] == '\\') &&
+ (tp[1] == '\\'))
+ tp += 2;
+ else
+ {
+ /*
+ * one backslash, not followed by another or ### valid octal
+ */
+ ereturn(escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "bytea")));
+ }
+ }
+
+ bc += VARHDRSZ;
+
+ result = (bytea *) palloc(bc);
+ SET_VARSIZE(result, bc);
+
+ tp = inputText;
+ rp = VARDATA(result);
+ while (*tp != '\0')
+ {
+ if (tp[0] != '\\')
+ *rp++ = *tp++;
+ else if ((tp[0] == '\\') &&
+ (tp[1] >= '0' && tp[1] <= '3') &&
+ (tp[2] >= '0' && tp[2] <= '7') &&
+ (tp[3] >= '0' && tp[3] <= '7'))
+ {
+ bc = VAL(tp[1]);
+ bc <<= 3;
+ bc += VAL(tp[2]);
+ bc <<= 3;
+ *rp++ = bc + VAL(tp[3]);
+
+ tp += 4;
+ }
+ else if ((tp[0] == '\\') &&
+ (tp[1] == '\\'))
+ {
+ *rp++ = '\\';
+ tp += 2;
+ }
+ else
+ {
+ /*
+ * We should never get here. The first pass should not allow it.
+ */
+ ereturn(escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "bytea")));
+ }
+ }
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * byteaout - converts to printable representation of byte array
+ *
+ * In the traditional escaped format, non-printable characters are
+ * printed as '\nnn' (octal) and '\' as '\\'.
+ */
+Datum
+byteaout(PG_FUNCTION_ARGS)
+{
+ bytea *vlena = PG_GETARG_BYTEA_PP(0);
+ char *result;
+ char *rp;
+
+ if (bytea_output == BYTEA_OUTPUT_HEX)
+ {
+ /* Print hex format */
+ rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+ *rp++ = '\\';
+ *rp++ = 'x';
+ rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
+ }
+ else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
+ {
+ /* Print traditional escaped format */
+ char *vp;
+ uint64 len;
+ int i;
+
+ len = 1; /* empty string has 1 char */
+ vp = VARDATA_ANY(vlena);
+ for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+ {
+ if (*vp == '\\')
+ len += 2;
+ else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+ len += 4;
+ else
+ len++;
+ }
+
+ /*
+ * In principle len can't overflow uint32 if the input fit in 1GB, but
+ * for safety let's check rather than relying on palloc's internal
+ * check.
+ */
+ if (len > MaxAllocSize)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg_internal("result of bytea output conversion is too large")));
+ rp = result = (char *) palloc(len);
+
+ vp = VARDATA_ANY(vlena);
+ for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+ {
+ if (*vp == '\\')
+ {
+ *rp++ = '\\';
+ *rp++ = '\\';
+ }
+ else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+ {
+ int val; /* holds unprintable chars */
+
+ val = *vp;
+ rp[0] = '\\';
+ rp[3] = DIG(val & 07);
+ val >>= 3;
+ rp[2] = DIG(val & 07);
+ val >>= 3;
+ rp[1] = DIG(val & 03);
+ rp += 4;
+ }
+ else
+ *rp++ = *vp;
+ }
+ }
+ else
+ {
+ elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
+ bytea_output);
+ rp = result = NULL; /* keep compiler quiet */
+ }
+ *rp = '\0';
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * bytearecv - converts external binary format to bytea
+ */
+Datum
+bytearecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ bytea *result;
+ int nbytes;
+
+ nbytes = buf->len - buf->cursor;
+ result = (bytea *) palloc(nbytes + VARHDRSZ);
+ SET_VARSIZE(result, nbytes + VARHDRSZ);
+ pq_copymsgbytes(buf, VARDATA(result), nbytes);
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * byteasend - converts bytea to binary format
+ *
+ * This is a special case: just copy the input...
+ */
+Datum
+byteasend(PG_FUNCTION_ARGS)
+{
+ bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
+
+ PG_RETURN_BYTEA_P(vlena);
+}
+
+Datum
+bytea_string_agg_transfn(PG_FUNCTION_ARGS)
+{
+ StringInfo state;
+
+ state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+ /* Append the value unless null, preceding it with the delimiter. */
+ if (!PG_ARGISNULL(1))
+ {
+ bytea *value = PG_GETARG_BYTEA_PP(1);
+ bool isfirst = false;
+
+ /*
+ * You might think we can just throw away the first delimiter, however
+ * we must keep it as we may be a parallel worker doing partial
+ * aggregation building a state to send to the main process. We need
+ * to keep the delimiter of every aggregation so that the combine
+ * function can properly join up the strings of two separately
+ * partially aggregated results. The first delimiter is only stripped
+ * off in the final function. To know how much to strip off the front
+ * of the string, we store the length of the first delimiter in the
+ * StringInfo's cursor field, which we don't otherwise need here.
+ */
+ if (state == NULL)
+ {
+ MemoryContext aggcontext;
+ MemoryContext oldcontext;
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context");
+ }
+
+ /*
+ * Create state in aggregate context. It'll stay there across
+ * subsequent calls.
+ */
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+ state = makeStringInfo();
+ MemoryContextSwitchTo(oldcontext);
+
+ isfirst = true;
+ }
+
+ if (!PG_ARGISNULL(2))
+ {
+ bytea *delim = PG_GETARG_BYTEA_PP(2);
+
+ appendBinaryStringInfo(state, VARDATA_ANY(delim),
+ VARSIZE_ANY_EXHDR(delim));
+ if (isfirst)
+ state->cursor = VARSIZE_ANY_EXHDR(delim);
+ }
+
+ appendBinaryStringInfo(state, VARDATA_ANY(value),
+ VARSIZE_ANY_EXHDR(value));
+ }
+
+ /*
+ * The transition type for string_agg() is declared to be "internal",
+ * which is a pass-by-value type the same size as a pointer.
+ */
+ if (state)
+ PG_RETURN_POINTER(state);
+ PG_RETURN_NULL();
+}
+
+Datum
+bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ StringInfo state;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+ if (state != NULL)
+ {
+ /* As per comment in transfn, strip data before the cursor position */
+ bytea *result;
+ int strippedlen = state->len - state->cursor;
+
+ result = (bytea *) palloc(strippedlen + VARHDRSZ);
+ SET_VARSIZE(result, strippedlen + VARHDRSZ);
+ memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
+ PG_RETURN_BYTEA_P(result);
+ }
+ else
+ PG_RETURN_NULL();
+}
+
+/*-------------------------------------------------------------
+ * byteaoctetlen
+ *
+ * get the number of bytes contained in an instance of type 'bytea'
+ *-------------------------------------------------------------
+ */
+Datum
+byteaoctetlen(PG_FUNCTION_ARGS)
+{
+ Datum str = PG_GETARG_DATUM(0);
+
+ /* We need not detoast the input at all */
+ PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
+}
+
+/*
+ * byteacat -
+ * takes two bytea* and returns a bytea* that is the concatenation of
+ * the two.
+ *
+ * Cloned from textcat and modified as required.
+ */
+Datum
+byteacat(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+
+ PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
+}
+
+/*
+ * byteaoverlay
+ * Replace specified substring of first string with second
+ *
+ * The SQL standard defines OVERLAY() in terms of substring and concatenation.
+ * This code is a direct implementation of what the standard says.
+ */
+Datum
+byteaoverlay(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl = PG_GETARG_INT32(3); /* substring length */
+
+ PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
+}
+
+Datum
+byteaoverlay_no_len(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl;
+
+ sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
+ PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
+}
+
+/*
+ * bytea_substr()
+ * Return a substring starting at the specified position.
+ * Cloned from text_substr and modified as required.
+ *
+ * Input:
+ * - string
+ * - starting position (is one-based)
+ * - string length (optional)
+ *
+ * If the starting position is zero or less, then return from the start of the string
+ * adjusting the length to be consistent with the "negative start" per SQL.
+ * If the length is less than zero, an ERROR is thrown. If no third argument
+ * (length) is provided, the length to the end of the string is assumed.
+ */
+Datum
+bytea_substr(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
+ PG_GETARG_INT32(1),
+ PG_GETARG_INT32(2),
+ false));
+}
+
+/*
+ * bytea_substr_no_len -
+ * Wrapper to avoid opr_sanity failure due to
+ * one function accepting a different number of args.
+ */
+Datum
+bytea_substr_no_len(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
+ PG_GETARG_INT32(1),
+ -1,
+ true));
+}
+
+/*
+ * bit_count
+ */
+Datum
+bytea_bit_count(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+
+ PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
+}
+
+/*
+ * byteapos -
+ * Return the position of the specified substring.
+ * Implements the SQL POSITION() function.
+ * Cloned from textpos and modified as required.
+ */
+Datum
+byteapos(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int pos;
+ int px,
+ p;
+ int len1,
+ len2;
+ char *p1,
+ *p2;
+
+ len1 = VARSIZE_ANY_EXHDR(t1);
+ len2 = VARSIZE_ANY_EXHDR(t2);
+
+ if (len2 <= 0)
+ PG_RETURN_INT32(1); /* result for empty pattern */
+
+ p1 = VARDATA_ANY(t1);
+ p2 = VARDATA_ANY(t2);
+
+ pos = 0;
+ px = (len1 - len2);
+ for (p = 0; p <= px; p++)
+ {
+ if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
+ {
+ pos = p + 1;
+ break;
+ };
+ p1++;
+ };
+
+ PG_RETURN_INT32(pos);
+}
+
+/*-------------------------------------------------------------
+ * byteaGetByte
+ *
+ * this routine treats "bytea" as an array of bytes.
+ * It returns the Nth byte (a number between 0 and 255).
+ *-------------------------------------------------------------
+ */
+Datum
+byteaGetByte(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int32 n = PG_GETARG_INT32(1);
+ int len;
+ int byte;
+
+ len = VARSIZE_ANY_EXHDR(v);
+
+ if (n < 0 || n >= len)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %d out of valid range, 0..%d",
+ n, len - 1)));
+
+ byte = ((unsigned char *) VARDATA_ANY(v))[n];
+
+ PG_RETURN_INT32(byte);
+}
+
+/*-------------------------------------------------------------
+ * byteaGetBit
+ *
+ * This routine treats a "bytea" type like an array of bits.
+ * It returns the value of the Nth bit (0 or 1).
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaGetBit(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int64 n = PG_GETARG_INT64(1);
+ int byteNo,
+ bitNo;
+ int len;
+ int byte;
+
+ len = VARSIZE_ANY_EXHDR(v);
+
+ if (n < 0 || n >= (int64) len * 8)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
+ n, (int64) len * 8 - 1)));
+
+ /* n/8 is now known < len, so safe to cast to int */
+ byteNo = (int) (n / 8);
+ bitNo = (int) (n % 8);
+
+ byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
+
+ if (byte & (1 << bitNo))
+ PG_RETURN_INT32(1);
+ else
+ PG_RETURN_INT32(0);
+}
+
+/*-------------------------------------------------------------
+ * byteaSetByte
+ *
+ * Given an instance of type 'bytea' creates a new one with
+ * the Nth byte set to the given value.
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaSetByte(PG_FUNCTION_ARGS)
+{
+ bytea *res = PG_GETARG_BYTEA_P_COPY(0);
+ int32 n = PG_GETARG_INT32(1);
+ int32 newByte = PG_GETARG_INT32(2);
+ int len;
+
+ len = VARSIZE(res) - VARHDRSZ;
+
+ if (n < 0 || n >= len)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %d out of valid range, 0..%d",
+ n, len - 1)));
+
+ /*
+ * Now set the byte.
+ */
+ ((unsigned char *) VARDATA(res))[n] = newByte;
+
+ PG_RETURN_BYTEA_P(res);
+}
+
+/*-------------------------------------------------------------
+ * byteaSetBit
+ *
+ * Given an instance of type 'bytea' creates a new one with
+ * the Nth bit set to the given value.
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaSetBit(PG_FUNCTION_ARGS)
+{
+ bytea *res = PG_GETARG_BYTEA_P_COPY(0);
+ int64 n = PG_GETARG_INT64(1);
+ int32 newBit = PG_GETARG_INT32(2);
+ int len;
+ int oldByte,
+ newByte;
+ int byteNo,
+ bitNo;
+
+ len = VARSIZE(res) - VARHDRSZ;
+
+ if (n < 0 || n >= (int64) len * 8)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
+ n, (int64) len * 8 - 1)));
+
+ /* n/8 is now known < len, so safe to cast to int */
+ byteNo = (int) (n / 8);
+ bitNo = (int) (n % 8);
+
+ /*
+ * sanity check!
+ */
+ if (newBit != 0 && newBit != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new bit must be 0 or 1")));
+
+ /*
+ * Update the byte.
+ */
+ oldByte = ((unsigned char *) VARDATA(res))[byteNo];
+
+ if (newBit == 0)
+ newByte = oldByte & (~(1 << bitNo));
+ else
+ newByte = oldByte | (1 << bitNo);
+
+ ((unsigned char *) VARDATA(res))[byteNo] = newByte;
+
+ PG_RETURN_BYTEA_P(res);
+}
+
+/*
+ * Return reversed bytea
+ */
+Datum
+bytea_reverse(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ const char *p = VARDATA_ANY(v);
+ int len = VARSIZE_ANY_EXHDR(v);
+ const char *endp = p + len;
+ bytea *result = palloc(len + VARHDRSZ);
+ char *dst = (char *) VARDATA(result) + len;
+
+ SET_VARSIZE(result, len + VARHDRSZ);
+
+ while (p < endp)
+ *(--dst) = *p++;
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+
+/*****************************************************************************
+ * Comparison Functions used for bytea
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums. Most places don't
+ * need to be so careful.
+ *****************************************************************************/
+
+Datum
+byteaeq(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ /*
+ * We can use a fast path for unequal lengths, which might save us from
+ * having to detoast one or both values.
+ */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = false;
+ else
+ {
+ bytea *barg1 = DatumGetByteaPP(arg1);
+ bytea *barg2 = DatumGetByteaPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
+ len1 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(barg1, 0);
+ PG_FREE_IF_COPY(barg2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+byteane(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ /*
+ * We can use a fast path for unequal lengths, which might save us from
+ * having to detoast one or both values.
+ */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = true;
+ else
+ {
+ bytea *barg1 = DatumGetByteaPP(arg1);
+ bytea *barg2 = DatumGetByteaPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
+ len1 - VARHDRSZ) != 0);
+
+ PG_FREE_IF_COPY(barg1, 0);
+ PG_FREE_IF_COPY(barg2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bytealt(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
+}
+
+Datum
+byteale(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
+}
+
+Datum
+byteagt(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
+}
+
+Datum
+byteage(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
+}
+
+Datum
+byteacmp(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ if ((cmp == 0) && (len1 != len2))
+ cmp = (len1 < len2) ? -1 : 1;
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(cmp);
+}
+
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+/* Cast bytea -> int2 */
+Datum
+bytea_int2(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int len = VARSIZE_ANY_EXHDR(v);
+ uint16 result;
+
+ /* Check that the byte array is not too long */
+ if (len > sizeof(result))
+ ereport(ERROR,
+ errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range"));
+
+ /* Convert it to an integer; most significant bytes come first */
+ result = 0;
+ for (int i = 0; i < len; i++)
+ {
+ result <<= BITS_PER_BYTE;
+ result |= ((unsigned char *) VARDATA_ANY(v))[i];
+ }
+
+ PG_RETURN_INT16(result);
+}
+
+/* Cast bytea -> int4 */
+Datum
+bytea_int4(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int len = VARSIZE_ANY_EXHDR(v);
+ uint32 result;
+
+ /* Check that the byte array is not too long */
+ if (len > sizeof(result))
+ ereport(ERROR,
+ errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range"));
+
+ /* Convert it to an integer; most significant bytes come first */
+ result = 0;
+ for (int i = 0; i < len; i++)
+ {
+ result <<= BITS_PER_BYTE;
+ result |= ((unsigned char *) VARDATA_ANY(v))[i];
+ }
+
+ PG_RETURN_INT32(result);
+}
+
+/* Cast bytea -> int8 */
+Datum
+bytea_int8(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int len = VARSIZE_ANY_EXHDR(v);
+ uint64 result;
+
+ /* Check that the byte array is not too long */
+ if (len > sizeof(result))
+ ereport(ERROR,
+ errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range"));
+
+ /* Convert it to an integer; most significant bytes come first */
+ result = 0;
+ for (int i = 0; i < len; i++)
+ {
+ result <<= BITS_PER_BYTE;
+ result |= ((unsigned char *) VARDATA_ANY(v))[i];
+ }
+
+ PG_RETURN_INT64(result);
+}
+
+/* Cast int2 -> bytea; can just use int2send() */
+Datum
+int2_bytea(PG_FUNCTION_ARGS)
+{
+ return int2send(fcinfo);
+}
+
+/* Cast int4 -> bytea; can just use int4send() */
+Datum
+int4_bytea(PG_FUNCTION_ARGS)
+{
+ return int4send(fcinfo);
+}
+
+/* Cast int8 -> bytea; can just use int8send() */
+Datum
+int8_bytea(PG_FUNCTION_ARGS)
+{
+ return int8send(fcinfo);
+}
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 4227ab1a72b..344f58b92f7 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -1363,10 +1363,35 @@ timestamp_date(PG_FUNCTION_ARGS)
{
Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
DateADT result;
+
+ result = timestamp2date_opt_overflow(timestamp, NULL);
+ PG_RETURN_DATEADT(result);
+}
+
+/*
+ * Convert timestamp to date.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamp is finite but out of the valid range for date, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate date infinity.
+ *
+ * Note: given the ranges of the types, overflow is only possible at
+ * the minimum end of the range, but we don't assume that in this code.
+ */
+DateADT
+timestamp2date_opt_overflow(Timestamp timestamp, int *overflow)
+{
+ DateADT result;
struct pg_tm tt,
*tm = &tt;
fsec_t fsec;
+ if (overflow)
+ *overflow = 0;
+
if (TIMESTAMP_IS_NOBEGIN(timestamp))
DATE_NOBEGIN(result);
else if (TIMESTAMP_IS_NOEND(timestamp))
@@ -1374,14 +1399,30 @@ timestamp_date(PG_FUNCTION_ARGS)
else
{
if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+ {
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ DATE_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1; /* not actually reachable */
+ DATE_NOEND(result);
+ }
+ return result;
+ }
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
+ }
result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
}
- PG_RETURN_DATEADT(result);
+ return result;
}
@@ -1408,11 +1449,36 @@ timestamptz_date(PG_FUNCTION_ARGS)
{
TimestampTz timestamp = PG_GETARG_TIMESTAMP(0);
DateADT result;
+
+ result = timestamptz2date_opt_overflow(timestamp, NULL);
+ PG_RETURN_DATEADT(result);
+}
+
+/*
+ * Convert timestamptz to date.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamptz is finite but out of the valid range for date, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate date infinity.
+ *
+ * Note: given the ranges of the types, overflow is only possible at
+ * the minimum end of the range, but we don't assume that in this code.
+ */
+DateADT
+timestamptz2date_opt_overflow(TimestampTz timestamp, int *overflow)
+{
+ DateADT result;
struct pg_tm tt,
*tm = &tt;
fsec_t fsec;
int tz;
+ if (overflow)
+ *overflow = 0;
+
if (TIMESTAMP_IS_NOBEGIN(timestamp))
DATE_NOBEGIN(result);
else if (TIMESTAMP_IS_NOEND(timestamp))
@@ -1420,14 +1486,30 @@ timestamptz_date(PG_FUNCTION_ARGS)
else
{
if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ {
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ DATE_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1; /* not actually reachable */
+ DATE_NOEND(result);
+ }
+ return result;
+ }
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
+ }
result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
}
- PG_RETURN_DATEADT(result);
+ return result;
}
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 6d20ae07ae7..7b97d2be6ca 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4065,10 +4065,11 @@ float84ge(PG_FUNCTION_ARGS)
* in the histogram. width_bucket() returns an integer indicating the
* bucket number that 'operand' belongs to in an equiwidth histogram
* with the specified characteristics. An operand smaller than the
- * lower bound is assigned to bucket 0. An operand greater than the
- * upper bound is assigned to an additional bucket (with number
- * count+1). We don't allow "NaN" for any of the float8 inputs, and we
- * don't allow either of the histogram bounds to be +/- infinity.
+ * lower bound is assigned to bucket 0. An operand greater than or equal
+ * to the upper bound is assigned to an additional bucket (with number
+ * count+1). We don't allow the histogram bounds to be NaN or +/- infinity,
+ * but we do allow those values for the operand (taking NaN to be larger
+ * than any other value, as we do in comparisons).
*/
Datum
width_bucket_float8(PG_FUNCTION_ARGS)
@@ -4084,12 +4085,11 @@ width_bucket_float8(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("count must be greater than zero")));
- if (isnan(operand) || isnan(bound1) || isnan(bound2))
+ if (isnan(bound1) || isnan(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
- errmsg("operand, lower bound, and upper bound cannot be NaN")));
+ errmsg("lower and upper bounds cannot be NaN")));
- /* Note that we allow "operand" to be infinite */
if (isinf(bound1) || isinf(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
@@ -4097,15 +4097,15 @@ width_bucket_float8(PG_FUNCTION_ARGS)
if (bound1 < bound2)
{
- if (operand < bound1)
- result = 0;
- else if (operand >= bound2)
+ if (isnan(operand) || operand >= bound2)
{
if (pg_add_s32_overflow(count, 1, &result))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
}
+ else if (operand < bound1)
+ result = 0;
else
{
if (!isinf(bound2 - bound1))
@@ -4135,7 +4135,7 @@ width_bucket_float8(PG_FUNCTION_ARGS)
}
else if (bound1 > bound2)
{
- if (operand > bound1)
+ if (isnan(operand) || operand > bound1)
result = 0;
else if (operand <= bound2)
{
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 5bd1e01f7e4..1d05481181d 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -3590,14 +3590,15 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
if (matched < 2)
ereturn(escontext,,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
- errmsg("invalid input string for \"Y,YYY\"")));
+ errmsg("invalid value \"%s\" for \"%s\"",
+ s, "Y,YYY")));
/* years += (millennia * 1000); */
if (pg_mul_s32_overflow(millennia, 1000, &millennia) ||
pg_add_s32_overflow(years, millennia, &years))
ereturn(escontext,,
(errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
- errmsg("value for \"Y,YYY\" in source string is out of range")));
+ errmsg("value for \"%s\" in source string is out of range", "Y,YYY")));
if (!from_char_set_int(&out->year, years, n, escontext))
return;
diff --git a/src/backend/utils/adt/inet_net_pton.c b/src/backend/utils/adt/inet_net_pton.c
index ef2236d9f04..3b0db2a3799 100644
--- a/src/backend/utils/adt/inet_net_pton.c
+++ b/src/backend/utils/adt/inet_net_pton.c
@@ -115,8 +115,7 @@ inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size)
src++; /* skip x or X. */
while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch))
{
- if (isupper((unsigned char) ch))
- ch = tolower((unsigned char) ch);
+ ch = pg_ascii_tolower((unsigned char) ch);
n = strchr(xdigits, ch) - xdigits;
assert(n >= 0 && n <= 15);
if (dirty == 0)
diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c
index c8b6c15e059..82b807d067a 100644
--- a/src/backend/utils/adt/jsonb_util.c
+++ b/src/backend/utils/adt/jsonb_util.c
@@ -277,22 +277,16 @@ compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
else
{
/*
- * It's safe to assume that the types differed, and that the va
- * and vb values passed were set.
- *
- * If the two values were of the same container type, then there'd
- * have been a chance to observe the variation in the number of
- * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
- * either two heterogeneously-typed containers, or a container and
- * some scalar type.
- *
- * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
- * cases here, because we would have seen the corresponding
- * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
- * concluded that they don't match.
+ * It's not possible for one iterator to report end of array or
+ * object while the other one reports something else, because we
+ * would have detected a length mismatch when we processed the
+ * container-start tokens above. Likewise we can't see WJB_DONE
+ * from one but not the other. So we have two different-type
+ * containers, or a container and some scalar type, or two
+ * different scalar types. Sort on the basis of the type code.
*/
- Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
- Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
+ Assert(ra != WJB_DONE && ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
+ Assert(rb != WJB_DONE && rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
Assert(va.type != vb.type);
Assert(va.type != jbvBinary);
@@ -852,15 +846,20 @@ JsonbIteratorInit(JsonbContainer *container)
* It is our job to expand the jbvBinary representation without bothering them
* with it. However, clients should not take it upon themselves to touch array
* or Object element/pair buffers, since their element/pair pointers are
- * garbage. Also, *val will not be set when returning WJB_END_ARRAY or
- * WJB_END_OBJECT, on the assumption that it's only useful to access values
- * when recursing in.
+ * garbage.
+ *
+ * *val is not meaningful when the result is WJB_DONE, WJB_END_ARRAY or
+ * WJB_END_OBJECT. However, we set val->type = jbvNull in those cases,
+ * so that callers may assume that val->type is always well-defined.
*/
JsonbIteratorToken
JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
{
if (*it == NULL)
+ {
+ val->type = jbvNull;
return WJB_DONE;
+ }
/*
* When stepping into a nested container, we jump back here to start
@@ -898,6 +897,7 @@ recurse:
* nesting).
*/
*it = freeAndGetParent(*it);
+ val->type = jbvNull;
return WJB_END_ARRAY;
}
@@ -951,6 +951,7 @@ recurse:
* of nesting).
*/
*it = freeAndGetParent(*it);
+ val->type = jbvNull;
return WJB_END_OBJECT;
}
else
@@ -995,8 +996,10 @@ recurse:
return WJB_VALUE;
}
- elog(ERROR, "invalid iterator state");
- return -1;
+ elog(ERROR, "invalid jsonb iterator state");
+ /* satisfy compilers that don't know that elog(ERROR) doesn't return */
+ val->type = jbvNull;
+ return WJB_DONE;
}
/*
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 7f4cf614585..4216ac17f43 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
else if (locale->is_default)
return pg_tolower(c);
else
- return tolower_l(c, locale->info.lt);
+ return char_tolower(c, locale);
}
@@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
* way.
*/
- if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
+ if (locale->ctype_is_c ||
+ (char_tolower_enabled(locale) &&
+ pg_database_encoding_max_length() == 1))
+ {
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ return SB_IMatchText(s, slen, p, plen, locale);
+ }
+ else
{
pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
PointerGetDatum(pat)));
@@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
else
return MB_MatchText(s, slen, p, plen, 0);
}
- else
- {
- p = VARDATA_ANY(pat);
- plen = VARSIZE_ANY_EXHDR(pat);
- s = VARDATA_ANY(str);
- slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale);
- }
}
/*
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 8fdc677371f..999f23f86d5 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
{
if (locale->ctype_is_c)
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
- else if (is_multibyte && IS_HIGHBIT_SET(c))
- return true;
- else if (locale->provider != COLLPROVIDER_LIBC)
- return IS_HIGHBIT_SET(c) ||
- (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
else
- return isalpha_l((unsigned char) c, locale->info.lt);
+ return char_is_cased(c, locale);
}
diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index 244f48f4fd7..ed9bbd7b926 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -12,6 +12,7 @@ backend_sources += files(
'arrayutils.c',
'ascii.c',
'bool.c',
+ 'bytea.c',
'cash.c',
'char.c',
'cryptohashfuncs.c',
diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c
index f03fcc1147b..9fd211b2d45 100644
--- a/src/backend/utils/adt/network.c
+++ b/src/backend/utils/adt/network.c
@@ -12,8 +12,6 @@
#include <netinet/in.h>
#include <arpa/inet.h>
-#include "access/stratnum.h"
-#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
#include "common/ip.h"
diff --git a/src/backend/utils/adt/network_spgist.c b/src/backend/utils/adt/network_spgist.c
index a84747d9275..602276a35c3 100644
--- a/src/backend/utils/adt/network_spgist.c
+++ b/src/backend/utils/adt/network_spgist.c
@@ -37,7 +37,6 @@
#include "catalog/pg_type.h"
#include "utils/fmgrprotos.h"
#include "utils/inet.h"
-#include "varatt.h"
static int inet_spg_node_number(const inet *val, int commonbits);
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 40dcbc7b671..c9233565d57 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -1958,9 +1958,11 @@ generate_series_numeric_support(PG_FUNCTION_ARGS)
* in the histogram. width_bucket() returns an integer indicating the
* bucket number that 'operand' belongs to in an equiwidth histogram
* with the specified characteristics. An operand smaller than the
- * lower bound is assigned to bucket 0. An operand greater than the
- * upper bound is assigned to an additional bucket (with number
- * count+1). We don't allow "NaN" for any of the numeric arguments.
+ * lower bound is assigned to bucket 0. An operand greater than or equal
+ * to the upper bound is assigned to an additional bucket (with number
+ * count+1). We don't allow the histogram bounds to be NaN or +/- infinity,
+ * but we do allow those values for the operand (taking NaN to be larger
+ * than any other value, as we do in comparisons).
*/
Datum
width_bucket_numeric(PG_FUNCTION_ARGS)
@@ -1978,17 +1980,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("count must be greater than zero")));
- if (NUMERIC_IS_SPECIAL(operand) ||
- NUMERIC_IS_SPECIAL(bound1) ||
- NUMERIC_IS_SPECIAL(bound2))
+ if (NUMERIC_IS_SPECIAL(bound1) || NUMERIC_IS_SPECIAL(bound2))
{
- if (NUMERIC_IS_NAN(operand) ||
- NUMERIC_IS_NAN(bound1) ||
- NUMERIC_IS_NAN(bound2))
+ if (NUMERIC_IS_NAN(bound1) || NUMERIC_IS_NAN(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
- errmsg("operand, lower bound, and upper bound cannot be NaN")));
- /* We allow "operand" to be infinite; cmp_numerics will cope */
+ errmsg("lower and upper bounds cannot be NaN")));
+
if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index f5e31c433a0..97c2ac1faf9 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -41,7 +41,6 @@
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
-#include "utils/formatting.h"
#include "utils/guc_hooks.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -80,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
extern char *get_collation_actual_version_libc(const char *collcollate);
-extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
/* GUC settings */
char *locale_messages;
char *locale_monetary;
@@ -1093,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context)
Assert((result->collate_is_c && result->collate == NULL) ||
(!result->collate_is_c && result->collate != NULL));
+ Assert((result->ctype_is_c && result->ctype == NULL) ||
+ (!result->ctype_is_c && result->ctype != NULL));
+
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
&isnull);
if (!isnull)
@@ -1257,77 +1234,31 @@ size_t
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strlower_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strlower_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strlower_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
size_t
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strtitle_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strtitle_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strtitle_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
}
size_t
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strupper_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strupper_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strupper_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
}
size_t
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strfold_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strfold_icu(dst, dstsize, src, srclen, locale);
-#endif
- /* for libc, just use strlower */
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strlower_libc(dst, dstsize, src, srclen, locale);
+ if (locale->ctype->strfold)
+ return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
/*
@@ -1465,6 +1396,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
}
/*
+ * char_is_cased()
+ *
+ * Fuzzy test of whether the given char is case-varying or not. The argument
+ * is a single byte, so in a multibyte encoding, just assume any non-ASCII
+ * char is case-varying.
+ */
+bool
+char_is_cased(char ch, pg_locale_t locale)
+{
+ return locale->ctype->char_is_cased(ch, locale);
+}
+
+/*
+ * char_tolower_enabled()
+ *
+ * Does the provider support char_tolower()?
+ */
+bool
+char_tolower_enabled(pg_locale_t locale)
+{
+ return (locale->ctype->char_tolower != NULL);
+}
+
+/*
+ * char_tolower()
+ *
+ * Convert char (single-byte encoding) to lowercase.
+ */
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
+{
+ return locale->ctype->char_tolower(ch, locale);
+}
+
+/*
* Return required encoding ID for the given locale, or -1 if any encoding is
* valid for the locale.
*/
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index f51768830cd..0c9fbdb40f2 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -18,22 +18,12 @@
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
-#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/syscache.h"
extern pg_locale_t create_pg_locale_builtin(Oid collid,
MemoryContext context);
extern char *get_collation_actual_version_builtin(const char *collcollate);
-extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
struct WordBoundaryState
{
@@ -77,7 +67,7 @@ initcap_wbnext(void *state)
return wbstate->len;
}
-size_t
+static size_t
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -85,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
locale->info.builtin.casemap_full);
}
-size_t
+static size_t
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -103,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
initcap_wbnext, &wbstate);
}
-size_t
+static size_t
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -111,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
locale->info.builtin.casemap_full);
}
-size_t
+static size_t
strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -119,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
locale->info.builtin.casemap_full);
}
+static bool
+wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isupper(wc);
+}
+
+static bool
+wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_islower(wc);
+}
+
+static bool
+wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isgraph(wc);
+}
+
+static bool
+wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isprint(wc);
+}
+
+static bool
+wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isspace(wc);
+}
+
+static bool
+char_is_cased_builtin(char ch, pg_locale_t locale)
+{
+ return IS_HIGHBIT_SET(ch) ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return unicode_uppercase_simple(wc);
+}
+
+static pg_wchar
+wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return unicode_lowercase_simple(wc);
+}
+
+static const struct ctype_methods ctype_methods_builtin = {
+ .strlower = strlower_builtin,
+ .strtitle = strtitle_builtin,
+ .strupper = strupper_builtin,
+ .strfold = strfold_builtin,
+ .wc_isdigit = wc_isdigit_builtin,
+ .wc_isalpha = wc_isalpha_builtin,
+ .wc_isalnum = wc_isalnum_builtin,
+ .wc_isupper = wc_isupper_builtin,
+ .wc_islower = wc_islower_builtin,
+ .wc_isgraph = wc_isgraph_builtin,
+ .wc_isprint = wc_isprint_builtin,
+ .wc_ispunct = wc_ispunct_builtin,
+ .wc_isspace = wc_isspace_builtin,
+ .char_is_cased = char_is_cased_builtin,
+ .wc_tolower = wc_tolower_builtin,
+ .wc_toupper = wc_toupper_builtin,
+};
+
pg_locale_t
create_pg_locale_builtin(Oid collid, MemoryContext context)
{
@@ -158,10 +240,11 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
result->info.builtin.locale = MemoryContextStrdup(context, locstr);
result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
- result->provider = COLLPROVIDER_BUILTIN;
result->deterministic = true;
result->collate_is_c = true;
result->ctype_is_c = (strcmp(locstr, "C") == 0);
+ if (!result->ctype_is_c)
+ result->ctype = &ctype_methods_builtin;
return result;
}
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index a32c32a0744..96741e08269 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -48,19 +48,22 @@
#define TEXTBUFLEN 1024
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
-extern size_t strlower_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
#ifdef USE_ICU
extern UCollator *pg_ucol_open(const char *loc_str);
+static size_t strlower_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strupper_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strfold_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static int strncoll_icu(const char *arg1, ssize_t len1,
+ const char *arg2, ssize_t len2,
+ pg_locale_t locale);
static size_t strnxfrm_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
@@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
const char *locale,
UErrorCode *pErrorCode);
+static bool
+char_is_cased_icu(char ch, pg_locale_t locale)
+{
+ return IS_HIGHBIT_SET(ch) ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+toupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_toupper(wc);
+}
+
+static pg_wchar
+tolower_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_tolower(wc);
+}
+
static const struct collate_methods collate_methods_icu = {
.strncoll = strncoll_icu,
.strnxfrm = strnxfrm_icu,
@@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = {
.strxfrm_is_safe = true,
};
+static bool
+wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isdigit(wc);
+}
+
+static bool
+wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isalnum(wc);
+}
+
+static bool
+wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isupper(wc);
+}
+
+static bool
+wc_islower_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_islower(wc);
+}
+
+static bool
+wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isgraph(wc);
+}
+
+static bool
+wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isprint(wc);
+}
+
+static bool
+wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_ispunct(wc);
+}
+
+static bool
+wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isspace(wc);
+}
+
+static const struct ctype_methods ctype_methods_icu = {
+ .strlower = strlower_icu,
+ .strtitle = strtitle_icu,
+ .strupper = strupper_icu,
+ .strfold = strfold_icu,
+ .wc_isdigit = wc_isdigit_icu,
+ .wc_isalpha = wc_isalpha_icu,
+ .wc_isalnum = wc_isalnum_icu,
+ .wc_isupper = wc_isupper_icu,
+ .wc_islower = wc_islower_icu,
+ .wc_isgraph = wc_isgraph_icu,
+ .wc_isprint = wc_isprint_icu,
+ .wc_ispunct = wc_ispunct_icu,
+ .wc_isspace = wc_isspace_icu,
+ .char_is_cased = char_is_cased_icu,
+ .wc_toupper = toupper_icu,
+ .wc_tolower = tolower_icu,
+};
#endif
pg_locale_t
@@ -198,7 +292,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
result->info.icu.ucol = collator;
- result->provider = COLLPROVIDER_ICU;
result->deterministic = deterministic;
result->collate_is_c = false;
result->ctype_is_c = false;
@@ -206,6 +299,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result->collate = &collate_methods_icu_utf8;
else
result->collate = &collate_methods_icu;
+ result->ctype = &ctype_methods_icu;
return result;
#else
@@ -379,7 +473,7 @@ make_icu_collator(const char *iculocstr, const char *icurules)
}
}
-size_t
+static size_t
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -399,7 +493,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_len;
}
-size_t
+static size_t
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -419,7 +513,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_len;
}
-size_t
+static size_t
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -439,7 +533,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_len;
}
-size_t
+static size_t
strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -474,8 +568,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2
int result;
UErrorCode status;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
Assert(GetDatabaseEncoding() == PG_UTF8);
status = U_ZERO_ERROR;
@@ -503,8 +595,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
size_t uchar_bsize;
Size result_bsize;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
init_icu_converter();
ulen = uchar_length(icu_converter, src, srclen);
@@ -549,8 +639,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
uint32_t state[2];
UErrorCode status;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
Assert(GetDatabaseEncoding() == PG_UTF8);
uiter_setUTF8(&iter, src, srclen);
@@ -749,8 +837,6 @@ strncoll_icu(const char *arg1, ssize_t len1,
*uchar2;
int result;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
/* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
#ifdef HAVE_UCOL_STRCOLLUTF8
Assert(GetDatabaseEncoding() != PG_UTF8);
@@ -799,8 +885,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
size_t uchar_bsize;
Size result_bsize;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
/* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
Assert(GetDatabaseEncoding() != PG_UTF8);
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 199857e22db..8d88b53c375 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -34,6 +34,46 @@
#endif
/*
+ * For the libc provider, to provide as much functionality as possible on a
+ * variety of platforms without going so far as to implement everything from
+ * scratch, we use several implementation strategies depending on the
+ * situation:
+ *
+ * 1. In C/POSIX collations, we use hard-wired code. We can't depend on
+ * the <ctype.h> functions since those will obey LC_CTYPE. Note that these
+ * collations don't give a fig about multibyte characters.
+ *
+ * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
+ * This assumes that every platform uses Unicode codepoints directly
+ * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
+ * even for non-UTF8 encodings, which may be a problem.) On some platforms
+ * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
+ *
+ * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
+ * values up to 255, and punt for values above that. This is 100% correct
+ * only in single-byte encodings such as LATINn. However, non-Unicode
+ * multibyte encodings are mostly Far Eastern character sets for which the
+ * properties being tested here aren't very relevant for higher code values
+ * anyway. The difficulty with using the <wctype.h> functions with
+ * non-Unicode multibyte encodings is that we can have no certainty that
+ * the platform's wchar_t representation matches what we do in pg_wchar
+ * conversions.
+ *
+ * As a special case, in the "default" collation, (2) and (3) force ASCII
+ * letters to follow ASCII upcase/downcase rules, while in a non-default
+ * collation we just let the library functions do what they will. The case
+ * where this matters is treatment of I/i in Turkish, and the behavior is
+ * meant to match the upper()/lower() SQL functions.
+ *
+ * We store the active collation setting in static variables. In principle
+ * it could be passed down to here via the regex library's "struct vars" data
+ * structure; but that would require somewhat invasive changes in the regex
+ * library, and right now there's no real benefit to be gained from that.
+ *
+ * NB: the coding here assumes pg_wchar is an unsigned type.
+ */
+
+/*
* Size of stack buffer to use for string transformations, used to avoid heap
* allocations in typical cases. This should be large enough that most strings
* will fit, but small enough that we feel comfortable putting it on the
@@ -43,13 +83,6 @@
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
static int strncoll_libc(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
@@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
+static bool
+wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isdigit_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isalpha_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isalnum_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isupper_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return islower_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isgraph_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isprint_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return ispunct_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isspace_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswdigit_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswalpha_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswalnum_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswupper_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswlower_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswgraph_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswprint_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswpunct_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswspace_l((wint_t) wc, locale->info.lt);
+}
+
+static char
+char_tolower_libc(unsigned char ch, pg_locale_t locale)
+{
+ Assert(pg_database_encoding_max_length() == 1);
+ return tolower_l(ch, locale->info.lt);
+}
+
+static bool
+char_is_cased_libc(char ch, pg_locale_t locale)
+{
+ bool is_multibyte = pg_database_encoding_max_length() > 1;
+
+ if (is_multibyte && IS_HIGHBIT_SET(ch))
+ return true;
+ else
+ return isalpha_l((unsigned char) ch, locale->info.lt);
+}
+
+static pg_wchar
+toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() != PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) wc);
+ if (wc <= (pg_wchar) UCHAR_MAX)
+ return toupper_l((unsigned char) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) wc);
+ if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+ return towupper_l((wint_t) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() != PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) wc);
+ if (wc <= (pg_wchar) UCHAR_MAX)
+ return tolower_l((unsigned char) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) wc);
+ if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+ return towlower_l((wint_t) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static const struct ctype_methods ctype_methods_libc_sb = {
+ .strlower = strlower_libc_sb,
+ .strtitle = strtitle_libc_sb,
+ .strupper = strupper_libc_sb,
+ .wc_isdigit = wc_isdigit_libc_sb,
+ .wc_isalpha = wc_isalpha_libc_sb,
+ .wc_isalnum = wc_isalnum_libc_sb,
+ .wc_isupper = wc_isupper_libc_sb,
+ .wc_islower = wc_islower_libc_sb,
+ .wc_isgraph = wc_isgraph_libc_sb,
+ .wc_isprint = wc_isprint_libc_sb,
+ .wc_ispunct = wc_ispunct_libc_sb,
+ .wc_isspace = wc_isspace_libc_sb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_sb,
+ .wc_tolower = tolower_libc_sb,
+ .max_chr = UCHAR_MAX,
+};
+
+/*
+ * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
+ * single-byte semantics for pattern matching.
+ */
+static const struct ctype_methods ctype_methods_libc_other_mb = {
+ .strlower = strlower_libc_mb,
+ .strtitle = strtitle_libc_mb,
+ .strupper = strupper_libc_mb,
+ .wc_isdigit = wc_isdigit_libc_sb,
+ .wc_isalpha = wc_isalpha_libc_sb,
+ .wc_isalnum = wc_isalnum_libc_sb,
+ .wc_isupper = wc_isupper_libc_sb,
+ .wc_islower = wc_islower_libc_sb,
+ .wc_isgraph = wc_isgraph_libc_sb,
+ .wc_isprint = wc_isprint_libc_sb,
+ .wc_ispunct = wc_ispunct_libc_sb,
+ .wc_isspace = wc_isspace_libc_sb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_sb,
+ .wc_tolower = tolower_libc_sb,
+ .max_chr = UCHAR_MAX,
+};
+
+static const struct ctype_methods ctype_methods_libc_utf8 = {
+ .strlower = strlower_libc_mb,
+ .strtitle = strtitle_libc_mb,
+ .strupper = strupper_libc_mb,
+ .wc_isdigit = wc_isdigit_libc_mb,
+ .wc_isalpha = wc_isalpha_libc_mb,
+ .wc_isalnum = wc_isalnum_libc_mb,
+ .wc_isupper = wc_isupper_libc_mb,
+ .wc_islower = wc_islower_libc_mb,
+ .wc_isgraph = wc_isgraph_libc_mb,
+ .wc_isprint = wc_isprint_libc_mb,
+ .wc_ispunct = wc_ispunct_libc_mb,
+ .wc_isspace = wc_isspace_libc_mb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_mb,
+ .wc_tolower = tolower_libc_mb,
+};
+
static const struct collate_methods collate_methods_libc = {
.strncoll = strncoll_libc,
.strnxfrm = strnxfrm_libc,
@@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = {
};
#endif
-size_t
-strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strlower_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strlower_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strupper_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strupper_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
@@ -209,7 +457,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
- char2wchar(workspace, srclen + 1, src, srclen, locale);
+ char2wchar(workspace, srclen + 1, src, srclen, loc);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
workspace[curr_char] = towlower_l(workspace[curr_char], loc);
@@ -220,7 +468,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
- result_size = wchar2char(result, workspace, max_size + 1, locale);
+ result_size = wchar2char(result, workspace, max_size + 1, loc);
if (result_size + 1 > destsize)
return result_size;
@@ -304,7 +552,7 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
- char2wchar(workspace, srclen + 1, src, srclen, locale);
+ char2wchar(workspace, srclen + 1, src, srclen, loc);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
@@ -321,7 +569,7 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
- result_size = wchar2char(result, workspace, max_size + 1, locale);
+ result_size = wchar2char(result, workspace, max_size + 1, loc);
if (result_size + 1 > destsize)
return result_size;
@@ -392,7 +640,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
- char2wchar(workspace, srclen + 1, src, srclen, locale);
+ char2wchar(workspace, srclen + 1, src, srclen, loc);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
workspace[curr_char] = towupper_l(workspace[curr_char], loc);
@@ -403,7 +651,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
- result_size = wchar2char(result, workspace, max_size + 1, locale);
+ result_size = wchar2char(result, workspace, max_size + 1, loc);
if (result_size + 1 > destsize)
return result_size;
@@ -465,7 +713,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
loc = make_libc_collator(collate, ctype);
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
- result->provider = COLLPROVIDER_LIBC;
result->deterministic = true;
result->collate_is_c = (strcmp(collate, "C") == 0) ||
(strcmp(collate, "POSIX") == 0);
@@ -481,6 +728,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
#endif
result->collate = &collate_methods_libc;
}
+ if (!result->ctype_is_c)
+ {
+ if (GetDatabaseEncoding() == PG_UTF8)
+ result->ctype = &ctype_methods_libc_utf8;
+ else if (pg_database_encoding_max_length() > 1)
+ result->ctype = &ctype_methods_libc_other_mb;
+ else
+ result->ctype = &ctype_methods_libc_sb;
+ }
return result;
}
@@ -576,8 +832,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
const char *arg2n;
int result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
-
if (bufsize1 + bufsize2 > TEXTBUFLEN)
buf = palloc(bufsize1 + bufsize2);
@@ -632,8 +886,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
size_t bufsize = srclen + 1;
size_t result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
-
if (srclen == -1)
return strxfrm_l(dest, src, destsize, locale->info.lt);
@@ -742,7 +994,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
int r;
int result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
Assert(GetDatabaseEncoding() == PG_UTF8);
if (len1 == -1)
@@ -879,7 +1130,7 @@ wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
* zero-terminated. The output will be zero-terminated iff there is room.
*/
size_t
-wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
+wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
{
size_t result;
@@ -909,7 +1160,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
}
else
#endif /* WIN32 */
- if (locale == (pg_locale_t) 0)
+ if (loc == (locale_t) 0)
{
/* Use wcstombs directly for the default locale */
result = wcstombs(to, from, tolen);
@@ -917,7 +1168,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
else
{
/* Use wcstombs_l for nondefault locales */
- result = wcstombs_l(to, from, tolen, locale->info.lt);
+ result = wcstombs_l(to, from, tolen, loc);
}
return result;
@@ -934,7 +1185,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
*/
size_t
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
- pg_locale_t locale)
+ locale_t loc)
{
size_t result;
@@ -969,7 +1220,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
/* mbstowcs requires ending '\0' */
char *str = pnstrdup(from, fromlen);
- if (locale == (pg_locale_t) 0)
+ if (loc == (locale_t) 0)
{
/* Use mbstowcs directly for the default locale */
result = mbstowcs(to, str, tolen);
@@ -977,7 +1228,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
else
{
/* Use mbstowcs_l for nondefault locales */
- result = mbstowcs_l(to, str, tolen, locale->info.lt);
+ result = mbstowcs_l(to, str, tolen, loc);
}
pfree(str);
diff --git a/src/backend/utils/adt/pg_lsn.c b/src/backend/utils/adt/pg_lsn.c
index 16311590a14..12de2446f5b 100644
--- a/src/backend/utils/adt/pg_lsn.c
+++ b/src/backend/utils/adt/pg_lsn.c
@@ -83,7 +83,7 @@ pg_lsn_out(PG_FUNCTION_ARGS)
char buf[MAXPG_LSNLEN + 1];
char *result;
- snprintf(buf, sizeof buf, "%X/%X", LSN_FORMAT_ARGS(lsn));
+ snprintf(buf, sizeof buf, "%X/%08X", LSN_FORMAT_ARGS(lsn));
result = pstrdup(buf);
PG_RETURN_CSTRING(result);
}
diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c
index 5ee608a2b39..b8bbe95e82e 100644
--- a/src/backend/utils/adt/regproc.c
+++ b/src/backend/utils/adt/regproc.c
@@ -30,6 +30,7 @@
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
#include "lib/stringinfo.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -1764,6 +1765,123 @@ regnamespacesend(PG_FUNCTION_ARGS)
}
/*
+ * regdatabasein - converts database name to database OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_database entry.
+ */
+Datum
+regdatabasein(PG_FUNCTION_ARGS)
+{
+ char *db_name_or_oid = PG_GETARG_CSTRING(0);
+ Node *escontext = fcinfo->context;
+ Oid result;
+ List *names;
+
+ /* Handle "-" or numeric OID */
+ if (parseDashOrOid(db_name_or_oid, &result, escontext))
+ PG_RETURN_OID(result);
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regdatabase values must be OIDs in bootstrap mode");
+
+ /* Normal case: see if the name matches any pg_database entry. */
+ names = stringToQualifiedNameList(db_name_or_oid, escontext);
+ if (names == NIL)
+ PG_RETURN_NULL();
+
+ if (list_length(names) != 1)
+ ereturn(escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ result = get_database_oid(strVal(linitial(names)), true);
+
+ if (!OidIsValid(result))
+ ereturn(escontext, (Datum) 0,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("database \"%s\" does not exist",
+ strVal(linitial(names)))));
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regdatabase - converts database name to database OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regdatabase(PG_FUNCTION_ARGS)
+{
+ char *db_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Datum result;
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
+
+ if (!DirectInputFunctionCallSafe(regdatabasein, db_name,
+ InvalidOid, -1,
+ (Node *) &escontext,
+ &result))
+ PG_RETURN_NULL();
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * regdatabaseout - converts database OID to database name
+ */
+Datum
+regdatabaseout(PG_FUNCTION_ARGS)
+{
+ Oid dboid = PG_GETARG_OID(0);
+ char *result;
+
+ if (dboid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ result = get_database_name(dboid);
+
+ if (result)
+ {
+ /* pstrdup is not really necessary, but it avoids a compiler warning */
+ result = pstrdup(quote_identifier(result));
+ }
+ else
+ {
+ /* If OID doesn't match any database, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", dboid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regdatabaserecv - converts external binary format to regdatabase
+ */
+Datum
+regdatabaserecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regdatabasesend - converts regdatabase to binary format
+ */
+Datum
+regdatabasesend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+/*
* text_regclass: convert text to regclass
*
* This could be replaced by CoerceViaIO, except that we need to treat
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 6239900fa28..059fc5ebf60 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -30,7 +30,6 @@
#include "access/xact.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_constraint.h"
-#include "catalog/pg_proc.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "executor/spi.h"
@@ -46,7 +45,6 @@
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
-#include "utils/rangetypes.h"
#include "utils/rel.h"
#include "utils/rls.h"
#include "utils/ruleutils.h"
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index a96b1b9c0bc..ce6a626eba2 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -103,7 +103,6 @@
#include "access/table.h"
#include "access/tableam.h"
#include "access/visibilitymap.h"
-#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
@@ -4620,6 +4619,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
case REGDICTIONARYOID:
case REGROLEOID:
case REGNAMESPACEOID:
+ case REGDATABASEOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid,
&failure);
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
@@ -4752,6 +4752,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
case REGDICTIONARYOID:
case REGROLEOID:
case REGNAMESPACEOID:
+ case REGDATABASEOID:
/* we can treat OIDs as integers... */
return (double) DatumGetObjectId(value);
}
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 347089b7626..25cff56c3d0 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -5312,10 +5312,10 @@ isoweekdate2date(int isoweek, int wday, int *year, int *mon, int *mday)
int
date2isoweek(int year, int mon, int mday)
{
- float8 result;
int day0,
day4,
- dayn;
+ dayn,
+ week;
/* current day */
dayn = date2j(year, mon, mday);
@@ -5338,13 +5338,13 @@ date2isoweek(int year, int mon, int mday)
day0 = j2day(day4 - 1);
}
- result = (dayn - (day4 - day0)) / 7 + 1;
+ week = (dayn - (day4 - day0)) / 7 + 1;
/*
* Sometimes the last few days in a year will fall into the first week of
* the next year, so check for this.
*/
- if (result >= 52)
+ if (week >= 52)
{
day4 = date2j(year + 1, 1, 4);
@@ -5352,10 +5352,10 @@ date2isoweek(int year, int mon, int mday)
day0 = j2day(day4 - 1);
if (dayn >= day4 - day0)
- result = (dayn - (day4 - day0)) / 7 + 1;
+ week = (dayn - (day4 - day0)) / 7 + 1;
}
- return (int) result;
+ return week;
}
@@ -5367,10 +5367,10 @@ date2isoweek(int year, int mon, int mday)
int
date2isoyear(int year, int mon, int mday)
{
- float8 result;
int day0,
day4,
- dayn;
+ dayn,
+ week;
/* current day */
dayn = date2j(year, mon, mday);
@@ -5395,13 +5395,13 @@ date2isoyear(int year, int mon, int mday)
year--;
}
- result = (dayn - (day4 - day0)) / 7 + 1;
+ week = (dayn - (day4 - day0)) / 7 + 1;
/*
* Sometimes the last few days in a year will fall into the first week of
* the next year, so check for this.
*/
- if (result >= 52)
+ if (week >= 52)
{
day4 = date2j(year + 1, 1, 4);
@@ -6477,7 +6477,7 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow)
if (TIMESTAMP_NOT_FINITE(timestamp))
return timestamp;
- /* We don't expect this to fail, but check it pro forma */
+ /* timestamp2tm should not fail on valid timestamps, but cope */
if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0)
{
tz = DetermineTimeZoneOffset(tm, session_timezone);
@@ -6485,23 +6485,22 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow)
result = dt2local(timestamp, -tz);
if (IS_VALID_TIMESTAMP(result))
- {
return result;
+ }
+
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ TIMESTAMP_NOBEGIN(result);
}
- else if (overflow)
+ else
{
- if (result < MIN_TIMESTAMP)
- {
- *overflow = -1;
- TIMESTAMP_NOBEGIN(result);
- }
- else
- {
- *overflow = 1;
- TIMESTAMP_NOEND(result);
- }
- return result;
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
}
+ return result;
}
ereport(ERROR,
@@ -6531,27 +6530,81 @@ timestamptz_timestamp(PG_FUNCTION_ARGS)
PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp));
}
+/*
+ * Convert timestamptz to timestamp, throwing error for overflow.
+ */
static Timestamp
timestamptz2timestamp(TimestampTz timestamp)
{
+ return timestamptz2timestamp_opt_overflow(timestamp, NULL);
+}
+
+/*
+ * Convert timestamp with time zone to timestamp.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamptz is finite but out of the valid range for timestamp, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate timestamp infinity.
+ */
+Timestamp
+timestamptz2timestamp_opt_overflow(TimestampTz timestamp, int *overflow)
+{
Timestamp result;
struct pg_tm tt,
*tm = &tt;
fsec_t fsec;
int tz;
+ if (overflow)
+ *overflow = 0;
+
if (TIMESTAMP_NOT_FINITE(timestamp))
result = timestamp;
else
{
if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ {
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ TIMESTAMP_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
+ }
+ return result;
+ }
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
+ }
if (tm2timestamp(tm, fsec, NULL, &result) != 0)
+ {
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ TIMESTAMP_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
+ }
+ return result;
+ }
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
+ }
}
return result;
}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 3e4d5568bde..ffae8c23abf 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -35,7 +35,6 @@
#include "port/pg_bswap.h"
#include "regex/regex.h"
#include "utils/builtins.h"
-#include "utils/bytea.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -43,10 +42,6 @@
#include "utils/sortsupport.h"
#include "utils/varlena.h"
-
-/* GUC variable */
-int bytea_output = BYTEA_OUTPUT_HEX;
-
typedef struct varlena VarString;
/*
@@ -148,12 +143,6 @@ static int text_position_get_match_pos(TextPositionState *state);
static void text_position_cleanup(TextPositionState *state);
static void check_collation_set(Oid collid);
static int text_cmp(text *arg1, text *arg2, Oid collid);
-static bytea *bytea_catenate(bytea *t1, bytea *t2);
-static bytea *bytea_substring(Datum str,
- int S,
- int L,
- bool length_not_specified);
-static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static void appendStringInfoText(StringInfo str, const text *t);
static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
static void split_text_accum_result(SplitTextOutputData *tstate,
@@ -279,307 +268,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
* USER I/O ROUTINES *
*****************************************************************************/
-
-#define VAL(CH) ((CH) - '0')
-#define DIG(VAL) ((VAL) + '0')
-
-/*
- * byteain - converts from printable representation of byte array
- *
- * Non-printable characters must be passed as '\nnn' (octal) and are
- * converted to internal form. '\' must be passed as '\\'.
- * ereport(ERROR, ...) if bad form.
- *
- * BUGS:
- * The input is scanned twice.
- * The error checking of input is minimal.
- */
-Datum
-byteain(PG_FUNCTION_ARGS)
-{
- char *inputText = PG_GETARG_CSTRING(0);
- Node *escontext = fcinfo->context;
- char *tp;
- char *rp;
- int bc;
- bytea *result;
-
- /* Recognize hex input */
- if (inputText[0] == '\\' && inputText[1] == 'x')
- {
- size_t len = strlen(inputText);
-
- bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
- result = palloc(bc);
- bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
- escontext);
- SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
-
- PG_RETURN_BYTEA_P(result);
- }
-
- /* Else, it's the traditional escaped style */
- for (bc = 0, tp = inputText; *tp != '\0'; bc++)
- {
- if (tp[0] != '\\')
- tp++;
- else if ((tp[0] == '\\') &&
- (tp[1] >= '0' && tp[1] <= '3') &&
- (tp[2] >= '0' && tp[2] <= '7') &&
- (tp[3] >= '0' && tp[3] <= '7'))
- tp += 4;
- else if ((tp[0] == '\\') &&
- (tp[1] == '\\'))
- tp += 2;
- else
- {
- /*
- * one backslash, not followed by another or ### valid octal
- */
- ereturn(escontext, (Datum) 0,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s", "bytea")));
- }
- }
-
- bc += VARHDRSZ;
-
- result = (bytea *) palloc(bc);
- SET_VARSIZE(result, bc);
-
- tp = inputText;
- rp = VARDATA(result);
- while (*tp != '\0')
- {
- if (tp[0] != '\\')
- *rp++ = *tp++;
- else if ((tp[0] == '\\') &&
- (tp[1] >= '0' && tp[1] <= '3') &&
- (tp[2] >= '0' && tp[2] <= '7') &&
- (tp[3] >= '0' && tp[3] <= '7'))
- {
- bc = VAL(tp[1]);
- bc <<= 3;
- bc += VAL(tp[2]);
- bc <<= 3;
- *rp++ = bc + VAL(tp[3]);
-
- tp += 4;
- }
- else if ((tp[0] == '\\') &&
- (tp[1] == '\\'))
- {
- *rp++ = '\\';
- tp += 2;
- }
- else
- {
- /*
- * We should never get here. The first pass should not allow it.
- */
- ereturn(escontext, (Datum) 0,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s", "bytea")));
- }
- }
-
- PG_RETURN_BYTEA_P(result);
-}
-
-/*
- * byteaout - converts to printable representation of byte array
- *
- * In the traditional escaped format, non-printable characters are
- * printed as '\nnn' (octal) and '\' as '\\'.
- */
-Datum
-byteaout(PG_FUNCTION_ARGS)
-{
- bytea *vlena = PG_GETARG_BYTEA_PP(0);
- char *result;
- char *rp;
-
- if (bytea_output == BYTEA_OUTPUT_HEX)
- {
- /* Print hex format */
- rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
- *rp++ = '\\';
- *rp++ = 'x';
- rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
- }
- else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
- {
- /* Print traditional escaped format */
- char *vp;
- uint64 len;
- int i;
-
- len = 1; /* empty string has 1 char */
- vp = VARDATA_ANY(vlena);
- for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
- {
- if (*vp == '\\')
- len += 2;
- else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
- len += 4;
- else
- len++;
- }
-
- /*
- * In principle len can't overflow uint32 if the input fit in 1GB, but
- * for safety let's check rather than relying on palloc's internal
- * check.
- */
- if (len > MaxAllocSize)
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg_internal("result of bytea output conversion is too large")));
- rp = result = (char *) palloc(len);
-
- vp = VARDATA_ANY(vlena);
- for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
- {
- if (*vp == '\\')
- {
- *rp++ = '\\';
- *rp++ = '\\';
- }
- else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
- {
- int val; /* holds unprintable chars */
-
- val = *vp;
- rp[0] = '\\';
- rp[3] = DIG(val & 07);
- val >>= 3;
- rp[2] = DIG(val & 07);
- val >>= 3;
- rp[1] = DIG(val & 03);
- rp += 4;
- }
- else
- *rp++ = *vp;
- }
- }
- else
- {
- elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
- bytea_output);
- rp = result = NULL; /* keep compiler quiet */
- }
- *rp = '\0';
- PG_RETURN_CSTRING(result);
-}
-
-/*
- * bytearecv - converts external binary format to bytea
- */
-Datum
-bytearecv(PG_FUNCTION_ARGS)
-{
- StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
- bytea *result;
- int nbytes;
-
- nbytes = buf->len - buf->cursor;
- result = (bytea *) palloc(nbytes + VARHDRSZ);
- SET_VARSIZE(result, nbytes + VARHDRSZ);
- pq_copymsgbytes(buf, VARDATA(result), nbytes);
- PG_RETURN_BYTEA_P(result);
-}
-
-/*
- * byteasend - converts bytea to binary format
- *
- * This is a special case: just copy the input...
- */
-Datum
-byteasend(PG_FUNCTION_ARGS)
-{
- bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
-
- PG_RETURN_BYTEA_P(vlena);
-}
-
-Datum
-bytea_string_agg_transfn(PG_FUNCTION_ARGS)
-{
- StringInfo state;
-
- state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
-
- /* Append the value unless null, preceding it with the delimiter. */
- if (!PG_ARGISNULL(1))
- {
- bytea *value = PG_GETARG_BYTEA_PP(1);
- bool isfirst = false;
-
- /*
- * You might think we can just throw away the first delimiter, however
- * we must keep it as we may be a parallel worker doing partial
- * aggregation building a state to send to the main process. We need
- * to keep the delimiter of every aggregation so that the combine
- * function can properly join up the strings of two separately
- * partially aggregated results. The first delimiter is only stripped
- * off in the final function. To know how much to strip off the front
- * of the string, we store the length of the first delimiter in the
- * StringInfo's cursor field, which we don't otherwise need here.
- */
- if (state == NULL)
- {
- state = makeStringAggState(fcinfo);
- isfirst = true;
- }
-
- if (!PG_ARGISNULL(2))
- {
- bytea *delim = PG_GETARG_BYTEA_PP(2);
-
- appendBinaryStringInfo(state, VARDATA_ANY(delim),
- VARSIZE_ANY_EXHDR(delim));
- if (isfirst)
- state->cursor = VARSIZE_ANY_EXHDR(delim);
- }
-
- appendBinaryStringInfo(state, VARDATA_ANY(value),
- VARSIZE_ANY_EXHDR(value));
- }
-
- /*
- * The transition type for string_agg() is declared to be "internal",
- * which is a pass-by-value type the same size as a pointer.
- */
- if (state)
- PG_RETURN_POINTER(state);
- PG_RETURN_NULL();
-}
-
-Datum
-bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
-{
- StringInfo state;
-
- /* cannot be called directly because of internal-type argument */
- Assert(AggCheckCallContext(fcinfo, NULL));
-
- state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
-
- if (state != NULL)
- {
- /* As per comment in transfn, strip data before the cursor position */
- bytea *result;
- int strippedlen = state->len - state->cursor;
-
- result = (bytea *) palloc(strippedlen + VARHDRSZ);
- SET_VARSIZE(result, strippedlen + VARHDRSZ);
- memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
- PG_RETURN_BYTEA_P(result);
- }
- else
- PG_RETURN_NULL();
-}
-
/*
* textin - converts cstring to internal representation
*/
@@ -2959,467 +2647,6 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
}
-/*-------------------------------------------------------------
- * byteaoctetlen
- *
- * get the number of bytes contained in an instance of type 'bytea'
- *-------------------------------------------------------------
- */
-Datum
-byteaoctetlen(PG_FUNCTION_ARGS)
-{
- Datum str = PG_GETARG_DATUM(0);
-
- /* We need not detoast the input at all */
- PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
-}
-
-/*
- * byteacat -
- * takes two bytea* and returns a bytea* that is the concatenation of
- * the two.
- *
- * Cloned from textcat and modified as required.
- */
-Datum
-byteacat(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
- bytea *t2 = PG_GETARG_BYTEA_PP(1);
-
- PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
-}
-
-/*
- * bytea_catenate
- * Guts of byteacat(), broken out so it can be used by other functions
- *
- * Arguments can be in short-header form, but not compressed or out-of-line
- */
-static bytea *
-bytea_catenate(bytea *t1, bytea *t2)
-{
- bytea *result;
- int len1,
- len2,
- len;
- char *ptr;
-
- len1 = VARSIZE_ANY_EXHDR(t1);
- len2 = VARSIZE_ANY_EXHDR(t2);
-
- /* paranoia ... probably should throw error instead? */
- if (len1 < 0)
- len1 = 0;
- if (len2 < 0)
- len2 = 0;
-
- len = len1 + len2 + VARHDRSZ;
- result = (bytea *) palloc(len);
-
- /* Set size of result string... */
- SET_VARSIZE(result, len);
-
- /* Fill data field of result string... */
- ptr = VARDATA(result);
- if (len1 > 0)
- memcpy(ptr, VARDATA_ANY(t1), len1);
- if (len2 > 0)
- memcpy(ptr + len1, VARDATA_ANY(t2), len2);
-
- return result;
-}
-
-#define PG_STR_GET_BYTEA(str_) \
- DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
-
-/*
- * bytea_substr()
- * Return a substring starting at the specified position.
- * Cloned from text_substr and modified as required.
- *
- * Input:
- * - string
- * - starting position (is one-based)
- * - string length (optional)
- *
- * If the starting position is zero or less, then return from the start of the string
- * adjusting the length to be consistent with the "negative start" per SQL.
- * If the length is less than zero, an ERROR is thrown. If no third argument
- * (length) is provided, the length to the end of the string is assumed.
- */
-Datum
-bytea_substr(PG_FUNCTION_ARGS)
-{
- PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
- PG_GETARG_INT32(1),
- PG_GETARG_INT32(2),
- false));
-}
-
-/*
- * bytea_substr_no_len -
- * Wrapper to avoid opr_sanity failure due to
- * one function accepting a different number of args.
- */
-Datum
-bytea_substr_no_len(PG_FUNCTION_ARGS)
-{
- PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
- PG_GETARG_INT32(1),
- -1,
- true));
-}
-
-static bytea *
-bytea_substring(Datum str,
- int S,
- int L,
- bool length_not_specified)
-{
- int32 S1; /* adjusted start position */
- int32 L1; /* adjusted substring length */
- int32 E; /* end position */
-
- /*
- * The logic here should generally match text_substring().
- */
- S1 = Max(S, 1);
-
- if (length_not_specified)
- {
- /*
- * Not passed a length - DatumGetByteaPSlice() grabs everything to the
- * end of the string if we pass it a negative value for length.
- */
- L1 = -1;
- }
- else if (L < 0)
- {
- /* SQL99 says to throw an error for E < S, i.e., negative length */
- ereport(ERROR,
- (errcode(ERRCODE_SUBSTRING_ERROR),
- errmsg("negative substring length not allowed")));
- L1 = -1; /* silence stupider compilers */
- }
- else if (pg_add_s32_overflow(S, L, &E))
- {
- /*
- * L could be large enough for S + L to overflow, in which case the
- * substring must run to end of string.
- */
- L1 = -1;
- }
- else
- {
- /*
- * A zero or negative value for the end position can happen if the
- * start was negative or one. SQL99 says to return a zero-length
- * string.
- */
- if (E < 1)
- return PG_STR_GET_BYTEA("");
-
- L1 = E - S1;
- }
-
- /*
- * If the start position is past the end of the string, SQL99 says to
- * return a zero-length string -- DatumGetByteaPSlice() will do that for
- * us. We need only convert S1 to zero-based starting position.
- */
- return DatumGetByteaPSlice(str, S1 - 1, L1);
-}
-
-/*
- * byteaoverlay
- * Replace specified substring of first string with second
- *
- * The SQL standard defines OVERLAY() in terms of substring and concatenation.
- * This code is a direct implementation of what the standard says.
- */
-Datum
-byteaoverlay(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
- bytea *t2 = PG_GETARG_BYTEA_PP(1);
- int sp = PG_GETARG_INT32(2); /* substring start position */
- int sl = PG_GETARG_INT32(3); /* substring length */
-
- PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
-}
-
-Datum
-byteaoverlay_no_len(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
- bytea *t2 = PG_GETARG_BYTEA_PP(1);
- int sp = PG_GETARG_INT32(2); /* substring start position */
- int sl;
-
- sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
- PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
-}
-
-static bytea *
-bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
-{
- bytea *result;
- bytea *s1;
- bytea *s2;
- int sp_pl_sl;
-
- /*
- * Check for possible integer-overflow cases. For negative sp, throw a
- * "substring length" error because that's what should be expected
- * according to the spec's definition of OVERLAY().
- */
- if (sp <= 0)
- ereport(ERROR,
- (errcode(ERRCODE_SUBSTRING_ERROR),
- errmsg("negative substring length not allowed")));
- if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("integer out of range")));
-
- s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
- s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
- result = bytea_catenate(s1, t2);
- result = bytea_catenate(result, s2);
-
- return result;
-}
-
-/*
- * bit_count
- */
-Datum
-bytea_bit_count(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
-
- PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
-}
-
-/*
- * byteapos -
- * Return the position of the specified substring.
- * Implements the SQL POSITION() function.
- * Cloned from textpos and modified as required.
- */
-Datum
-byteapos(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
- bytea *t2 = PG_GETARG_BYTEA_PP(1);
- int pos;
- int px,
- p;
- int len1,
- len2;
- char *p1,
- *p2;
-
- len1 = VARSIZE_ANY_EXHDR(t1);
- len2 = VARSIZE_ANY_EXHDR(t2);
-
- if (len2 <= 0)
- PG_RETURN_INT32(1); /* result for empty pattern */
-
- p1 = VARDATA_ANY(t1);
- p2 = VARDATA_ANY(t2);
-
- pos = 0;
- px = (len1 - len2);
- for (p = 0; p <= px; p++)
- {
- if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
- {
- pos = p + 1;
- break;
- };
- p1++;
- };
-
- PG_RETURN_INT32(pos);
-}
-
-/*-------------------------------------------------------------
- * byteaGetByte
- *
- * this routine treats "bytea" as an array of bytes.
- * It returns the Nth byte (a number between 0 and 255).
- *-------------------------------------------------------------
- */
-Datum
-byteaGetByte(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int32 n = PG_GETARG_INT32(1);
- int len;
- int byte;
-
- len = VARSIZE_ANY_EXHDR(v);
-
- if (n < 0 || n >= len)
- ereport(ERROR,
- (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %d out of valid range, 0..%d",
- n, len - 1)));
-
- byte = ((unsigned char *) VARDATA_ANY(v))[n];
-
- PG_RETURN_INT32(byte);
-}
-
-/*-------------------------------------------------------------
- * byteaGetBit
- *
- * This routine treats a "bytea" type like an array of bits.
- * It returns the value of the Nth bit (0 or 1).
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaGetBit(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int64 n = PG_GETARG_INT64(1);
- int byteNo,
- bitNo;
- int len;
- int byte;
-
- len = VARSIZE_ANY_EXHDR(v);
-
- if (n < 0 || n >= (int64) len * 8)
- ereport(ERROR,
- (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
- n, (int64) len * 8 - 1)));
-
- /* n/8 is now known < len, so safe to cast to int */
- byteNo = (int) (n / 8);
- bitNo = (int) (n % 8);
-
- byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
-
- if (byte & (1 << bitNo))
- PG_RETURN_INT32(1);
- else
- PG_RETURN_INT32(0);
-}
-
-/*-------------------------------------------------------------
- * byteaSetByte
- *
- * Given an instance of type 'bytea' creates a new one with
- * the Nth byte set to the given value.
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaSetByte(PG_FUNCTION_ARGS)
-{
- bytea *res = PG_GETARG_BYTEA_P_COPY(0);
- int32 n = PG_GETARG_INT32(1);
- int32 newByte = PG_GETARG_INT32(2);
- int len;
-
- len = VARSIZE(res) - VARHDRSZ;
-
- if (n < 0 || n >= len)
- ereport(ERROR,
- (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %d out of valid range, 0..%d",
- n, len - 1)));
-
- /*
- * Now set the byte.
- */
- ((unsigned char *) VARDATA(res))[n] = newByte;
-
- PG_RETURN_BYTEA_P(res);
-}
-
-/*-------------------------------------------------------------
- * byteaSetBit
- *
- * Given an instance of type 'bytea' creates a new one with
- * the Nth bit set to the given value.
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaSetBit(PG_FUNCTION_ARGS)
-{
- bytea *res = PG_GETARG_BYTEA_P_COPY(0);
- int64 n = PG_GETARG_INT64(1);
- int32 newBit = PG_GETARG_INT32(2);
- int len;
- int oldByte,
- newByte;
- int byteNo,
- bitNo;
-
- len = VARSIZE(res) - VARHDRSZ;
-
- if (n < 0 || n >= (int64) len * 8)
- ereport(ERROR,
- (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
- n, (int64) len * 8 - 1)));
-
- /* n/8 is now known < len, so safe to cast to int */
- byteNo = (int) (n / 8);
- bitNo = (int) (n % 8);
-
- /*
- * sanity check!
- */
- if (newBit != 0 && newBit != 1)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("new bit must be 0 or 1")));
-
- /*
- * Update the byte.
- */
- oldByte = ((unsigned char *) VARDATA(res))[byteNo];
-
- if (newBit == 0)
- newByte = oldByte & (~(1 << bitNo));
- else
- newByte = oldByte | (1 << bitNo);
-
- ((unsigned char *) VARDATA(res))[byteNo] = newByte;
-
- PG_RETURN_BYTEA_P(res);
-}
-
-/*
- * Return reversed bytea
- */
-Datum
-bytea_reverse(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- const char *p = VARDATA_ANY(v);
- int len = VARSIZE_ANY_EXHDR(v);
- const char *endp = p + len;
- bytea *result = palloc(len + VARHDRSZ);
- char *dst = (char *) VARDATA(result) + len;
-
- SET_VARSIZE(result, len + VARHDRSZ);
-
- while (p < endp)
- *(--dst) = *p++;
-
- PG_RETURN_BYTEA_P(result);
-}
-
-
/* text_name()
* Converts a text type to a Name type.
*/
@@ -3849,331 +3076,6 @@ SplitGUCList(char *rawstring, char separator,
return true;
}
-
-/*****************************************************************************
- * Comparison Functions used for bytea
- *
- * Note: btree indexes need these routines not to leak memory; therefore,
- * be careful to free working copies of toasted datums. Most places don't
- * need to be so careful.
- *****************************************************************************/
-
-Datum
-byteaeq(PG_FUNCTION_ARGS)
-{
- Datum arg1 = PG_GETARG_DATUM(0);
- Datum arg2 = PG_GETARG_DATUM(1);
- bool result;
- Size len1,
- len2;
-
- /*
- * We can use a fast path for unequal lengths, which might save us from
- * having to detoast one or both values.
- */
- len1 = toast_raw_datum_size(arg1);
- len2 = toast_raw_datum_size(arg2);
- if (len1 != len2)
- result = false;
- else
- {
- bytea *barg1 = DatumGetByteaPP(arg1);
- bytea *barg2 = DatumGetByteaPP(arg2);
-
- result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
- len1 - VARHDRSZ) == 0);
-
- PG_FREE_IF_COPY(barg1, 0);
- PG_FREE_IF_COPY(barg2, 1);
- }
-
- PG_RETURN_BOOL(result);
-}
-
-Datum
-byteane(PG_FUNCTION_ARGS)
-{
- Datum arg1 = PG_GETARG_DATUM(0);
- Datum arg2 = PG_GETARG_DATUM(1);
- bool result;
- Size len1,
- len2;
-
- /*
- * We can use a fast path for unequal lengths, which might save us from
- * having to detoast one or both values.
- */
- len1 = toast_raw_datum_size(arg1);
- len2 = toast_raw_datum_size(arg2);
- if (len1 != len2)
- result = true;
- else
- {
- bytea *barg1 = DatumGetByteaPP(arg1);
- bytea *barg2 = DatumGetByteaPP(arg2);
-
- result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
- len1 - VARHDRSZ) != 0);
-
- PG_FREE_IF_COPY(barg1, 0);
- PG_FREE_IF_COPY(barg2, 1);
- }
-
- PG_RETURN_BOOL(result);
-}
-
-Datum
-bytealt(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
-}
-
-Datum
-byteale(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
-}
-
-Datum
-byteagt(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
-}
-
-Datum
-byteage(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
-}
-
-Datum
-byteacmp(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
- if ((cmp == 0) && (len1 != len2))
- cmp = (len1 < len2) ? -1 : 1;
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_INT32(cmp);
-}
-
-Datum
-bytea_larger(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- bytea *result;
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
- result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
-
- PG_RETURN_BYTEA_P(result);
-}
-
-Datum
-bytea_smaller(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- bytea *result;
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
- result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
-
- PG_RETURN_BYTEA_P(result);
-}
-
-Datum
-bytea_sortsupport(PG_FUNCTION_ARGS)
-{
- SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
- MemoryContext oldcontext;
-
- oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
-
- /* Use generic string SortSupport, forcing "C" collation */
- varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
-
- MemoryContextSwitchTo(oldcontext);
-
- PG_RETURN_VOID();
-}
-
-/* Cast bytea -> int2 */
-Datum
-bytea_int2(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int len = VARSIZE_ANY_EXHDR(v);
- uint16 result;
-
- /* Check that the byte array is not too long */
- if (len > sizeof(result))
- ereport(ERROR,
- errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("smallint out of range"));
-
- /* Convert it to an integer; most significant bytes come first */
- result = 0;
- for (int i = 0; i < len; i++)
- {
- result <<= BITS_PER_BYTE;
- result |= ((unsigned char *) VARDATA_ANY(v))[i];
- }
-
- PG_RETURN_INT16(result);
-}
-
-/* Cast bytea -> int4 */
-Datum
-bytea_int4(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int len = VARSIZE_ANY_EXHDR(v);
- uint32 result;
-
- /* Check that the byte array is not too long */
- if (len > sizeof(result))
- ereport(ERROR,
- errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("integer out of range"));
-
- /* Convert it to an integer; most significant bytes come first */
- result = 0;
- for (int i = 0; i < len; i++)
- {
- result <<= BITS_PER_BYTE;
- result |= ((unsigned char *) VARDATA_ANY(v))[i];
- }
-
- PG_RETURN_INT32(result);
-}
-
-/* Cast bytea -> int8 */
-Datum
-bytea_int8(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int len = VARSIZE_ANY_EXHDR(v);
- uint64 result;
-
- /* Check that the byte array is not too long */
- if (len > sizeof(result))
- ereport(ERROR,
- errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("bigint out of range"));
-
- /* Convert it to an integer; most significant bytes come first */
- result = 0;
- for (int i = 0; i < len; i++)
- {
- result <<= BITS_PER_BYTE;
- result |= ((unsigned char *) VARDATA_ANY(v))[i];
- }
-
- PG_RETURN_INT64(result);
-}
-
-/* Cast int2 -> bytea; can just use int2send() */
-Datum
-int2_bytea(PG_FUNCTION_ARGS)
-{
- return int2send(fcinfo);
-}
-
-/* Cast int4 -> bytea; can just use int4send() */
-Datum
-int4_bytea(PG_FUNCTION_ARGS)
-{
- return int4send(fcinfo);
-}
-
-/* Cast int8 -> bytea; can just use int8send() */
-Datum
-int8_bytea(PG_FUNCTION_ARGS)
-{
- return int8send(fcinfo);
-}
-
/*
* appendStringInfoText
*
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index a4150bff2ea..f7b731825fc 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -529,14 +529,36 @@ xmltext(PG_FUNCTION_ARGS)
#ifdef USE_LIBXML
text *arg = PG_GETARG_TEXT_PP(0);
text *result;
- xmlChar *xmlbuf = NULL;
+ volatile xmlChar *xmlbuf = NULL;
+ PgXmlErrorContext *xmlerrcxt;
+
+ /* First we gotta spin up some error handling. */
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+ PG_TRY();
+ {
+ xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
- xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
+ if (xmlbuf == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlChar");
- Assert(xmlbuf);
+ result = cstring_to_text_with_len((const char *) xmlbuf,
+ xmlStrlen((const xmlChar *) xmlbuf));
+ }
+ PG_CATCH();
+ {
+ if (xmlbuf)
+ xmlFree((xmlChar *) xmlbuf);
+
+ pg_xml_done(xmlerrcxt, true);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xmlFree((xmlChar *) xmlbuf);
+ pg_xml_done(xmlerrcxt, false);
- result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf));
- xmlFree(xmlbuf);
PG_RETURN_XML_P(result);
#else
NO_XML_SUPPORT();
@@ -663,7 +685,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
volatile xmlBufferPtr buf = NULL;
volatile xmlSaveCtxtPtr ctxt = NULL;
ErrorSaveContext escontext = {T_ErrorSaveContext};
- PgXmlErrorContext *xmlerrcxt;
+ PgXmlErrorContext *volatile xmlerrcxt = NULL;
#endif
if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
@@ -704,13 +726,18 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
return (text *) data;
}
- /* Otherwise, we gotta spin up some error handling. */
- xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
-
+ /*
+ * Otherwise, we gotta spin up some error handling. Unlike most other
+ * routines in this module, we already have a libxml "doc" structure to
+ * free, so we need to call pg_xml_init() inside the PG_TRY and be
+ * prepared for it to fail (typically due to palloc OOM).
+ */
PG_TRY();
{
size_t decl_len = 0;
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
/* The serialized data will go into this buffer. */
buf = xmlBufferCreate();
@@ -770,7 +797,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
if (oldroot != NULL)
xmlFreeNode(oldroot);
- xmlAddChildList(root, content_nodes);
+ if (xmlAddChildList(root, content_nodes) == NULL ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not append xml node list");
/*
* We use this node to insert newlines in the dump. Note: in at
@@ -838,10 +868,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
xmlSaveClose(ctxt);
if (buf)
xmlBufferFree(buf);
- if (doc)
- xmlFreeDoc(doc);
+ xmlFreeDoc(doc);
- pg_xml_done(xmlerrcxt, true);
+ if (xmlerrcxt)
+ pg_xml_done(xmlerrcxt, true);
PG_RE_THROW();
}
@@ -931,7 +961,10 @@ xmlelement(XmlExpr *xexpr,
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xmlTextWriter");
- xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
+ if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not start xml element");
forboth(arg, named_arg_strings, narg, xexpr->arg_names)
{
@@ -939,19 +972,30 @@ xmlelement(XmlExpr *xexpr,
char *argname = strVal(lfirst(narg));
if (str)
- xmlTextWriterWriteAttribute(writer,
- (xmlChar *) argname,
- (xmlChar *) str);
+ {
+ if (xmlTextWriterWriteAttribute(writer,
+ (xmlChar *) argname,
+ (xmlChar *) str) < 0 ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not write xml attribute");
+ }
}
foreach(arg, arg_strings)
{
char *str = (char *) lfirst(arg);
- xmlTextWriterWriteRaw(writer, (xmlChar *) str);
+ if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not write raw xml text");
}
- xmlTextWriterEndElement(writer);
+ if (xmlTextWriterEndElement(writer) < 0 ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not end xml element");
/* we MUST do this now to flush data out to the buffer ... */
xmlFreeTextWriter(writer);
@@ -4220,20 +4264,27 @@ xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
}
else
{
- xmlChar *str;
+ volatile xmlChar *str = NULL;
- str = xmlXPathCastNodeToString(cur);
PG_TRY();
{
+ char *escaped;
+
+ str = xmlXPathCastNodeToString(cur);
+ if (str == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlChar");
+
/* Here we rely on XML having the same representation as TEXT */
- char *escaped = escape_xml((char *) str);
+ escaped = escape_xml((char *) str);
result = (xmltype *) cstring_to_text(escaped);
pfree(escaped);
}
PG_FINALLY();
{
- xmlFree(str);
+ if (str)
+ xmlFree((xmlChar *) str);
}
PG_END_TRY();
}
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 657648996c2..d1b25214376 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -317,6 +317,7 @@ GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEq
case REGDICTIONARYOID:
case REGROLEOID:
case REGNAMESPACEOID:
+ case REGDATABASEOID:
*hashfunc = int4hashfast;
*fasteqfunc = int4eqfast;
*eqfunc = F_OIDEQ;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index c86ceefda94..641e535a73c 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -417,12 +417,11 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
ctype = TextDatumGetCString(datum);
- if (pg_perm_setlocale(LC_COLLATE, collate) == NULL)
- ereport(FATAL,
- (errmsg("database locale is incompatible with operating system"),
- errdetail("The database was initialized with LC_COLLATE \"%s\", "
- " which is not recognized by setlocale().", collate),
- errhint("Recreate the database with another locale or install the missing locale.")));
+ /*
+ * Historcally, we set LC_COLLATE from datcollate, as well. That's no
+ * longer necessary because all collation behavior is handled through
+ * pg_locale_t.
+ */
if (pg_perm_setlocale(LC_CTYPE, ctype) == NULL)
ereport(FATAL,
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f04bfedb2fd..d14b1678e7f 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1028,7 +1028,7 @@ struct config_bool ConfigureNamesBool[] =
},
{
{"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
- gettext_noop("Enables reordering of DISTINCT pathkeys."),
+ gettext_noop("Enables reordering of DISTINCT keys."),
NULL,
GUC_EXPLAIN
},
@@ -3081,7 +3081,7 @@ struct config_int ConfigureNamesInt[] =
},
&max_slot_wal_keep_size_mb,
-1, -1, MAX_KILOBYTES,
- check_max_slot_wal_keep_size, NULL, NULL
+ NULL, NULL, NULL
},
{
@@ -3100,11 +3100,11 @@ struct config_int ConfigureNamesInt[] =
gettext_noop("Sets the duration a replication slot can remain idle before "
"it is invalidated."),
NULL,
- GUC_UNIT_MIN
+ GUC_UNIT_S
},
- &idle_replication_slot_timeout_mins,
- 0, 0, INT_MAX / SECS_PER_MINUTE,
- check_idle_replication_slot_timeout, NULL, NULL
+ &idle_replication_slot_timeout_secs,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
},
{
@@ -4837,7 +4837,7 @@ struct config_string ConfigureNamesString[] =
{
{"ssl_groups", PGC_SIGHUP, CONN_AUTH_SSL,
gettext_noop("Sets the group(s) to use for Diffie-Hellman key exchange."),
- gettext_noop("Multiple groups can be specified using colon-separated list."),
+ gettext_noop("Multiple groups can be specified using a colon-separated list."),
GUC_SUPERUSER_ONLY
},
&SSLECDHCurve,
diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c
index f58ebc8ee52..83b887b6978 100644
--- a/src/backend/utils/misc/injection_point.c
+++ b/src/backend/utils/misc/injection_point.c
@@ -584,3 +584,49 @@ IsInjectionPointAttached(const char *name)
return false; /* silence compiler */
#endif
}
+
+/*
+ * Retrieve a list of all the injection points currently attached.
+ *
+ * This list is palloc'd in the current memory context.
+ */
+List *
+InjectionPointList(void)
+{
+#ifdef USE_INJECTION_POINTS
+ List *inj_points = NIL;
+ uint32 max_inuse;
+
+ LWLockAcquire(InjectionPointLock, LW_SHARED);
+
+ max_inuse = pg_atomic_read_u32(&ActiveInjectionPoints->max_inuse);
+
+ for (uint32 idx = 0; idx < max_inuse; idx++)
+ {
+ InjectionPointEntry *entry;
+ InjectionPointData *inj_point;
+ uint64 generation;
+
+ entry = &ActiveInjectionPoints->entries[idx];
+ generation = pg_atomic_read_u64(&entry->generation);
+
+ /* skip free slots */
+ if (generation % 2 == 0)
+ continue;
+
+ inj_point = (InjectionPointData *) palloc0(sizeof(InjectionPointData));
+ inj_point->name = pstrdup(entry->name);
+ inj_point->library = pstrdup(entry->library);
+ inj_point->function = pstrdup(entry->function);
+ inj_points = lappend(inj_points, inj_point);
+ }
+
+ LWLockRelease(InjectionPointLock);
+
+ return inj_points;
+
+#else
+ elog(ERROR, "Injection points are not supported by this build");
+ return NIL; /* keep compiler quiet */
+#endif
+}
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 341f88adc87..a9d8293474a 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -342,7 +342,7 @@
# (change requires restart)
#wal_keep_size = 0 # in megabytes; 0 disables
#max_slot_wal_keep_size = -1 # in megabytes; -1 disables
-#idle_replication_slot_timeout = 0 # in minutes; 0 disables
+#idle_replication_slot_timeout = 0 # in seconds; 0 disables
#wal_sender_timeout = 60s # in milliseconds; 0 disables
#track_commit_timestamp = off # collect timestamp of transaction commit
# (change requires restart)
diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c
index 17d4f7a7a06..be43e9351c3 100644
--- a/src/backend/utils/mmgr/dsa.c
+++ b/src/backend/utils/mmgr/dsa.c
@@ -532,6 +532,21 @@ dsa_attach(dsa_handle handle)
}
/*
+ * Returns whether the area with the given handle was already attached by the
+ * current process. The area must have been created with dsa_create (not
+ * dsa_create_in_place).
+ */
+bool
+dsa_is_attached(dsa_handle handle)
+{
+ /*
+ * An area handle is really a DSM segment handle for the first segment, so
+ * we can just search for that.
+ */
+ return dsm_find_mapping(handle) != NULL;
+}
+
+/*
* Attach to an area that was created with dsa_create_in_place. The caller
* must somehow know the location in memory that was used when the area was
* created, though it may be mapped at a different virtual address in this