aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/amcheck/Makefile2
-rw-r--r--contrib/amcheck/t/002_cic.pl78
-rw-r--r--src/backend/utils/cache/inval.c12
-rw-r--r--src/backend/utils/cache/relcache.c105
-rw-r--r--src/include/utils/inval.h1
-rw-r--r--src/include/utils/relcache.h2
-rw-r--r--src/test/perl/PostgresNode.pm135
-rw-r--r--src/tools/pgindent/typedefs.list1
8 files changed, 331 insertions, 5 deletions
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index dcec3b85203..9154d2874bc 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -9,6 +9,8 @@ PGFILEDESC = "amcheck - function for verifying relation integrity"
REGRESS = check check_btree
+TAP_TESTS = 1
+
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/contrib/amcheck/t/002_cic.pl b/contrib/amcheck/t/002_cic.pl
new file mode 100644
index 00000000000..26b56054b13
--- /dev/null
+++ b/contrib/amcheck/t/002_cic.pl
@@ -0,0 +1,78 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# Test CREATE INDEX CONCURRENTLY with concurrent modifications
+use strict;
+use warnings;
+
+use Config;
+use PostgresNode;
+use TestLib;
+
+use Test::More tests => 4;
+
+my ($node, $result);
+
+#
+# Test set-up
+#
+$node = get_new_node('CIC_test');
+$node->init;
+$node->append_conf('postgresql.conf', 'lock_timeout = 180000');
+$node->start;
+$node->safe_psql('postgres', q(CREATE EXTENSION amcheck));
+$node->safe_psql('postgres', q(CREATE TABLE tbl(i int)));
+$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i)));
+
+#
+# Stress CIC with pgbench
+#
+
+# Run background pgbench with CIC. We cannot mix-in this script into single
+# pgbench: CIC will deadlock with itself occasionally.
+my $pgbench_out = '';
+my $pgbench_timer = IPC::Run::timeout(180);
+my $pgbench_h = $node->background_pgbench(
+ '--no-vacuum --client=1 --transactions=200',
+ {
+ '002_pgbench_concurrent_cic' => q(
+ DROP INDEX CONCURRENTLY idx;
+ CREATE INDEX CONCURRENTLY idx ON tbl(i);
+ SELECT bt_index_check('idx',true);
+ )
+ },
+ \$pgbench_out,
+ $pgbench_timer);
+
+# Run pgbench.
+$node->pgbench(
+ '--no-vacuum --client=5 --transactions=200',
+ 0,
+ [qr{actually processed}],
+ [qr{^$}],
+ 'concurrent INSERTs',
+ {
+ '002_pgbench_concurrent_transaction' => q(
+ BEGIN;
+ INSERT INTO tbl VALUES(0);
+ COMMIT;
+ ),
+ '002_pgbench_concurrent_transaction_savepoints' => q(
+ BEGIN;
+ SAVEPOINT s1;
+ INSERT INTO tbl VALUES(0);
+ COMMIT;
+ )
+ });
+
+$pgbench_h->pump_nb;
+$pgbench_h->finish();
+$result =
+ ($Config{osname} eq "MSWin32")
+ ? ($pgbench_h->full_results)[0]
+ : $pgbench_h->result(0);
+is($result, 0, "pgbench with CIC works");
+
+# done
+$node->stop;
+done_testing();
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 3ccf7ccb2ae..44eaf7b0e23 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -584,7 +584,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
int i;
if (msg->rc.relId == InvalidOid)
- RelationCacheInvalidate();
+ RelationCacheInvalidate(false);
else
RelationCacheInvalidateEntry(msg->rc.relId);
@@ -642,11 +642,17 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
void
InvalidateSystemCaches(void)
{
+ InvalidateSystemCachesExtended(false);
+}
+
+void
+InvalidateSystemCachesExtended(bool debug_discard)
+{
int i;
InvalidateCatalogSnapshot();
ResetCatalogCaches();
- RelationCacheInvalidate(); /* gets smgr and relmap too */
+ RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */
for (i = 0; i < syscache_callback_count; i++)
{
@@ -717,7 +723,7 @@ AcceptInvalidationMessages(void)
if (recursion_depth < 3)
{
recursion_depth++;
- InvalidateSystemCaches();
+ InvalidateSystemCachesExtended(true);
recursion_depth--;
}
}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 3680e071b1c..87651111ee3 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -155,6 +155,24 @@ bool criticalSharedRelcachesBuilt = false;
static long relcacheInvalsReceived = 0L;
/*
+ * in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE
+ * INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
+ * It critically relies on each backend absorbing those changes no later than
+ * next transaction start. Hence, RelationBuildDesc() loops until it finishes
+ * without accepting a relevant invalidation. (Most invalidation consumers
+ * don't do this.)
+ */
+typedef struct inprogressent
+{
+ Oid reloid; /* OID of relation being built */
+ bool invalidated; /* whether an invalidation arrived for it */
+} InProgressEnt;
+
+static InProgressEnt *in_progress_list;
+static int in_progress_list_len;
+static int in_progress_list_maxlen;
+
+/*
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
* cleanup work. This list intentionally has limited size; if it overflows,
* we fall back to scanning the whole hashtable. There is no value in a very
@@ -1042,6 +1060,7 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
static Relation
RelationBuildDesc(Oid targetRelId, bool insertIt)
{
+ int in_progress_offset;
Relation relation;
Oid relid;
HeapTuple pg_class_tuple;
@@ -1069,6 +1088,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
oldcxt = MemoryContextSwitchTo(tmpcxt);
#endif
+ /* Register to catch invalidation messages */
+ if (in_progress_list_len >= in_progress_list_maxlen)
+ {
+ int allocsize;
+
+ allocsize = in_progress_list_maxlen * 2;
+ in_progress_list = repalloc(in_progress_list,
+ allocsize * sizeof(*in_progress_list));
+ in_progress_list_maxlen = allocsize;
+ }
+ in_progress_offset = in_progress_list_len++;
+ in_progress_list[in_progress_offset].reloid = targetRelId;
+retry:
+ in_progress_list[in_progress_offset].invalidated = false;
+
/*
* find the tuple in pg_class corresponding to the given relation id
*/
@@ -1084,6 +1118,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(tmpcxt);
#endif
+ Assert(in_progress_offset + 1 == in_progress_list_len);
+ in_progress_list_len--;
return NULL;
}
@@ -1252,6 +1288,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
heap_freetuple(pg_class_tuple);
/*
+ * If an invalidation arrived mid-build, start over. Between here and the
+ * end of this function, don't add code that does or reasonably could read
+ * system catalogs. That range must be free from invalidation processing
+ * for the !insertIt case. For the insertIt case, RelationCacheInsert()
+ * will enroll this relation in ordinary relcache invalidation processing,
+ */
+ if (in_progress_list[in_progress_offset].invalidated)
+ {
+ RelationDestroyRelation(relation, false);
+ goto retry;
+ }
+ Assert(in_progress_offset + 1 == in_progress_list_len);
+ in_progress_list_len--;
+
+ /*
* Insert newly created relation into relcache hash table, if requested.
*
* There is one scenario in which we might find a hashtable entry already
@@ -2554,6 +2605,14 @@ RelationClearRelation(Relation relation, bool rebuild)
/* Build temporary entry, but don't link it into hashtable */
newrel = RelationBuildDesc(save_relid, false);
+
+ /*
+ * Between here and the end of the swap, don't add code that does or
+ * reasonably could read system catalogs. That range must be free
+ * from invalidation processing. See RelationBuildDesc() manipulation
+ * of in_progress_list.
+ */
+
if (newrel == NULL)
{
/*
@@ -2765,6 +2824,14 @@ RelationCacheInvalidateEntry(Oid relationId)
relcacheInvalsReceived++;
RelationFlushRelation(relation);
}
+ else
+ {
+ int i;
+
+ for (i = 0; i < in_progress_list_len; i++)
+ if (in_progress_list[i].reloid == relationId)
+ in_progress_list[i].invalidated = true;
+ }
}
/*
@@ -2796,9 +2863,14 @@ RelationCacheInvalidateEntry(Oid relationId)
* second pass processes nailed-in-cache items before other nondeletable
* items. This should ensure that system catalogs are up to date before
* we attempt to use them to reload information about other open relations.
+ *
+ * After those two phases of work having immediate effects, we normally
+ * signal any RelationBuildDesc() on the stack to start over. However, we
+ * don't do this if called as part of debug_discard_caches. Otherwise,
+ * RelationBuildDesc() would become an infinite loop.
*/
void
-RelationCacheInvalidate(void)
+RelationCacheInvalidate(bool debug_discard)
{
HASH_SEQ_STATUS status;
RelIdCacheEnt *idhentry;
@@ -2806,6 +2878,7 @@ RelationCacheInvalidate(void)
List *rebuildFirstList = NIL;
List *rebuildList = NIL;
ListCell *l;
+ int i;
/*
* Reload relation mapping data before starting to reconstruct cache.
@@ -2892,6 +2965,11 @@ RelationCacheInvalidate(void)
RelationClearRelation(relation, true);
}
list_free(rebuildList);
+
+ if (!debug_discard)
+ /* Any RelationBuildDesc() on the stack must start over. */
+ for (i = 0; i < in_progress_list_len; i++)
+ in_progress_list[i].invalidated = true;
}
/*
@@ -2965,6 +3043,13 @@ AtEOXact_RelationCache(bool isCommit)
int i;
/*
+ * Forget in_progress_list. This is relevant when we're aborting due to
+ * an error during RelationBuildDesc().
+ */
+ Assert(in_progress_list_len == 0 || !isCommit);
+ in_progress_list_len = 0;
+
+ /*
* Unless the eoxact_list[] overflowed, we only need to examine the rels
* listed in it. Otherwise fall back on a hash_seq_search scan.
*
@@ -3101,6 +3186,14 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
int i;
/*
+ * Forget in_progress_list. This is relevant when we're aborting due to
+ * an error during RelationBuildDesc(). We don't commit subtransactions
+ * during RelationBuildDesc().
+ */
+ Assert(in_progress_list_len == 0 || !isCommit);
+ in_progress_list_len = 0;
+
+ /*
* Unless the eoxact_list[] overflowed, we only need to examine the rels
* listed in it. Otherwise fall back on a hash_seq_search scan. Same
* logic as in AtEOXact_RelationCache.
@@ -3601,6 +3694,7 @@ void
RelationCacheInitialize(void)
{
HASHCTL ctl;
+ int allocsize;
/*
* make sure cache memory context exists
@@ -3618,6 +3712,15 @@ RelationCacheInitialize(void)
&ctl, HASH_ELEM | HASH_BLOBS);
/*
+ * reserve enough in_progress_list slots for many cases
+ */
+ allocsize = 4;
+ in_progress_list =
+ MemoryContextAlloc(CacheMemoryContext,
+ allocsize * sizeof(*in_progress_list));
+ in_progress_list_maxlen = allocsize;
+
+ /*
* relation mapper needs to be initialized too
*/
RelationMapInitialize();
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index 940d53f9006..bafdc6ee3a5 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -61,4 +61,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
extern void InvalidateSystemCaches(void);
+extern void InvalidateSystemCachesExtended(bool debug_discard);
#endif /* INVAL_H */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 06ea5bc8179..08a47430af4 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -117,7 +117,7 @@ extern void RelationForgetRelation(Oid rid);
extern void RelationCacheInvalidateEntry(Oid relationId);
-extern void RelationCacheInvalidate(void);
+extern void RelationCacheInvalidate(bool debug_discard);
extern void RelationCloseSmgrByOid(Oid relationId);
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 055a9f0b834..16e559771ab 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -1530,6 +1530,141 @@ sub psql
}
}
+# Common sub of pgbench-invoking interfaces. Makes any requested script files
+# and returns pgbench command-line options causing use of those files.
+sub _pgbench_make_files
+{
+ my ($self, $files) = @_;
+ my @file_opts;
+
+ if (defined $files)
+ {
+
+ # note: files are ordered for determinism
+ for my $fn (sort keys %$files)
+ {
+ my $filename = $self->basedir . '/' . $fn;
+ push @file_opts, '-f', $filename;
+
+ # cleanup file weight
+ $filename =~ s/\@\d+$//;
+
+ #push @filenames, $filename;
+ # filenames are expected to be unique on a test
+ if (-e $filename)
+ {
+ ok(0, "$filename must not already exist");
+ unlink $filename or die "cannot unlink $filename: $!";
+ }
+ TestLib::append_to_file($filename, $$files{$fn});
+ }
+ }
+
+ return @file_opts;
+}
+
+=pod
+
+=item $node->pgbench($opts, $stat, $out, $err, $name, $files, @args)
+
+Invoke B<pgbench>, with parameters and files.
+
+=over
+
+=item $opts
+
+Options as a string to be split on spaces.
+
+=item $stat
+
+Expected exit status.
+
+=item $out
+
+Reference to a regexp list that must match stdout.
+
+=item $err
+
+Reference to a regexp list that must match stderr.
+
+=item $name
+
+Name of test for error messages.
+
+=item $files
+
+Reference to filename/contents dictionary.
+
+=item @args
+
+Further raw options or arguments.
+
+=back
+
+=cut
+
+sub pgbench
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my ($self, $opts, $stat, $out, $err, $name, $files, @args) = @_;
+ my @cmd = (
+ 'pgbench',
+ split(/\s+/, $opts),
+ $self->_pgbench_make_files($files), @args);
+
+ $self->command_checks_all(\@cmd, $stat, $out, $err, $name);
+}
+
+=pod
+
+=item $node->background_pgbench($opts, $files, \$stdout, $timer) => harness
+
+Invoke B<pgbench> and return an IPC::Run harness object. The process's stdin
+is empty, and its stdout and stderr go to the $stdout scalar reference. This
+allows the caller to act on other parts of the system while B<pgbench> is
+running. Errors from B<pgbench> are the caller's problem.
+
+The specified timer object is attached to the harness, as well. It's caller's
+responsibility to select the timeout length, and to restart the timer after
+each command if the timeout is per-command.
+
+Be sure to "finish" the harness when done with it.
+
+=over
+
+=item $opts
+
+Options as a string to be split on spaces.
+
+=item $files
+
+Reference to filename/contents dictionary.
+
+=back
+
+=cut
+
+sub background_pgbench
+{
+ my ($self, $opts, $files, $stdout, $timer) = @_;
+
+ my @cmd =
+ ('pgbench', split(/\s+/, $opts), $self->_pgbench_make_files($files));
+
+ local $ENV{PGHOST} = $self->host;
+ local $ENV{PGPORT} = $self->port;
+
+ my $stdin = "";
+ # IPC::Run would otherwise append to existing contents:
+ $$stdout = "" if ref($stdout);
+
+ my $harness = IPC::Run::start \@cmd, '<', \$stdin, '>', $stdout, '2>&1',
+ $timer;
+
+ return $harness;
+}
+
=pod
=item $node->poll_query_until($dbname, $query [, $expected ])
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 7f8cb3cae13..daebb773870 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1020,6 +1020,7 @@ ImportForeignSchemaStmt
ImportForeignSchemaType
ImportForeignSchema_function
ImportQual
+InProgressEnt
IncludeWal
InclusionOpaque
IncrementVarSublevelsUp_context