aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/catalog/heap.c20
-rw-r--r--src/backend/catalog/index.c4
-rw-r--r--src/backend/catalog/partition.c38
-rw-r--r--src/backend/catalog/pg_inherits.c114
-rw-r--r--src/backend/commands/indexcmds.c6
-rw-r--r--src/backend/commands/tablecmds.c550
-rw-r--r--src/backend/commands/trigger.c5
-rw-r--r--src/backend/executor/execPartition.c29
-rw-r--r--src/backend/nodes/copyfuncs.c1
-rw-r--r--src/backend/nodes/equalfuncs.c1
-rw-r--r--src/backend/optimizer/util/plancat.c8
-rw-r--r--src/backend/parser/gram.y23
-rw-r--r--src/backend/partitioning/partbounds.c6
-rw-r--r--src/backend/partitioning/partdesc.c21
-rw-r--r--src/backend/tcop/utility.c19
-rw-r--r--src/backend/utils/adt/ri_triggers.c6
-rw-r--r--src/backend/utils/cache/partcache.c12
17 files changed, 714 insertions, 149 deletions
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index d0ec44bb40e..9f6303266f9 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1923,7 +1923,12 @@ heap_drop_with_catalog(Oid relid)
elog(ERROR, "cache lookup failed for relation %u", relid);
if (((Form_pg_class) GETSTRUCT(tuple))->relispartition)
{
- parentOid = get_partition_parent(relid);
+ /*
+ * We have to lock the parent if the partition is being detached,
+ * because it's possible that some query still has a partition
+ * descriptor that includes this partition.
+ */
+ parentOid = get_partition_parent(relid, true);
LockRelationOid(parentOid, AccessExclusiveLock);
/*
@@ -2559,10 +2564,12 @@ StoreConstraints(Relation rel, List *cooked_constraints, bool is_internal)
* Returns a list of CookedConstraint nodes that shows the cooked form of
* the default and constraint expressions added to the relation.
*
- * NB: caller should have opened rel with AccessExclusiveLock, and should
- * hold that lock till end of transaction. Also, we assume the caller has
- * done a CommandCounterIncrement if necessary to make the relation's catalog
- * tuples visible.
+ * NB: caller should have opened rel with some self-conflicting lock mode,
+ * and should hold that lock till end of transaction; for normal cases that'll
+ * be AccessExclusiveLock, but if caller knows that the constraint is already
+ * enforced by some other means, it can be ShareUpdateExclusiveLock. Also, we
+ * assume the caller has done a CommandCounterIncrement if necessary to make
+ * the relation's catalog tuples visible.
*/
List *
AddRelationNewConstraints(Relation rel,
@@ -3831,7 +3838,8 @@ StorePartitionBound(Relation rel, Relation parent, PartitionBoundSpec *bound)
* relcache entry for that partition every time a partition is added or
* removed.
*/
- defaultPartOid = get_default_oid_from_partdesc(RelationGetPartitionDesc(parent));
+ defaultPartOid =
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(parent, false));
if (OidIsValid(defaultPartOid))
CacheInvalidateRelcacheByRelid(defaultPartOid);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 397d70d2269..af30840c044 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1837,7 +1837,7 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
List *ancestors = get_partition_ancestors(oldIndexId);
Oid parentIndexRelid = linitial_oid(ancestors);
- DeleteInheritsTuple(oldIndexId, parentIndexRelid);
+ DeleteInheritsTuple(oldIndexId, parentIndexRelid, false, NULL);
StoreSingleInheritance(newIndexId, parentIndexRelid, 1);
list_free(ancestors);
@@ -2487,7 +2487,7 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
/*
* fix INHERITS relation
*/
- DeleteInheritsTuple(indexId, InvalidOid);
+ DeleteInheritsTuple(indexId, InvalidOid, false, NULL);
/*
* We are presently too lazy to attempt to compute the new correct value
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index af7754d6ab7..790f4ccb927 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -32,7 +32,8 @@
#include "utils/rel.h"
#include "utils/syscache.h"
-static Oid get_partition_parent_worker(Relation inhRel, Oid relid);
+static Oid get_partition_parent_worker(Relation inhRel, Oid relid,
+ bool *detach_pending);
static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
List **ancestors);
@@ -42,23 +43,32 @@ static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
*
* Returns inheritance parent of a partition by scanning pg_inherits
*
+ * If the partition is in the process of being detached, an error is thrown,
+ * unless even_if_detached is passed as true.
+ *
* Note: Because this function assumes that the relation whose OID is passed
* as an argument will have precisely one parent, it should only be called
* when it is known that the relation is a partition.
*/
Oid
-get_partition_parent(Oid relid)
+get_partition_parent(Oid relid, bool even_if_detached)
{
Relation catalogRelation;
Oid result;
+ bool detach_pending;
catalogRelation = table_open(InheritsRelationId, AccessShareLock);
- result = get_partition_parent_worker(catalogRelation, relid);
+ result = get_partition_parent_worker(catalogRelation, relid,
+ &detach_pending);
if (!OidIsValid(result))
elog(ERROR, "could not find tuple for parent of relation %u", relid);
+ if (detach_pending && !even_if_detached)
+ elog(ERROR, "relation %u has no parent because it's being detached",
+ relid);
+
table_close(catalogRelation, AccessShareLock);
return result;
@@ -68,15 +78,20 @@ get_partition_parent(Oid relid)
* get_partition_parent_worker
* Scan the pg_inherits relation to return the OID of the parent of the
* given relation
+ *
+ * If the partition is being detached, *detach_pending is set true (but the
+ * original parent is still returned.)
*/
static Oid
-get_partition_parent_worker(Relation inhRel, Oid relid)
+get_partition_parent_worker(Relation inhRel, Oid relid, bool *detach_pending)
{
SysScanDesc scan;
ScanKeyData key[2];
Oid result = InvalidOid;
HeapTuple tuple;
+ *detach_pending = false;
+
ScanKeyInit(&key[0],
Anum_pg_inherits_inhrelid,
BTEqualStrategyNumber, F_OIDEQ,
@@ -93,6 +108,9 @@ get_partition_parent_worker(Relation inhRel, Oid relid)
{
Form_pg_inherits form = (Form_pg_inherits) GETSTRUCT(tuple);
+ /* Let caller know of partition being detached */
+ if (form->inhdetachpending)
+ *detach_pending = true;
result = form->inhparent;
}
@@ -134,10 +152,14 @@ static void
get_partition_ancestors_worker(Relation inhRel, Oid relid, List **ancestors)
{
Oid parentOid;
+ bool detach_pending;
- /* Recursion ends at the topmost level, ie., when there's no parent */
- parentOid = get_partition_parent_worker(inhRel, relid);
- if (parentOid == InvalidOid)
+ /*
+ * Recursion ends at the topmost level, ie., when there's no parent; also
+ * when the partition is being detached.
+ */
+ parentOid = get_partition_parent_worker(inhRel, relid, &detach_pending);
+ if (parentOid == InvalidOid || detach_pending)
return;
*ancestors = lappend_oid(*ancestors, parentOid);
@@ -170,7 +192,7 @@ index_get_partition(Relation partition, Oid indexId)
ReleaseSysCache(tup);
if (!ispartition)
continue;
- if (get_partition_parent(partIdx) == indexId)
+ if (get_partition_parent(partIdx, false) == indexId)
{
list_free(idxlist);
return partIdx;
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c
index 5ab79028274..bedee069be2 100644
--- a/src/backend/catalog/pg_inherits.c
+++ b/src/backend/catalog/pg_inherits.c
@@ -29,6 +29,7 @@
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/memutils.h"
+#include "utils/snapmgr.h"
#include "utils/syscache.h"
/*
@@ -50,9 +51,14 @@ typedef struct SeenRelsEntry
* given rel; caller should already have locked it). If lockmode is NoLock
* then no locks are acquired, but caller must beware of race conditions
* against possible DROPs of child relations.
+ *
+ * include_detached says to include all partitions, even if they're marked
+ * detached. Passing it as false means they might or might not be included,
+ * depending on the visibility of the pg_inherits row for the active snapshot.
*/
List *
-find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
+find_inheritance_children(Oid parentrelId, bool include_detached,
+ LOCKMODE lockmode)
{
List *list = NIL;
Relation relation;
@@ -91,6 +97,30 @@ find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
while ((inheritsTuple = systable_getnext(scan)) != NULL)
{
+ /*
+ * Cope with partitions concurrently being detached. When we see a
+ * partition marked "detach pending", we only include it in the set of
+ * visible partitions if caller requested all detached partitions, or
+ * if its pg_inherits tuple's xmin is still visible to the active
+ * snapshot.
+ *
+ * The reason for this check is that we want to avoid seeing the
+ * partition as alive in RI queries during REPEATABLE READ or
+ * SERIALIZABLE transactions.
+ */
+ if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhdetachpending &&
+ !include_detached)
+ {
+ TransactionId xmin;
+ Snapshot snap;
+
+ xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data);
+ snap = GetActiveSnapshot();
+
+ if (!XidInMVCCSnapshot(xmin, snap))
+ continue;
+ }
+
inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid;
if (numoids >= maxoids)
{
@@ -160,6 +190,9 @@ find_inheritance_children(Oid parentrelId, LOCKMODE lockmode)
* given rel; caller should already have locked it). If lockmode is NoLock
* then no locks are acquired, but caller must beware of race conditions
* against possible DROPs of child relations.
+ *
+ * NB - No current callers of this routine are interested in children being
+ * concurrently detached, so there's no provision to include them.
*/
List *
find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
@@ -199,7 +232,8 @@ find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
ListCell *lc;
/* Get the direct children of this rel */
- currentchildren = find_inheritance_children(currentrel, lockmode);
+ currentchildren = find_inheritance_children(currentrel, false,
+ lockmode);
/*
* Add to the queue only those children not already seen. This avoids
@@ -430,6 +464,7 @@ StoreSingleInheritance(Oid relationId, Oid parentOid, int32 seqNumber)
values[Anum_pg_inherits_inhrelid - 1] = ObjectIdGetDatum(relationId);
values[Anum_pg_inherits_inhparent - 1] = ObjectIdGetDatum(parentOid);
values[Anum_pg_inherits_inhseqno - 1] = Int32GetDatum(seqNumber);
+ values[Anum_pg_inherits_inhdetachpending - 1] = BoolGetDatum(false);
memset(nulls, 0, sizeof(nulls));
@@ -449,10 +484,17 @@ StoreSingleInheritance(Oid relationId, Oid parentOid, int32 seqNumber)
* as InvalidOid, in which case all tuples matching inhrelid are deleted;
* otherwise only delete tuples with the specified inhparent.
*
+ * expect_detach_pending is the expected state of the inhdetachpending flag.
+ * If the catalog row does not match that state, an error is raised.
+ *
+ * childname is the partition name, if a table; pass NULL for regular
+ * inheritance or when working with other relation kinds.
+ *
* Returns whether at least one row was deleted.
*/
bool
-DeleteInheritsTuple(Oid inhrelid, Oid inhparent)
+DeleteInheritsTuple(Oid inhrelid, Oid inhparent, bool expect_detach_pending,
+ const char *childname)
{
bool found = false;
Relation catalogRelation;
@@ -479,6 +521,29 @@ DeleteInheritsTuple(Oid inhrelid, Oid inhparent)
parent = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent;
if (!OidIsValid(inhparent) || parent == inhparent)
{
+ bool detach_pending;
+
+ detach_pending =
+ ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhdetachpending;
+
+ /*
+ * Raise error depending on state. This should only happen for
+ * partitions, but we have no way to cross-check.
+ */
+ if (detach_pending && !expect_detach_pending)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot detach partition \"%s\"",
+ childname ? childname : "unknown relation"),
+ errdetail("The partition is being detached concurrently or has an unfinished detach."),
+ errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation")));
+ if (!detach_pending && expect_detach_pending)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot complete detaching partition \"%s\"",
+ childname ? childname : "unknown relation"),
+ errdetail("There's no pending concurrent detach.")));
+
CatalogTupleDelete(catalogRelation, &inheritsTuple->t_self);
found = true;
}
@@ -490,3 +555,46 @@ DeleteInheritsTuple(Oid inhrelid, Oid inhparent)
return found;
}
+
+/*
+ * Return whether the pg_inherits tuple for a partition has the "detach
+ * pending" flag set.
+ */
+bool
+PartitionHasPendingDetach(Oid partoid)
+{
+ Relation catalogRelation;
+ ScanKeyData key;
+ SysScanDesc scan;
+ HeapTuple inheritsTuple;
+
+ /* We don't have a good way to verify it is in fact a partition */
+
+ /*
+ * Find the pg_inherits entry by inhrelid. (There should only be one.)
+ */
+ catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+ ScanKeyInit(&key,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(partoid));
+ scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
+ true, NULL, 1, &key);
+
+ while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+ {
+ bool detached;
+
+ detached =
+ ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhdetachpending;
+
+ /* Done */
+ systable_endscan(scan);
+ table_close(catalogRelation, RowExclusiveLock);
+
+ return detached;
+ }
+
+ elog(ERROR, "relation %u is not a partition", partoid);
+ return false; /* keep compiler quiet */
+}
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index e4e1bbb7e00..166374cc0c9 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -410,7 +410,7 @@ CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts)
* GetCurrentVirtualXIDs. If, during any iteration, a particular vxid
* doesn't show up in the output, we know we can forget about it.
*/
-static void
+void
WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
{
int n_old_snapshots;
@@ -1123,7 +1123,7 @@ DefineIndex(Oid relationId,
*/
if (partitioned && stmt->relation && !stmt->relation->inh)
{
- PartitionDesc pd = RelationGetPartitionDesc(rel);
+ PartitionDesc pd = RelationGetPartitionDesc(rel, false);
if (pd->nparts != 0)
flags |= INDEX_CREATE_INVALID;
@@ -1180,7 +1180,7 @@ DefineIndex(Oid relationId,
*
* If we're called internally (no stmt->relation), recurse always.
*/
- partdesc = RelationGetPartitionDesc(rel);
+ partdesc = RelationGetPartitionDesc(rel, false);
if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
{
int nparts = partdesc->nparts;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 550b84681e4..efac06f72c7 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -552,7 +552,8 @@ static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partsp
static void ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs,
List **partexprs, Oid *partopclass, Oid *partcollation, char strategy);
static void CreateInheritance(Relation child_rel, Relation parent_rel);
-static void RemoveInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel,
+ bool allow_detached);
static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
PartitionCmd *cmd,
AlterTableUtilityContext *context);
@@ -561,8 +562,14 @@ static void QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
List *partConstraint,
bool validate_default);
static void CloneRowTriggersToPartition(Relation parent, Relation partition);
+static void DetachAddConstraintIfNeeded(List **wqueue, Relation partRel);
static void DropClonedTriggersFromPartition(Oid partitionId);
-static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name);
+static ObjectAddress ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab,
+ Relation rel, RangeVar *name,
+ bool concurrent);
+static void DetachPartitionFinalize(Relation rel, Relation partRel,
+ bool concurrent, Oid defaultPartOid);
+static ObjectAddress ATExecDetachPartitionFinalize(Relation rel, RangeVar *name);
static ObjectAddress ATExecAttachPartitionIdx(List **wqueue, Relation rel,
RangeVar *name);
static void validatePartitionedIndex(Relation partedIdx, Relation partedTbl);
@@ -1010,7 +1017,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
* lock the partition so as to avoid a deadlock.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(parent));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(parent,
+ false));
if (OidIsValid(defaultPartOid))
defaultRel = table_open(defaultPartOid, AccessExclusiveLock);
@@ -1563,7 +1571,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
*/
if (is_partition && relOid != oldRelOid)
{
- state->partParentOid = get_partition_parent(relOid);
+ state->partParentOid = get_partition_parent(relOid, true);
if (OidIsValid(state->partParentOid))
LockRelationOid(state->partParentOid, AccessExclusiveLock);
}
@@ -3323,7 +3331,7 @@ renameatt_internal(Oid myrelid,
* expected_parents will only be 0 if we are not already recursing.
*/
if (expected_parents == 0 &&
- find_inheritance_children(myrelid, NoLock) != NIL)
+ find_inheritance_children(myrelid, false, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited column \"%s\" must be renamed in child tables too",
@@ -3522,7 +3530,7 @@ rename_constraint_internal(Oid myrelid,
else
{
if (expected_parents == 0 &&
- find_inheritance_children(myrelid, NoLock) != NIL)
+ find_inheritance_children(myrelid, false, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited constraint \"%s\" must be renamed in child tables too",
@@ -4142,7 +4150,14 @@ AlterTableGetLockLevel(List *cmds)
break;
case AT_DetachPartition:
- cmd_lockmode = AccessExclusiveLock;
+ if (((PartitionCmd *) cmd->def)->concurrent)
+ cmd_lockmode = ShareUpdateExclusiveLock;
+ else
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ case AT_DetachPartitionFinalize:
+ cmd_lockmode = ShareUpdateExclusiveLock;
break;
case AT_CheckNotNull:
@@ -4227,6 +4242,19 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
tab = ATGetQueueEntry(wqueue, rel);
/*
+ * Disallow any ALTER TABLE other than ALTER TABLE DETACH FINALIZE on
+ * partitions that are pending detach.
+ */
+ if (rel->rd_rel->relispartition &&
+ cmd->subtype != AT_DetachPartitionFinalize &&
+ PartitionHasPendingDetach(RelationGetRelid(rel)))
+ ereport(ERROR,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot alter partition \"%s\" with an incomplete detach",
+ RelationGetRelationName(rel)),
+ errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation."));
+
+ /*
* Copy the original subcommand for each table. This avoids conflicts
* when different child tables need to make different parse
* transformations (for example, the same column may have different column
@@ -4539,6 +4567,11 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
/* No command-specific prep needed */
pass = AT_PASS_MISC;
break;
+ case AT_DetachPartitionFinalize:
+ ATSimplePermissions(rel, ATT_TABLE);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
@@ -4930,7 +4963,12 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab,
Assert(cmd != NULL);
/* ATPrepCmd ensures it must be a table */
Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
- ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name);
+ ATExecDetachPartition(wqueue, tab, rel,
+ ((PartitionCmd *) cmd->def)->name,
+ ((PartitionCmd *) cmd->def)->concurrent);
+ break;
+ case AT_DetachPartitionFinalize:
+ ATExecDetachPartitionFinalize(rel, ((PartitionCmd *) cmd->def)->name);
break;
case AT_AlterCollationRefreshVersion:
/* ATPrepCmd ensured it must be an index */
@@ -6362,7 +6400,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
*/
if (colDef->identity &&
recurse &&
- find_inheritance_children(myrelid, NoLock) != NIL)
+ find_inheritance_children(myrelid, false, NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("cannot recursively add identity column to table that has child tables")));
@@ -6607,7 +6645,8 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
* routines, we have to do this one level of recursion at a time; we can't
* use find_all_inheritors to do it in one pass.
*/
- children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ children =
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode);
/*
* If we are told not to recurse, there had better not be any child
@@ -6761,7 +6800,7 @@ ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
*/
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(rel);
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, false);
Assert(partdesc != NULL);
if (partdesc->nparts > 0 && !recurse && !recursing)
@@ -6860,7 +6899,7 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
/* If rel is partition, shouldn't drop NOT NULL if parent has the same */
if (rel->rd_rel->relispartition)
{
- Oid parentId = get_partition_parent(RelationGetRelid(rel));
+ Oid parentId = get_partition_parent(RelationGetRelid(rel), false);
Relation parent = table_open(parentId, AccessShareLock);
TupleDesc tupDesc = RelationGetDescr(parent);
AttrNumber parent_attnum;
@@ -7470,7 +7509,7 @@ ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recurs
* resulting state can be properly dumped and restored.
*/
if (!recurse &&
- find_inheritance_children(RelationGetRelid(rel), lockmode))
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too")));
@@ -8077,7 +8116,8 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
* routines, we have to do this one level of recursion at a time; we can't
* use find_all_inheritors to do it in one pass.
*/
- children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ children =
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode);
if (children)
{
@@ -8541,7 +8581,8 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
* routines, we have to do this one level of recursion at a time; we can't
* use find_all_inheritors to do it in one pass.
*/
- children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ children =
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode);
/*
* Check if ONLY was specified with ALTER TABLE. If so, allow the
@@ -9156,7 +9197,7 @@ addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint, Relation rel,
*/
if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc pd = RelationGetPartitionDesc(pkrel);
+ PartitionDesc pd = RelationGetPartitionDesc(pkrel, false);
for (int i = 0; i < pd->nparts; i++)
{
@@ -9290,7 +9331,7 @@ addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel,
}
else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc pd = RelationGetPartitionDesc(rel);
+ PartitionDesc pd = RelationGetPartitionDesc(rel, false);
/*
* Recurse to take appropriate action on each partition; either we
@@ -11074,7 +11115,8 @@ ATExecDropConstraint(Relation rel, const char *constrName,
* use find_all_inheritors to do it in one pass.
*/
if (!is_no_inherit_constraint)
- children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ children =
+ find_inheritance_children(RelationGetRelid(rel), false, lockmode);
else
children = NIL;
@@ -11458,7 +11500,8 @@ ATPrepAlterColumnType(List **wqueue,
}
}
else if (!recursing &&
- find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL)
+ find_inheritance_children(RelationGetRelid(rel), false,
+ NoLock) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("type of inherited column \"%s\" must be changed in child tables too",
@@ -14296,7 +14339,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
*/
/* Off to RemoveInheritance() where most of the work happens */
- RemoveInheritance(rel, parent_rel);
+ RemoveInheritance(rel, parent_rel, false);
ObjectAddressSet(address, RelationRelationId,
RelationGetRelid(parent_rel));
@@ -14308,11 +14351,71 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
}
/*
+ * MarkInheritDetached
+ *
+ * Set inhdetachpending for a partition, for ATExecDetachPartition
+ * in concurrent mode.
+ */
+static void
+MarkInheritDetached(Relation child_rel, Relation parent_rel)
+{
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key;
+ HeapTuple inheritsTuple;
+ bool found = false;
+
+ Assert(child_rel->rd_rel->relkind == RELKIND_RELATION ||
+ child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+ Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+ /*
+ * Find pg_inherits entries by inhrelid.
+ */
+ catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+ ScanKeyInit(&key,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
+ scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
+ true, NULL, 1, &key);
+
+ while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+ {
+ HeapTuple newtup;
+
+ if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent !=
+ RelationGetRelid(parent_rel))
+ elog(ERROR, "bad parent tuple found for partition %u",
+ RelationGetRelid(child_rel));
+
+ newtup = heap_copytuple(inheritsTuple);
+ ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true;
+
+ CatalogTupleUpdate(catalogRelation,
+ &inheritsTuple->t_self,
+ newtup);
+ found = true;
+ }
+
+ /* Done */
+ systable_endscan(scan);
+ table_close(catalogRelation, RowExclusiveLock);
+
+ if (!found)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+ RelationGetRelationName(child_rel),
+ RelationGetRelationName(parent_rel))));
+}
+
+/*
* RemoveInheritance
*
* Drop a parent from the child's parents. This just adjusts the attinhcount
* and attislocal of the columns and removes the pg_inherit and pg_depend
- * entries.
+ * entries. expect_detached is passed down to DeleteInheritsTuple, q.v..
*
* If attinhcount goes to 0 then attislocal gets set to true. If it goes back
* up attislocal stays true, which means if a child is ever removed from a
@@ -14326,7 +14429,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
* Common to ATExecDropInherit() and ATExecDetachPartition().
*/
static void
-RemoveInheritance(Relation child_rel, Relation parent_rel)
+RemoveInheritance(Relation child_rel, Relation parent_rel, bool expect_detached)
{
Relation catalogRelation;
SysScanDesc scan;
@@ -14342,7 +14445,9 @@ RemoveInheritance(Relation child_rel, Relation parent_rel)
child_is_partition = true;
found = DeleteInheritsTuple(RelationGetRelid(child_rel),
- RelationGetRelid(parent_rel));
+ RelationGetRelid(parent_rel),
+ expect_detached,
+ RelationGetRelationName(child_rel));
if (!found)
{
if (child_is_partition)
@@ -16508,7 +16613,7 @@ QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
}
else if (scanrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(scanrel);
+ PartitionDesc partdesc = RelationGetPartitionDesc(scanrel, false);
int i;
for (i = 0; i < partdesc->nparts; i++)
@@ -16568,7 +16673,7 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd,
* new partition will change its partition constraint.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(rel));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, false));
if (OidIsValid(defaultPartOid))
LockRelationOid(defaultPartOid, AccessExclusiveLock);
@@ -17157,105 +17262,213 @@ CloneRowTriggersToPartition(Relation parent, Relation partition)
* ALTER TABLE DETACH PARTITION
*
* Return the address of the relation that is no longer a partition of rel.
+ *
+ * If concurrent mode is requested, we run in two transactions. A side-
+ * effect is that this command cannot run in a multi-part ALTER TABLE.
+ * Currently, that's enforced by the grammar.
+ *
+ * The strategy for concurrency is to first modify the partition's
+ * pg_inherit catalog row to make it visible to everyone that the
+ * partition is detached, lock the partition against writes, and commit
+ * the transaction; anyone who requests the partition descriptor from
+ * that point onwards has to ignore such a partition. In a second
+ * transaction, we wait until all transactions that could have seen the
+ * partition as attached are gone, then we remove the rest of partition
+ * metadata (pg_inherits and pg_class.relpartbounds).
*/
static ObjectAddress
-ATExecDetachPartition(Relation rel, RangeVar *name)
+ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
+ RangeVar *name, bool concurrent)
{
- Relation partRel,
- classRel;
- HeapTuple tuple,
- newtuple;
- Datum new_val[Natts_pg_class];
- bool new_null[Natts_pg_class],
- new_repl[Natts_pg_class];
+ Relation partRel;
ObjectAddress address;
Oid defaultPartOid;
- List *indexes;
- List *fks;
- ListCell *cell;
/*
* We must lock the default partition, because detaching this partition
* will change its partition constraint.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(rel));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, false));
if (OidIsValid(defaultPartOid))
+ {
+ /*
+ * Concurrent detaching when a default partition exists is not
+ * supported. The main problem is that the default partition
+ * constraint would change. And there's a definitional problem: what
+ * should happen to the tuples that are being inserted that belong to
+ * the partition being detached? Putting them on the partition being
+ * detached would be wrong, since they'd become "lost" after the but
+ * we cannot put them in the default partition either until we alter
+ * its partition constraint.
+ *
+ * I think we could solve this problem if we effected the constraint
+ * change before committing the first transaction. But the lock would
+ * have to remain AEL and it would cause concurrent query planning to
+ * be blocked, so changing it that way would be even worse.
+ */
+ if (concurrent)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot detach partitions concurrently when a default partition exists")));
LockRelationOid(defaultPartOid, AccessExclusiveLock);
+ }
- partRel = table_openrv(name, ShareUpdateExclusiveLock);
+ /*
+ * In concurrent mode, the partition is locked with share-update-exclusive
+ * in the first transaction. This allows concurrent transactions to be
+ * doing DML to the partition.
+ */
+ partRel = table_openrv(name, concurrent ? ShareUpdateExclusiveLock :
+ AccessExclusiveLock);
- /* Ensure that foreign keys still hold after this detach */
+ /*
+ * Check inheritance conditions and either delete the pg_inherits row
+ * (in non-concurrent mode) or just set the inhdetachpending flag.
+ */
+ if (!concurrent)
+ RemoveInheritance(partRel, rel, false);
+ else
+ MarkInheritDetached(partRel, rel);
+
+ /*
+ * Ensure that foreign keys still hold after this detach. This keeps
+ * locks on the referencing tables, which prevents concurrent transactions
+ * from adding rows that we wouldn't see. For this to work in concurrent
+ * mode, it is critical that the partition appears as no longer attached
+ * for the RI queries as soon as the first transaction commits.
+ */
ATDetachCheckNoForeignKeyRefs(partRel);
- /* All inheritance related checks are performed within the function */
- RemoveInheritance(partRel, rel);
+ /*
+ * Concurrent mode has to work harder; first we add a new constraint to
+ * the partition that matches the partition constraint. Then we close our
+ * existing transaction, and in a new one wait for all processes to catch
+ * up on the catalog updates we've done so far; at that point we can
+ * complete the operation.
+ */
+ if (concurrent)
+ {
+ Oid partrelid,
+ parentrelid;
+ LOCKTAG tag;
+ char *parentrelname;
+ char *partrelname;
- /* Update pg_class tuple */
- classRel = table_open(RelationRelationId, RowExclusiveLock);
- tuple = SearchSysCacheCopy1(RELOID,
- ObjectIdGetDatum(RelationGetRelid(partRel)));
- if (!HeapTupleIsValid(tuple))
- elog(ERROR, "cache lookup failed for relation %u",
- RelationGetRelid(partRel));
- Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+ /*
+ * Add a new constraint to the partition being detached, which
+ * supplants the partition constraint (unless there is one already).
+ */
+ DetachAddConstraintIfNeeded(wqueue, partRel);
- /* Clear relpartbound and reset relispartition */
- memset(new_val, 0, sizeof(new_val));
- memset(new_null, false, sizeof(new_null));
- memset(new_repl, false, sizeof(new_repl));
- new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
- new_null[Anum_pg_class_relpartbound - 1] = true;
- new_repl[Anum_pg_class_relpartbound - 1] = true;
- newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
- new_val, new_null, new_repl);
+ /*
+ * We're almost done now; the only traces that remain are the
+ * pg_inherits tuple and the partition's relpartbounds. Before we can
+ * remove those, we need to wait until all transactions that know that
+ * this is a partition are gone.
+ */
- ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
- CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple);
- heap_freetuple(newtuple);
+ /*
+ * Remember relation OIDs to re-acquire them later; and relation names
+ * too, for error messages if something is dropped in between.
+ */
+ partrelid = RelationGetRelid(partRel);
+ parentrelid = RelationGetRelid(rel);
+ parentrelname = MemoryContextStrdup(PortalContext,
+ RelationGetRelationName(rel));
+ partrelname = MemoryContextStrdup(PortalContext,
+ RelationGetRelationName(partRel));
+
+ /* Invalidate relcache entries for the parent -- must be before close */
+ CacheInvalidateRelcache(rel);
+
+ table_close(partRel, NoLock);
+ table_close(rel, NoLock);
+ tab->rel = NULL;
+
+ /* Make updated catalog entry visible */
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ StartTransactionCommand();
- if (OidIsValid(defaultPartOid))
- {
/*
- * If the relation being detached is the default partition itself,
- * remove it from the parent's pg_partitioned_table entry.
+ * Now wait. This ensures that all queries that were planned including
+ * the partition are finished before we remove the rest of catalog
+ * entries. We don't need or indeed want to acquire this lock, though
+ * -- that would block later queries.
*
- * If not, we must invalidate default partition's relcache entry, as
- * in StorePartitionBound: its partition constraint depends on every
- * other partition's partition constraint.
+ * We don't need to concern ourselves with waiting for a lock on the
+ * partition itself, since we will acquire AccessExclusiveLock below.
*/
- if (RelationGetRelid(partRel) == defaultPartOid)
- update_default_partition_oid(RelationGetRelid(rel), InvalidOid);
- else
- CacheInvalidateRelcacheByRelid(defaultPartOid);
+ SET_LOCKTAG_RELATION(tag, MyDatabaseId, parentrelid);
+ WaitForLockersMultiple(list_make1(&tag), AccessExclusiveLock, false);
+
+ /*
+ * Now acquire locks in both relations again. Note they may have been
+ * removed in the meantime, so care is required.
+ */
+ rel = try_relation_open(parentrelid, ShareUpdateExclusiveLock);
+ partRel = try_relation_open(partrelid, AccessExclusiveLock);
+
+ /* If the relations aren't there, something bad happened; bail out */
+ if (rel == NULL)
+ {
+ if (partRel != NULL) /* shouldn't happen */
+ elog(WARNING, "dangling partition \"%s\" remains, can't fix",
+ partrelname);
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("partitioned table \"%s\" was removed concurrently",
+ parentrelname)));
+ }
+ if (partRel == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("partition \"%s\" was removed concurrently", partrelname)));
+
+ tab->rel = rel;
}
- /* detach indexes too */
- indexes = RelationGetIndexList(partRel);
- foreach(cell, indexes)
- {
- Oid idxid = lfirst_oid(cell);
- Relation idx;
- Oid constrOid;
+ /* Do the final part of detaching */
+ DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
- if (!has_superclass(idxid))
- continue;
+ ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
- Assert((IndexGetRelation(get_partition_parent(idxid), false) ==
- RelationGetRelid(rel)));
+ /* keep our lock until commit */
+ table_close(partRel, NoLock);
- idx = index_open(idxid, AccessExclusiveLock);
- IndexSetParentIndex(idx, InvalidOid);
+ return address;
+}
- /* If there's a constraint associated with the index, detach it too */
- constrOid = get_relation_idx_constraint_oid(RelationGetRelid(partRel),
- idxid);
- if (OidIsValid(constrOid))
- ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+/*
+ * Second part of ALTER TABLE .. DETACH.
+ *
+ * This is separate so that it can be run independently when the second
+ * transaction of the concurrent algorithm fails (crash or abort).
+ */
+static void
+DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent,
+ Oid defaultPartOid)
+{
+ Relation classRel;
+ List *fks;
+ ListCell *cell;
+ List *indexes;
+ Datum new_val[Natts_pg_class];
+ bool new_null[Natts_pg_class],
+ new_repl[Natts_pg_class];
+ HeapTuple tuple,
+ newtuple;
- index_close(idx, NoLock);
+ if (concurrent)
+ {
+ /*
+ * We can remove the pg_inherits row now. (In the non-concurrent case,
+ * this was already done).
+ */
+ RemoveInheritance(partRel, rel, true);
}
- table_close(classRel, RowExclusiveLock);
/* Drop any triggers that were cloned on creation/attach. */
DropClonedTriggersFromPartition(RelationGetRelid(partRel));
@@ -17328,23 +17541,162 @@ ATExecDetachPartition(Relation rel, RangeVar *name)
ObjectAddressSet(constraint, ConstraintRelationId, constrOid);
performDeletion(&constraint, DROP_RESTRICT, 0);
}
- CommandCounterIncrement();
+
+ /* Now we can detach indexes */
+ indexes = RelationGetIndexList(partRel);
+ foreach(cell, indexes)
+ {
+ Oid idxid = lfirst_oid(cell);
+ Relation idx;
+ Oid constrOid;
+
+ if (!has_superclass(idxid))
+ continue;
+
+ Assert((IndexGetRelation(get_partition_parent(idxid, false), false) ==
+ RelationGetRelid(rel)));
+
+ idx = index_open(idxid, AccessExclusiveLock);
+ IndexSetParentIndex(idx, InvalidOid);
+
+ /* If there's a constraint associated with the index, detach it too */
+ constrOid = get_relation_idx_constraint_oid(RelationGetRelid(partRel),
+ idxid);
+ if (OidIsValid(constrOid))
+ ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+
+ index_close(idx, NoLock);
+ }
+
+ /* Update pg_class tuple */
+ classRel = table_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(partRel)));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u",
+ RelationGetRelid(partRel));
+ Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+
+ /* Clear relpartbound and reset relispartition */
+ memset(new_val, 0, sizeof(new_val));
+ memset(new_null, false, sizeof(new_null));
+ memset(new_repl, false, sizeof(new_repl));
+ new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
+ new_null[Anum_pg_class_relpartbound - 1] = true;
+ new_repl[Anum_pg_class_relpartbound - 1] = true;
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+ new_val, new_null, new_repl);
+
+ ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
+ CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple);
+ heap_freetuple(newtuple);
+ table_close(classRel, RowExclusiveLock);
+
+ if (OidIsValid(defaultPartOid))
+ {
+ /*
+ * If the relation being detached is the default partition itself,
+ * remove it from the parent's pg_partitioned_table entry.
+ *
+ * If not, we must invalidate default partition's relcache entry, as
+ * in StorePartitionBound: its partition constraint depends on every
+ * other partition's partition constraint.
+ */
+ if (RelationGetRelid(partRel) == defaultPartOid)
+ update_default_partition_oid(RelationGetRelid(rel), InvalidOid);
+ else
+ CacheInvalidateRelcacheByRelid(defaultPartOid);
+ }
/*
* Invalidate the parent's relcache so that the partition is no longer
* included in its partition descriptor.
*/
CacheInvalidateRelcache(rel);
+}
+
+/*
+ * ALTER TABLE ... DETACH PARTITION ... FINALIZE
+ *
+ * To use when a DETACH PARTITION command previously did not run to
+ * completion; this completes the detaching process.
+ */
+static ObjectAddress
+ATExecDetachPartitionFinalize(Relation rel, RangeVar *name)
+{
+ Relation partRel;
+ ObjectAddress address;
+ Snapshot snap = GetActiveSnapshot();
+
+ partRel = table_openrv(name, AccessExclusiveLock);
+
+ /*
+ * Wait until existing snapshots are gone. This is important if the
+ * second transaction of DETACH PARTITION CONCURRENTLY is canceled: the
+ * user could immediately run DETACH FINALIZE without actually waiting for
+ * existing transactions. We must not complete the detach action until
+ * all such queries are complete (otherwise we would present them with an
+ * inconsistent view of catalogs).
+ */
+ WaitForOlderSnapshots(snap->xmin, false);
+
+ DetachPartitionFinalize(rel, partRel, true, InvalidOid);
ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
- /* keep our lock until commit */
table_close(partRel, NoLock);
return address;
}
/*
+ * DetachAddConstraintIfNeeded
+ * Subroutine for ATExecDetachPartition. Create a constraint that
+ * takes the place of the partition constraint, but avoid creating
+ * a dupe if an equivalent constraint already exists.
+ */
+static void
+DetachAddConstraintIfNeeded(List **wqueue, Relation partRel)
+{
+ AlteredTableInfo *tab;
+ Expr *constraintExpr;
+ TupleDesc td = RelationGetDescr(partRel);
+ Constraint *n;
+
+ constraintExpr = make_ands_explicit(RelationGetPartitionQual(partRel));
+
+ /* If an identical constraint exists, we don't need to create one */
+ if (td->constr && td->constr->num_check > 0)
+ {
+ for (int i = 0; i < td->constr->num_check; i++)
+ {
+ Node *thisconstr;
+
+ thisconstr = stringToNode(td->constr->check[i].ccbin);
+
+ if (equal(constraintExpr, thisconstr))
+ return;
+ }
+ }
+
+ tab = ATGetQueueEntry(wqueue, partRel);
+
+ /* Add constraint on partition, equivalent to the partition constraint */
+ n = makeNode(Constraint);
+ n->contype = CONSTR_CHECK;
+ n->conname = NULL;
+ n->location = -1;
+ n->is_no_inherit = false;
+ n->raw_expr = NULL;
+ n->cooked_expr = nodeToString(constraintExpr);
+ n->initially_valid = true;
+ n->skip_validation = true;
+ /* It's a re-add, since it nominally already exists */
+ ATAddCheckConstraint(wqueue, tab, partRel, n,
+ true, false, true, ShareUpdateExclusiveLock);
+}
+
+/*
* DropClonedTriggersFromPartition
* subroutine for ATExecDetachPartition to remove any triggers that were
* cloned to the partition when it was created-as-partition or attached.
@@ -17511,7 +17863,7 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name)
/* Silently do nothing if already in the right state */
currParent = partIdx->rd_rel->relispartition ?
- get_partition_parent(partIdxId) : InvalidOid;
+ get_partition_parent(partIdxId, false) : InvalidOid;
if (currParent != RelationGetRelid(parentIdx))
{
IndexInfo *childInfo;
@@ -17539,7 +17891,7 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name)
RelationGetRelationName(partIdx))));
/* Make sure it indexes a partition of the other index's table */
- partDesc = RelationGetPartitionDesc(parentTbl);
+ partDesc = RelationGetPartitionDesc(parentTbl, false);
found = false;
for (i = 0; i < partDesc->nparts; i++)
{
@@ -17693,7 +18045,7 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
* If we found as many inherited indexes as the partitioned table has
* partitions, we're good; update pg_index to set indisvalid.
*/
- if (tuples == RelationGetPartitionDesc(partedTbl)->nparts)
+ if (tuples == RelationGetPartitionDesc(partedTbl, false)->nparts)
{
Relation idxRel;
HeapTuple newtup;
@@ -17723,8 +18075,8 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
/* make sure we see the validation we just did */
CommandCounterIncrement();
- parentIdxId = get_partition_parent(RelationGetRelid(partedIdx));
- parentTblId = get_partition_parent(RelationGetRelid(partedTbl));
+ parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), false);
+ parentTblId = get_partition_parent(RelationGetRelid(partedTbl), false);
parentIdx = relation_open(parentIdxId, AccessExclusiveLock);
parentTbl = relation_open(parentTblId, AccessExclusiveLock);
Assert(!parentIdx->rd_index->indisvalid);
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 4e4e05844c5..7383d5994eb 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -1119,7 +1119,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
*/
if (partition_recurse)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(rel);
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, false);
List *idxs = NIL;
List *childTbls = NIL;
ListCell *l;
@@ -1141,7 +1141,8 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
ListCell *l;
List *idxs = NIL;
- idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock);
+ idxs = find_inheritance_children(indexOid, false,
+ ShareRowExclusiveLock);
foreach(l, idxs)
childTbls = lappend_oid(childTbls,
IndexGetRelation(lfirst_oid(l),
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index b8da4c5967d..619aaffae43 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -569,6 +569,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
int partidx)
{
ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+ Oid partOid = dispatch->partdesc->oids[partidx];
Relation partrel;
int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
@@ -579,7 +580,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
oldcxt = MemoryContextSwitchTo(proute->memcxt);
- partrel = table_open(dispatch->partdesc->oids[partidx], RowExclusiveLock);
+ partrel = table_open(partOid, RowExclusiveLock);
leaf_part_rri = makeNode(ResultRelInfo);
InitResultRelInfo(leaf_part_rri,
@@ -1065,9 +1066,21 @@ ExecInitPartitionDispatchInfo(EState *estate,
int dispatchidx;
MemoryContext oldcxt;
+ /*
+ * For data modification, it is better that executor does not include
+ * partitions being detached, except in snapshot-isolation mode. This
+ * means that a read-committed transaction immediately gets a "no
+ * partition for tuple" error when a tuple is inserted into a partition
+ * that's being detached concurrently, but a transaction in repeatable-
+ * read mode can still use the partition. Note that because partition
+ * detach uses ShareLock on the partition (which conflicts with DML),
+ * we're certain that the detach won't be able to complete until any
+ * inserting transaction is done.
+ */
if (estate->es_partition_directory == NULL)
estate->es_partition_directory =
- CreatePartitionDirectory(estate->es_query_cxt);
+ CreatePartitionDirectory(estate->es_query_cxt,
+ IsolationUsesXactSnapshot());
oldcxt = MemoryContextSwitchTo(proute->memcxt);
@@ -1645,9 +1658,10 @@ ExecCreatePartitionPruneState(PlanState *planstate,
ListCell *lc;
int i;
+ /* Executor must always include detached partitions */
if (estate->es_partition_directory == NULL)
estate->es_partition_directory =
- CreatePartitionDirectory(estate->es_query_cxt);
+ CreatePartitionDirectory(estate->es_query_cxt, true);
n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
Assert(n_part_hierarchies > 0);
@@ -1713,9 +1727,12 @@ ExecCreatePartitionPruneState(PlanState *planstate,
partrel);
/*
- * Initialize the subplan_map and subpart_map. Since detaching a
- * partition requires AccessExclusiveLock, no partitions can have
- * disappeared, nor can the bounds for any partition have changed.
+ * Initialize the subplan_map and subpart_map.
+ *
+ * Because we request detached partitions to be included, and
+ * detaching waits for old transactions, it is safe to assume that
+ * no partitions have disappeared since this query was planned.
+ *
* However, new partitions may have been added.
*/
Assert(partdesc->nparts >= pinfo->nparts);
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 82d7cce5d57..38b56231b7d 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -4737,6 +4737,7 @@ _copyPartitionCmd(const PartitionCmd *from)
COPY_NODE_FIELD(name);
COPY_NODE_FIELD(bound);
+ COPY_SCALAR_FIELD(concurrent);
return newnode;
}
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 3e980c457c5..3292dda3424 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2975,6 +2975,7 @@ _equalPartitionCmd(const PartitionCmd *a, const PartitionCmd *b)
{
COMPARE_NODE_FIELD(name);
COMPARE_NODE_FIELD(bound);
+ COMPARE_SCALAR_FIELD(concurrent);
return true;
}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 7f2e40ae39e..6c39bf893f8 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2141,10 +2141,14 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
{
PartitionDesc partdesc;
- /* Create the PartitionDirectory infrastructure if we didn't already */
+ /*
+ * Create the PartitionDirectory infrastructure if we didn't already.
+ */
if (root->glob->partition_directory == NULL)
+ {
root->glob->partition_directory =
- CreatePartitionDirectory(CurrentMemoryContext);
+ CreatePartitionDirectory(CurrentMemoryContext, false);
+ }
partdesc = PartitionDirectoryLookup(root->glob->partition_directory,
relation);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 4843ff99e2d..2132cf4d828 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -658,7 +658,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXPRESSION
EXTENSION EXTERNAL EXTRACT
- FALSE_P FAMILY FETCH FILTER FIRST_P FLOAT_P FOLLOWING FOR
+ FALSE_P FAMILY FETCH FILTER FINALIZE FIRST_P FLOAT_P FOLLOWING FOR
FORCE FOREIGN FORWARD FREEZE FROM FULL FUNCTION FUNCTIONS
GENERATED GLOBAL GRANT GRANTED GREATEST GROUP_P GROUPING GROUPS
@@ -2108,12 +2108,13 @@ partition_cmd:
n->subtype = AT_AttachPartition;
cmd->name = $3;
cmd->bound = $4;
+ cmd->concurrent = false;
n->def = (Node *) cmd;
$$ = (Node *) n;
}
- /* ALTER TABLE <name> DETACH PARTITION <partition_name> */
- | DETACH PARTITION qualified_name
+ /* ALTER TABLE <name> DETACH PARTITION <partition_name> [CONCURRENTLY] */
+ | DETACH PARTITION qualified_name opt_concurrently
{
AlterTableCmd *n = makeNode(AlterTableCmd);
PartitionCmd *cmd = makeNode(PartitionCmd);
@@ -2121,10 +2122,23 @@ partition_cmd:
n->subtype = AT_DetachPartition;
cmd->name = $3;
cmd->bound = NULL;
+ cmd->concurrent = $4;
n->def = (Node *) cmd;
$$ = (Node *) n;
}
+ | DETACH PARTITION qualified_name FINALIZE
+ {
+ AlterTableCmd *n = makeNode(AlterTableCmd);
+ PartitionCmd *cmd = makeNode(PartitionCmd);
+
+ n->subtype = AT_DetachPartitionFinalize;
+ cmd->name = $3;
+ cmd->bound = NULL;
+ cmd->concurrent = false;
+ n->def = (Node *) cmd;
+ $$ = (Node *) n;
+ }
;
index_partition_cmd:
@@ -2137,6 +2151,7 @@ index_partition_cmd:
n->subtype = AT_AttachPartition;
cmd->name = $3;
cmd->bound = NULL;
+ cmd->concurrent = false;
n->def = (Node *) cmd;
$$ = (Node *) n;
@@ -15395,6 +15410,7 @@ unreserved_keyword:
| EXTERNAL
| FAMILY
| FILTER
+ | FINALIZE
| FIRST_P
| FOLLOWING
| FORCE
@@ -15936,6 +15952,7 @@ bare_label_keyword:
| EXTRACT
| FALSE_P
| FAMILY
+ | FINALIZE
| FIRST_P
| FLOAT_P
| FOLLOWING
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index e5f3482d52e..2b2b1dc1ad7 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -2798,7 +2798,7 @@ check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec, ParseState *pstate)
{
PartitionKey key = RelationGetPartitionKey(parent);
- PartitionDesc partdesc = RelationGetPartitionDesc(parent);
+ PartitionDesc partdesc = RelationGetPartitionDesc(parent, true);
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int with = -1;
bool overlap = false;
@@ -3990,7 +3990,7 @@ get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
{
int i;
int ndatums = 0;
- PartitionDesc pdesc = RelationGetPartitionDesc(parent);
+ PartitionDesc pdesc = RelationGetPartitionDesc(parent, true); /* XXX correct? */
PartitionBoundInfo boundinfo = pdesc->boundinfo;
if (boundinfo)
@@ -4190,7 +4190,7 @@ get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
if (spec->is_default)
{
List *or_expr_args = NIL;
- PartitionDesc pdesc = RelationGetPartitionDesc(parent);
+ PartitionDesc pdesc = RelationGetPartitionDesc(parent, true); /* XXX correct? */
Oid *inhoids = pdesc->oids;
int nparts = pdesc->nparts,
i;
diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c
index f852b6e99de..58570fecfdc 100644
--- a/src/backend/partitioning/partdesc.c
+++ b/src/backend/partitioning/partdesc.c
@@ -37,6 +37,7 @@ typedef struct PartitionDirectoryData
{
MemoryContext pdir_mcxt;
HTAB *pdir_hash;
+ bool include_detached;
} PartitionDirectoryData;
typedef struct PartitionDirectoryEntry
@@ -46,7 +47,7 @@ typedef struct PartitionDirectoryEntry
PartitionDesc pd;
} PartitionDirectoryEntry;
-static void RelationBuildPartitionDesc(Relation rel);
+static void RelationBuildPartitionDesc(Relation rel, bool include_detached);
/*
@@ -61,13 +62,14 @@ static void RelationBuildPartitionDesc(Relation rel);
* that the data doesn't become stale.
*/
PartitionDesc
-RelationGetPartitionDesc(Relation rel)
+RelationGetPartitionDesc(Relation rel, bool include_detached)
{
if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
return NULL;
- if (unlikely(rel->rd_partdesc == NULL))
- RelationBuildPartitionDesc(rel);
+ if (unlikely(rel->rd_partdesc == NULL ||
+ rel->rd_partdesc->includes_detached != include_detached))
+ RelationBuildPartitionDesc(rel, include_detached);
return rel->rd_partdesc;
}
@@ -88,7 +90,7 @@ RelationGetPartitionDesc(Relation rel)
* permanently.
*/
static void
-RelationBuildPartitionDesc(Relation rel)
+RelationBuildPartitionDesc(Relation rel, bool include_detached)
{
PartitionDesc partdesc;
PartitionBoundInfo boundinfo = NULL;
@@ -110,7 +112,8 @@ RelationBuildPartitionDesc(Relation rel)
* concurrently, whatever this function returns will be accurate as of
* some well-defined point in time.
*/
- inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
+ inhoids = find_inheritance_children(RelationGetRelid(rel), include_detached,
+ NoLock);
nparts = list_length(inhoids);
/* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
@@ -238,6 +241,7 @@ RelationBuildPartitionDesc(Relation rel)
partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
+ partdesc->includes_detached = include_detached;
/*
* Assign OIDs from the original array into mapped indexes of the
@@ -280,7 +284,7 @@ RelationBuildPartitionDesc(Relation rel)
* Create a new partition directory object.
*/
PartitionDirectory
-CreatePartitionDirectory(MemoryContext mcxt)
+CreatePartitionDirectory(MemoryContext mcxt, bool include_detached)
{
MemoryContext oldcontext = MemoryContextSwitchTo(mcxt);
PartitionDirectory pdir;
@@ -295,6 +299,7 @@ CreatePartitionDirectory(MemoryContext mcxt)
pdir->pdir_hash = hash_create("partition directory", 256, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+ pdir->include_detached = include_detached;
MemoryContextSwitchTo(oldcontext);
return pdir;
@@ -327,7 +332,7 @@ PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
*/
RelationIncrementReferenceCount(rel);
pde->rel = rel;
- pde->pd = RelationGetPartitionDesc(rel);
+ pde->pd = RelationGetPartitionDesc(rel, pdir->include_detached);
Assert(pde->pd != NULL);
}
return pde->pd;
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 05bb698cf45..729274b330f 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1236,6 +1236,25 @@ ProcessUtilitySlow(ParseState *pstate,
AlterTableStmt *atstmt = (AlterTableStmt *) parsetree;
Oid relid;
LOCKMODE lockmode;
+ ListCell *cell;
+
+ /*
+ * Disallow ALTER TABLE .. DETACH CONCURRENTLY in a
+ * transaction block or function. (Perhaps it could be
+ * allowed in a procedure, but don't hold your breath.)
+ */
+ foreach(cell, atstmt->cmds)
+ {
+ AlterTableCmd *cmd = (AlterTableCmd *) lfirst(cell);
+
+ /* Disallow DETACH CONCURRENTLY in a transaction block */
+ if (cmd->subtype == AT_DetachPartition)
+ {
+ if (((PartitionCmd *) cmd->def)->concurrent)
+ PreventInTransactionBlock(isTopLevel,
+ "ALTER TABLE ... DETACH CONCURRENTLY");
+ }
+ }
/*
* Figure out lock mode, and acquire lock. This also does
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 09a2ad28814..7c77c338cec 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -392,11 +392,15 @@ RI_FKey_check(TriggerData *trigdata)
/*
* Now check that foreign key exists in PK table
+ *
+ * XXX detectNewRows must be true when a partitioned table is on the
+ * referenced side. The reason is that our snapshot must be fresh
+ * in order for the hack in find_inheritance_children() to work.
*/
ri_PerformCheck(riinfo, &qkey, qplan,
fk_rel, pk_rel,
NULL, newslot,
- false,
+ pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE,
SPI_OK_SELECT);
if (SPI_finish() != SPI_OK_FINISH)
diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c
index a6388d980ed..21e60f0c5e8 100644
--- a/src/backend/utils/cache/partcache.c
+++ b/src/backend/utils/cache/partcache.c
@@ -341,6 +341,7 @@ generate_partition_qual(Relation rel)
bool isnull;
List *my_qual = NIL,
*result = NIL;
+ Oid parentrelid;
Relation parent;
/* Guard against stack overflow due to overly deep partition tree */
@@ -350,9 +351,14 @@ generate_partition_qual(Relation rel)
if (rel->rd_partcheckvalid)
return copyObject(rel->rd_partcheck);
- /* Grab at least an AccessShareLock on the parent table */
- parent = relation_open(get_partition_parent(RelationGetRelid(rel)),
- AccessShareLock);
+ /*
+ * Grab at least an AccessShareLock on the parent table. Must do this
+ * even if the partition has been partially detached, because transactions
+ * concurrent with the detach might still be trying to use a partition
+ * descriptor that includes it.
+ */
+ parentrelid = get_partition_parent(RelationGetRelid(rel), true);
+ parent = relation_open(parentrelid, AccessShareLock);
/* Get pg_class.relpartbound */
tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));