diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/catalog/pg_inherits.c | 52 | ||||
-rw-r--r-- | src/backend/commands/tablecmds.c | 66 | ||||
-rw-r--r-- | src/backend/commands/trigger.c | 3 | ||||
-rw-r--r-- | src/backend/partitioning/partdesc.c | 101 | ||||
-rw-r--r-- | src/backend/utils/cache/relcache.c | 33 |
5 files changed, 186 insertions, 69 deletions
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index 6447b528546..c373faf2d64 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -52,6 +52,22 @@ typedef struct SeenRelsEntry * then no locks are acquired, but caller must beware of race conditions * against possible DROPs of child relations. * + * Partitions marked as being detached are omitted; see + * find_inheritance_children_extended for details. + */ +List * +find_inheritance_children(Oid parentrelId, LOCKMODE lockmode) +{ + return find_inheritance_children_extended(parentrelId, true, lockmode, + NULL, NULL); +} + +/* + * find_inheritance_children_extended + * + * As find_inheritance_children, with more options regarding detached + * partitions. + * * If a partition's pg_inherits row is marked "detach pending", * *detached_exist (if not null) is set true. * @@ -60,11 +76,13 @@ typedef struct SeenRelsEntry * marked "detach pending" is visible to that snapshot, then that partition is * omitted from the output list. This makes partitions invisible depending on * whether the transaction that marked those partitions as detached appears - * committed to the active snapshot. + * committed to the active snapshot. In addition, *detached_xmin (if not null) + * is set to the xmin of the row of the detached partition. */ List * -find_inheritance_children(Oid parentrelId, bool omit_detached, - LOCKMODE lockmode, bool *detached_exist) +find_inheritance_children_extended(Oid parentrelId, bool omit_detached, + LOCKMODE lockmode, bool *detached_exist, + TransactionId *detached_xmin) { List *list = NIL; Relation relation; @@ -132,7 +150,32 @@ find_inheritance_children(Oid parentrelId, bool omit_detached, snap = GetActiveSnapshot(); if (!XidInMVCCSnapshot(xmin, snap)) + { + if (detached_xmin) + { + /* + * Two detached partitions should not occur (see + * checks in MarkInheritDetached), but if they do, + * track the newer of the two. Make sure to warn the + * user, so that they can clean up. Since this is + * just a cross-check against potentially corrupt + * catalogs, we don't make it a full-fledged error + * message. + */ + if (*detached_xmin != InvalidTransactionId) + { + elog(WARNING, "more than one partition pending detach found for table with OID %u", + parentrelId); + if (TransactionIdFollows(xmin, *detached_xmin)) + *detached_xmin = xmin; + } + else + *detached_xmin = xmin; + } + + /* Don't add the partition to the output list */ continue; + } } } @@ -247,8 +290,7 @@ find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents) ListCell *lc; /* Get the direct children of this rel */ - currentchildren = find_inheritance_children(currentrel, true, - lockmode, NULL); + currentchildren = find_inheritance_children(currentrel, lockmode); /* * Add to the queue only those children not already seen. This avoids diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 8e717ada28d..d9ba87a2a3a 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -3492,7 +3492,7 @@ renameatt_internal(Oid myrelid, * expected_parents will only be 0 if we are not already recursing. */ if (expected_parents == 0 && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("inherited column \"%s\" must be renamed in child tables too", @@ -3691,7 +3691,7 @@ rename_constraint_internal(Oid myrelid, else { if (expected_parents == 0 && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("inherited constraint \"%s\" must be renamed in child tables too", @@ -6565,7 +6565,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, */ if (colDef->identity && recurse && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("cannot recursively add identity column to table that has child tables"))); @@ -6811,7 +6811,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); /* * If we are told not to recurse, there had better not be any child @@ -7674,7 +7674,7 @@ ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recurs * resulting state can be properly dumped and restored. */ if (!recurse && - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL)) + find_inheritance_children(RelationGetRelid(rel), lockmode)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too"))); @@ -8282,7 +8282,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); if (children) { @@ -8770,7 +8770,7 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); /* * Check if ONLY was specified with ALTER TABLE. If so, allow the @@ -11303,8 +11303,7 @@ ATExecDropConstraint(Relation rel, const char *constrName, * use find_all_inheritors to do it in one pass. */ if (!is_no_inherit_constraint) - children = find_inheritance_children(RelationGetRelid(rel), true, - lockmode, NULL); + children = find_inheritance_children(RelationGetRelid(rel), lockmode); else children = NIL; @@ -11688,8 +11687,7 @@ ATPrepAlterColumnType(List **wqueue, } } else if (!recursing && - find_inheritance_children(RelationGetRelid(rel), true, - NoLock, NULL) != NIL) + find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("type of inherited column \"%s\" must be changed in child tables too", @@ -14601,7 +14599,8 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) * MarkInheritDetached * * Set inhdetachpending for a partition, for ATExecDetachPartition - * in concurrent mode. + * in concurrent mode. While at it, verify that no other partition is + * already pending detach. */ static void MarkInheritDetached(Relation child_rel, Relation parent_rel) @@ -14617,32 +14616,45 @@ MarkInheritDetached(Relation child_rel, Relation parent_rel) Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); /* - * Find pg_inherits entries by inhrelid. + * Find pg_inherits entries by inhparent. (We need to scan them all in + * order to verify that no other partition is pending detach.) */ catalogRelation = table_open(InheritsRelationId, RowExclusiveLock); ScanKeyInit(&key, - Anum_pg_inherits_inhrelid, + Anum_pg_inherits_inhparent, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(child_rel))); - scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, + ObjectIdGetDatum(RelationGetRelid(parent_rel))); + scan = systable_beginscan(catalogRelation, InheritsParentIndexId, true, NULL, 1, &key); while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan))) { - HeapTuple newtup; + Form_pg_inherits inhForm; - if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent != - RelationGetRelid(parent_rel)) - elog(ERROR, "bad parent tuple found for partition %u", - RelationGetRelid(child_rel)); + inhForm = (Form_pg_inherits) GETSTRUCT(inheritsTuple); + if (inhForm->inhdetachpending) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("partition \"%s\" already pending detach in partitioned table \"%s.%s\"", + get_rel_name(inhForm->inhrelid), + get_namespace_name(parent_rel->rd_rel->relnamespace), + RelationGetRelationName(parent_rel)), + errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the detach operation.")); + + if (inhForm->inhrelid == RelationGetRelid(child_rel)) + { + HeapTuple newtup; - newtup = heap_copytuple(inheritsTuple); - ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true; + newtup = heap_copytuple(inheritsTuple); + ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true; - CatalogTupleUpdate(catalogRelation, - &inheritsTuple->t_self, - newtup); - found = true; + CatalogTupleUpdate(catalogRelation, + &inheritsTuple->t_self, + newtup); + found = true; + heap_freetuple(newtup); + /* keep looking, to ensure we catch others pending detach */ + } } /* Done */ diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index d8393aa4de3..f305f8bc0f2 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -1141,8 +1141,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, ListCell *l; List *idxs = NIL; - idxs = find_inheritance_children(indexOid, true, - ShareRowExclusiveLock, NULL); + idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock); foreach(l, idxs) childTbls = lappend_oid(childTbls, IndexGetRelation(lfirst_oid(l), diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c index 2305dff4077..fa9729283a0 100644 --- a/src/backend/partitioning/partdesc.c +++ b/src/backend/partitioning/partdesc.c @@ -54,6 +54,12 @@ static PartitionDesc RelationBuildPartitionDesc(Relation rel, /* * RelationGetPartitionDesc -- get partition descriptor, if relation is partitioned * + * We keep two partdescs in relcache: rd_partdesc includes all partitions + * (even those being concurrently marked detached), while rd_partdesc_nodetach + * omits (some of) those. We store the pg_inherits.xmin value for the latter, + * to determine whether it can be validly reused in each case, since that + * depends on the active snapshot. + * * Note: we arrange for partition descriptors to not get freed until the * relcache entry's refcount goes to zero (see hacks in RelationClose, * RelationClearRelation, and RelationBuildPartitionDesc). Therefore, even @@ -61,13 +67,6 @@ static PartitionDesc RelationBuildPartitionDesc(Relation rel, * for callers to continue to use that pointer as long as (a) they hold the * relation open, and (b) they hold a relation lock strong enough to ensure * that the data doesn't become stale. - * - * The above applies to partition descriptors that are complete regarding - * partitions concurrently being detached. When a descriptor that omits - * partitions being detached is requested (and such partitions are present), - * said descriptor is not part of relcache and so it isn't freed by - * invalidations either. Caller must not use such a descriptor beyond the - * current Portal. */ PartitionDesc RelationGetPartitionDesc(Relation rel, bool omit_detached) @@ -78,11 +77,36 @@ RelationGetPartitionDesc(Relation rel, bool omit_detached) * If relcache has a partition descriptor, use that. However, we can only * do so when we are asked to include all partitions including detached; * and also when we know that there are no detached partitions. + * + * If there is no active snapshot, detached partitions aren't omitted + * either, so we can use the cached descriptor too in that case. */ if (likely(rel->rd_partdesc && - (!rel->rd_partdesc->detached_exist || !omit_detached))) + (!rel->rd_partdesc->detached_exist || !omit_detached || + !ActiveSnapshotSet()))) return rel->rd_partdesc; + /* + * If we're asked to omit detached partitions, we may be able to use a + * cached descriptor too. We determine that based on the pg_inherits.xmin + * that was saved alongside that descriptor: if the xmin that was not in + * progress for that active snapshot is also not in progress for the + * current active snapshot, then we can use use it. Otherwise build one + * from scratch. + */ + if (omit_detached && + rel->rd_partdesc_nodetached && + TransactionIdIsValid(rel->rd_partdesc_nodetached_xmin) && + ActiveSnapshotSet()) + { + Snapshot activesnap; + + activesnap = GetActiveSnapshot(); + + if (!XidInMVCCSnapshot(rel->rd_partdesc_nodetached_xmin, activesnap)) + return rel->rd_partdesc_nodetached; + } + return RelationBuildPartitionDesc(rel, omit_detached); } @@ -117,6 +141,8 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) Oid *oids = NULL; bool *is_leaf = NULL; bool detached_exist; + bool is_omit; + TransactionId detached_xmin; ListCell *cell; int i, nparts; @@ -132,8 +158,11 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) * some well-defined point in time. */ detached_exist = false; - inhoids = find_inheritance_children(RelationGetRelid(rel), omit_detached, - NoLock, &detached_exist); + detached_xmin = InvalidTransactionId; + inhoids = find_inheritance_children_extended(RelationGetRelid(rel), + omit_detached, NoLock, + &detached_exist, + &detached_xmin); nparts = list_length(inhoids); /* Allocate working arrays for OIDs, leaf flags, and boundspecs. */ @@ -281,37 +310,49 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) } /* + * Are we working with the partition that omits detached partitions, or + * the one that includes them? + */ + is_omit = omit_detached && detached_exist && ActiveSnapshotSet(); + + /* * We have a fully valid partdesc. Reparent it so that it has the right - * lifespan, and if appropriate put it into the relation's relcache entry. + * lifespan. */ - if (omit_detached && detached_exist) + MemoryContextSetParent(new_pdcxt, CacheMemoryContext); + + /* + * Store it into relcache. + * + * But first, a kluge: if there's an old context for this type of + * descriptor, it contains an old partition descriptor that may still be + * referenced somewhere. Preserve it, while not leaking it, by + * reattaching it as a child context of the new one. Eventually it will + * get dropped by either RelationClose or RelationClearRelation. + * (We keep the regular partdesc in rd_pdcxt, and the partdesc-excluding- + * detached-partitions in rd_pddcxt.) + */ + if (is_omit) { + if (rel->rd_pddcxt != NULL) + MemoryContextSetParent(rel->rd_pddcxt, new_pdcxt); + rel->rd_pddcxt = new_pdcxt; + rel->rd_partdesc_nodetached = partdesc; + /* - * A transient partition descriptor is only good for the current - * statement, so make it a child of the current portal's context. + * For partdescs built excluding detached partitions, which we save + * separately, we also record the pg_inherits.xmin of the detached + * partition that was omitted; this informs a future potential user of + * such a cached partdescs. (This might be InvalidXid; see comments + * in struct RelationData). */ - MemoryContextSetParent(new_pdcxt, PortalContext); + rel->rd_partdesc_nodetached_xmin = detached_xmin; } else { - /* - * This partdesc goes into relcache. - */ - - MemoryContextSetParent(new_pdcxt, CacheMemoryContext); - - /* - * But first, a kluge: if there's an old rd_pdcxt, it contains an old - * partition descriptor that may still be referenced somewhere. - * Preserve it, while not leaking it, by reattaching it as a child - * context of the new rd_pdcxt. Eventually it will get dropped by - * either RelationClose or RelationClearRelation. - */ if (rel->rd_pdcxt != NULL) MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt); rel->rd_pdcxt = new_pdcxt; - - /* Store it into relcache */ rel->rd_partdesc = partdesc; } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 466c28d5287..31c8e07f2a7 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1157,7 +1157,10 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_partkey = NULL; relation->rd_partkeycxt = NULL; relation->rd_partdesc = NULL; + relation->rd_partdesc_nodetached = NULL; + relation->rd_partdesc_nodetached_xmin = InvalidTransactionId; relation->rd_pdcxt = NULL; + relation->rd_pddcxt = NULL; relation->rd_partcheck = NIL; relation->rd_partcheckvalid = false; relation->rd_partcheckcxt = NULL; @@ -2103,10 +2106,16 @@ RelationClose(Relation relation) * stale partition descriptors it has. This is unlikely, so check to see * if there are child contexts before expending a call to mcxt.c. */ - if (RelationHasReferenceCountZero(relation) && - relation->rd_pdcxt != NULL && - relation->rd_pdcxt->firstchild != NULL) - MemoryContextDeleteChildren(relation->rd_pdcxt); + if (RelationHasReferenceCountZero(relation)) + { + if (relation->rd_pdcxt != NULL && + relation->rd_pdcxt->firstchild != NULL) + MemoryContextDeleteChildren(relation->rd_pdcxt); + + if (relation->rd_pddcxt != NULL && + relation->rd_pddcxt->firstchild != NULL) + MemoryContextDeleteChildren(relation->rd_pddcxt); + } #ifdef RELCACHE_FORCE_RELEASE if (RelationHasReferenceCountZero(relation) && @@ -2390,6 +2399,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) MemoryContextDelete(relation->rd_partkeycxt); if (relation->rd_pdcxt) MemoryContextDelete(relation->rd_pdcxt); + if (relation->rd_pddcxt) + MemoryContextDelete(relation->rd_pddcxt); if (relation->rd_partcheckcxt) MemoryContextDelete(relation->rd_partcheckcxt); pfree(relation); @@ -2644,7 +2655,7 @@ RelationClearRelation(Relation relation, bool rebuild) SWAPFIELD(PartitionKey, rd_partkey); SWAPFIELD(MemoryContext, rd_partkeycxt); } - if (newrel->rd_pdcxt != NULL) + if (newrel->rd_pdcxt != NULL || newrel->rd_pddcxt != NULL) { /* * We are rebuilding a partitioned relation with a non-zero @@ -2672,13 +2683,22 @@ RelationClearRelation(Relation relation, bool rebuild) * newrel. */ relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */ + relation->rd_partdesc_nodetached = NULL; + relation->rd_partdesc_nodetached_xmin = InvalidTransactionId; if (relation->rd_pdcxt != NULL) /* probably never happens */ MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt); else relation->rd_pdcxt = newrel->rd_pdcxt; + if (relation->rd_pddcxt != NULL) + MemoryContextSetParent(newrel->rd_pddcxt, relation->rd_pddcxt); + else + relation->rd_pddcxt = newrel->rd_pddcxt; /* drop newrel's pointers so we don't destroy it below */ newrel->rd_partdesc = NULL; + newrel->rd_partdesc_nodetached = NULL; + newrel->rd_partdesc_nodetached_xmin = InvalidTransactionId; newrel->rd_pdcxt = NULL; + newrel->rd_pddcxt = NULL; } #undef SWAPFIELD @@ -6017,7 +6037,10 @@ load_relcache_init_file(bool shared) rel->rd_partkey = NULL; rel->rd_partkeycxt = NULL; rel->rd_partdesc = NULL; + rel->rd_partdesc_nodetached = NULL; + rel->rd_partdesc_nodetached_xmin = InvalidTransactionId; rel->rd_pdcxt = NULL; + rel->rd_pddcxt = NULL; rel->rd_partcheck = NIL; rel->rd_partcheckvalid = false; rel->rd_partcheckcxt = NULL; |