aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/execPartition.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2020-09-08 19:35:15 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2020-09-08 19:35:15 -0300
commitd0230a43fcae6f923fcedfe6f27db7fca8760d95 (patch)
treeafd90e37f8ddcac7ecb414560937faa2c8fe5fa7 /src/backend/executor/execPartition.c
parentb61d048e0d480f4311c62bf3026879c83ba9aaad (diff)
downloadpostgresql-d0230a43fcae6f923fcedfe6f27db7fca8760d95.tar.gz
postgresql-d0230a43fcae6f923fcedfe6f27db7fca8760d95.zip
Check default partitions constraints while descending
Partitioning tuple route code assumes that the partition chosen while descending the partition hierarchy is always the correct one. This is true except when the partition is the default partition and another partition has been added concurrently: the partition constraint changes and we don't recheck it. This can lead to tuples mistakenly being added to the default partition that should have been rejected. Fix by rechecking the default partition constraint while descending the hierarchy. An isolation test based on the reproduction steps described by Hao Wu (with tweaks for extra coverage) is included. Backpatch to 12, where this bug came in with 898e5e3290a7. Reported by: Hao Wu <hawu@vmware.com> Author: Amit Langote <amitlangote09@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Discussion: https://postgr.es/m/CA+HiwqFqBmcSSap4sFnCBUEL_VfOMmEKaQ3gwUhyfa4c7J_-nA@mail.gmail.com Discussion: https://postgr.es/m/DM5PR0501MB3910E97A9EDFB4C775CF3D75A42F0@DM5PR0501MB3910.namprd05.prod.outlook.com
Diffstat (limited to 'src/backend/executor/execPartition.c')
-rw-r--r--src/backend/executor/execPartition.c127
1 files changed, 102 insertions, 25 deletions
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 79fcbd6b066..bd2ea258047 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -51,6 +51,11 @@
* PartitionDispatchData->indexes for details on how this array is
* indexed.
*
+ * nonleaf_partitions
+ * Array of 'max_dispatch' elements containing pointers to fake
+ * ResultRelInfo objects for nonleaf partitions, useful for checking
+ * the partition constraint.
+ *
* num_dispatch
* The current number of items stored in the 'partition_dispatch_info'
* array. Also serves as the index of the next free array element for
@@ -89,6 +94,7 @@ struct PartitionTupleRouting
{
Relation partition_root;
PartitionDispatch *partition_dispatch_info;
+ ResultRelInfo **nonleaf_partitions;
int num_dispatch;
int max_dispatch;
ResultRelInfo **partitions;
@@ -280,9 +286,11 @@ ExecFindPartition(ModifyTableState *mtstate,
PartitionDispatch dispatch;
PartitionDesc partdesc;
ExprContext *ecxt = GetPerTupleExprContext(estate);
- TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
+ TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
+ TupleTableSlot *rootslot = slot;
TupleTableSlot *myslot = NULL;
MemoryContext oldcxt;
+ ResultRelInfo *rri = NULL;
/* use per-tuple context here to avoid leaking memory */
oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -296,9 +304,8 @@ ExecFindPartition(ModifyTableState *mtstate,
/* start with the root partitioned table */
dispatch = pd[0];
- while (true)
+ while (dispatch != NULL)
{
- AttrMap *map = dispatch->tupmap;
int partidx = -1;
CHECK_FOR_INTERRUPTS();
@@ -307,17 +314,6 @@ ExecFindPartition(ModifyTableState *mtstate,
partdesc = dispatch->partdesc;
/*
- * Convert the tuple to this parent's layout, if different from the
- * current relation.
- */
- myslot = dispatch->tupslot;
- if (myslot != NULL)
- {
- Assert(map != NULL);
- slot = execute_attr_map_slot(map, slot, myslot);
- }
-
- /*
* Extract partition key from tuple. Expression evaluation machinery
* that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
* point to the correct tuple slot. The slot might have changed from
@@ -352,11 +348,9 @@ ExecFindPartition(ModifyTableState *mtstate,
if (partdesc->is_leaf[partidx])
{
- ResultRelInfo *rri;
-
/*
- * Look to see if we've already got a ResultRelInfo for this
- * partition.
+ * We've reached the leaf -- hurray, we're done. Look to see if
+ * we've already got a ResultRelInfo for this partition.
*/
if (likely(dispatch->indexes[partidx] >= 0))
{
@@ -400,14 +394,10 @@ ExecFindPartition(ModifyTableState *mtstate,
dispatch,
rootResultRelInfo, partidx);
}
+ Assert(rri != NULL);
- /* Release the tuple in the lowest parent's dedicated slot. */
- if (slot == myslot)
- ExecClearTuple(myslot);
-
- MemoryContextSwitchTo(oldcxt);
- ecxt->ecxt_scantuple = ecxt_scantuple_old;
- return rri;
+ /* Signal to terminate the loop */
+ dispatch = NULL;
}
else
{
@@ -419,6 +409,8 @@ ExecFindPartition(ModifyTableState *mtstate,
/* Already built. */
Assert(dispatch->indexes[partidx] < proute->num_dispatch);
+ rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
+
/*
* Move down to the next partition level and search again
* until we find a leaf partition that matches this tuple
@@ -440,10 +432,75 @@ ExecFindPartition(ModifyTableState *mtstate,
dispatch, partidx);
Assert(dispatch->indexes[partidx] >= 0 &&
dispatch->indexes[partidx] < proute->num_dispatch);
+
+ rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
dispatch = subdispatch;
}
+
+ /*
+ * Convert the tuple to the new parent's layout, if different from
+ * the previous parent.
+ */
+ if (dispatch->tupslot)
+ {
+ AttrMap *map = dispatch->tupmap;
+ TupleTableSlot *tempslot = myslot;
+
+ myslot = dispatch->tupslot;
+ slot = execute_attr_map_slot(map, slot, myslot);
+
+ if (tempslot != NULL)
+ ExecClearTuple(tempslot);
+ }
+ }
+
+ /*
+ * If this partition is the default one, we must check its partition
+ * constraint now, which may have changed concurrently due to
+ * partitions being added to the parent.
+ *
+ * (We do this here, and do not rely on ExecInsert doing it, because
+ * we don't want to miss doing it for non-leaf partitions.)
+ */
+ if (partidx == partdesc->boundinfo->default_index)
+ {
+ PartitionRoutingInfo *partrouteinfo = rri->ri_PartitionInfo;
+
+ /*
+ * The tuple must match the partition's layout for the constraint
+ * expression to be evaluated successfully. If the partition is
+ * sub-partitioned, that would already be the case due to the code
+ * above, but for a leaf partition the tuple still matches the
+ * parent's layout.
+ *
+ * Note that we have a map to convert from root to current
+ * partition, but not from immediate parent to current partition.
+ * So if we have to convert, do it from the root slot; if not, use
+ * the root slot as-is.
+ */
+ if (partrouteinfo)
+ {
+ TupleConversionMap *map = partrouteinfo->pi_RootToPartitionMap;
+
+ if (map)
+ slot = execute_attr_map_slot(map->attrMap, rootslot,
+ partrouteinfo->pi_PartitionTupleSlot);
+ else
+ slot = rootslot;
+ }
+
+ ExecPartitionCheck(rri, slot, estate, true);
}
}
+
+ /* Release the tuple in the lowest parent's dedicated slot. */
+ if (myslot != NULL)
+ ExecClearTuple(myslot);
+ /* and restore ecxt's scantuple */
+ ecxt->ecxt_scantuple = ecxt_scantuple_saved;
+ MemoryContextSwitchTo(oldcxt);
+
+ return rri;
}
/*
@@ -1060,6 +1117,8 @@ ExecInitPartitionDispatchInfo(EState *estate,
proute->max_dispatch = 4;
proute->partition_dispatch_info = (PartitionDispatch *)
palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
+ proute->nonleaf_partitions = (ResultRelInfo **)
+ palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
}
else
{
@@ -1067,11 +1126,29 @@ ExecInitPartitionDispatchInfo(EState *estate,
proute->partition_dispatch_info = (PartitionDispatch *)
repalloc(proute->partition_dispatch_info,
sizeof(PartitionDispatch) * proute->max_dispatch);
+ proute->nonleaf_partitions = (ResultRelInfo **)
+ repalloc(proute->nonleaf_partitions,
+ sizeof(ResultRelInfo *) * proute->max_dispatch);
}
}
proute->partition_dispatch_info[dispatchidx] = pd;
/*
+ * If setting up a PartitionDispatch for a sub-partitioned table, we may
+ * also need a minimally valid ResultRelInfo for checking the partition
+ * constraint later; set that up now.
+ */
+ if (parent_pd)
+ {
+ ResultRelInfo *rri = makeNode(ResultRelInfo);
+
+ InitResultRelInfo(rri, rel, 1, proute->partition_root, 0);
+ proute->nonleaf_partitions[dispatchidx] = rri;
+ }
+ else
+ proute->nonleaf_partitions[dispatchidx] = NULL;
+
+ /*
* Finally, if setting up a PartitionDispatch for a sub-partitioned table,
* install a downlink in the parent to allow quick descent.
*/