aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/executor/execPartition.c127
-rw-r--r--src/test/isolation/expected/partition-concurrent-attach.out49
-rw-r--r--src/test/isolation/isolation_schedule1
-rw-r--r--src/test/isolation/specs/partition-concurrent-attach.spec43
4 files changed, 195 insertions, 25 deletions
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 79fcbd6b066..bd2ea258047 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -51,6 +51,11 @@
* PartitionDispatchData->indexes for details on how this array is
* indexed.
*
+ * nonleaf_partitions
+ * Array of 'max_dispatch' elements containing pointers to fake
+ * ResultRelInfo objects for nonleaf partitions, useful for checking
+ * the partition constraint.
+ *
* num_dispatch
* The current number of items stored in the 'partition_dispatch_info'
* array. Also serves as the index of the next free array element for
@@ -89,6 +94,7 @@ struct PartitionTupleRouting
{
Relation partition_root;
PartitionDispatch *partition_dispatch_info;
+ ResultRelInfo **nonleaf_partitions;
int num_dispatch;
int max_dispatch;
ResultRelInfo **partitions;
@@ -280,9 +286,11 @@ ExecFindPartition(ModifyTableState *mtstate,
PartitionDispatch dispatch;
PartitionDesc partdesc;
ExprContext *ecxt = GetPerTupleExprContext(estate);
- TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
+ TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
+ TupleTableSlot *rootslot = slot;
TupleTableSlot *myslot = NULL;
MemoryContext oldcxt;
+ ResultRelInfo *rri = NULL;
/* use per-tuple context here to avoid leaking memory */
oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -296,9 +304,8 @@ ExecFindPartition(ModifyTableState *mtstate,
/* start with the root partitioned table */
dispatch = pd[0];
- while (true)
+ while (dispatch != NULL)
{
- AttrMap *map = dispatch->tupmap;
int partidx = -1;
CHECK_FOR_INTERRUPTS();
@@ -307,17 +314,6 @@ ExecFindPartition(ModifyTableState *mtstate,
partdesc = dispatch->partdesc;
/*
- * Convert the tuple to this parent's layout, if different from the
- * current relation.
- */
- myslot = dispatch->tupslot;
- if (myslot != NULL)
- {
- Assert(map != NULL);
- slot = execute_attr_map_slot(map, slot, myslot);
- }
-
- /*
* Extract partition key from tuple. Expression evaluation machinery
* that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
* point to the correct tuple slot. The slot might have changed from
@@ -352,11 +348,9 @@ ExecFindPartition(ModifyTableState *mtstate,
if (partdesc->is_leaf[partidx])
{
- ResultRelInfo *rri;
-
/*
- * Look to see if we've already got a ResultRelInfo for this
- * partition.
+ * We've reached the leaf -- hurray, we're done. Look to see if
+ * we've already got a ResultRelInfo for this partition.
*/
if (likely(dispatch->indexes[partidx] >= 0))
{
@@ -400,14 +394,10 @@ ExecFindPartition(ModifyTableState *mtstate,
dispatch,
rootResultRelInfo, partidx);
}
+ Assert(rri != NULL);
- /* Release the tuple in the lowest parent's dedicated slot. */
- if (slot == myslot)
- ExecClearTuple(myslot);
-
- MemoryContextSwitchTo(oldcxt);
- ecxt->ecxt_scantuple = ecxt_scantuple_old;
- return rri;
+ /* Signal to terminate the loop */
+ dispatch = NULL;
}
else
{
@@ -419,6 +409,8 @@ ExecFindPartition(ModifyTableState *mtstate,
/* Already built. */
Assert(dispatch->indexes[partidx] < proute->num_dispatch);
+ rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
+
/*
* Move down to the next partition level and search again
* until we find a leaf partition that matches this tuple
@@ -440,10 +432,75 @@ ExecFindPartition(ModifyTableState *mtstate,
dispatch, partidx);
Assert(dispatch->indexes[partidx] >= 0 &&
dispatch->indexes[partidx] < proute->num_dispatch);
+
+ rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
dispatch = subdispatch;
}
+
+ /*
+ * Convert the tuple to the new parent's layout, if different from
+ * the previous parent.
+ */
+ if (dispatch->tupslot)
+ {
+ AttrMap *map = dispatch->tupmap;
+ TupleTableSlot *tempslot = myslot;
+
+ myslot = dispatch->tupslot;
+ slot = execute_attr_map_slot(map, slot, myslot);
+
+ if (tempslot != NULL)
+ ExecClearTuple(tempslot);
+ }
+ }
+
+ /*
+ * If this partition is the default one, we must check its partition
+ * constraint now, which may have changed concurrently due to
+ * partitions being added to the parent.
+ *
+ * (We do this here, and do not rely on ExecInsert doing it, because
+ * we don't want to miss doing it for non-leaf partitions.)
+ */
+ if (partidx == partdesc->boundinfo->default_index)
+ {
+ PartitionRoutingInfo *partrouteinfo = rri->ri_PartitionInfo;
+
+ /*
+ * The tuple must match the partition's layout for the constraint
+ * expression to be evaluated successfully. If the partition is
+ * sub-partitioned, that would already be the case due to the code
+ * above, but for a leaf partition the tuple still matches the
+ * parent's layout.
+ *
+ * Note that we have a map to convert from root to current
+ * partition, but not from immediate parent to current partition.
+ * So if we have to convert, do it from the root slot; if not, use
+ * the root slot as-is.
+ */
+ if (partrouteinfo)
+ {
+ TupleConversionMap *map = partrouteinfo->pi_RootToPartitionMap;
+
+ if (map)
+ slot = execute_attr_map_slot(map->attrMap, rootslot,
+ partrouteinfo->pi_PartitionTupleSlot);
+ else
+ slot = rootslot;
+ }
+
+ ExecPartitionCheck(rri, slot, estate, true);
}
}
+
+ /* Release the tuple in the lowest parent's dedicated slot. */
+ if (myslot != NULL)
+ ExecClearTuple(myslot);
+ /* and restore ecxt's scantuple */
+ ecxt->ecxt_scantuple = ecxt_scantuple_saved;
+ MemoryContextSwitchTo(oldcxt);
+
+ return rri;
}
/*
@@ -1060,6 +1117,8 @@ ExecInitPartitionDispatchInfo(EState *estate,
proute->max_dispatch = 4;
proute->partition_dispatch_info = (PartitionDispatch *)
palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
+ proute->nonleaf_partitions = (ResultRelInfo **)
+ palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
}
else
{
@@ -1067,11 +1126,29 @@ ExecInitPartitionDispatchInfo(EState *estate,
proute->partition_dispatch_info = (PartitionDispatch *)
repalloc(proute->partition_dispatch_info,
sizeof(PartitionDispatch) * proute->max_dispatch);
+ proute->nonleaf_partitions = (ResultRelInfo **)
+ repalloc(proute->nonleaf_partitions,
+ sizeof(ResultRelInfo *) * proute->max_dispatch);
}
}
proute->partition_dispatch_info[dispatchidx] = pd;
/*
+ * If setting up a PartitionDispatch for a sub-partitioned table, we may
+ * also need a minimally valid ResultRelInfo for checking the partition
+ * constraint later; set that up now.
+ */
+ if (parent_pd)
+ {
+ ResultRelInfo *rri = makeNode(ResultRelInfo);
+
+ InitResultRelInfo(rri, rel, 1, proute->partition_root, 0);
+ proute->nonleaf_partitions[dispatchidx] = rri;
+ }
+ else
+ proute->nonleaf_partitions[dispatchidx] = NULL;
+
+ /*
* Finally, if setting up a PartitionDispatch for a sub-partitioned table,
* install a downlink in the parent to allow quick descent.
*/
diff --git a/src/test/isolation/expected/partition-concurrent-attach.out b/src/test/isolation/expected/partition-concurrent-attach.out
new file mode 100644
index 00000000000..17fac399898
--- /dev/null
+++ b/src/test/isolation/expected/partition-concurrent-attach.out
@@ -0,0 +1,49 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1b s1a s2b s2i s1c s2c s2s
+step s1b: begin;
+step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200);
+step s2b: begin;
+step s2i: insert into tpart values (110,'xxx'), (120, 'yyy'), (150, 'zzz'); <waiting ...>
+step s1c: commit;
+step s2i: <... completed>
+error in steps s1c s2i: ERROR: new row for relation "tpart_default" violates partition constraint
+step s2c: commit;
+step s2s: select tableoid::regclass, * from tpart;
+tableoid i j
+
+tpart_2 110 xxx
+tpart_2 120 yyy
+tpart_2 150 zzz
+
+starting permutation: s1b s1a s2b s2i2 s1c s2c s2s
+step s1b: begin;
+step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200);
+step s2b: begin;
+step s2i2: insert into tpart_default (i, j) values (110, 'xxx'), (120, 'yyy'), (150, 'zzz'); <waiting ...>
+step s1c: commit;
+step s2i2: <... completed>
+error in steps s1c s2i2: ERROR: new row for relation "tpart_default" violates partition constraint
+step s2c: commit;
+step s2s: select tableoid::regclass, * from tpart;
+tableoid i j
+
+tpart_2 110 xxx
+tpart_2 120 yyy
+tpart_2 150 zzz
+
+starting permutation: s1b s2b s2i s1a s2c s1c s2s
+step s1b: begin;
+step s2b: begin;
+step s2i: insert into tpart values (110,'xxx'), (120, 'yyy'), (150, 'zzz');
+step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200); <waiting ...>
+step s2c: commit;
+step s1a: <... completed>
+error in steps s2c s1a: ERROR: updated partition constraint for default partition "tpart_default_default" would be violated by some row
+step s1c: commit;
+step s2s: select tableoid::regclass, * from tpart;
+tableoid i j
+
+tpart_default_default110 xxx
+tpart_default_default120 yyy
+tpart_default_default150 zzz
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 218c87b24bf..a4144127add 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -80,6 +80,7 @@ test: vacuum-skip-locked
test: predicate-hash
test: predicate-gist
test: predicate-gin
+test: partition-concurrent-attach
test: partition-key-update-1
test: partition-key-update-2
test: partition-key-update-3
diff --git a/src/test/isolation/specs/partition-concurrent-attach.spec b/src/test/isolation/specs/partition-concurrent-attach.spec
new file mode 100644
index 00000000000..48c3f83e0c8
--- /dev/null
+++ b/src/test/isolation/specs/partition-concurrent-attach.spec
@@ -0,0 +1,43 @@
+# Verify that default partition constraint is enforced correctly
+# in light of partitions being added concurrently to its parent
+setup {
+ drop table if exists tpart;
+ create table tpart(i int, j text) partition by range(i);
+ create table tpart_1(like tpart);
+ create table tpart_2(like tpart);
+ create table tpart_default (a int, j text, i int) partition by list (j);
+ create table tpart_default_default (a int, i int, b int, j text);
+ alter table tpart_default_default drop b;
+ alter table tpart_default attach partition tpart_default_default default;
+ alter table tpart_default drop a;
+ alter table tpart attach partition tpart_default default;
+ alter table tpart attach partition tpart_1 for values from(0) to (100);
+ insert into tpart_2 values (110,'xxx'), (120, 'yyy'), (150, 'zzz');
+}
+
+session "s1"
+step "s1b" { begin; }
+step "s1a" { alter table tpart attach partition tpart_2 for values from (100) to (200); }
+step "s1c" { commit; }
+
+session "s2"
+step "s2b" { begin; }
+step "s2i" { insert into tpart values (110,'xxx'), (120, 'yyy'), (150, 'zzz'); }
+step "s2i2" { insert into tpart_default (i, j) values (110, 'xxx'), (120, 'yyy'), (150, 'zzz'); }
+step "s2c" { commit; }
+step "s2s" { select tableoid::regclass, * from tpart; }
+
+teardown { drop table tpart; }
+
+# insert into tpart by s2 which routes to tpart_default due to not seeing
+# concurrently added tpart_2 should fail, because the partition constraint
+# of tpart_default would have changed due to tpart_2 having been added
+permutation "s1b" "s1a" "s2b" "s2i" "s1c" "s2c" "s2s"
+
+# similar to above, but now insert into sub-partitioned tpart_default
+permutation "s1b" "s1a" "s2b" "s2i2" "s1c" "s2c" "s2s"
+
+# reverse: now the insert into tpart_default by s2 occurs first followed by
+# attach in s1, which should fail when it scans the leaf default partition
+# find the violating rows
+permutation "s1b" "s2b" "s2i" "s1a" "s2c" "s1c" "s2s"