diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2018-08-01 19:42:46 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2018-08-01 19:42:52 -0400 |
commit | 1c2cb2744bf3d8ad751cd5cf3b347f10f48492b3 (patch) | |
tree | 78cbab7db4dacbac9f47b4c78974ba3c8f6c9184 /src/backend/executor/execPartition.c | |
parent | c40489e449ea08e154cd62fa055785873f7bdac8 (diff) | |
download | postgresql-1c2cb2744bf3d8ad751cd5cf3b347f10f48492b3.tar.gz postgresql-1c2cb2744bf3d8ad751cd5cf3b347f10f48492b3.zip |
Fix run-time partition pruning for appends with multiple source rels.
The previous coding here supposed that if run-time partitioning applied to
a particular Append/MergeAppend plan, then all child plans of that node
must be members of a single partitioning hierarchy. This is totally wrong,
since an Append could be formed from a UNION ALL: we could have multiple
hierarchies sharing the same Append, or child plans that aren't part of any
hierarchy.
To fix, restructure the related plan-time and execution-time data
structures so that we can have a separate list or array for each
partitioning hierarchy. Also track subplans that are not part of any
hierarchy, and make sure they don't get pruned.
Per reports from Phil Florent and others. Back-patch to v11, since
the bug originated there.
David Rowley, with a lot of cosmetic adjustments by me; thanks also
to Amit Langote for review.
Discussion: https://postgr.es/m/HE1PR03MB17068BB27404C90B5B788BCABA7B0@HE1PR03MB1706.eurprd03.prod.outlook.com
Diffstat (limited to 'src/backend/executor/execPartition.c')
-rw-r--r-- | src/backend/executor/execPartition.c | 399 |
1 files changed, 239 insertions, 160 deletions
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 11b7e67e4d6..d13be4145f8 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -48,8 +48,8 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, bool *isnull, int maxfieldlen); static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map); -static void find_matching_subplans_recurse(PartitionPruneState *prunestate, - PartitionPruningData *pprune, +static void find_matching_subplans_recurse(PartitionPruningData *prunedata, + PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans); @@ -1417,34 +1417,42 @@ adjust_partition_tlist(List *tlist, TupleConversionMap *map) * * 'planstate' is the parent plan node's execution state. * - * 'partitionpruneinfo' is a List of PartitionPruneInfos as generated by + * 'partitionpruneinfo' is a PartitionPruneInfo as generated by * make_partition_pruneinfo. Here we build a PartitionPruneState containing a - * PartitionPruningData for each item in that List. This data can be re-used - * each time we re-evaluate which partitions match the pruning steps provided - * in each PartitionPruneInfo. + * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of + * partitionpruneinfo->prune_infos), each of which contains a + * PartitionedRelPruningData for each PartitionedRelPruneInfo appearing in + * that sublist. This two-level system is needed to keep from confusing the + * different hierarchies when a UNION ALL contains multiple partitioned tables + * as children. The data stored in each PartitionedRelPruningData can be + * re-used each time we re-evaluate which partitions match the pruning steps + * provided in each PartitionedRelPruneInfo. */ PartitionPruneState * -ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) +ExecCreatePartitionPruneState(PlanState *planstate, + PartitionPruneInfo *partitionpruneinfo) { PartitionPruneState *prunestate; - PartitionPruningData *prunedata; + int n_part_hierarchies; ListCell *lc; int i; - Assert(partitionpruneinfo != NIL); + n_part_hierarchies = list_length(partitionpruneinfo->prune_infos); + Assert(n_part_hierarchies > 0); /* * Allocate the data structure */ - prunestate = (PartitionPruneState *) palloc(sizeof(PartitionPruneState)); - prunedata = (PartitionPruningData *) - palloc(sizeof(PartitionPruningData) * list_length(partitionpruneinfo)); + prunestate = (PartitionPruneState *) + palloc(offsetof(PartitionPruneState, partprunedata) + + sizeof(PartitionPruningData *) * n_part_hierarchies); - prunestate->partprunedata = prunedata; - prunestate->num_partprunedata = list_length(partitionpruneinfo); + prunestate->execparamids = NULL; + /* other_subplans can change at runtime, so we need our own copy */ + prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans); prunestate->do_initial_prune = false; /* may be set below */ prunestate->do_exec_prune = false; /* may be set below */ - prunestate->execparamids = NULL; + prunestate->num_partprunedata = n_part_hierarchies; /* * Create a short-term memory context which we'll use when making calls to @@ -1458,110 +1466,128 @@ ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) ALLOCSET_DEFAULT_SIZES); i = 0; - foreach(lc, partitionpruneinfo) + foreach(lc, partitionpruneinfo->prune_infos) { - PartitionPruneInfo *pinfo = castNode(PartitionPruneInfo, lfirst(lc)); - PartitionPruningData *pprune = &prunedata[i]; - PartitionPruneContext *context = &pprune->context; - PartitionDesc partdesc; - PartitionKey partkey; - int partnatts; - int n_steps; + List *partrelpruneinfos = lfirst_node(List, lc); + int npartrelpruneinfos = list_length(partrelpruneinfos); + PartitionPruningData *prunedata; ListCell *lc2; + int j; - /* - * We must copy the subplan_map rather than pointing directly to the - * plan's version, as we may end up making modifications to it later. - */ - pprune->subplan_map = palloc(sizeof(int) * pinfo->nparts); - memcpy(pprune->subplan_map, pinfo->subplan_map, - sizeof(int) * pinfo->nparts); + prunedata = (PartitionPruningData *) + palloc(offsetof(PartitionPruningData, partrelprunedata) + + npartrelpruneinfos * sizeof(PartitionedRelPruningData)); + prunestate->partprunedata[i] = prunedata; + prunedata->num_partrelprunedata = npartrelpruneinfos; - /* We can use the subpart_map verbatim, since we never modify it */ - pprune->subpart_map = pinfo->subpart_map; - - /* present_parts is also subject to later modification */ - pprune->present_parts = bms_copy(pinfo->present_parts); - - /* - * We need to hold a pin on the partitioned table's relcache entry so - * that we can rely on its copies of the table's partition key and - * partition descriptor. We need not get a lock though; one should - * have been acquired already by InitPlan or - * ExecLockNonLeafAppendTables. - */ - context->partrel = relation_open(pinfo->reloid, NoLock); - - partkey = RelationGetPartitionKey(context->partrel); - partdesc = RelationGetPartitionDesc(context->partrel); - n_steps = list_length(pinfo->pruning_steps); - - context->strategy = partkey->strategy; - context->partnatts = partnatts = partkey->partnatts; - context->nparts = pinfo->nparts; - context->boundinfo = partdesc->boundinfo; - context->partcollation = partkey->partcollation; - context->partsupfunc = partkey->partsupfunc; - - /* We'll look up type-specific support functions as needed */ - context->stepcmpfuncs = (FmgrInfo *) - palloc0(sizeof(FmgrInfo) * n_steps * partnatts); - - context->ppccontext = CurrentMemoryContext; - context->planstate = planstate; - - /* Initialize expression state for each expression we need */ - context->exprstates = (ExprState **) - palloc0(sizeof(ExprState *) * n_steps * partnatts); - foreach(lc2, pinfo->pruning_steps) + j = 0; + foreach(lc2, partrelpruneinfos) { - PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc2); + PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2); + PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j]; + PartitionPruneContext *context = &pprune->context; + PartitionDesc partdesc; + PartitionKey partkey; + int partnatts; + int n_steps; ListCell *lc3; - int keyno; - /* not needed for other step kinds */ - if (!IsA(step, PartitionPruneStepOp)) - continue; + /* + * We must copy the subplan_map rather than pointing directly to + * the plan's version, as we may end up making modifications to it + * later. + */ + pprune->subplan_map = palloc(sizeof(int) * pinfo->nparts); + memcpy(pprune->subplan_map, pinfo->subplan_map, + sizeof(int) * pinfo->nparts); - Assert(list_length(step->exprs) <= partnatts); + /* We can use the subpart_map verbatim, since we never modify it */ + pprune->subpart_map = pinfo->subpart_map; - keyno = 0; - foreach(lc3, step->exprs) + /* present_parts is also subject to later modification */ + pprune->present_parts = bms_copy(pinfo->present_parts); + + /* + * We need to hold a pin on the partitioned table's relcache entry + * so that we can rely on its copies of the table's partition key + * and partition descriptor. We need not get a lock though; one + * should have been acquired already by InitPlan or + * ExecLockNonLeafAppendTables. + */ + context->partrel = relation_open(pinfo->reloid, NoLock); + + partkey = RelationGetPartitionKey(context->partrel); + partdesc = RelationGetPartitionDesc(context->partrel); + n_steps = list_length(pinfo->pruning_steps); + + context->strategy = partkey->strategy; + context->partnatts = partnatts = partkey->partnatts; + context->nparts = pinfo->nparts; + context->boundinfo = partdesc->boundinfo; + context->partcollation = partkey->partcollation; + context->partsupfunc = partkey->partsupfunc; + + /* We'll look up type-specific support functions as needed */ + context->stepcmpfuncs = (FmgrInfo *) + palloc0(sizeof(FmgrInfo) * n_steps * partnatts); + + context->ppccontext = CurrentMemoryContext; + context->planstate = planstate; + + /* Initialize expression state for each expression we need */ + context->exprstates = (ExprState **) + palloc0(sizeof(ExprState *) * n_steps * partnatts); + foreach(lc3, pinfo->pruning_steps) { - Expr *expr = (Expr *) lfirst(lc3); + PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc3); + ListCell *lc4; + int keyno; + + /* not needed for other step kinds */ + if (!IsA(step, PartitionPruneStepOp)) + continue; - /* not needed for Consts */ - if (!IsA(expr, Const)) + Assert(list_length(step->exprs) <= partnatts); + + keyno = 0; + foreach(lc4, step->exprs) { - int stateidx = PruneCxtStateIdx(partnatts, - step->step.step_id, - keyno); + Expr *expr = (Expr *) lfirst(lc4); + + /* not needed for Consts */ + if (!IsA(expr, Const)) + { + int stateidx = PruneCxtStateIdx(partnatts, + step->step.step_id, + keyno); - context->exprstates[stateidx] = - ExecInitExpr(expr, context->planstate); + context->exprstates[stateidx] = + ExecInitExpr(expr, context->planstate); + } + keyno++; } - keyno++; } - } - /* Array is not modified at runtime, so just point to plan's copy */ - context->exprhasexecparam = pinfo->hasexecparam; + /* Array is not modified at runtime, so just point to plan's copy */ + context->exprhasexecparam = pinfo->hasexecparam; - pprune->pruning_steps = pinfo->pruning_steps; - pprune->do_initial_prune = pinfo->do_initial_prune; - pprune->do_exec_prune = pinfo->do_exec_prune; + pprune->pruning_steps = pinfo->pruning_steps; + pprune->do_initial_prune = pinfo->do_initial_prune; + pprune->do_exec_prune = pinfo->do_exec_prune; - /* Record if pruning would be useful at any level */ - prunestate->do_initial_prune |= pinfo->do_initial_prune; - prunestate->do_exec_prune |= pinfo->do_exec_prune; + /* Record if pruning would be useful at any level */ + prunestate->do_initial_prune |= pinfo->do_initial_prune; + prunestate->do_exec_prune |= pinfo->do_exec_prune; - /* - * Accumulate the IDs of all PARAM_EXEC Params affecting the - * partitioning decisions at this plan node. - */ - prunestate->execparamids = bms_add_members(prunestate->execparamids, - pinfo->execparamids); + /* + * Accumulate the IDs of all PARAM_EXEC Params affecting the + * partitioning decisions at this plan node. + */ + prunestate->execparamids = bms_add_members(prunestate->execparamids, + pinfo->execparamids); + j++; + } i++; } @@ -1578,13 +1604,17 @@ ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate) { + PartitionPruningData **partprunedata = prunestate->partprunedata; int i; for (i = 0; i < prunestate->num_partprunedata; i++) { - PartitionPruningData *pprune = &prunestate->partprunedata[i]; + PartitionPruningData *prunedata = partprunedata[i]; + PartitionedRelPruningData *pprune = prunedata->partrelprunedata; + int j; - relation_close(pprune->context.partrel, NoLock); + for (j = 0; j < prunedata->num_partrelprunedata; j++) + relation_close(pprune[j].context.partrel, NoLock); } } @@ -1604,31 +1634,46 @@ ExecDestroyPartitionPruneState(PartitionPruneState *prunestate) Bitmapset * ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) { - PartitionPruningData *pprune; - MemoryContext oldcontext; Bitmapset *result = NULL; + MemoryContext oldcontext; + int i; Assert(prunestate->do_initial_prune); - pprune = prunestate->partprunedata; - /* * Switch to a temp context to avoid leaking memory in the executor's * memory context. */ oldcontext = MemoryContextSwitchTo(prunestate->prune_context); - /* Perform pruning without using PARAM_EXEC Params */ - find_matching_subplans_recurse(prunestate, pprune, true, &result); + /* + * For each hierarchy, do the pruning tests, and add deletable subplans' + * indexes to "result". + */ + for (i = 0; i < prunestate->num_partprunedata; i++) + { + PartitionPruningData *prunedata; + PartitionedRelPruningData *pprune; + + prunedata = prunestate->partprunedata[i]; + pprune = &prunedata->partrelprunedata[0]; + + /* Perform pruning without using PARAM_EXEC Params */ + find_matching_subplans_recurse(prunedata, pprune, true, &result); + + /* Expression eval may have used space in node's ps_ExprContext too */ + ResetExprContext(pprune->context.planstate->ps_ExprContext); + } MemoryContextSwitchTo(oldcontext); /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + /* Add in any subplans that partition pruning didn't account for */ + result = bms_add_members(result, prunestate->other_subplans); + MemoryContextReset(prunestate->prune_context); - /* Expression eval may have used space in node's ps_ExprContext too */ - ResetExprContext(pprune->context.planstate->ps_ExprContext); /* * If any subplans were pruned, we must re-sequence the subplan indexes so @@ -1638,14 +1683,17 @@ ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) if (bms_num_members(result) < nsubplans) { int *new_subplan_indexes; + Bitmapset *new_other_subplans; int i; int newidx; /* * First we must build a temporary array which maps old subplan - * indexes to new ones. + * indexes to new ones. While we're at it, also recompute the + * other_subplans set, since indexes in it may change. */ new_subplan_indexes = (int *) palloc(sizeof(int) * nsubplans); + new_other_subplans = NULL; newidx = 0; for (i = 0; i < nsubplans; i++) { @@ -1653,58 +1701,74 @@ ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) new_subplan_indexes[i] = newidx++; else new_subplan_indexes[i] = -1; /* Newly pruned */ + + if (bms_is_member(i, prunestate->other_subplans)) + new_other_subplans = bms_add_member(new_other_subplans, + new_subplan_indexes[i]); } + bms_free(prunestate->other_subplans); + prunestate->other_subplans = new_other_subplans; /* - * Now we can update each PartitionPruneInfo's subplan_map with new - * subplan indexes. We must also recompute its present_parts bitmap. - * We perform this loop in back-to-front order so that we determine - * present_parts for the lowest-level partitioned tables first. This - * way we can tell whether a sub-partitioned table's partitions were - * entirely pruned so we can exclude that from 'present_parts'. + * Now we can update each PartitionedRelPruneInfo's subplan_map with + * new subplan indexes. We must also recompute its present_parts + * bitmap. */ - for (i = prunestate->num_partprunedata - 1; i >= 0; i--) + for (i = 0; i < prunestate->num_partprunedata; i++) { - int nparts; + PartitionPruningData *prunedata = prunestate->partprunedata[i]; int j; - pprune = &prunestate->partprunedata[i]; - nparts = pprune->context.nparts; - /* We just rebuild present_parts from scratch */ - bms_free(pprune->present_parts); - pprune->present_parts = NULL; - - for (j = 0; j < nparts; j++) + /* + * Within each hierarchy, we perform this loop in back-to-front + * order so that we determine present_parts for the lowest-level + * partitioned tables first. This way we can tell whether a + * sub-partitioned table's partitions were entirely pruned so we + * can exclude that from 'present_parts'. + */ + for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--) { - int oldidx = pprune->subplan_map[j]; - int subidx; + PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j]; + int nparts = pprune->context.nparts; + int k; - /* - * If this partition existed as a subplan then change the old - * subplan index to the new subplan index. The new index may - * become -1 if the partition was pruned above, or it may just - * come earlier in the subplan list due to some subplans being - * removed earlier in the list. If it's a subpartition, add - * it to present_parts unless it's entirely pruned. - */ - if (oldidx >= 0) - { - Assert(oldidx < nsubplans); - pprune->subplan_map[j] = new_subplan_indexes[oldidx]; + /* We just rebuild present_parts from scratch */ + bms_free(pprune->present_parts); + pprune->present_parts = NULL; - if (new_subplan_indexes[oldidx] >= 0) - pprune->present_parts = - bms_add_member(pprune->present_parts, j); - } - else if ((subidx = pprune->subpart_map[j]) >= 0) + for (k = 0; k < nparts; k++) { - PartitionPruningData *subprune; + int oldidx = pprune->subplan_map[k]; + int subidx; - subprune = &prunestate->partprunedata[subidx]; + /* + * If this partition existed as a subplan then change the + * old subplan index to the new subplan index. The new + * index may become -1 if the partition was pruned above, + * or it may just come earlier in the subplan list due to + * some subplans being removed earlier in the list. If + * it's a subpartition, add it to present_parts unless + * it's entirely pruned. + */ + if (oldidx >= 0) + { + Assert(oldidx < nsubplans); + pprune->subplan_map[k] = new_subplan_indexes[oldidx]; - if (!bms_is_empty(subprune->present_parts)) - pprune->present_parts = - bms_add_member(pprune->present_parts, j); + if (new_subplan_indexes[oldidx] >= 0) + pprune->present_parts = + bms_add_member(pprune->present_parts, k); + } + else if ((subidx = pprune->subpart_map[k]) >= 0) + { + PartitionedRelPruningData *subprune; + + subprune = &prunedata->partrelprunedata[subidx]; + + if (!bms_is_empty(subprune->present_parts)) + pprune->present_parts = + bms_add_member(pprune->present_parts, k); + } } } } @@ -1725,11 +1789,9 @@ ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate) { - PartitionPruningData *pprune; - MemoryContext oldcontext; Bitmapset *result = NULL; - - pprune = prunestate->partprunedata; + MemoryContext oldcontext; + int i; /* * Switch to a temp context to avoid leaking memory in the executor's @@ -1737,16 +1799,33 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate) */ oldcontext = MemoryContextSwitchTo(prunestate->prune_context); - find_matching_subplans_recurse(prunestate, pprune, false, &result); + /* + * For each hierarchy, do the pruning tests, and add deletable subplans' + * indexes to "result". + */ + for (i = 0; i < prunestate->num_partprunedata; i++) + { + PartitionPruningData *prunedata; + PartitionedRelPruningData *pprune; + + prunedata = prunestate->partprunedata[i]; + pprune = &prunedata->partrelprunedata[0]; + + find_matching_subplans_recurse(prunedata, pprune, false, &result); + + /* Expression eval may have used space in node's ps_ExprContext too */ + ResetExprContext(pprune->context.planstate->ps_ExprContext); + } MemoryContextSwitchTo(oldcontext); /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + /* Add in any subplans that partition pruning didn't account for */ + result = bms_add_members(result, prunestate->other_subplans); + MemoryContextReset(prunestate->prune_context); - /* Expression eval may have used space in node's ps_ExprContext too */ - ResetExprContext(pprune->context.planstate->ps_ExprContext); return result; } @@ -1759,8 +1838,8 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate) * Adds valid (non-prunable) subplan IDs to *validsubplans */ static void -find_matching_subplans_recurse(PartitionPruneState *prunestate, - PartitionPruningData *pprune, +find_matching_subplans_recurse(PartitionPruningData *prunedata, + PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans) { @@ -1802,8 +1881,8 @@ find_matching_subplans_recurse(PartitionPruneState *prunestate, int partidx = pprune->subpart_map[i]; if (partidx >= 0) - find_matching_subplans_recurse(prunestate, - &prunestate->partprunedata[partidx], + find_matching_subplans_recurse(prunedata, + &prunedata->partrelprunedata[partidx], initial_prune, validsubplans); else { |