aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/execPartition.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2018-04-07 17:54:31 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2018-04-07 17:54:39 -0300
commit499be013de65242235ebdde06adb08db887f0ea5 (patch)
treec1f69b818f917379fb4f72e80535fef899a40b5b /src/backend/executor/execPartition.c
parent5c0675215e153ba1297fd494b34af2fdebd645d1 (diff)
downloadpostgresql-499be013de65242235ebdde06adb08db887f0ea5.tar.gz
postgresql-499be013de65242235ebdde06adb08db887f0ea5.zip
Support partition pruning at execution time
Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
Diffstat (limited to 'src/backend/executor/execPartition.c')
-rw-r--r--src/backend/executor/execPartition.c419
1 files changed, 419 insertions, 0 deletions
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index ac94f9f3374..57a24d48783 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -40,6 +40,10 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
bool *isnull,
int maxfieldlen);
static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map);
+static void find_subplans_for_params_recurse(PartitionPruneState *prunestate,
+ PartitionPruningData *pprune,
+ bool allparams,
+ Bitmapset **validsubplans);
/*
@@ -1293,3 +1297,418 @@ adjust_partition_tlist(List *tlist, TupleConversionMap *map)
return new_tlist;
}
+
+/*-------------------------------------------------------------------------
+ * Run-Time Partition Pruning Support.
+ *
+ * The following series of functions exist to support the removal of unneeded
+ * subnodes for queries against partitioned tables. The supporting functions
+ * here are designed to work with any node type which supports an arbitrary
+ * number of subnodes, e.g. Append, MergeAppend.
+ *
+ * Normally this pruning work is performed by the query planner's partition
+ * pruning code, however, the planner is limited to only being able to prune
+ * away unneeded partitions using quals which compare the partition key to a
+ * value which is known to be Const during planning. To allow the same
+ * pruning to be performed for values which are only determined during
+ * execution, we must make an additional pruning attempt during execution.
+ *
+ * Here we support pruning using both external and exec Params. The main
+ * difference between these that we need to concern ourselves with is the
+ * time when the values of the Params are known. External Param values are
+ * known at any time of execution, including executor startup, but exec Param
+ * values are only known when the executor is running.
+ *
+ * For external Params we may be able to prune away unneeded partitions
+ * during executor startup. This has the added benefit of not having to
+ * initialize the unneeded subnodes at all. This is useful as it can save
+ * quite a bit of effort during executor startup.
+ *
+ * For exec Params, we must delay pruning until the executor is running.
+ *
+ * Functions:
+ *
+ * ExecSetupPartitionPruneState:
+ * This must be called by nodes before any partition pruning is
+ * attempted. Normally executor startup is a good time. This function
+ * creates the PartitionPruneState details which are required by each
+ * of the two pruning functions, details include information about
+ * how to map the partition index details which are returned by the
+ * planner's partition prune function into subnode indexes.
+ *
+ * ExecFindInitialMatchingSubPlans:
+ * Returns indexes of matching subnodes utilizing only external Params
+ * to eliminate subnodes. The function must only be called during
+ * executor startup for the given node before the subnodes themselves
+ * are initialized. Subnodes which are found not to match by this
+ * function must not be included in the node's list of subnodes as this
+ * function performs a remap of the partition index to subplan index map
+ * and the newly created map provides indexes only for subnodes which
+ * remain after calling this function.
+ *
+ * ExecFindMatchingSubPlans:
+ * Returns indexes of matching subnodes utilizing all Params to eliminate
+ * subnodes which can't possibly contain matching tuples. This function
+ * can only be called while the executor is running.
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * ExecSetupPartitionPruneState
+ * Setup the required data structure which is required for calling
+ * ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans.
+ *
+ * 'partitionpruneinfo' is a List of PartitionPruneInfos as generated by
+ * make_partition_pruneinfo. Here we build a PartitionPruneContext for each
+ * item in the List. These contexts can be re-used each time we re-evaulate
+ * which partitions match the pruning steps provided in each
+ * PartitionPruneInfo.
+ */
+PartitionPruneState *
+ExecSetupPartitionPruneState(PlanState *planstate, List *partitionpruneinfo)
+{
+ PartitionPruningData *prunedata;
+ PartitionPruneState *prunestate;
+ ListCell *lc;
+ int i;
+
+ Assert(partitionpruneinfo != NIL);
+
+ prunestate = (PartitionPruneState *) palloc(sizeof(PartitionPruneState));
+ prunedata = (PartitionPruningData *)
+ palloc(sizeof(PartitionPruningData) * list_length(partitionpruneinfo));
+
+ /*
+ * The first item in the array contains the details for the query's target
+ * partition, so record that as the root of the partition hierarchy.
+ */
+ prunestate->partprunedata = prunedata;
+ prunestate->num_partprunedata = list_length(partitionpruneinfo);
+ prunestate->extparams = NULL;
+ prunestate->execparams = NULL;
+
+ /*
+ * Create a sub memory context which we'll use when making calls to the
+ * query planner's function to determine which partitions will match. The
+ * planner is not too careful about freeing memory, so we'll ensure we
+ * call the function in this context to avoid any memory leaking in the
+ * executor's memory context.
+ */
+ prunestate->prune_context =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "Partition Prune",
+ ALLOCSET_DEFAULT_SIZES);
+
+ i = 0;
+ foreach(lc, partitionpruneinfo)
+ {
+ PartitionPruneInfo *pinfo = (PartitionPruneInfo *) lfirst(lc);
+ PartitionPruningData *pprune = &prunedata[i];
+ PartitionPruneContext *context = &pprune->context;
+ PartitionDesc partdesc;
+ Relation rel;
+ PartitionKey partkey;
+ int partnatts;
+
+ pprune->present_parts = bms_copy(pinfo->present_parts);
+ pprune->subnode_map = palloc(sizeof(int) * pinfo->nparts);
+
+ /*
+ * We must make a copy of this rather than pointing directly to the
+ * plan's version as we may end up making modifications to it later.
+ */
+ memcpy(pprune->subnode_map, pinfo->subnode_map,
+ sizeof(int) * pinfo->nparts);
+
+ /* We can use the subpart_map verbatim, since we never modify it */
+ pprune->subpart_map = pinfo->subpart_map;
+
+ /*
+ * Grab some info from the table's relcache; lock was already obtained
+ * by ExecLockNonLeafAppendTables.
+ */
+ rel = relation_open(pinfo->reloid, NoLock);
+
+ partkey = RelationGetPartitionKey(rel);
+ partdesc = RelationGetPartitionDesc(rel);
+
+ context->strategy = partkey->strategy;
+ context->partnatts = partnatts = partkey->partnatts;
+ context->partopfamily = partkey->partopfamily;
+ context->partopcintype = partkey->partopcintype;
+ context->partcollation = partkey->partcollation;
+ context->partsupfunc = partkey->partsupfunc;
+ context->nparts = pinfo->nparts;
+ context->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey);
+ context->planstate = planstate;
+ context->safeparams = NULL; /* empty for now */
+
+ pprune->pruning_steps = pinfo->pruning_steps;
+ pprune->extparams = bms_copy(pinfo->extparams);
+ pprune->allparams = bms_union(pinfo->extparams, pinfo->execparams);
+
+ /*
+ * Accumulate the paramids which match the partitioned keys of all
+ * partitioned tables.
+ */
+ prunestate->extparams = bms_add_members(prunestate->extparams,
+ pinfo->extparams);
+
+ prunestate->execparams = bms_add_members(prunestate->execparams,
+ pinfo->execparams);
+
+ relation_close(rel, NoLock);
+
+ i++;
+ }
+
+ /*
+ * Cache the union of the paramids of both types. This saves having to
+ * recalculate it everytime we need to know what they are.
+ */
+ prunestate->allparams = bms_union(prunestate->extparams,
+ prunestate->execparams);
+
+ return prunestate;
+}
+
+/*
+ * ExecFindInitialMatchingSubPlans
+ * Determine which subset of subplan nodes we need to initialize based
+ * on the details stored in 'prunestate'. Here we only determine the
+ * matching partitions using values known during plan startup, which is
+ * only external Params. Exec Params will be unknown at this time. We
+ * must delay pruning using exec Params until the actual executor run.
+ *
+ * It is expected that callers of this function do so only once during their
+ * init plan. The caller must only initialize the subnodes which are returned
+ * by this function. The remaining subnodes should be discarded. Once this
+ * function has been called, future calls to ExecFindMatchingSubPlans will
+ * return its matching subnode indexes assuming that the caller discarded
+ * the original non-matching subnodes.
+ *
+ * This function must only be called if 'prunestate' has any extparams.
+ *
+ * 'nsubnodes' must be passed as the total number of unpruned subnodes.
+ */
+Bitmapset *
+ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubnodes)
+{
+ PartitionPruningData *pprune;
+ MemoryContext oldcontext;
+ Bitmapset *result = NULL;
+
+ /*
+ * Ensure there's actually external params, or we've not been called
+ * already.
+ */
+ Assert(!bms_is_empty(prunestate->extparams));
+
+ pprune = prunestate->partprunedata;
+
+ /*
+ * Switch to a temp context to avoid leaking memory in the executor's
+ * memory context.
+ */
+ oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
+
+ /* Determine which subnodes match the external params */
+ find_subplans_for_params_recurse(prunestate, pprune, false, &result);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Move to the correct memory context */
+ result = bms_copy(result);
+
+ MemoryContextReset(prunestate->prune_context);
+
+ /*
+ * Record that partition pruning has been performed for external params.
+ * This partly also serves to ensure we never call this function twice
+ * with the same input and also so that ExecFindMatchingSubPlans is aware
+ * that pruning has already been performed for external Params.
+ */
+ bms_free(prunestate->extparams);
+ prunestate->extparams = NULL;
+
+ /*
+ * If any subnodes were pruned, we must re-sequence the subnode indexes so
+ * that ExecFindMatchingSubPlans properly returns the indexes from the
+ * subnodes which will remain after execution of this function.
+ */
+ if (bms_num_members(result) < nsubnodes)
+ {
+ int *new_subnode_indexes;
+ int i;
+ int newidx;
+
+ /*
+ * First we must build an array which we can use to adjust the
+ * existing subnode_map so that it contains the new subnode indexes.
+ */
+ new_subnode_indexes = (int *) palloc(sizeof(int) * nsubnodes);
+ newidx = 0;
+ for (i = 0; i < nsubnodes; i++)
+ {
+ if (bms_is_member(i, result))
+ new_subnode_indexes[i] = newidx++;
+ else
+ new_subnode_indexes[i] = -1; /* Newly pruned */
+ }
+
+ /*
+ * Now we can re-sequence each PartitionPruneInfo's subnode_map so
+ * that they point to the new index of the subnode.
+ */
+ for (i = 0; i < prunestate->num_partprunedata; i++)
+ {
+ int nparts;
+ int j;
+
+ pprune = &prunestate->partprunedata[i];
+ nparts = pprune->context.nparts;
+
+ /*
+ * We also need to reset the present_parts field so that it only
+ * contains partition indexes that we actually still have subnodes
+ * for. It seems easier to build a fresh one, rather than trying
+ * to update the existing one.
+ */
+ bms_free(pprune->present_parts);
+ pprune->present_parts = NULL;
+
+ for (j = 0; j < nparts; j++)
+ {
+ int oldidx = pprune->subnode_map[j];
+
+ /*
+ * If this partition existed as a subnode then change the old
+ * subnode index to the new subnode index. The new index may
+ * become -1 if the partition was pruned above, or it may just
+ * come earlier in the subnode list due to some subnodes being
+ * removed earlier in the list.
+ */
+ if (oldidx >= 0)
+ {
+ pprune->subnode_map[j] = new_subnode_indexes[oldidx];
+
+ if (new_subnode_indexes[oldidx] >= 0)
+ pprune->present_parts =
+ bms_add_member(pprune->present_parts, j);
+ }
+ }
+ }
+
+ pfree(new_subnode_indexes);
+ }
+
+ return result;
+}
+
+/*
+ * ExecFindMatchingSubPlans
+ * Determine which subplans match the the pruning steps detailed in
+ * 'pprune' for the current Param values.
+ *
+ * Here we utilize both external and exec Params for pruning.
+ */
+Bitmapset *
+ExecFindMatchingSubPlans(PartitionPruneState *prunestate)
+{
+ PartitionPruningData *pprune;
+ MemoryContext oldcontext;
+ Bitmapset *result = NULL;
+
+ pprune = prunestate->partprunedata;
+
+ /*
+ * Switch to a temp context to avoid leaking memory in the executor's
+ * memory context.
+ */
+ oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
+
+ find_subplans_for_params_recurse(prunestate, pprune, true, &result);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Move to the correct memory context */
+ result = bms_copy(result);
+
+ MemoryContextReset(prunestate->prune_context);
+
+ return result;
+}
+
+/*
+ * find_subplans_for_params_recurse
+ * Recursive worker function for ExecFindMatchingSubPlans and
+ * ExecFindInitialMatchingSubPlans
+ */
+static void
+find_subplans_for_params_recurse(PartitionPruneState *prunestate,
+ PartitionPruningData *pprune,
+ bool allparams,
+ Bitmapset **validsubplans)
+{
+ PartitionPruneContext *context = &pprune->context;
+ Bitmapset *partset;
+ Bitmapset *pruneparams;
+ int i;
+
+ /* Guard against stack overflow due to overly deep partition hierarchy. */
+ check_stack_depth();
+
+ /*
+ * Use only external params unless we've been asked to also use exec
+ * params too.
+ */
+ if (allparams)
+ pruneparams = pprune->allparams;
+ else
+ pruneparams = pprune->extparams;
+
+ /*
+ * We only need to determine the matching partitions if there are any
+ * params matching the partition key at this level. If there are no
+ * matching params, then we can simply return all subnodes which belong to
+ * this parent partition. The planner should have already determined
+ * these to be the minimum possible set. We must still recursively visit
+ * any subpartitioned tables as we may find their partition keys match
+ * some Params at their level.
+ */
+ if (!bms_is_empty(pruneparams))
+ {
+ context->safeparams = pruneparams;
+ partset = get_matching_partitions(context,
+ pprune->pruning_steps);
+ }
+ else
+ partset = pprune->present_parts;
+
+ /* Translate partset into subnode indexes */
+ i = -1;
+ while ((i = bms_next_member(partset, i)) >= 0)
+ {
+ if (pprune->subnode_map[i] >= 0)
+ *validsubplans = bms_add_member(*validsubplans,
+ pprune->subnode_map[i]);
+ else
+ {
+ int partidx = pprune->subpart_map[i];
+
+ if (partidx != -1)
+ find_subplans_for_params_recurse(prunestate,
+ &prunestate->partprunedata[partidx],
+ allparams, validsubplans);
+ else
+ {
+ /*
+ * This could only happen if clauses used in planning where
+ * more restrictive than those used here, or if the maps are
+ * somehow corrupt.
+ */
+ elog(ERROR, "partition missing from subplans");
+ }
+ }
+ }
+}