aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2000-12-14 22:30:45 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2000-12-14 22:30:45 +0000
commitea166f11462c863d91378fcbb15d4d3140002413 (patch)
treeef157dad5b07081aae231ab9691f2ef2d5b625a4 /src/backend/optimizer/path
parentdb11f4382abad09d42e784c1fa19dfbcd68038ac (diff)
downloadpostgresql-ea166f11462c863d91378fcbb15d4d3140002413.tar.gz
postgresql-ea166f11462c863d91378fcbb15d4d3140002413.zip
Planner speedup hacking. Avoid saving useless pathkeys, so that path
comparison does not consider paths different when they differ only in uninteresting aspects of sort order. (We had a special case of this consideration for indexscans already, but generalize it to apply to ordered join paths too.) Be stricter about what is a canonical pathkey to allow faster pathkey comparison. Cache canonical pathkeys and dispersion stats for left and right sides of a RestrictInfo's clause, to avoid repeated computation. Total speedup will depend on number of tables in a query, but I see about 4x speedup of planning phase for a sample seven-table query.
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/allpaths.c6
-rw-r--r--src/backend/optimizer/path/indxpath.c176
-rw-r--r--src/backend/optimizer/path/joinpath.c182
-rw-r--r--src/backend/optimizer/path/pathkeys.c411
4 files changed, 490 insertions, 285 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 12fc576612f..4f7a0e570f5 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.67 2000/11/12 00:36:58 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.68 2000/12/14 22:30:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -162,9 +162,7 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
create_tidscan_paths(root, rel);
/* Consider index paths for both simple and OR index clauses */
- create_index_paths(root, rel, indices,
- rel->baserestrictinfo,
- rel->joininfo);
+ create_index_paths(root, rel, indices);
/*
* Note: create_or_index_paths depends on create_index_paths to
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 63e3a53af5a..28437e6c56d 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.99 2000/11/25 20:33:51 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.100 2000/12/14 22:30:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -87,11 +87,6 @@ static void indexable_joinclauses(RelOptInfo *rel, IndexOptInfo *index,
List **clausegroups, List **outerrelids);
static List *index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
List *clausegroup_list, List *outerrelids_list);
-static bool useful_for_mergejoin(RelOptInfo *rel, IndexOptInfo *index,
- List *joininfo_list);
-static bool useful_for_ordering(Query *root, RelOptInfo *rel,
- IndexOptInfo *index,
- ScanDirection scandir);
static bool match_index_to_operand(int indexkey, Var *operand,
RelOptInfo *rel, IndexOptInfo *index);
static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel,
@@ -125,31 +120,31 @@ static Const *string_to_const(const char *str, Oid datatype);
* attributes are available and fixed during any one scan of the indexpath.
*
* An IndexPath is generated and submitted to add_path() for each index
- * this routine deems potentially interesting for the current query
- * (at most one IndexPath per index on the given relation). An innerjoin
- * path is also generated for each interesting combination of outer join
- * relations. The innerjoin paths are *not* passed to add_path(), but are
- * appended to the "innerjoin" list of the relation for later consideration
- * in nested-loop joins.
+ * this routine deems potentially interesting for the current query.
+ * An innerjoin path is also generated for each interesting combination of
+ * outer join relations. The innerjoin paths are *not* passed to add_path(),
+ * but are appended to the "innerjoin" list of the relation for later
+ * consideration in nested-loop joins.
*
* 'rel' is the relation for which we want to generate index paths
* 'indices' is a list of available indexes for 'rel'
- * 'restrictinfo_list' is a list of restrictinfo nodes for 'rel'
- * 'joininfo_list' is a list of joininfo nodes for 'rel'
*/
void
create_index_paths(Query *root,
RelOptInfo *rel,
- List *indices,
- List *restrictinfo_list,
- List *joininfo_list)
+ List *indices)
{
+ List *restrictinfo_list = rel->baserestrictinfo;
+ List *joininfo_list = rel->joininfo;
List *ilist;
foreach(ilist, indices)
{
IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
List *restrictclauses;
+ List *index_pathkeys;
+ List *useful_pathkeys;
+ bool index_is_ordered;
List *joinclausegroups;
List *joinouterrelids;
@@ -179,9 +174,7 @@ create_index_paths(Query *root,
match_index_orclauses(rel, index, restrictinfo_list);
/*
- * 2. If the keys of this index match any of the available
- * non-'or' restriction clauses, then create a path using those
- * clauses as indexquals.
+ * 2. Match the index against non-'or' restriction clauses.
*/
restrictclauses = group_clauses_by_indexkey(rel,
index,
@@ -189,43 +182,50 @@ create_index_paths(Query *root,
index->classlist,
restrictinfo_list);
- if (restrictclauses != NIL)
- add_path(rel, (Path *) create_index_path(root, rel, index,
- restrictclauses,
- NoMovementScanDirection));
-
/*
- * 3. If this index can be used for a mergejoin, then create an
- * index path for it even if there were no restriction clauses.
- * (If there were, there is no need to make another index path.)
- * This will allow the index to be considered as a base for a
- * mergejoin in later processing. Similarly, if the index matches
- * the ordering that is needed for the overall query result, make
- * an index path for it even if there is no other reason to do so.
+ * 3. Compute pathkeys describing index's ordering, if any,
+ * then see how many of them are actually useful for this query.
*/
- if (restrictclauses == NIL)
- {
- if (useful_for_mergejoin(rel, index, joininfo_list) ||
- useful_for_ordering(root, rel, index, ForwardScanDirection))
- add_path(rel, (Path *)
- create_index_path(root, rel, index,
- restrictclauses,
- ForwardScanDirection));
- }
+ index_pathkeys = build_index_pathkeys(root, rel, index,
+ ForwardScanDirection);
+ index_is_ordered = (index_pathkeys != NIL);
+ useful_pathkeys = truncate_useless_pathkeys(root, rel,
+ index_pathkeys);
/*
- * Currently, backwards scan is never considered except for the
- * case of matching a query result ordering. Possibly should
- * consider it in other places?
+ * 4. Generate an indexscan path if there are relevant restriction
+ * clauses OR the index ordering is potentially useful for later
+ * merging or final output ordering.
*/
- if (useful_for_ordering(root, rel, index, BackwardScanDirection))
+ if (restrictclauses != NIL || useful_pathkeys != NIL)
add_path(rel, (Path *)
create_index_path(root, rel, index,
restrictclauses,
- BackwardScanDirection));
+ useful_pathkeys,
+ index_is_ordered ?
+ ForwardScanDirection :
+ NoMovementScanDirection));
+
+ /*
+ * 5. If the index is ordered, a backwards scan might be interesting.
+ * Currently this is only possible for a DESC query result ordering.
+ */
+ if (index_is_ordered)
+ {
+ index_pathkeys = build_index_pathkeys(root, rel, index,
+ BackwardScanDirection);
+ useful_pathkeys = truncate_useless_pathkeys(root, rel,
+ index_pathkeys);
+ if (useful_pathkeys != NIL)
+ add_path(rel, (Path *)
+ create_index_path(root, rel, index,
+ restrictclauses,
+ useful_pathkeys,
+ BackwardScanDirection));
+ }
/*
- * 4. Create an innerjoin index path for each combination of other
+ * 6. Create an innerjoin index path for each combination of other
* rels used in available join clauses. These paths will be
* considered as the inner side of nestloop joins against those
* sets of other rels. indexable_joinclauses() finds sets of
@@ -904,88 +904,6 @@ indexable_operator(Expr *clause, Oid opclass, Oid relam,
return InvalidOid;
}
-/*
- * useful_for_mergejoin
- * Determine whether the given index can support a mergejoin based
- * on any available join clause.
- *
- * We look to see whether the first indexkey of the index matches the
- * left or right sides of any of the mergejoinable clauses and provides
- * the ordering needed for that side. If so, the index is useful.
- * Matching a second or later indexkey is not useful unless there is
- * also a mergeclause for the first indexkey, so we need not consider
- * secondary indexkeys at this stage.
- *
- * 'rel' is the relation for which 'index' is defined
- * 'joininfo_list' is the list of JoinInfo nodes for 'rel'
- */
-static bool
-useful_for_mergejoin(RelOptInfo *rel,
- IndexOptInfo *index,
- List *joininfo_list)
-{
- int *indexkeys = index->indexkeys;
- Oid *ordering = index->ordering;
- List *i;
-
- if (!indexkeys || indexkeys[0] == 0 ||
- !ordering || ordering[0] == InvalidOid)
- return false; /* unordered index is not useful */
-
- foreach(i, joininfo_list)
- {
- JoinInfo *joininfo = (JoinInfo *) lfirst(i);
- List *j;
-
- foreach(j, joininfo->jinfo_restrictinfo)
- {
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
-
- if (restrictinfo->mergejoinoperator)
- {
- if (restrictinfo->left_sortop == ordering[0] &&
- match_index_to_operand(indexkeys[0],
- get_leftop(restrictinfo->clause),
- rel, index))
- return true;
- if (restrictinfo->right_sortop == ordering[0] &&
- match_index_to_operand(indexkeys[0],
- get_rightop(restrictinfo->clause),
- rel, index))
- return true;
- }
- }
- }
- return false;
-}
-
-/*
- * useful_for_ordering
- * Determine whether the given index can produce an ordering matching
- * the order that is wanted for the query result.
- *
- * 'rel' is the relation for which 'index' is defined
- * 'scandir' is the contemplated scan direction
- */
-static bool
-useful_for_ordering(Query *root,
- RelOptInfo *rel,
- IndexOptInfo *index,
- ScanDirection scandir)
-{
- List *index_pathkeys;
-
- if (root->query_pathkeys == NIL)
- return false; /* no special ordering requested */
-
- index_pathkeys = build_index_pathkeys(root, rel, index, scandir);
-
- if (index_pathkeys == NIL)
- return false; /* unordered index */
-
- return pathkeys_contained_in(root->query_pathkeys, index_pathkeys);
-}
-
/****************************************************************************
* ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ----
****************************************************************************/
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 6096f8c3f26..81a5431db97 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.59 2000/11/23 03:57:31 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.60 2000/12/14 22:30:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -152,6 +152,7 @@ sort_inner_and_outer(Query *root,
List *mergeclause_list,
JoinType jointype)
{
+ List *all_pathkeys;
List *i;
/*
@@ -159,36 +160,57 @@ sort_inner_and_outer(Query *root,
* generate a differently-sorted result path at essentially the same
* cost. We have no basis for choosing one over another at this level
* of joining, but some sort orders may be more useful than others for
- * higher-level mergejoins. Generating a path here for *every*
- * permutation of mergejoin clauses doesn't seem like a winning
- * strategy, however; the cost in planning time is too high.
+ * higher-level mergejoins, so it's worth considering multiple orderings.
*
- * For now, we generate one path for each mergejoin clause, listing that
- * clause first and the rest in random order. This should allow at
+ * Actually, it's not quite true that every mergeclause ordering will
+ * generate a different path order, because some of the clauses may be
+ * redundant. Therefore, what we do is convert the mergeclause list to
+ * a list of canonical pathkeys, and then consider different orderings
+ * of the pathkeys.
+ *
+ * Generating a path for *every* permutation of the pathkeys doesn't
+ * seem like a winning strategy; the cost in planning time is too high.
+ * For now, we generate one path for each pathkey, listing that pathkey
+ * first and the rest in random order. This should allow at
* least a one-clause mergejoin without re-sorting against any other
* possible mergejoin partner path. But if we've not guessed the
- * right ordering of secondary clauses, we may end up evaluating
+ * right ordering of secondary keys, we may end up evaluating
* clauses as qpquals when they could have been done as mergeclauses.
* We need to figure out a better way. (Two possible approaches: look
* at all the relevant index relations to suggest plausible sort
* orders, or make just one output path and somehow mark it as having
* a sort-order that can be rearranged freely.)
*/
- foreach(i, mergeclause_list)
+ all_pathkeys = make_pathkeys_for_mergeclauses(root,
+ mergeclause_list,
+ outerrel);
+
+ foreach(i, all_pathkeys)
{
- RestrictInfo *restrictinfo = lfirst(i);
- List *curclause_list;
+ List *front_pathkey = lfirst(i);
+ List *cur_pathkeys;
+ List *cur_mergeclauses;
List *outerkeys;
List *innerkeys;
List *merge_pathkeys;
- /* Make a mergeclause list with this guy first. */
- if (i != mergeclause_list)
- curclause_list = lcons(restrictinfo,
- lremove(restrictinfo,
- listCopy(mergeclause_list)));
+ /* Make a pathkey list with this guy first. */
+ if (i != all_pathkeys)
+ cur_pathkeys = lcons(front_pathkey,
+ lremove(front_pathkey,
+ listCopy(all_pathkeys)));
else
- curclause_list = mergeclause_list; /* no work at first one... */
+ cur_pathkeys = all_pathkeys; /* no work at first one... */
+
+ /*
+ * Select mergeclause(s) that match this sort ordering. If we had
+ * redundant merge clauses then we will get a subset of the original
+ * clause list. There had better be some match, however...
+ */
+ cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
+ cur_pathkeys,
+ mergeclause_list);
+ Assert(cur_mergeclauses != NIL);
/*
* Build sort pathkeys for both sides.
@@ -198,15 +220,13 @@ sort_inner_and_outer(Query *root,
* suppress an explicit sort step, so we needn't do so here.
*/
outerkeys = make_pathkeys_for_mergeclauses(root,
- curclause_list,
+ cur_mergeclauses,
outerrel);
innerkeys = make_pathkeys_for_mergeclauses(root,
- curclause_list,
+ cur_mergeclauses,
innerrel);
/* Build pathkeys representing output sort order. */
- merge_pathkeys = build_join_pathkeys(outerkeys,
- joinrel->targetlist,
- root->equi_key_list);
+ merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys);
/*
* And now we can make the path. We only consider the cheapest-
@@ -221,7 +241,7 @@ sort_inner_and_outer(Query *root,
innerrel->cheapest_total_path,
restrictlist,
merge_pathkeys,
- curclause_list,
+ cur_mergeclauses,
outerkeys,
innerkeys));
}
@@ -301,17 +321,16 @@ match_unsorted_outer(Query *root,
List *trialsortkeys;
Path *cheapest_startup_inner;
Path *cheapest_total_inner;
- int num_mergeclauses;
- int clausecnt;
+ int num_sortkeys;
+ int sortkeycnt;
/*
* The result will have this sort order (even if it is implemented
* as a nestloop, and even if some of the mergeclauses are
* implemented by qpquals rather than as true mergeclauses):
*/
- merge_pathkeys = build_join_pathkeys(outerpath->pathkeys,
- joinrel->targetlist,
- root->equi_key_list);
+ merge_pathkeys = build_join_pathkeys(root, joinrel,
+ outerpath->pathkeys);
if (nestjoinOK)
{
@@ -347,7 +366,8 @@ match_unsorted_outer(Query *root,
}
/* Look for useful mergeclauses (if any) */
- mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys,
+ mergeclauses = find_mergeclauses_for_pathkeys(root,
+ outerpath->pathkeys,
mergeclause_list);
/* Done with this outer path if no chance for a mergejoin */
@@ -362,7 +382,8 @@ match_unsorted_outer(Query *root,
/*
* Generate a mergejoin on the basis of sorting the cheapest
* inner. Since a sort will be needed, only cheapest total cost
- * matters.
+ * matters. (But create_mergejoin_path will do the right thing
+ * if innerrel->cheapest_total_path is already correctly sorted.)
*/
add_path(joinrel, (Path *)
create_mergejoin_path(joinrel,
@@ -376,38 +397,49 @@ match_unsorted_outer(Query *root,
innersortkeys));
/*
- * Look for presorted inner paths that satisfy the mergeclause
+ * Look for presorted inner paths that satisfy the innersortkey
* list or any truncation thereof. Here, we consider both cheap
- * startup cost and cheap total cost.
+ * startup cost and cheap total cost. Ignore
+ * innerrel->cheapest_total_path, since we already made a path with it.
*/
- trialsortkeys = listCopy(innersortkeys); /* modifiable copy */
+ num_sortkeys = length(innersortkeys);
+ if (num_sortkeys > 1)
+ trialsortkeys = listCopy(innersortkeys); /* need modifiable copy */
+ else
+ trialsortkeys = innersortkeys; /* won't really truncate */
cheapest_startup_inner = NULL;
cheapest_total_inner = NULL;
- num_mergeclauses = length(mergeclauses);
- for (clausecnt = num_mergeclauses; clausecnt > 0; clausecnt--)
+ for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
{
Path *innerpath;
List *newclauses = NIL;
/*
- * Look for an inner path ordered well enough to merge with
- * the first 'clausecnt' mergeclauses. NB: trialsortkeys list
+ * Look for an inner path ordered well enough for the first
+ * 'sortkeycnt' innersortkeys. NB: trialsortkeys list
* is modified destructively, which is why we made a copy...
*/
- trialsortkeys = ltruncate(clausecnt, trialsortkeys);
+ trialsortkeys = ltruncate(sortkeycnt, trialsortkeys);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
TOTAL_COST);
if (innerpath != NULL &&
+ innerpath != innerrel->cheapest_total_path &&
(cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner,
TOTAL_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
- if (clausecnt < num_mergeclauses)
- newclauses = ltruncate(clausecnt,
- listCopy(mergeclauses));
+ /* Select the right mergeclauses, if we didn't already */
+ if (sortkeycnt < num_sortkeys)
+ {
+ newclauses =
+ find_mergeclauses_for_pathkeys(root,
+ trialsortkeys,
+ mergeclauses);
+ Assert(newclauses != NIL);
+ }
else
newclauses = mergeclauses;
add_path(joinrel, (Path *)
@@ -427,6 +459,7 @@ match_unsorted_outer(Query *root,
trialsortkeys,
STARTUP_COST);
if (innerpath != NULL &&
+ innerpath != innerrel->cheapest_total_path &&
(cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner,
STARTUP_COST) < 0))
@@ -441,9 +474,14 @@ match_unsorted_outer(Query *root,
*/
if (newclauses == NIL)
{
- if (clausecnt < num_mergeclauses)
- newclauses = ltruncate(clausecnt,
- listCopy(mergeclauses));
+ if (sortkeycnt < num_sortkeys)
+ {
+ newclauses =
+ find_mergeclauses_for_pathkeys(root,
+ trialsortkeys,
+ mergeclauses);
+ Assert(newclauses != NIL);
+ }
else
newclauses = mergeclauses;
}
@@ -501,7 +539,8 @@ match_unsorted_inner(Query *root,
Path *startupouterpath;
/* Look for useful mergeclauses (if any) */
- mergeclauses = find_mergeclauses_for_pathkeys(innerpath->pathkeys,
+ mergeclauses = find_mergeclauses_for_pathkeys(root,
+ innerpath->pathkeys,
mergeclause_list);
if (mergeclauses == NIL)
continue;
@@ -516,9 +555,7 @@ match_unsorted_inner(Query *root,
* outer. Since a sort will be needed, only cheapest total cost
* matters.
*/
- merge_pathkeys = build_join_pathkeys(outersortkeys,
- joinrel->targetlist,
- root->equi_key_list);
+ merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(joinrel,
jointype,
@@ -545,9 +582,8 @@ match_unsorted_inner(Query *root,
continue; /* there won't be a startup-cost path
* either */
- merge_pathkeys = build_join_pathkeys(totalouterpath->pathkeys,
- joinrel->targetlist,
- root->equi_key_list);
+ merge_pathkeys = build_join_pathkeys(root, joinrel,
+ totalouterpath->pathkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(joinrel,
jointype,
@@ -564,9 +600,8 @@ match_unsorted_inner(Query *root,
STARTUP_COST);
if (startupouterpath != NULL && startupouterpath != totalouterpath)
{
- merge_pathkeys = build_join_pathkeys(startupouterpath->pathkeys,
- joinrel->targetlist,
- root->equi_key_list);
+ merge_pathkeys = build_join_pathkeys(root, joinrel,
+ startupouterpath->pathkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(joinrel,
jointype,
@@ -637,10 +672,9 @@ hash_inner_and_outer(Query *root,
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
Expr *clause;
Var *left,
- *right,
- *inner;
- List *hashclauses;
+ *right;
Selectivity innerdispersion;
+ List *hashclauses;
if (restrictinfo->hashjoinoperator == InvalidOid)
continue; /* not hashjoinable */
@@ -657,26 +691,48 @@ hash_inner_and_outer(Query *root,
left = get_leftop(clause);
right = get_rightop(clause);
- /* check if clause is usable with these sub-rels, find inner var */
+ /*
+ * Check if clause is usable with these sub-rels, find inner side,
+ * estimate dispersion of inner var for costing purposes.
+ *
+ * Since we tend to visit the same clauses over and over when
+ * planning a large query, we cache the dispersion estimates in the
+ * RestrictInfo node to avoid repeated lookups of statistics.
+ */
if (intMember(left->varno, outerrelids) &&
intMember(right->varno, innerrelids))
- inner = right;
+ {
+ /* righthand side is inner */
+ innerdispersion = restrictinfo->right_dispersion;
+ if (innerdispersion < 0)
+ {
+ /* not cached yet */
+ innerdispersion = estimate_dispersion(root, right);
+ restrictinfo->right_dispersion = innerdispersion;
+ }
+ }
else if (intMember(left->varno, innerrelids) &&
intMember(right->varno, outerrelids))
- inner = left;
+ {
+ /* lefthand side is inner */
+ innerdispersion = restrictinfo->left_dispersion;
+ if (innerdispersion < 0)
+ {
+ /* not cached yet */
+ innerdispersion = estimate_dispersion(root, left);
+ restrictinfo->left_dispersion = innerdispersion;
+ }
+ }
else
continue; /* no good for these input relations */
/* always a one-element list of hash clauses */
hashclauses = makeList1(restrictinfo);
- /* estimate dispersion of inner var for costing purposes */
- innerdispersion = estimate_dispersion(root, inner);
-
/*
* We consider both the cheapest-total-cost and
* cheapest-startup-cost outer paths. There's no need to consider
- * any but the cheapest- total-cost inner path, however.
+ * any but the cheapest-total-cost inner path, however.
*/
add_path(joinrel, (Path *)
create_hashjoin_path(joinrel,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index f94d2e4037b..9c4e537d557 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.27 2000/11/12 00:36:58 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.28 2000/12/14 22:30:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -195,9 +195,8 @@ generate_implied_equalities(Query *root)
* Given a PathKeyItem, find the equi_key_list subset it is a member of,
* if any. If so, return a pointer to that sublist, which is the
* canonical representation (for this query) of that PathKeyItem's
- * equivalence set. If it is not found, return a single-element list
- * containing the PathKeyItem (when the item has no equivalence peers,
- * we just allow it to be a standalone list).
+ * equivalence set. If it is not found, add a singleton "equivalence set"
+ * to the equi_key_list and return that --- see compare_pathkeys.
*
* Note that this function must not be used until after we have completed
* scanning the WHERE clause for equijoin operators.
@@ -206,6 +205,7 @@ static List *
make_canonical_pathkey(Query *root, PathKeyItem *item)
{
List *cursetlink;
+ List *newset;
foreach(cursetlink, root->equi_key_list)
{
@@ -214,7 +214,9 @@ make_canonical_pathkey(Query *root, PathKeyItem *item)
if (member(item, curset))
return curset;
}
- return lcons(item, NIL);
+ newset = makeList1(item);
+ root->equi_key_list = lcons(newset, root->equi_key_list);
+ return newset;
}
/*
@@ -234,6 +236,7 @@ canonicalize_pathkeys(Query *root, List *pathkeys)
{
List *pathkey = (List *) lfirst(i);
PathKeyItem *item;
+ List *cpathkey;
/*
* It's sufficient to look at the first entry in the sublist; if
@@ -242,8 +245,15 @@ canonicalize_pathkeys(Query *root, List *pathkeys)
*/
Assert(pathkey != NIL);
item = (PathKeyItem *) lfirst(pathkey);
- new_pathkeys = lappend(new_pathkeys,
- make_canonical_pathkey(root, item));
+ cpathkey = make_canonical_pathkey(root, item);
+ /*
+ * Eliminate redundant ordering requests --- ORDER BY A,A
+ * is the same as ORDER BY A. We want to check this only
+ * after we have canonicalized the keys, so that equivalent-key
+ * knowledge is used when deciding if an item is redundant.
+ */
+ if (!ptrMember(cpathkey, new_pathkeys))
+ new_pathkeys = lappend(new_pathkeys, cpathkey);
}
return new_pathkeys;
}
@@ -257,19 +267,9 @@ canonicalize_pathkeys(Query *root, List *pathkeys)
* Compare two pathkeys to see if they are equivalent, and if not whether
* one is "better" than the other.
*
- * A pathkey can be considered better than another if it is a superset:
- * it contains all the keys of the other plus more. For example, either
- * ((A) (B)) or ((A B)) is better than ((A)).
- *
- * Because we actually only expect to see canonicalized pathkey sublists,
- * we don't have to do the full two-way-subset-inclusion test on each
- * pair of sublists that is implied by the above statement. Instead we
- * just do an equal(). In the normal case where multi-element sublists
- * are pointers into the root's equi_key_list, equal() will be very fast:
- * it will recognize pointer equality when the sublists are the same,
- * and will fail at the first sublist element when they are not.
- *
- * Yes, this gets called enough to be worth coding it this tensely.
+ * This function may only be applied to canonicalized pathkey lists.
+ * In the canonical representation, sublists can be checked for equality
+ * by simple pointer comparison.
*/
PathKeysComparison
compare_pathkeys(List *keys1, List *keys2)
@@ -285,10 +285,70 @@ compare_pathkeys(List *keys1, List *keys2)
List *subkey2 = lfirst(key2);
/*
+ * XXX would like to check that we've been given canonicalized input,
+ * but query root not accessible here...
+ */
+#ifdef NOT_USED
+ Assert(ptrMember(subkey1, root->equi_key_list));
+ Assert(ptrMember(subkey2, root->equi_key_list));
+#endif
+
+ /*
* We will never have two subkeys where one is a subset of the
- * other, because of the canonicalization explained above. Either
- * they are equal or they ain't.
+ * other, because of the canonicalization process. Either they
+ * are equal or they ain't. Furthermore, we only need pointer
+ * comparison to detect equality.
*/
+ if (subkey1 != subkey2)
+ return PATHKEYS_DIFFERENT; /* no need to keep looking */
+ }
+
+ /*
+ * If we reached the end of only one list, the other is longer and
+ * therefore not a subset. (We assume the additional sublist(s) of
+ * the other list are not NIL --- no pathkey list should ever have a
+ * NIL sublist.)
+ */
+ if (key1 == NIL && key2 == NIL)
+ return PATHKEYS_EQUAL;
+ if (key1 != NIL)
+ return PATHKEYS_BETTER1;/* key1 is longer */
+ return PATHKEYS_BETTER2; /* key2 is longer */
+}
+
+/*
+ * compare_noncanonical_pathkeys
+ * Compare two pathkeys to see if they are equivalent, and if not whether
+ * one is "better" than the other. This is used when we must compare
+ * non-canonicalized pathkeys.
+ *
+ * A pathkey can be considered better than another if it is a superset:
+ * it contains all the keys of the other plus more. For example, either
+ * ((A) (B)) or ((A B)) is better than ((A)).
+ *
+ * Currently, the only user of this routine is grouping_planner(),
+ * and it will only pass single-element sublists (from
+ * make_pathkeys_for_sortclauses). Therefore we don't have to do the
+ * full two-way-subset-inclusion test on each pair of sublists that is
+ * implied by the above statement. Instead we just verify they are
+ * singleton lists and then do an equal(). This could be improved if
+ * necessary.
+ */
+PathKeysComparison
+compare_noncanonical_pathkeys(List *keys1, List *keys2)
+{
+ List *key1,
+ *key2;
+
+ for (key1 = keys1, key2 = keys2;
+ key1 != NIL && key2 != NIL;
+ key1 = lnext(key1), key2 = lnext(key2))
+ {
+ List *subkey1 = lfirst(key1);
+ List *subkey2 = lfirst(key2);
+
+ Assert(length(subkey1) == 1);
+ Assert(length(subkey2) == 1);
if (!equal(subkey1, subkey2))
return PATHKEYS_DIFFERENT; /* no need to keep looking */
}
@@ -326,6 +386,24 @@ pathkeys_contained_in(List *keys1, List *keys2)
}
/*
+ * noncanonical_pathkeys_contained_in
+ * The same, when we don't have canonical pathkeys.
+ */
+bool
+noncanonical_pathkeys_contained_in(List *keys1, List *keys2)
+{
+ switch (compare_noncanonical_pathkeys(keys1, keys2))
+ {
+ case PATHKEYS_EQUAL:
+ case PATHKEYS_BETTER2:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+/*
* get_cheapest_path_for_pathkeys
* Find the cheapest path (according to the specified criterion) that
* satisfies the given pathkeys. Return NULL if no such path.
@@ -464,6 +542,7 @@ build_index_pathkeys(Query *root,
while (*indexkeys != 0 && *ordering != InvalidOid)
{
Var *relvar = find_indexkey_var(root, rel, *indexkeys);
+ List *cpathkey;
sortop = *ordering;
if (ScanDirectionIsBackward(scandir))
@@ -475,8 +554,13 @@ build_index_pathkeys(Query *root,
/* OK, make a sublist for this sort key */
item = makePathKeyItem((Node *) relvar, sortop);
- retval = lappend(retval, make_canonical_pathkey(root, item));
-
+ cpathkey = make_canonical_pathkey(root, item);
+ /*
+ * Eliminate redundant ordering info; could happen if query
+ * is such that index keys are equijoined...
+ */
+ if (!ptrMember(cpathkey, retval))
+ retval = lappend(retval, cpathkey);
indexkeys++;
ordering++;
}
@@ -526,21 +610,20 @@ find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno)
* outer path (since the join will retain the ordering of the outer path)
* plus any vars of the inner path that are equijoined to the outer vars.
*
- * Per the discussion at the top of this file, equijoined inner vars
+ * Per the discussion in backend/optimizer/README, equijoined inner vars
* can be considered path keys of the result, just the same as the outer
* vars they were joined with; furthermore, it doesn't matter what kind
* of join algorithm is actually used.
*
- * 'outer_pathkeys' is the list of the outer path's path keys
- * 'join_rel_tlist' is the target list of the join relation
- * 'equi_key_list' is the query's list of pathkeyitem equivalence sets
+ * 'joinrel' is the join relation that paths are being formed for
+ * 'outer_pathkeys' is the list of the current outer path's path keys
*
* Returns the list of new path keys.
*/
List *
-build_join_pathkeys(List *outer_pathkeys,
- List *join_rel_tlist,
- List *equi_key_list)
+build_join_pathkeys(Query *root,
+ RelOptInfo *joinrel,
+ List *outer_pathkeys)
{
/*
@@ -549,9 +632,11 @@ build_join_pathkeys(List *outer_pathkeys,
* a darn thing here! The inner-rel vars we used to need to add are
* *already* part of the outer pathkey!
*
- * I'd remove the routine entirely, but maybe someday we'll need it...
+ * We do, however, need to truncate the pathkeys list, since it may
+ * contain pathkeys that were useful for forming this joinrel but are
+ * uninteresting to higher levels.
*/
- return outer_pathkeys;
+ return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
}
/****************************************************************************
@@ -603,6 +688,39 @@ make_pathkeys_for_sortclauses(List *sortclauses,
****************************************************************************/
/*
+ * cache_mergeclause_pathkeys
+ * Make the cached pathkeys valid in a mergeclause restrictinfo.
+ *
+ * RestrictInfo contains fields in which we may cache the result
+ * of looking up the canonical pathkeys for the left and right sides
+ * of the mergeclause. (Note that in normal cases they will be the
+ * same, but not if the mergeclause appears above an OUTER JOIN.)
+ * This is a worthwhile savings because these routines will be invoked
+ * many times when dealing with a many-relation query.
+ */
+static void
+cache_mergeclause_pathkeys(Query *root, RestrictInfo *restrictinfo)
+{
+ Node *key;
+ PathKeyItem *item;
+
+ Assert(restrictinfo->mergejoinoperator != InvalidOid);
+
+ if (restrictinfo->left_pathkey == NIL)
+ {
+ key = (Node *) get_leftop(restrictinfo->clause);
+ item = makePathKeyItem(key, restrictinfo->left_sortop);
+ restrictinfo->left_pathkey = make_canonical_pathkey(root, item);
+ }
+ if (restrictinfo->right_pathkey == NIL)
+ {
+ key = (Node *) get_rightop(restrictinfo->clause);
+ item = makePathKeyItem(key, restrictinfo->right_sortop);
+ restrictinfo->right_pathkey = make_canonical_pathkey(root, item);
+ }
+}
+
+/*
* find_mergeclauses_for_pathkeys
* This routine attempts to find a set of mergeclauses that can be
* used with a specified ordering for one of the input relations.
@@ -618,11 +736,13 @@ make_pathkeys_for_sortclauses(List *sortclauses,
*
* XXX Ideally we ought to be considering context, ie what path orderings
* are available on the other side of the join, rather than just making
- * an arbitrary choice among the mergeclause orders that will work for
- * this side of the join.
+ * an arbitrary choice among the mergeclauses that will work for this side
+ * of the join.
*/
List *
-find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
+find_mergeclauses_for_pathkeys(Query *root,
+ List *pathkeys,
+ List *restrictinfos)
{
List *mergeclauses = NIL;
List *i;
@@ -634,38 +754,28 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
List *j;
/*
- * We can match any of the keys in this pathkey sublist, since
- * they're all equivalent. And we can match against either left
- * or right side of any mergejoin clause we haven't used yet. For
- * the moment we use a dumb "greedy" algorithm with no
- * backtracking. Is it worth being any smarter to make a longer
- * list of usable mergeclauses? Probably not.
+ * We can match a pathkey against either left or right side of any
+ * mergejoin clause we haven't used yet. For the moment we use a
+ * dumb "greedy" algorithm with no backtracking. Is it worth being
+ * any smarter to make a longer list of usable mergeclauses?
+ * Probably not.
*/
- foreach(j, pathkey)
+ foreach(j, restrictinfos)
{
- PathKeyItem *keyitem = lfirst(j);
- Node *key = keyitem->key;
- Oid keyop = keyitem->sortop;
- List *k;
+ RestrictInfo *restrictinfo = lfirst(j);
- foreach(k, restrictinfos)
+ cache_mergeclause_pathkeys(root, restrictinfo);
+ /*
+ * We can compare canonical pathkey sublists by simple
+ * pointer equality; see compare_pathkeys.
+ */
+ if ((pathkey == restrictinfo->left_pathkey ||
+ pathkey == restrictinfo->right_pathkey) &&
+ !ptrMember(restrictinfo, mergeclauses))
{
- RestrictInfo *restrictinfo = lfirst(k);
-
- Assert(restrictinfo->mergejoinoperator != InvalidOid);
-
- if (((keyop == restrictinfo->left_sortop &&
- equal(key, get_leftop(restrictinfo->clause))) ||
- (keyop == restrictinfo->right_sortop &&
- equal(key, get_rightop(restrictinfo->clause)))) &&
- !member(restrictinfo, mergeclauses))
- {
- matched_restrictinfo = restrictinfo;
- break;
- }
- }
- if (matched_restrictinfo)
+ matched_restrictinfo = restrictinfo;
break;
+ }
}
/*
@@ -715,47 +825,170 @@ make_pathkeys_for_mergeclauses(Query *root,
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
Node *key;
- Oid sortop;
- PathKeyItem *item;
List *pathkey;
- Assert(restrictinfo->mergejoinoperator != InvalidOid);
+ cache_mergeclause_pathkeys(root, restrictinfo);
- /*
- * Which key and sortop is needed for this relation?
- */
key = (Node *) get_leftop(restrictinfo->clause);
- sortop = restrictinfo->left_sortop;
- if (!IsA(key, Var) ||
- !intMember(((Var *) key)->varno, rel->relids))
+ if (IsA(key, Var) && intMember(((Var *) key)->varno, rel->relids))
+ {
+ /* Rel is left side of mergeclause */
+ pathkey = restrictinfo->left_pathkey;
+ }
+ else
{
key = (Node *) get_rightop(restrictinfo->clause);
- sortop = restrictinfo->right_sortop;
- if (!IsA(key, Var) ||
- !intMember(((Var *) key)->varno, rel->relids))
+ if (IsA(key, Var) && intMember(((Var *) key)->varno, rel->relids))
+ {
+ /* Rel is right side of mergeclause */
+ pathkey = restrictinfo->right_pathkey;
+ }
+ else
+ {
elog(ERROR, "make_pathkeys_for_mergeclauses: can't identify which side of mergeclause to use");
+ pathkey = NIL; /* keep compiler quiet */
+ }
}
/*
- * Find or create canonical pathkey sublist for this sort item.
+ * When we are given multiple merge clauses, it's possible that some
+ * clauses refer to the same vars as earlier clauses. There's no
+ * reason for us to specify sort keys like (A,B,A) when (A,B) will
+ * do --- and adding redundant sort keys makes add_path think that
+ * this sort order is different from ones that are really the same,
+ * so don't do it. Since we now have a canonicalized pathkey,
+ * a simple ptrMember test is sufficient to detect redundant keys.
*/
- item = makePathKeyItem(key, sortop);
- pathkey = make_canonical_pathkey(root, item);
+ if (!ptrMember(pathkey, pathkeys))
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+
+ return pathkeys;
+}
+
+/****************************************************************************
+ * PATHKEY USEFULNESS CHECKS
+ *
+ * We only want to remember as many of the pathkeys of a path as have some
+ * potential use, either for subsequent mergejoins or for meeting the query's
+ * requested output ordering. This ensures that add_path() won't consider
+ * a path to have a usefully different ordering unless it really is useful.
+ * These routines check for usefulness of given pathkeys.
+ ****************************************************************************/
+
+/*
+ * pathkeys_useful_for_merging
+ * Count the number of pathkeys that may be useful for mergejoins
+ * above the given relation (by looking at its joininfo lists).
+ *
+ * We consider a pathkey potentially useful if it corresponds to the merge
+ * ordering of either side of any joinclause for the rel. This might be
+ * overoptimistic, since joinclauses that appear in different join lists
+ * might never be usable at the same time, but trying to be exact is likely
+ * to be more trouble than it's worth.
+ */
+int
+pathkeys_useful_for_merging(Query *root, RelOptInfo *rel, List *pathkeys)
+{
+ int useful = 0;
+ List *i;
+
+ foreach(i, pathkeys)
+ {
+ List *pathkey = lfirst(i);
+ bool matched = false;
+ List *j;
+
+ foreach(j, rel->joininfo)
+ {
+ JoinInfo *joininfo = (JoinInfo *) lfirst(j);
+ List *k;
+
+ foreach(k, joininfo->jinfo_restrictinfo)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(k);
+
+ if (restrictinfo->mergejoinoperator == InvalidOid)
+ continue;
+ cache_mergeclause_pathkeys(root, restrictinfo);
+ /*
+ * We can compare canonical pathkey sublists by simple
+ * pointer equality; see compare_pathkeys.
+ */
+ if (pathkey == restrictinfo->left_pathkey ||
+ pathkey == restrictinfo->right_pathkey)
+ {
+ matched = true;
+ break;
+ }
+ }
+
+ if (matched)
+ break;
+ }
/*
- * Most of the time we will get back a canonical pathkey set
- * including both the mergeclause's left and right sides (the only
- * case where we don't is if the mergeclause appeared in an OUTER
- * JOIN, which causes us not to generate an equijoin set from it).
- * Therefore, most of the time the item we just made is not part
- * of the returned structure, and we can free it. This check
- * saves a useful amount of storage in a big join tree.
+ * If we didn't find a mergeclause, we're done --- any additional
+ * sort-key positions in the pathkeys are useless. (But we can
+ * still mergejoin if we found at least one mergeclause.)
*/
- if (item != (PathKeyItem *) lfirst(pathkey))
- pfree(item);
+ if (matched)
+ useful++;
+ else
+ break;
+ }
- pathkeys = lappend(pathkeys, pathkey);
+ return useful;
+}
+
+/*
+ * pathkeys_useful_for_ordering
+ * Count the number of pathkeys that are useful for meeting the
+ * query's requested output ordering.
+ *
+ * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
+ * no good to order by just the first key(s) of the requested ordering.
+ * So the result is always either 0 or length(root->query_pathkeys).
+ */
+int
+pathkeys_useful_for_ordering(Query *root, List *pathkeys)
+{
+ if (root->query_pathkeys == NIL)
+ return 0; /* no special ordering requested */
+
+ if (pathkeys == NIL)
+ return 0; /* unordered path */
+
+ if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
+ {
+ /* It's useful ... or at least the first N keys are */
+ return length(root->query_pathkeys);
}
- return pathkeys;
+ return 0; /* path ordering not useful */
+}
+
+/*
+ * truncate_useless_pathkeys
+ * Shorten the given pathkey list to just the useful pathkeys.
+ */
+List *
+truncate_useless_pathkeys(Query *root,
+ RelOptInfo *rel,
+ List *pathkeys)
+{
+ int nuseful;
+ int nuseful2;
+
+ nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
+ nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+ if (nuseful2 > nuseful)
+ nuseful = nuseful2;
+ /* Note: not safe to modify input list destructively, but we can avoid
+ * copying the list if we're not actually going to change it
+ */
+ if (nuseful == length(pathkeys))
+ return pathkeys;
+ else
+ return ltruncate(nuseful, listCopy(pathkeys));
}