aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/optimizer/path/pathkeys.c149
1 files changed, 53 insertions, 96 deletions
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 1cde4a686cf..09a5b8a51f1 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.8 1999/04/30 03:59:06 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.9 1999/05/17 00:26:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -29,30 +29,34 @@ static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
int outer_or_inner);
static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
List *joinclauses);
-static List *get_joinvars_for_var(Var *pathkey, List **considered_pathkeys,
- List *join_rel_tlist, List *joinclauses);
-/*
+/*--------------------
* Explanation of Path.pathkeys
*
* Path.pathkeys is a List of List of Var nodes that represent the sort
* order of the result generated by the Path.
*
* In single/base relation RelOptInfo's, the Path's represent various ways
- * of generate the relation. Sequential scan Paths have a NIL pathkeys.
- * Index scans have Path.pathkeys that represent the chosen index. A
- * single-key index pathkeys would be { {tab1_indexkey1} }. The pathkeys
- * entry for a multi-key index would be { {tab1_indexkey1}, {tab1_indexkey2} }.
+ * of generating the relation and the resulting ordering of the tuples.
+ * Sequential scan Paths have NIL pathkeys, indicating no known ordering.
+ * Index scans have Path.pathkeys that represent the chosen index.
+ * A single-key index pathkeys would be { {tab1_indexkey1} }. For a
+ * multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} },
+ * indicating major sort by indexkey1 and minor sort by indexkey2.
*
* Multi-relation RelOptInfo Path's are more complicated. Mergejoins are
- * only performed with equajoins("="). Because of this, the multi-relation
+ * only performed with equijoins ("="). Because of this, the multi-relation
* path actually has more than one primary Var key. For example, a
* mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
- * { {tab1.col1, tab2.col1} }. This allows future joins to use either Var
- * as a pre-sorted key to prevent Mergejoins from having to re-sort the Path.
- * They are equal, so they are both primary sort keys. This is why pathkeys
- * is a List of Lists.
+ * { {tab1.col1, tab2.col1} }, indicating that the major sort order of the
+ * Path can be taken to be *either* tab1.col1 or tab2.col1.
+ * They are equal, so they are both primary sort keys. This allows future
+ * joins to use either Var as a pre-sorted key to prevent upper Mergejoins
+ * from having to re-sort the Path. This is why pathkeys is a List of Lists.
+ *
+ * Note that while the order of the top list is meaningful (primary vs.
+ * secondary sort key), the order of each sublist is arbitrary.
*
* For multi-key sorts, if the outer is sorted by a multi-key index, the
* multi-key index remains after the join. If the inner has a multi-key
@@ -60,11 +64,16 @@ static List *get_joinvars_for_var(Var *pathkey, List **considered_pathkeys,
* Mergejoins only join on the primary key. Currently, non-primary keys
* in the pathkeys List are of limited value.
*
- * HashJoin has similar functionality. NestJoin does not perform sorting,
- * and allows non-equajoins, so it does not allow useful pathkeys.
+ * Although Hashjoins also work only with equijoin operators, it is *not*
+ * safe to consider the output of a Hashjoin to be sorted in any particular
+ * order --- not even the outer path's order. This is true because the
+ * executor might have to split the join into multiple batches.
+ *
+ * NestJoin does not perform sorting, and allows non-equijoins, so it does
+ * not allow useful pathkeys. (But couldn't we use the outer path's order?)
*
* -- bjm
- *
+ *--------------------
*/
/****************************************************************************
@@ -228,7 +237,7 @@ get_cheapest_path_for_joinkeys(List *joinkeys,
int outer_or_inner)
{
Path *matched_path = NULL;
- List *i = NIL;
+ List *i;
foreach(i, paths)
{
@@ -337,16 +346,16 @@ new_join_pathkeys(List *outer_pathkeys,
List *join_rel_tlist,
List *joinclauses)
{
- List *outer_pathkey = NIL;
List *final_pathkeys = NIL;
- List *new_pathkey;
- List *i = NIL;
+ List *i;
foreach(i, outer_pathkeys)
{
- outer_pathkey = lfirst(i);
+ List *outer_pathkey = lfirst(i);
+ List *new_pathkey;
+
new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist,
- joinclauses);
+ joinclauses);
if (new_pathkey != NIL)
final_pathkeys = lappend(final_pathkeys, new_pathkey);
}
@@ -355,19 +364,18 @@ new_join_pathkeys(List *outer_pathkeys,
/*
* new_join_pathkey
- * Finds new vars that become pathkeys due to qualification clauses that
- * contain any previously considered pathkeys. These new pathkeys plus the
- * pathkeys from 'pathkeys' form a new pathkey for the join relation.
+ * Generate an individual pathkey sublist, consisting of the outer vars
+ * already mentioned in 'pathkey' plus any inner vars that are joined to
+ * them (and thus can now also be considered path keys, per discussion
+ * at the top of this file).
*
* Note that each returned pathkey is the var node found in
* 'join_rel_tlist' rather than the joinclause var node.
+ * (Is this important?) Also, we return a fully copied list
+ * that does not share any subnodes with existing data structures.
+ * (Is that important, either?)
*
- * 'pathkeys' is a list of pathkeys for which matching pathkeys are to be
- * found
- * 'considered_pathkeys' is the current list of all pathkeys corresponding
- * to a given pathkey
- *
- * Returns a new pathkey(list of pathkeys).
+ * Returns a new pathkey (list of pathkey variables).
*
*/
static List *
@@ -375,84 +383,33 @@ new_join_pathkey(List *pathkey,
List *join_rel_tlist,
List *joinclauses)
{
- List *final_pathkey = NIL;
- List *i = NIL;
- List *considered_pathkeys = NIL;
+ List *new_pathkey = NIL;
+ List *i,
+ *j;
foreach(i, pathkey)
{
Var *key = (Var *) lfirst(i);
- List *joined_keys;
Expr *tlist_key;
Assert(key);
- joined_keys = get_joinvars_for_var(key, &considered_pathkeys,
- join_rel_tlist, joinclauses);
- if (joined_keys)
- {
- considered_pathkeys = nconc(considered_pathkeys, joined_keys);
- final_pathkey = nconc(final_pathkey, joined_keys);
- }
tlist_key = matching_tlist_var(key, join_rel_tlist);
- if (tlist_key && !member(tlist_key, considered_pathkeys))
- {
- /*
- * If pathkey is in the target list, and not considered,
- * add it
- */
- considered_pathkeys = lcons(tlist_key, considered_pathkeys);
- final_pathkey = lcons(tlist_key, final_pathkey);
- }
- }
- return copyObject(final_pathkey);
-}
+ if (tlist_key && !member(tlist_key, new_pathkey))
+ new_pathkey = lcons(copyObject(tlist_key), new_pathkey);
-/*
- * get_joinvars_for_var
- * Returns a list of new pathkeys:
- * (1) which are not listed in 'considered_pathkeys'
- * (2) for which the "other" variable in some clause in 'joinclauses' is
- * 'pathkey'
- * (3) which are mentioned in 'join_rel_tlist'
- *
- * Note that each returned pathkey is the var node found in
- * 'join_rel_tlist' rather than the joinclause var node.
- *
- * 'pathkey' is the var node for which we are trying to find matching
- * clauses
- *
- * Returns a list of new pathkeys.
- *
- */
-static List *
-get_joinvars_for_var(Var *key,
- List **considered_pathkeys,
- List *join_rel_tlist,
- List *joinclauses)
-{
- List *final_pathkey = NIL;
- Expr *joinclause;
- List *i;
- Expr *tlist_other_var;
-
- foreach(i, joinclauses)
- {
- joinclause = lfirst(i);
+ foreach(j, joinclauses)
+ {
+ Expr *joinclause = lfirst(j);
+ Expr *tlist_other_var;
- tlist_other_var = matching_tlist_var(
+ tlist_other_var = matching_tlist_var(
other_join_clause_var(key, joinclause),
join_rel_tlist);
- if (tlist_other_var &&
- !member(tlist_other_var, *considered_pathkeys))
- {
- /*
- * The key has a join variable that is in the target list,
- * and has not been considered.
- */
- *considered_pathkeys = lcons(tlist_other_var, *considered_pathkeys);
- final_pathkey = lcons(tlist_other_var, final_pathkey);
+ if (tlist_other_var && !member(tlist_other_var, new_pathkey))
+ new_pathkey = lcons(copyObject(tlist_other_var), new_pathkey);
}
}
- return final_pathkey;
+
+ return new_pathkey;
}