1 files changed, 177 insertions, 129 deletions
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 4db2e9cea44..91fa85dd25e 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: relation.h,v 1.37 1999/07/27 03:51:09 tgl Exp $
+ * $Id: relation.h,v 1.38 1999/08/16 02:17:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,65 +18,75 @@
 /*
  * Relids
  *		List of relation identifiers (indexes into the rangetable).
+ *
+ *		Note: these are lists of integers, not Nodes.
  */
 
 typedef List *Relids;
 
 /*
  * RelOptInfo
- *		Per-base-relation information
+ *		Per-relation information for planning/optimization
  *
  *		Parts of this data structure are specific to various scan and join
  *		mechanisms.  It didn't seem worth creating new node types for them.
  *
- *		relids - List of relation indentifiers
+ *		relids - List of base-relation identifiers; it is a base relation
+ *				if there is just one, a join relation if more than one
  *		indexed - true if the relation has secondary indices
  *		pages - number of pages in the relation
  *		tuples - number of tuples in the relation
- *		size - number of tuples in the relation after restrictions clauses
- *			   have been applied
- *		width - number of bytes per tuple in the relation after the
+ *		size - estimated number of tuples in the relation after restriction
+ *			   clauses have been applied
+ *		width - avg. number of bytes per tuple in the relation after the
  *				appropriate projections have been done
  *		targetlist - List of TargetList nodes
- *		pathlist - List of Path nodes, one for each possible method of
- *				   generating the relation
- *		cheapestpath -	least expensive Path (regardless of final order)
- *		pruneable - flag to let the planner know whether it can prune the plan
- *					space of this RelOptInfo or not.
+ *		pathlist - List of Path nodes, one for each potentially useful
+ *				   method of generating the relation
+ *		cheapestpath -	least expensive Path (regardless of ordering)
+ *		pruneable - flag to let the planner know whether it can prune the
+ *					pathlist of this RelOptInfo or not.
  *
  *	 * If the relation is a (secondary) index it will have the following
- *		three fields:
+ *		fields set:
  *
  *		classlist - List of PG_AMOPCLASS OIDs for the index
  *		indexkeys - List of base-relation attribute numbers that are index keys
  *		ordering - List of PG_OPERATOR OIDs which order the indexscan result
  *		relam	  - the OID of the pg_am of the index
  *
+ *		NB. the last element of the arrays classlist, indexkeys and ordering
+ *			is always 0.
+ *
+ *		Index relations do not participate in the join tree in the way
+ *		that regular base relations do, but it is still convenient to
+ *		represent them by RelOptInfos.
+ *
  *	 * The presence of the remaining fields depends on the restrictions
- *		and joins which the relation participates in:
+ *		and joins that the relation participates in:
  *
  *		restrictinfo - List of RestrictInfo nodes, containing info about each
  *					 qualification clause in which this relation participates
  *		joininfo  - List of JoinInfo nodes, containing info about each join
  *					clause in which this relation participates
  *		innerjoin - List of Path nodes that represent indices that may be used
- *					as inner paths of nestloop joins
- *
- * NB. the last element of the arrays classlist, indexkeys and ordering
- *	   is always 0.								2/95 - ay
+ *					as inner paths of nestloop joins. This field is non-null
+ *					only for base rels, since join rels have no indices.
  */
 
 typedef struct RelOptInfo
 {
 	NodeTag		type;
 
-	/* all relations: */
-	Relids		relids;			/* integer list of base relids involved */
+	/* all relations included in this RelOptInfo */
+	Relids		relids;			/* integer list of base relids */
 
 	/* catalog statistics information */
 	bool		indexed;
 	int			pages;
 	int			tuples;
+
+	/* estimates generated by planner.  XXX int is probably too small... */
 	int			size;
 	int			width;
 
@@ -89,65 +99,66 @@ typedef struct RelOptInfo
 	/* used solely by indices: */
 	Oid		   *classlist;		/* classes of AM operators */
 	int		   *indexkeys;		/* keys over which we're indexing */
+	Oid		   *ordering;		/* OIDs of sort operators for each key */
 	Oid			relam;			/* OID of the access method (in pg_am) */
 
-	Oid			indproc;
-	List	   *indpred;
+	Oid			indproc;		/* if a functional index */
+	List	   *indpred;		/* if a partial index */
 
 	/* used by various scans and joins: */
-	Oid		   *ordering;		/* OID of operators in sort order */
 	List	   *restrictinfo;	/* RestrictInfo structures */
 	List	   *joininfo;		/* JoinInfo structures */
-	List	   *innerjoin;
+	List	   *innerjoin;		/* potential indexscans for nestloop joins */
+	/* innerjoin indexscans are not in the main pathlist because they are
+	 * not usable except in specific join contexts; we have to test before
+	 * seeing whether they can be used.
+	 */
 } RelOptInfo;
 
-extern Var *get_expr(TargetEntry *foo);
-extern Var *get_groupclause_expr(GroupClause *groupClause, List *targetList);
+/*
+ * PathKeys
+ *
+ *	The sort ordering of a path is represented by a list of sublists of
+ *	PathKeyItem nodes.  An empty list implies no known ordering.  Otherwise
+ *	the first sublist represents the primary sort key, the second the
+ *	first secondary sort key, etc.  Each sublist contains one or more
+ *	PathKeyItem nodes, each of which can be taken as the attribute that
+ *	appears at that sort position.  (See the top of optimizer/path/pathkeys.c
+ *	for more information.)
+ */
 
-typedef struct MergeOrder
+typedef struct PathKeyItem
 {
 	NodeTag		type;
-	Oid			join_operator;
-	Oid			left_operator;
-	Oid			right_operator;
-	Oid			left_type;
-	Oid			right_type;
-} MergeOrder;
-
-typedef enum OrderType
-{
-	MERGE_ORDER, SORTOP_ORDER
-} OrderType;
 
-typedef struct PathOrder
-{
-	NodeTag		type;
+	Node	   *key;			/* the item that is ordered */
+	Oid			sortop;			/* the ordering operator ('<' op) */
+	/*
+	 * key typically points to a Var node, ie a relation attribute,
+	 * but it can also point to a Func clause representing the value
+	 * indexed by a functional index.  Someday we might allow arbitrary
+	 * expressions as path keys, so don't assume more than you must.
+	 */
+} PathKeyItem;
 
-	OrderType	ordtype;
-	union
-	{
-		Oid		   *sortop;
-		MergeOrder *merge;
-	}			ord;
-} PathOrder;
+/*
+ * Type "Path" is used as-is for sequential-scan paths.  For other
+ * path types it is the first component of a larger struct.
+ */
 
 typedef struct Path
 {
 	NodeTag		type;
 
-	RelOptInfo *parent;
-	Cost		path_cost;
+	RelOptInfo *parent;			/* the relation this path can build */
 
-	NodeTag		pathtype;
+	Cost		path_cost;		/* estimated execution cost of path */
 
-	PathOrder  *pathorder;
+	NodeTag		pathtype;		/* tag identifying scan/join method */
+	/* XXX why is pathtype separate from the NodeTag? */
 
-	List	   *pathkeys;		/* This is a List of List of Var nodes.
-								 * See the top of
-								 * optimizer/path/pathkeys.c for more
-								 * information. */
-	Cost		outerjoincost;
-	Relids		joinid;
+	List	   *pathkeys;		/* sort ordering of path's output */
+	/* pathkeys is a List of Lists of PathKeyItem nodes; see above */
 } Path;
 
 /*----------
@@ -156,129 +167,163 @@ typedef struct Path
  * different index during each scan.  The result is the union (OR) of all the
  * tuples matched during any scan.  (The executor is smart enough not to return
  * the same tuple more than once, even if it is matched in multiple scans.)
+ *
  * 'indexid' is a list of index relation OIDs, one per scan to be performed.
  * 'indexqual' is a list of index qualifications, also one per scan.
  * Each entry in 'indexqual' is a sublist of qualification expressions with
- * implicit AND semantics across the sublist items.  Each one of the sublist
- * items must be an operator expression of the form (var op something) or
- * (something op var), where the var is a field the associated index keys on
- * and the op is a member of the operator class of the index.
+ * implicit AND semantics across the sublist items.  Only expressions that
+ * are usable as indexquals (as determined by indxpath.c) may appear here.
+ *
  * NOTE that the semantics of the top-level list in 'indexqual' is OR
  * combination, while the sublists are implicitly AND combinations!
  *----------
  */
+
 typedef struct IndexPath
 {
 	Path		path;
 	List	   *indexid;
 	List	   *indexqual;
-	int		   *indexkeys;		/* to transform heap attnos into index
-								 * ones */
+	/*
+	 * joinrelids is only used in IndexPaths that are constructed for use
+	 * as the inner path of a nestloop join.  These paths have indexquals
+	 * that refer to values of other rels, so those other rels must be
+	 * included in the outer joinrel in order to make a usable join.
+	 */
+	Relids		joinrelids;			/* other rels mentioned in indexqual */
 } IndexPath;
 
-typedef struct NestPath
+/*
+ * All join-type paths share these fields.
+ */
+
+typedef struct JoinPath
 {
 	Path		path;
-	List	   *pathinfo;
-	Path	   *outerjoinpath;
-	Path	   *innerjoinpath;
-} NestPath;
 
-typedef NestPath JoinPath;
+	List	   *pathinfo;		/* copy of parent->restrictinfo; REMOVE? */
+
+	Path	   *outerjoinpath;	/* path for the outer side of the join */
+	Path	   *innerjoinpath;	/* path for the inner side of the join */
+} JoinPath;
+
+/*
+ * A nested-loop path needs no special fields.
+ */
+
+typedef JoinPath NestPath;
+
+/*
+ * A mergejoin path has these fields.
+ *
+ * Note that the mergeclauses are a subset of the parent relation's
+ * restriction-clause list.  Any join clauses that are not mergejoinable
+ * appear only in the parent's restrict list, and must be checked by a
+ * qpqual at execution time.
+ */
 
 typedef struct MergePath
 {
 	JoinPath	jpath;
-	List	   *path_mergeclauses;
+	List	   *path_mergeclauses; /* join clauses used for merge */
+	/*
+	 * outersortkeys (resp. innersortkeys) is NIL if the outer path
+	 * (resp. inner path) is already ordered appropriately for the
+	 * mergejoin.  If it is not NIL then it is a PathKeys list describing
+	 * the ordering that must be created by an explicit sort step.
+	 */
 	List	   *outersortkeys;
 	List	   *innersortkeys;
 } MergePath;
 
-typedef struct HashPath
-{
-	JoinPath	jpath;
-	List	   *path_hashclauses;
-	List	   *outerhashkeys;
-	List	   *innerhashkeys;
-} HashPath;
-
 /*
- * Keys
+ * A hashjoin path has these fields.
+ *
+ * The remarks above for mergeclauses apply for hashclauses as well.
+ * However, hashjoin does not care what order its inputs appear in,
+ * so we have no need for sortkeys.
  */
 
-typedef struct OrderKey
-{
-	NodeTag		type;
-	int			attribute_number;
-	Index		array_index;
-} OrderKey;
-
-typedef struct JoinKey
+typedef struct HashPath
 {
-	NodeTag		type;
-	Var		   *outer;
-	Var		   *inner;
-} JoinKey;
+	JoinPath	jpath;
+	List	   *path_hashclauses; /* join clauses used for hashing */
+} HashPath;
 
 /*
  * Restriction clause info.
+ *
  * We create one of these for each AND sub-clause of a restriction condition
  * (WHERE clause).  Since the restriction clauses are logically ANDed, we
  * can use any one of them or any subset of them to filter out tuples,
  * without having to evaluate the rest.  The RestrictInfo node itself stores
  * data used by the optimizer while choosing the best query plan.
+ *
+ * A restriction clause will appear in the restrictinfo list of a RelOptInfo
+ * that describes exactly the set of base relations referenced by the
+ * restriction clause.  It is not possible to apply the clause at any lower
+ * nesting level, and there is little point in delaying its evaluation to
+ * higher nesting levels.  (The "predicate migration" code was once intended
+ * to push restriction clauses up and down the plan tree, but it's dead code
+ * and is unlikely to be resurrected in the foreseeable future.)
+ *
+ * If a restriction clause references more than one base rel, it will also
+ * appear in the JoinInfo list of every RelOptInfo that describes a strict
+ * subset of the base rels mentioned in the clause.  The JoinInfo lists are
+ * used to drive join tree building by selecting plausible join candidates.
+ *
+ * In general, the referenced clause might be arbitrarily complex.  The
+ * kinds of clauses we can handle as indexscan quals, mergejoin clauses,
+ * or hashjoin clauses are fairly limited --- the code for each kind of
+ * path is responsible for identifying the restrict clauses it can use
+ * and ignoring the rest.  Clauses not implemented by an indexscan,
+ * mergejoin, or hashjoin will be placed in the qpqual field of the
+ * final Plan node, where they will be enforced by general-purpose
+ * qual-expression-evaluation code.  (But we are still entitled to count
+ * their selectivity when estimating the result tuple count, if we
+ * can guess what it is...)
  */
 
 typedef struct RestrictInfo
 {
 	NodeTag		type;
-	Expr	   *clause;			/* the represented subclause of WHERE cond */
-	Cost		selectivity;	/* estimated selectivity */
-	List	   *indexids;		/* subclause index IDs if clause is an OR */
 
-	/* mergejoin only */
-	MergeOrder *mergejoinorder;
+	Expr	   *clause;			/* the represented clause of WHERE cond */
+	Cost		selectivity;	/* estimated selectivity */
 
-	/* hashjoin only */
-	Oid			hashjoinoperator;
-} RestrictInfo;
+	/* only used if clause is an OR clause: */
+	List	   *subclauseindices;	/* lists of indexes matching subclauses */
 
-typedef struct JoinMethod
-{
-	NodeTag		type;
-	List	   *jmkeys;
-	List	   *clauses;
-} JoinMethod;
+	/* valid if clause is mergejoinable, else InvalidOid: */
+	Oid			mergejoinoperator;	/* copy of clause operator */
+	Oid			left_sortop;	/* leftside sortop needed for mergejoin */
+	Oid			right_sortop;	/* rightside sortop needed for mergejoin */
 
-typedef struct HashInfo
-{
-	JoinMethod	jmethod;
-	Oid			hashop;
-} HashInfo;
+	/* valid if clause is hashjoinable, else InvalidOid: */
+	Oid			hashjoinoperator;	/* copy of clause operator */
+} RestrictInfo;
 
-typedef struct MergeInfo
-{
-	JoinMethod	jmethod;
-	MergeOrder *m_ordering;
-} MergeInfo;
+/*
+ * Join clause info.
+ *
+ * We make a list of these for each RelOptInfo, containing info about
+ * all the join clauses this RelOptInfo participates in.  (For this
+ * purpose, a "join clause" is a WHERE clause that mentions both vars
+ * belonging to this relation and vars belonging to relations not yet
+ * joined to it.)  We group these clauses according to the set of
+ * other base relations (unjoined relations) mentioned in them.
+ * There is one JoinInfo for each distinct set of unjoined_relids,
+ * and its jinfo_restrictinfo lists the clause(s) that use that set
+ * of other relations.
+ */
 
 typedef struct JoinInfo
 {
 	NodeTag		type;
-	Relids		unjoined_relids;
-	List	   *jinfo_restrictinfo;
-	bool		mergejoinable;
-	bool		hashjoinable;
+	Relids		unjoined_relids; /* some rels not yet part of my RelOptInfo */
+	List	   *jinfo_restrictinfo;	/* relevant RestrictInfos */
 } JoinInfo;
 
-typedef struct Iter
-{
-	NodeTag		type;
-	Node	   *iterexpr;
-	Oid			itertype;		/* type of the iter expr (use for type
-								 * checking) */
-} Iter;
-
 /*
  *	Stream:
  *	A stream represents a root-to-leaf path in a plan tree (i.e. a tree of
@@ -287,6 +332,9 @@ typedef struct Iter
  *	is used to make Path nodes and clauses look similar, so that Predicate
  *	Migration can run.
  *
+ *	XXX currently, Predicate Migration is dead code, and so is this node type.
+ *	Probably should remove support for it.
+ *
  *	pathptr -- pointer to the current path node
  *	cinfo -- if NULL, this stream node referes to the path node.
  *			  Otherwise this is a pointer to the current clause.
@@ -306,8 +354,8 @@ typedef struct Stream
 	Path	   *pathptr;
 	RestrictInfo *cinfo;
 	int		   *clausetype;
-	struct Stream *upstream;
-	struct Stream *downstream;
+	StreamPtr	upstream;
+	StreamPtr	downstream;
 	bool		groupup;
 	Cost		groupcost;
 	Cost		groupsel;