1 files changed, 109 insertions, 83 deletions
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 94657d2aaa1..6ba920a479e 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -146,6 +146,13 @@ typedef struct PlannerInfo
 	RangeTblEntry **simple_rte_array;	/* rangetable as an array */
 
 	/*
+	 * all_baserels is a Relids set of all base relids (but not "other"
+	 * relids) in the query; that is, the Relids identifier of the final
+	 * join we need to form.
+	 */
+	Relids		all_baserels;
+
+	/*
 	 * join_rel_list is a list of all join-relation RelOptInfos we have
 	 * considered in this planning run.  For small problems we just scan the
 	 * list to do lookups, but when there are many join relations we build a
@@ -298,11 +305,16 @@ typedef struct PlannerInfo
  *		pathlist - List of Path nodes, one for each potentially useful
  *				   method of generating the relation
  *		cheapest_startup_path - the pathlist member with lowest startup cost
- *								(regardless of its ordering)
+ *								(regardless of its ordering; but must be
+ *								 unparameterized)
  *		cheapest_total_path - the pathlist member with lowest total cost
- *							  (regardless of its ordering)
+ *							  (regardless of its ordering; but must be
+ *							   unparameterized)
  *		cheapest_unique_path - for caching cheapest path to produce unique
  *							   (no duplicates) output from relation
+ *		cheapest_parameterized_paths - paths with cheapest total costs for
+ *								 their parameterizations; always includes
+ *								 cheapest_total_path
  *
  * If the relation is a base relation it will have these fields set:
  *
@@ -343,11 +355,6 @@ typedef struct PlannerInfo
  *					note this excludes clauses that might be derivable from
  *					EquivalenceClasses)
  *		has_eclass_joins - flag that EquivalenceClass joins are possible
- *		index_outer_relids - only used for base rels; set of outer relids
- *					that participate in indexable joinclauses for this rel
- *		index_inner_paths - only used for base rels; list of InnerIndexscanInfo
- *					nodes showing best indexpaths for various subsets of
- *					index_outer_relids.
  *
  * Note: Keeping a restrictinfo list in the RelOptInfo is useful only for
  * base rels, because for a join rel the set of clauses that are treated as
@@ -393,6 +400,7 @@ typedef struct RelOptInfo
 	struct Path *cheapest_startup_path;
 	struct Path *cheapest_total_path;
 	struct Path *cheapest_unique_path;
+	List	   *cheapest_parameterized_paths;
 
 	/* information about a base rel (not set for join rels!) */
 	Index		relid;
@@ -416,18 +424,6 @@ typedef struct RelOptInfo
 	List	   *joininfo;		/* RestrictInfo structures for join clauses
 								 * involving this rel */
 	bool		has_eclass_joins;		/* T means joininfo is incomplete */
-
-	/* cached info about inner indexscan paths for relation: */
-	Relids		index_outer_relids;		/* other relids in indexable join
-										 * clauses */
-	List	   *index_inner_paths;		/* InnerIndexscanInfo nodes */
-
-	/*
-	 * Inner indexscans are not in the main pathlist because they are not
-	 * usable except in specific join contexts.  We use the index_inner_paths
-	 * list just to avoid recomputing the best inner indexscan repeatedly for
-	 * similar outer relations.  See comments for InnerIndexscanInfo.
-	 */
 } RelOptInfo;
 
 /*
@@ -609,7 +605,6 @@ typedef struct EquivalenceMember
  * BTGreaterStrategyNumber (for DESC).	We assume that all ordering-capable
  * index types will use btree-compatible strategy numbers.
  */
-
 typedef struct PathKey
 {
 	NodeTag		type;
@@ -625,12 +620,31 @@ typedef struct PathKey
  * simple plan types that we don't need any extra information in the path for.
  * For other path types it is the first component of a larger struct.
  *
- * Note: "pathtype" is the NodeTag of the Plan node we could build from this
- * Path.  It is partially redundant with the Path's NodeTag, but allows us
- * to use the same Path type for multiple Plan types where there is no need
- * to distinguish the Plan type during path processing.
+ * "pathtype" is the NodeTag of the Plan node we could build from this Path.
+ * It is partially redundant with the Path's NodeTag, but allows us to use
+ * the same Path type for multiple Plan types when there is no need to
+ * distinguish the Plan type during path processing.
+ *
+ * "rows" is the same as parent->rows in simple paths, but in parameterized
+ * paths and UniquePaths it can be less than parent->rows, reflecting the
+ * fact that we've filtered by extra join conditions or removed duplicates.
+ *
+ * "pathkeys" is a List of PathKey nodes (see above), describing the sort
+ * ordering of the path's output rows.
+ *
+ * "required_outer", if not NULL, contains the relids of one or more relations
+ * that must provide parameter values to each scan of this path, because the
+ * path relies on join clauses using those rels. That means this path can only
+ * be joined to those rels by means of nestloop joins with this path on the
+ * inside.  Note: for a normal unparameterized path, required_outer must be
+ * NULL, not an empty-but-not-null Bitmapset.
+ *
+ * "param_clauses" is a List of RestrictInfo nodes, containing the join
+ * clauses used by a parameterized path.  Ideally param_clauses should be NIL
+ * if and only if required_outer is NULL.  XXX for the moment, however, we do
+ * not compute param_clauses for Append and MergeAppend paths, so the list
+ * is inaccurate in those paths and possibly paths above them.
  */
-
 typedef struct Path
 {
 	NodeTag		type;
@@ -639,12 +653,15 @@ typedef struct Path
 
 	RelOptInfo *parent;			/* the relation this path can build */
 
-	/* estimated execution costs for path (see costsize.c for more info) */
+	/* estimated size/costs for path (see costsize.c for more info) */
+	double		rows;			/* estimated number of result tuples */
 	Cost		startup_cost;	/* cost expended before fetching any tuples */
 	Cost		total_cost;		/* total cost (assuming all tuples fetched) */
 
 	List	   *pathkeys;		/* sort ordering of path's output */
-	/* pathkeys is a List of PathKey nodes; see above */
+
+	Relids		required_outer;	/* rels supplying parameters used by path */
+	List	   *param_clauses;	/* join clauses that use such parameters */
 } Path;
 
 /*----------
@@ -685,12 +702,6 @@ typedef struct Path
  * ORDER BY expression is meant to be used with.  (There is no restriction
  * on which index column each ORDER BY can be used with.)
  *
- * 'isjoininner' is TRUE if the path is a nestloop inner scan (that is,
- * some of the index conditions are join rather than restriction clauses).
- * Note that the path costs will be calculated differently from a plain
- * indexscan in this case, and in addition there's a special 'rows' value
- * different from the parent RelOptInfo's (see below).
- *
  * 'indexscandir' is one of:
  *		ForwardScanDirection: forward scan of an ordered index
  *		BackwardScanDirection: backward scan of an ordered index
@@ -703,12 +714,6 @@ typedef struct Path
  * we need not recompute them when considering using the same index in a
  * bitmap index/heap scan (see BitmapHeapPath).  The costs of the IndexPath
  * itself represent the costs of an IndexScan or IndexOnlyScan plan type.
- *
- * 'rows' is the estimated result tuple count for the indexscan.  This
- * is the same as path.parent->rows for a simple indexscan, but it is
- * different for a nestloop inner scan, because the additional indexquals
- * coming from join clauses make the scan more selective than the parent
- * rel's restrict clauses alone would do.
  *----------
  */
 typedef struct IndexPath
@@ -720,11 +725,9 @@ typedef struct IndexPath
 	List	   *indexqualcols;
 	List	   *indexorderbys;
 	List	   *indexorderbycols;
-	bool		isjoininner;
 	ScanDirection indexscandir;
 	Cost		indextotalcost;
 	Selectivity indexselectivity;
-	double		rows;			/* estimated number of result tuples */
 } IndexPath;
 
 /*
@@ -743,16 +746,11 @@ typedef struct IndexPath
  * always represent the costs to use it as a regular (or index-only)
  * IndexScan.  The costs of a BitmapIndexScan can be computed using the
  * IndexPath's indextotalcost and indexselectivity.
- *
- * BitmapHeapPaths can be nestloop inner indexscans.  The isjoininner and
- * rows fields serve the same purpose as for plain IndexPaths.
  */
 typedef struct BitmapHeapPath
 {
 	Path		path;
 	Path	   *bitmapqual;		/* IndexPath, BitmapAndPath, BitmapOrPath */
-	bool		isjoininner;	/* T if it's a nestloop inner scan */
-	double		rows;			/* estimated number of result tuples */
 } BitmapHeapPath;
 
 /*
@@ -822,6 +820,11 @@ typedef struct AppendPath
 #define IS_DUMMY_PATH(p) \
 	(IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL)
 
+/* A relation that's been proven empty will have one path that is dummy */
+#define IS_DUMMY_REL(r) \
+	((r)->cheapest_total_path != NULL && \
+	 IS_DUMMY_PATH((r)->cheapest_total_path))
+
 /*
  * MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted
  * results from several member plans to produce similarly-sorted output.
@@ -885,7 +888,6 @@ typedef struct UniquePath
 	UniquePathMethod umethod;
 	List	   *in_operators;	/* equality operators of the IN clause */
 	List	   *uniq_exprs;		/* expressions to be made unique */
-	double		rows;			/* estimated number of result tuples */
 } UniquePath;
 
 /*
@@ -1173,42 +1175,6 @@ typedef struct MergeScanSelCache
 } MergeScanSelCache;
 
 /*
- * Inner indexscan info.
- *
- * An inner indexscan is one that uses one or more joinclauses as index
- * conditions (perhaps in addition to plain restriction clauses).  So it
- * can only be used as the inner path of a nestloop join where the outer
- * relation includes all other relids appearing in those joinclauses.
- * The set of usable joinclauses, and thus the best inner indexscan,
- * thus varies depending on which outer relation we consider; so we have
- * to recompute the best such paths for every join.  To avoid lots of
- * redundant computation, we cache the results of such searches.  For
- * each relation we compute the set of possible otherrelids (all relids
- * appearing in joinquals that could become indexquals for this table).
- * Two outer relations whose relids have the same intersection with this
- * set will have the same set of available joinclauses and thus the same
- * best inner indexscans for the inner relation.  By taking the intersection
- * before scanning the cache, we avoid recomputing when considering
- * join rels that differ only by the inclusion of irrelevant other rels.
- *
- * The search key also includes a bool showing whether the join being
- * considered is an outer join.  Since we constrain the join order for
- * outer joins, I believe that this bool can only have one possible value
- * for any particular lookup key; but store it anyway to avoid confusion.
- */
-
-typedef struct InnerIndexscanInfo
-{
-	NodeTag		type;
-	/* The lookup key: */
-	Relids		other_relids;	/* a set of relevant other relids */
-	bool		isouterjoin;	/* true if join is outer */
-	/* Best paths for this lookup key (NULL if no available indexscans): */
-	Path	   *cheapest_startup_innerpath;		/* cheapest startup cost */
-	Path	   *cheapest_total_innerpath;		/* cheapest total cost */
-} InnerIndexscanInfo;
-
-/*
  * Placeholder node for an expression to be evaluated below the top level
  * of a plan tree.	This is used during planning to represent the contained
  * expression.	At the end of the planning process it is replaced by either
@@ -1490,4 +1456,64 @@ typedef struct PlannerParamItem
 	Index		abslevel;		/* its absolute query level */
 } PlannerParamItem;
 
+/*
+ * When making cost estimates for a SEMI or ANTI join, there are some
+ * correction factors that are needed in both nestloop and hash joins
+ * to account for the fact that the executor can stop scanning inner rows
+ * as soon as it finds a match to the current outer row.  These numbers
+ * depend only on the selected outer and inner join relations, not on the
+ * particular paths used for them, so it's worthwhile to calculate them
+ * just once per relation pair not once per considered path.  This struct
+ * is filled by compute_semi_anti_join_factors and must be passed along
+ * to the join cost estimation functions.
+ *
+ * outer_match_frac is the fraction of the outer tuples that are
+ *		expected to have at least one match.
+ * match_count is the average number of matches expected for
+ *		outer tuples that have at least one match.
+ */
+typedef struct SemiAntiJoinFactors
+{
+	Selectivity outer_match_frac;
+	Selectivity match_count;
+} SemiAntiJoinFactors;
+
+/*
+ * For speed reasons, cost estimation for join paths is performed in two
+ * phases: the first phase tries to quickly derive a lower bound for the
+ * join cost, and then we check if that's sufficient to reject the path.
+ * If not, we come back for a more refined cost estimate.  The first phase
+ * fills a JoinCostWorkspace struct with its preliminary cost estimates
+ * and possibly additional intermediate values.  The second phase takes
+ * these values as inputs to avoid repeating work.
+ *
+ * (Ideally we'd declare this in cost.h, but it's also needed in pathnode.h,
+ * so seems best to put it here.)
+ */
+typedef struct JoinCostWorkspace
+{
+	/* Preliminary cost estimates --- must not be larger than final ones! */
+	Cost		startup_cost;	/* cost expended before fetching any tuples */
+	Cost		total_cost;		/* total cost (assuming all tuples fetched) */
+
+	/* Fields below here should be treated as private to costsize.c */
+	Cost		run_cost;		/* non-startup cost components */
+
+	/* private for cost_nestloop code */
+	Cost		inner_rescan_run_cost;
+	double		outer_matched_rows;
+	Selectivity inner_scan_frac;
+
+	/* private for cost_mergejoin code */
+	Cost		inner_run_cost;
+	double		outer_rows;
+	double		inner_rows;
+	double		outer_skip_rows;
+	double		inner_skip_rows;
+
+	/* private for cost_hashjoin code */
+	int			numbuckets;
+	int			numbatches;
+} JoinCostWorkspace;
+
 #endif   /* RELATION_H */