aboutsummaryrefslogtreecommitdiff
path: root/src/include/nodes/relation.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/nodes/relation.h')
-rw-r--r--src/include/nodes/relation.h192
1 files changed, 109 insertions, 83 deletions
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 94657d2aaa1..6ba920a479e 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -146,6 +146,13 @@ typedef struct PlannerInfo
RangeTblEntry **simple_rte_array; /* rangetable as an array */
/*
+ * all_baserels is a Relids set of all base relids (but not "other"
+ * relids) in the query; that is, the Relids identifier of the final
+ * join we need to form.
+ */
+ Relids all_baserels;
+
+ /*
* join_rel_list is a list of all join-relation RelOptInfos we have
* considered in this planning run. For small problems we just scan the
* list to do lookups, but when there are many join relations we build a
@@ -298,11 +305,16 @@ typedef struct PlannerInfo
* pathlist - List of Path nodes, one for each potentially useful
* method of generating the relation
* cheapest_startup_path - the pathlist member with lowest startup cost
- * (regardless of its ordering)
+ * (regardless of its ordering; but must be
+ * unparameterized)
* cheapest_total_path - the pathlist member with lowest total cost
- * (regardless of its ordering)
+ * (regardless of its ordering; but must be
+ * unparameterized)
* cheapest_unique_path - for caching cheapest path to produce unique
* (no duplicates) output from relation
+ * cheapest_parameterized_paths - paths with cheapest total costs for
+ * their parameterizations; always includes
+ * cheapest_total_path
*
* If the relation is a base relation it will have these fields set:
*
@@ -343,11 +355,6 @@ typedef struct PlannerInfo
* note this excludes clauses that might be derivable from
* EquivalenceClasses)
* has_eclass_joins - flag that EquivalenceClass joins are possible
- * index_outer_relids - only used for base rels; set of outer relids
- * that participate in indexable joinclauses for this rel
- * index_inner_paths - only used for base rels; list of InnerIndexscanInfo
- * nodes showing best indexpaths for various subsets of
- * index_outer_relids.
*
* Note: Keeping a restrictinfo list in the RelOptInfo is useful only for
* base rels, because for a join rel the set of clauses that are treated as
@@ -393,6 +400,7 @@ typedef struct RelOptInfo
struct Path *cheapest_startup_path;
struct Path *cheapest_total_path;
struct Path *cheapest_unique_path;
+ List *cheapest_parameterized_paths;
/* information about a base rel (not set for join rels!) */
Index relid;
@@ -416,18 +424,6 @@ typedef struct RelOptInfo
List *joininfo; /* RestrictInfo structures for join clauses
* involving this rel */
bool has_eclass_joins; /* T means joininfo is incomplete */
-
- /* cached info about inner indexscan paths for relation: */
- Relids index_outer_relids; /* other relids in indexable join
- * clauses */
- List *index_inner_paths; /* InnerIndexscanInfo nodes */
-
- /*
- * Inner indexscans are not in the main pathlist because they are not
- * usable except in specific join contexts. We use the index_inner_paths
- * list just to avoid recomputing the best inner indexscan repeatedly for
- * similar outer relations. See comments for InnerIndexscanInfo.
- */
} RelOptInfo;
/*
@@ -609,7 +605,6 @@ typedef struct EquivalenceMember
* BTGreaterStrategyNumber (for DESC). We assume that all ordering-capable
* index types will use btree-compatible strategy numbers.
*/
-
typedef struct PathKey
{
NodeTag type;
@@ -625,12 +620,31 @@ typedef struct PathKey
* simple plan types that we don't need any extra information in the path for.
* For other path types it is the first component of a larger struct.
*
- * Note: "pathtype" is the NodeTag of the Plan node we could build from this
- * Path. It is partially redundant with the Path's NodeTag, but allows us
- * to use the same Path type for multiple Plan types where there is no need
- * to distinguish the Plan type during path processing.
+ * "pathtype" is the NodeTag of the Plan node we could build from this Path.
+ * It is partially redundant with the Path's NodeTag, but allows us to use
+ * the same Path type for multiple Plan types when there is no need to
+ * distinguish the Plan type during path processing.
+ *
+ * "rows" is the same as parent->rows in simple paths, but in parameterized
+ * paths and UniquePaths it can be less than parent->rows, reflecting the
+ * fact that we've filtered by extra join conditions or removed duplicates.
+ *
+ * "pathkeys" is a List of PathKey nodes (see above), describing the sort
+ * ordering of the path's output rows.
+ *
+ * "required_outer", if not NULL, contains the relids of one or more relations
+ * that must provide parameter values to each scan of this path, because the
+ * path relies on join clauses using those rels. That means this path can only
+ * be joined to those rels by means of nestloop joins with this path on the
+ * inside. Note: for a normal unparameterized path, required_outer must be
+ * NULL, not an empty-but-not-null Bitmapset.
+ *
+ * "param_clauses" is a List of RestrictInfo nodes, containing the join
+ * clauses used by a parameterized path. Ideally param_clauses should be NIL
+ * if and only if required_outer is NULL. XXX for the moment, however, we do
+ * not compute param_clauses for Append and MergeAppend paths, so the list
+ * is inaccurate in those paths and possibly paths above them.
*/
-
typedef struct Path
{
NodeTag type;
@@ -639,12 +653,15 @@ typedef struct Path
RelOptInfo *parent; /* the relation this path can build */
- /* estimated execution costs for path (see costsize.c for more info) */
+ /* estimated size/costs for path (see costsize.c for more info) */
+ double rows; /* estimated number of result tuples */
Cost startup_cost; /* cost expended before fetching any tuples */
Cost total_cost; /* total cost (assuming all tuples fetched) */
List *pathkeys; /* sort ordering of path's output */
- /* pathkeys is a List of PathKey nodes; see above */
+
+ Relids required_outer; /* rels supplying parameters used by path */
+ List *param_clauses; /* join clauses that use such parameters */
} Path;
/*----------
@@ -685,12 +702,6 @@ typedef struct Path
* ORDER BY expression is meant to be used with. (There is no restriction
* on which index column each ORDER BY can be used with.)
*
- * 'isjoininner' is TRUE if the path is a nestloop inner scan (that is,
- * some of the index conditions are join rather than restriction clauses).
- * Note that the path costs will be calculated differently from a plain
- * indexscan in this case, and in addition there's a special 'rows' value
- * different from the parent RelOptInfo's (see below).
- *
* 'indexscandir' is one of:
* ForwardScanDirection: forward scan of an ordered index
* BackwardScanDirection: backward scan of an ordered index
@@ -703,12 +714,6 @@ typedef struct Path
* we need not recompute them when considering using the same index in a
* bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath
* itself represent the costs of an IndexScan or IndexOnlyScan plan type.
- *
- * 'rows' is the estimated result tuple count for the indexscan. This
- * is the same as path.parent->rows for a simple indexscan, but it is
- * different for a nestloop inner scan, because the additional indexquals
- * coming from join clauses make the scan more selective than the parent
- * rel's restrict clauses alone would do.
*----------
*/
typedef struct IndexPath
@@ -720,11 +725,9 @@ typedef struct IndexPath
List *indexqualcols;
List *indexorderbys;
List *indexorderbycols;
- bool isjoininner;
ScanDirection indexscandir;
Cost indextotalcost;
Selectivity indexselectivity;
- double rows; /* estimated number of result tuples */
} IndexPath;
/*
@@ -743,16 +746,11 @@ typedef struct IndexPath
* always represent the costs to use it as a regular (or index-only)
* IndexScan. The costs of a BitmapIndexScan can be computed using the
* IndexPath's indextotalcost and indexselectivity.
- *
- * BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and
- * rows fields serve the same purpose as for plain IndexPaths.
*/
typedef struct BitmapHeapPath
{
Path path;
Path *bitmapqual; /* IndexPath, BitmapAndPath, BitmapOrPath */
- bool isjoininner; /* T if it's a nestloop inner scan */
- double rows; /* estimated number of result tuples */
} BitmapHeapPath;
/*
@@ -822,6 +820,11 @@ typedef struct AppendPath
#define IS_DUMMY_PATH(p) \
(IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL)
+/* A relation that's been proven empty will have one path that is dummy */
+#define IS_DUMMY_REL(r) \
+ ((r)->cheapest_total_path != NULL && \
+ IS_DUMMY_PATH((r)->cheapest_total_path))
+
/*
* MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted
* results from several member plans to produce similarly-sorted output.
@@ -885,7 +888,6 @@ typedef struct UniquePath
UniquePathMethod umethod;
List *in_operators; /* equality operators of the IN clause */
List *uniq_exprs; /* expressions to be made unique */
- double rows; /* estimated number of result tuples */
} UniquePath;
/*
@@ -1173,42 +1175,6 @@ typedef struct MergeScanSelCache
} MergeScanSelCache;
/*
- * Inner indexscan info.
- *
- * An inner indexscan is one that uses one or more joinclauses as index
- * conditions (perhaps in addition to plain restriction clauses). So it
- * can only be used as the inner path of a nestloop join where the outer
- * relation includes all other relids appearing in those joinclauses.
- * The set of usable joinclauses, and thus the best inner indexscan,
- * thus varies depending on which outer relation we consider; so we have
- * to recompute the best such paths for every join. To avoid lots of
- * redundant computation, we cache the results of such searches. For
- * each relation we compute the set of possible otherrelids (all relids
- * appearing in joinquals that could become indexquals for this table).
- * Two outer relations whose relids have the same intersection with this
- * set will have the same set of available joinclauses and thus the same
- * best inner indexscans for the inner relation. By taking the intersection
- * before scanning the cache, we avoid recomputing when considering
- * join rels that differ only by the inclusion of irrelevant other rels.
- *
- * The search key also includes a bool showing whether the join being
- * considered is an outer join. Since we constrain the join order for
- * outer joins, I believe that this bool can only have one possible value
- * for any particular lookup key; but store it anyway to avoid confusion.
- */
-
-typedef struct InnerIndexscanInfo
-{
- NodeTag type;
- /* The lookup key: */
- Relids other_relids; /* a set of relevant other relids */
- bool isouterjoin; /* true if join is outer */
- /* Best paths for this lookup key (NULL if no available indexscans): */
- Path *cheapest_startup_innerpath; /* cheapest startup cost */
- Path *cheapest_total_innerpath; /* cheapest total cost */
-} InnerIndexscanInfo;
-
-/*
* Placeholder node for an expression to be evaluated below the top level
* of a plan tree. This is used during planning to represent the contained
* expression. At the end of the planning process it is replaced by either
@@ -1490,4 +1456,64 @@ typedef struct PlannerParamItem
Index abslevel; /* its absolute query level */
} PlannerParamItem;
+/*
+ * When making cost estimates for a SEMI or ANTI join, there are some
+ * correction factors that are needed in both nestloop and hash joins
+ * to account for the fact that the executor can stop scanning inner rows
+ * as soon as it finds a match to the current outer row. These numbers
+ * depend only on the selected outer and inner join relations, not on the
+ * particular paths used for them, so it's worthwhile to calculate them
+ * just once per relation pair not once per considered path. This struct
+ * is filled by compute_semi_anti_join_factors and must be passed along
+ * to the join cost estimation functions.
+ *
+ * outer_match_frac is the fraction of the outer tuples that are
+ * expected to have at least one match.
+ * match_count is the average number of matches expected for
+ * outer tuples that have at least one match.
+ */
+typedef struct SemiAntiJoinFactors
+{
+ Selectivity outer_match_frac;
+ Selectivity match_count;
+} SemiAntiJoinFactors;
+
+/*
+ * For speed reasons, cost estimation for join paths is performed in two
+ * phases: the first phase tries to quickly derive a lower bound for the
+ * join cost, and then we check if that's sufficient to reject the path.
+ * If not, we come back for a more refined cost estimate. The first phase
+ * fills a JoinCostWorkspace struct with its preliminary cost estimates
+ * and possibly additional intermediate values. The second phase takes
+ * these values as inputs to avoid repeating work.
+ *
+ * (Ideally we'd declare this in cost.h, but it's also needed in pathnode.h,
+ * so seems best to put it here.)
+ */
+typedef struct JoinCostWorkspace
+{
+ /* Preliminary cost estimates --- must not be larger than final ones! */
+ Cost startup_cost; /* cost expended before fetching any tuples */
+ Cost total_cost; /* total cost (assuming all tuples fetched) */
+
+ /* Fields below here should be treated as private to costsize.c */
+ Cost run_cost; /* non-startup cost components */
+
+ /* private for cost_nestloop code */
+ Cost inner_rescan_run_cost;
+ double outer_matched_rows;
+ Selectivity inner_scan_frac;
+
+ /* private for cost_mergejoin code */
+ Cost inner_run_cost;
+ double outer_rows;
+ double inner_rows;
+ double outer_skip_rows;
+ double inner_skip_rows;
+
+ /* private for cost_hashjoin code */
+ int numbuckets;
+ int numbatches;
+} JoinCostWorkspace;
+
#endif /* RELATION_H */