diff options
Diffstat (limited to 'src/include/nodes/relation.h')
-rw-r--r-- | src/include/nodes/relation.h | 192 |
1 files changed, 109 insertions, 83 deletions
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 94657d2aaa1..6ba920a479e 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -146,6 +146,13 @@ typedef struct PlannerInfo RangeTblEntry **simple_rte_array; /* rangetable as an array */ /* + * all_baserels is a Relids set of all base relids (but not "other" + * relids) in the query; that is, the Relids identifier of the final + * join we need to form. + */ + Relids all_baserels; + + /* * join_rel_list is a list of all join-relation RelOptInfos we have * considered in this planning run. For small problems we just scan the * list to do lookups, but when there are many join relations we build a @@ -298,11 +305,16 @@ typedef struct PlannerInfo * pathlist - List of Path nodes, one for each potentially useful * method of generating the relation * cheapest_startup_path - the pathlist member with lowest startup cost - * (regardless of its ordering) + * (regardless of its ordering; but must be + * unparameterized) * cheapest_total_path - the pathlist member with lowest total cost - * (regardless of its ordering) + * (regardless of its ordering; but must be + * unparameterized) * cheapest_unique_path - for caching cheapest path to produce unique * (no duplicates) output from relation + * cheapest_parameterized_paths - paths with cheapest total costs for + * their parameterizations; always includes + * cheapest_total_path * * If the relation is a base relation it will have these fields set: * @@ -343,11 +355,6 @@ typedef struct PlannerInfo * note this excludes clauses that might be derivable from * EquivalenceClasses) * has_eclass_joins - flag that EquivalenceClass joins are possible - * index_outer_relids - only used for base rels; set of outer relids - * that participate in indexable joinclauses for this rel - * index_inner_paths - only used for base rels; list of InnerIndexscanInfo - * nodes showing best indexpaths for various subsets of - * index_outer_relids. * * Note: Keeping a restrictinfo list in the RelOptInfo is useful only for * base rels, because for a join rel the set of clauses that are treated as @@ -393,6 +400,7 @@ typedef struct RelOptInfo struct Path *cheapest_startup_path; struct Path *cheapest_total_path; struct Path *cheapest_unique_path; + List *cheapest_parameterized_paths; /* information about a base rel (not set for join rels!) */ Index relid; @@ -416,18 +424,6 @@ typedef struct RelOptInfo List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ - - /* cached info about inner indexscan paths for relation: */ - Relids index_outer_relids; /* other relids in indexable join - * clauses */ - List *index_inner_paths; /* InnerIndexscanInfo nodes */ - - /* - * Inner indexscans are not in the main pathlist because they are not - * usable except in specific join contexts. We use the index_inner_paths - * list just to avoid recomputing the best inner indexscan repeatedly for - * similar outer relations. See comments for InnerIndexscanInfo. - */ } RelOptInfo; /* @@ -609,7 +605,6 @@ typedef struct EquivalenceMember * BTGreaterStrategyNumber (for DESC). We assume that all ordering-capable * index types will use btree-compatible strategy numbers. */ - typedef struct PathKey { NodeTag type; @@ -625,12 +620,31 @@ typedef struct PathKey * simple plan types that we don't need any extra information in the path for. * For other path types it is the first component of a larger struct. * - * Note: "pathtype" is the NodeTag of the Plan node we could build from this - * Path. It is partially redundant with the Path's NodeTag, but allows us - * to use the same Path type for multiple Plan types where there is no need - * to distinguish the Plan type during path processing. + * "pathtype" is the NodeTag of the Plan node we could build from this Path. + * It is partially redundant with the Path's NodeTag, but allows us to use + * the same Path type for multiple Plan types when there is no need to + * distinguish the Plan type during path processing. + * + * "rows" is the same as parent->rows in simple paths, but in parameterized + * paths and UniquePaths it can be less than parent->rows, reflecting the + * fact that we've filtered by extra join conditions or removed duplicates. + * + * "pathkeys" is a List of PathKey nodes (see above), describing the sort + * ordering of the path's output rows. + * + * "required_outer", if not NULL, contains the relids of one or more relations + * that must provide parameter values to each scan of this path, because the + * path relies on join clauses using those rels. That means this path can only + * be joined to those rels by means of nestloop joins with this path on the + * inside. Note: for a normal unparameterized path, required_outer must be + * NULL, not an empty-but-not-null Bitmapset. + * + * "param_clauses" is a List of RestrictInfo nodes, containing the join + * clauses used by a parameterized path. Ideally param_clauses should be NIL + * if and only if required_outer is NULL. XXX for the moment, however, we do + * not compute param_clauses for Append and MergeAppend paths, so the list + * is inaccurate in those paths and possibly paths above them. */ - typedef struct Path { NodeTag type; @@ -639,12 +653,15 @@ typedef struct Path RelOptInfo *parent; /* the relation this path can build */ - /* estimated execution costs for path (see costsize.c for more info) */ + /* estimated size/costs for path (see costsize.c for more info) */ + double rows; /* estimated number of result tuples */ Cost startup_cost; /* cost expended before fetching any tuples */ Cost total_cost; /* total cost (assuming all tuples fetched) */ List *pathkeys; /* sort ordering of path's output */ - /* pathkeys is a List of PathKey nodes; see above */ + + Relids required_outer; /* rels supplying parameters used by path */ + List *param_clauses; /* join clauses that use such parameters */ } Path; /*---------- @@ -685,12 +702,6 @@ typedef struct Path * ORDER BY expression is meant to be used with. (There is no restriction * on which index column each ORDER BY can be used with.) * - * 'isjoininner' is TRUE if the path is a nestloop inner scan (that is, - * some of the index conditions are join rather than restriction clauses). - * Note that the path costs will be calculated differently from a plain - * indexscan in this case, and in addition there's a special 'rows' value - * different from the parent RelOptInfo's (see below). - * * 'indexscandir' is one of: * ForwardScanDirection: forward scan of an ordered index * BackwardScanDirection: backward scan of an ordered index @@ -703,12 +714,6 @@ typedef struct Path * we need not recompute them when considering using the same index in a * bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath * itself represent the costs of an IndexScan or IndexOnlyScan plan type. - * - * 'rows' is the estimated result tuple count for the indexscan. This - * is the same as path.parent->rows for a simple indexscan, but it is - * different for a nestloop inner scan, because the additional indexquals - * coming from join clauses make the scan more selective than the parent - * rel's restrict clauses alone would do. *---------- */ typedef struct IndexPath @@ -720,11 +725,9 @@ typedef struct IndexPath List *indexqualcols; List *indexorderbys; List *indexorderbycols; - bool isjoininner; ScanDirection indexscandir; Cost indextotalcost; Selectivity indexselectivity; - double rows; /* estimated number of result tuples */ } IndexPath; /* @@ -743,16 +746,11 @@ typedef struct IndexPath * always represent the costs to use it as a regular (or index-only) * IndexScan. The costs of a BitmapIndexScan can be computed using the * IndexPath's indextotalcost and indexselectivity. - * - * BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and - * rows fields serve the same purpose as for plain IndexPaths. */ typedef struct BitmapHeapPath { Path path; Path *bitmapqual; /* IndexPath, BitmapAndPath, BitmapOrPath */ - bool isjoininner; /* T if it's a nestloop inner scan */ - double rows; /* estimated number of result tuples */ } BitmapHeapPath; /* @@ -822,6 +820,11 @@ typedef struct AppendPath #define IS_DUMMY_PATH(p) \ (IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL) +/* A relation that's been proven empty will have one path that is dummy */ +#define IS_DUMMY_REL(r) \ + ((r)->cheapest_total_path != NULL && \ + IS_DUMMY_PATH((r)->cheapest_total_path)) + /* * MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted * results from several member plans to produce similarly-sorted output. @@ -885,7 +888,6 @@ typedef struct UniquePath UniquePathMethod umethod; List *in_operators; /* equality operators of the IN clause */ List *uniq_exprs; /* expressions to be made unique */ - double rows; /* estimated number of result tuples */ } UniquePath; /* @@ -1173,42 +1175,6 @@ typedef struct MergeScanSelCache } MergeScanSelCache; /* - * Inner indexscan info. - * - * An inner indexscan is one that uses one or more joinclauses as index - * conditions (perhaps in addition to plain restriction clauses). So it - * can only be used as the inner path of a nestloop join where the outer - * relation includes all other relids appearing in those joinclauses. - * The set of usable joinclauses, and thus the best inner indexscan, - * thus varies depending on which outer relation we consider; so we have - * to recompute the best such paths for every join. To avoid lots of - * redundant computation, we cache the results of such searches. For - * each relation we compute the set of possible otherrelids (all relids - * appearing in joinquals that could become indexquals for this table). - * Two outer relations whose relids have the same intersection with this - * set will have the same set of available joinclauses and thus the same - * best inner indexscans for the inner relation. By taking the intersection - * before scanning the cache, we avoid recomputing when considering - * join rels that differ only by the inclusion of irrelevant other rels. - * - * The search key also includes a bool showing whether the join being - * considered is an outer join. Since we constrain the join order for - * outer joins, I believe that this bool can only have one possible value - * for any particular lookup key; but store it anyway to avoid confusion. - */ - -typedef struct InnerIndexscanInfo -{ - NodeTag type; - /* The lookup key: */ - Relids other_relids; /* a set of relevant other relids */ - bool isouterjoin; /* true if join is outer */ - /* Best paths for this lookup key (NULL if no available indexscans): */ - Path *cheapest_startup_innerpath; /* cheapest startup cost */ - Path *cheapest_total_innerpath; /* cheapest total cost */ -} InnerIndexscanInfo; - -/* * Placeholder node for an expression to be evaluated below the top level * of a plan tree. This is used during planning to represent the contained * expression. At the end of the planning process it is replaced by either @@ -1490,4 +1456,64 @@ typedef struct PlannerParamItem Index abslevel; /* its absolute query level */ } PlannerParamItem; +/* + * When making cost estimates for a SEMI or ANTI join, there are some + * correction factors that are needed in both nestloop and hash joins + * to account for the fact that the executor can stop scanning inner rows + * as soon as it finds a match to the current outer row. These numbers + * depend only on the selected outer and inner join relations, not on the + * particular paths used for them, so it's worthwhile to calculate them + * just once per relation pair not once per considered path. This struct + * is filled by compute_semi_anti_join_factors and must be passed along + * to the join cost estimation functions. + * + * outer_match_frac is the fraction of the outer tuples that are + * expected to have at least one match. + * match_count is the average number of matches expected for + * outer tuples that have at least one match. + */ +typedef struct SemiAntiJoinFactors +{ + Selectivity outer_match_frac; + Selectivity match_count; +} SemiAntiJoinFactors; + +/* + * For speed reasons, cost estimation for join paths is performed in two + * phases: the first phase tries to quickly derive a lower bound for the + * join cost, and then we check if that's sufficient to reject the path. + * If not, we come back for a more refined cost estimate. The first phase + * fills a JoinCostWorkspace struct with its preliminary cost estimates + * and possibly additional intermediate values. The second phase takes + * these values as inputs to avoid repeating work. + * + * (Ideally we'd declare this in cost.h, but it's also needed in pathnode.h, + * so seems best to put it here.) + */ +typedef struct JoinCostWorkspace +{ + /* Preliminary cost estimates --- must not be larger than final ones! */ + Cost startup_cost; /* cost expended before fetching any tuples */ + Cost total_cost; /* total cost (assuming all tuples fetched) */ + + /* Fields below here should be treated as private to costsize.c */ + Cost run_cost; /* non-startup cost components */ + + /* private for cost_nestloop code */ + Cost inner_rescan_run_cost; + double outer_matched_rows; + Selectivity inner_scan_frac; + + /* private for cost_mergejoin code */ + Cost inner_run_cost; + double outer_rows; + double inner_rows; + double outer_skip_rows; + double inner_skip_rows; + + /* private for cost_hashjoin code */ + int numbuckets; + int numbatches; +} JoinCostWorkspace; + #endif /* RELATION_H */ |