diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-11-30 05:21:03 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-11-30 05:21:03 +0000 |
commit | 935969415a90065d4bc4b2643b4c9c50518c934b (patch) | |
tree | 49488aee7f8c95a338e4b53024d4aaeb36d2abcc /src/backend/optimizer/path | |
parent | 829cedc8cf04801c8fce49afa5dd57b3833b969f (diff) | |
download | postgresql-935969415a90065d4bc4b2643b4c9c50518c934b.tar.gz postgresql-935969415a90065d4bc4b2643b4c9c50518c934b.zip |
Be more realistic about plans involving Materialize nodes: take their
cost into account while planning.
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 21 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 57 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 50 |
3 files changed, 96 insertions, 32 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 107641399d7..c0b3ab40da1 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.92 2002/11/13 00:39:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -724,33 +724,34 @@ static void print_path(Query *root, Path *path, int indent) { const char *ptype; - bool join; + bool join = false; + Path *subpath = NULL; int i; switch (nodeTag(path)) { case T_Path: ptype = "SeqScan"; - join = false; break; case T_IndexPath: ptype = "IdxScan"; - join = false; break; case T_TidPath: ptype = "TidScan"; - join = false; break; case T_AppendPath: ptype = "Append"; - join = false; break; case T_ResultPath: ptype = "Result"; - join = false; + subpath = ((ResultPath *) path)->subpath; + break; + case T_MaterialPath: + ptype = "Material"; + subpath = ((MaterialPath *) path)->subpath; break; case T_NestPath: - ptype = "Nestloop"; + ptype = "NestLoop"; join = true; break; case T_MergePath: @@ -763,7 +764,6 @@ print_path(Query *root, Path *path, int indent) break; default: ptype = "???Path"; - join = false; break; } @@ -814,6 +814,9 @@ print_path(Query *root, Path *path, int indent) print_path(root, jp->outerjoinpath, indent + 1); print_path(root, jp->innerjoinpath, indent + 1); } + + if (subpath) + print_path(root, subpath, indent + 1); } void diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index fbdeea414c2..1db310fc52e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.93 2002/11/30 05:21:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -230,7 +230,7 @@ cost_index(Path *path, Query *root, Assert(length(baserel->relids) == 1); Assert(baserel->rtekind == RTE_RELATION); - if (!enable_indexscan && !is_injoin) + if (!enable_indexscan) startup_cost += disable_cost; /* @@ -514,6 +514,43 @@ cost_sort(Path *path, Query *root, } /* + * cost_material + * Determines and returns the cost of materializing a relation, including + * the cost of reading the input data. + * + * If the total volume of data to materialize exceeds SortMem, we will need + * to write it to disk, so the cost is much higher in that case. + */ +void +cost_material(Path *path, + Cost input_cost, double tuples, int width) +{ + Cost startup_cost = input_cost; + Cost run_cost = 0; + double nbytes = relation_byte_size(tuples, width); + long sortmembytes = SortMem * 1024L; + + /* disk costs */ + if (nbytes > sortmembytes) + { + double npages = ceil(nbytes / BLCKSZ); + + /* We'll write during startup and read during retrieval */ + startup_cost += npages; + run_cost += npages; + } + + /* + * Also charge a small amount per extracted tuple. We use cpu_tuple_cost + * so that it doesn't appear worthwhile to materialize a bare seqscan. + */ + run_cost += cpu_tuple_cost * tuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* * cost_agg * Determines and returns the cost of performing an Agg plan node, * including the cost of its input. @@ -630,19 +667,17 @@ cost_nestloop(Path *path, Query *root, * before we can start returning tuples, so the join's startup cost is * their sum. What's not so clear is whether the inner path's * startup_cost must be paid again on each rescan of the inner path. - * This is not true if the inner path is materialized, but probably is - * true otherwise. Since we don't yet have clean handling of the - * decision whether to materialize a path, we can't tell here which - * will happen. As a compromise, charge 50% of the inner startup cost - * for each restart. + * This is not true if the inner path is materialized or is a hashjoin, + * but probably is true otherwise. */ startup_cost += outer_path->startup_cost + inner_path->startup_cost; run_cost += outer_path->total_cost - outer_path->startup_cost; run_cost += outer_path->parent->rows * (inner_path->total_cost - inner_path->startup_cost); - if (outer_path->parent->rows > 1) - run_cost += (outer_path->parent->rows - 1) * - inner_path->startup_cost * 0.5; + if (!(IsA(inner_path, MaterialPath) || + IsA(inner_path, HashPath)) && + outer_path->parent->rows > 1) + run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost; /* * Number of tuples processed (not number emitted!). If inner path is @@ -1544,7 +1579,7 @@ set_rel_width(Query *root, RelOptInfo *rel) static double relation_byte_size(double tuples, int width) { - return tuples * ((double) MAXALIGN(width + sizeof(HeapTupleData))); + return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleData))); } /* diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 6069a34d879..65d0d8fa358 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.74 2002/11/30 05:21:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -281,8 +281,9 @@ sort_inner_and_outer(Query *root, * only outer paths that are already ordered well enough for merging). * * We always generate a nestloop path for each available outer path. - * In fact we may generate as many as three: one on the cheapest-total-cost - * inner path, one on the cheapest-startup-cost inner path (if different), + * In fact we may generate as many as four: one on the cheapest-total-cost + * inner path, one on the same with materialization, one on the + * cheapest-startup-cost inner path (if different), * and one on the best inner-indexscan path (if any). * * We also consider mergejoins if mergejoin clauses are available. We have @@ -315,7 +316,8 @@ match_unsorted_outer(Query *root, { bool nestjoinOK; bool useallclauses; - Path *bestinnerjoin; + Path *matpath = NULL; + Path *bestinnerjoin = NULL; List *i; /* @@ -345,12 +347,26 @@ match_unsorted_outer(Query *root, break; } - /* - * Get the best innerjoin indexpath (if any) for this outer rel. It's - * the same for all outer paths. - */ - bestinnerjoin = best_inner_indexscan(root, innerrel, - outerrel->relids, jointype); + if (nestjoinOK) + { + /* + * If the cheapest inner path is a join or seqscan, we should consider + * materializing it. (This is a heuristic: we could consider it + * always, but for inner indexscans it's probably a waste of time.) + */ + if (!(IsA(innerrel->cheapest_total_path, IndexPath) || + IsA(innerrel->cheapest_total_path, TidPath))) + matpath = (Path *) + create_material_path(innerrel, + innerrel->cheapest_total_path); + + /* + * Get the best innerjoin indexpath (if any) for this outer rel. It's + * the same for all outer paths. + */ + bestinnerjoin = best_inner_indexscan(root, innerrel, + outerrel->relids, jointype); + } foreach(i, outerrel->pathlist) { @@ -376,8 +392,9 @@ match_unsorted_outer(Query *root, { /* * Always consider a nestloop join with this outer and - * cheapest-total-cost inner. Consider nestloops using the - * cheapest-startup-cost inner as well, and the best innerjoin + * cheapest-total-cost inner. When appropriate, also consider + * using the materialized form of the cheapest inner, the + * cheapest-startup-cost inner path, and the best innerjoin * indexpath. */ add_path(joinrel, (Path *) @@ -388,6 +405,15 @@ match_unsorted_outer(Query *root, innerrel->cheapest_total_path, restrictlist, merge_pathkeys)); + if (matpath != NULL) + add_path(joinrel, (Path *) + create_nestloop_path(root, + joinrel, + jointype, + outerpath, + matpath, + restrictlist, + merge_pathkeys)); if (innerrel->cheapest_startup_path != innerrel->cheapest_total_path) add_path(joinrel, (Path *) |