aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2002-11-30 05:21:03 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2002-11-30 05:21:03 +0000
commit935969415a90065d4bc4b2643b4c9c50518c934b (patch)
tree49488aee7f8c95a338e4b53024d4aaeb36d2abcc /src/backend/optimizer/path
parent829cedc8cf04801c8fce49afa5dd57b3833b969f (diff)
downloadpostgresql-935969415a90065d4bc4b2643b4c9c50518c934b.tar.gz
postgresql-935969415a90065d4bc4b2643b4c9c50518c934b.zip
Be more realistic about plans involving Materialize nodes: take their
cost into account while planning.
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/allpaths.c21
-rw-r--r--src/backend/optimizer/path/costsize.c57
-rw-r--r--src/backend/optimizer/path/joinpath.c50
3 files changed, 96 insertions, 32 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 107641399d7..c0b3ab40da1 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.92 2002/11/13 00:39:47 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -724,33 +724,34 @@ static void
print_path(Query *root, Path *path, int indent)
{
const char *ptype;
- bool join;
+ bool join = false;
+ Path *subpath = NULL;
int i;
switch (nodeTag(path))
{
case T_Path:
ptype = "SeqScan";
- join = false;
break;
case T_IndexPath:
ptype = "IdxScan";
- join = false;
break;
case T_TidPath:
ptype = "TidScan";
- join = false;
break;
case T_AppendPath:
ptype = "Append";
- join = false;
break;
case T_ResultPath:
ptype = "Result";
- join = false;
+ subpath = ((ResultPath *) path)->subpath;
+ break;
+ case T_MaterialPath:
+ ptype = "Material";
+ subpath = ((MaterialPath *) path)->subpath;
break;
case T_NestPath:
- ptype = "Nestloop";
+ ptype = "NestLoop";
join = true;
break;
case T_MergePath:
@@ -763,7 +764,6 @@ print_path(Query *root, Path *path, int indent)
break;
default:
ptype = "???Path";
- join = false;
break;
}
@@ -814,6 +814,9 @@ print_path(Query *root, Path *path, int indent)
print_path(root, jp->outerjoinpath, indent + 1);
print_path(root, jp->innerjoinpath, indent + 1);
}
+
+ if (subpath)
+ print_path(root, subpath, indent + 1);
}
void
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index fbdeea414c2..1db310fc52e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -230,7 +230,7 @@ cost_index(Path *path, Query *root,
Assert(length(baserel->relids) == 1);
Assert(baserel->rtekind == RTE_RELATION);
- if (!enable_indexscan && !is_injoin)
+ if (!enable_indexscan)
startup_cost += disable_cost;
/*
@@ -514,6 +514,43 @@ cost_sort(Path *path, Query *root,
}
/*
+ * cost_material
+ * Determines and returns the cost of materializing a relation, including
+ * the cost of reading the input data.
+ *
+ * If the total volume of data to materialize exceeds SortMem, we will need
+ * to write it to disk, so the cost is much higher in that case.
+ */
+void
+cost_material(Path *path,
+ Cost input_cost, double tuples, int width)
+{
+ Cost startup_cost = input_cost;
+ Cost run_cost = 0;
+ double nbytes = relation_byte_size(tuples, width);
+ long sortmembytes = SortMem * 1024L;
+
+ /* disk costs */
+ if (nbytes > sortmembytes)
+ {
+ double npages = ceil(nbytes / BLCKSZ);
+
+ /* We'll write during startup and read during retrieval */
+ startup_cost += npages;
+ run_cost += npages;
+ }
+
+ /*
+ * Also charge a small amount per extracted tuple. We use cpu_tuple_cost
+ * so that it doesn't appear worthwhile to materialize a bare seqscan.
+ */
+ run_cost += cpu_tuple_cost * tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
* cost_agg
* Determines and returns the cost of performing an Agg plan node,
* including the cost of its input.
@@ -630,19 +667,17 @@ cost_nestloop(Path *path, Query *root,
* before we can start returning tuples, so the join's startup cost is
* their sum. What's not so clear is whether the inner path's
* startup_cost must be paid again on each rescan of the inner path.
- * This is not true if the inner path is materialized, but probably is
- * true otherwise. Since we don't yet have clean handling of the
- * decision whether to materialize a path, we can't tell here which
- * will happen. As a compromise, charge 50% of the inner startup cost
- * for each restart.
+ * This is not true if the inner path is materialized or is a hashjoin,
+ * but probably is true otherwise.
*/
startup_cost += outer_path->startup_cost + inner_path->startup_cost;
run_cost += outer_path->total_cost - outer_path->startup_cost;
run_cost += outer_path->parent->rows *
(inner_path->total_cost - inner_path->startup_cost);
- if (outer_path->parent->rows > 1)
- run_cost += (outer_path->parent->rows - 1) *
- inner_path->startup_cost * 0.5;
+ if (!(IsA(inner_path, MaterialPath) ||
+ IsA(inner_path, HashPath)) &&
+ outer_path->parent->rows > 1)
+ run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost;
/*
* Number of tuples processed (not number emitted!). If inner path is
@@ -1544,7 +1579,7 @@ set_rel_width(Query *root, RelOptInfo *rel)
static double
relation_byte_size(double tuples, int width)
{
- return tuples * ((double) MAXALIGN(width + sizeof(HeapTupleData)));
+ return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleData)));
}
/*
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 6069a34d879..65d0d8fa358 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.74 2002/11/30 05:21:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -281,8 +281,9 @@ sort_inner_and_outer(Query *root,
* only outer paths that are already ordered well enough for merging).
*
* We always generate a nestloop path for each available outer path.
- * In fact we may generate as many as three: one on the cheapest-total-cost
- * inner path, one on the cheapest-startup-cost inner path (if different),
+ * In fact we may generate as many as four: one on the cheapest-total-cost
+ * inner path, one on the same with materialization, one on the
+ * cheapest-startup-cost inner path (if different),
* and one on the best inner-indexscan path (if any).
*
* We also consider mergejoins if mergejoin clauses are available. We have
@@ -315,7 +316,8 @@ match_unsorted_outer(Query *root,
{
bool nestjoinOK;
bool useallclauses;
- Path *bestinnerjoin;
+ Path *matpath = NULL;
+ Path *bestinnerjoin = NULL;
List *i;
/*
@@ -345,12 +347,26 @@ match_unsorted_outer(Query *root,
break;
}
- /*
- * Get the best innerjoin indexpath (if any) for this outer rel. It's
- * the same for all outer paths.
- */
- bestinnerjoin = best_inner_indexscan(root, innerrel,
- outerrel->relids, jointype);
+ if (nestjoinOK)
+ {
+ /*
+ * If the cheapest inner path is a join or seqscan, we should consider
+ * materializing it. (This is a heuristic: we could consider it
+ * always, but for inner indexscans it's probably a waste of time.)
+ */
+ if (!(IsA(innerrel->cheapest_total_path, IndexPath) ||
+ IsA(innerrel->cheapest_total_path, TidPath)))
+ matpath = (Path *)
+ create_material_path(innerrel,
+ innerrel->cheapest_total_path);
+
+ /*
+ * Get the best innerjoin indexpath (if any) for this outer rel. It's
+ * the same for all outer paths.
+ */
+ bestinnerjoin = best_inner_indexscan(root, innerrel,
+ outerrel->relids, jointype);
+ }
foreach(i, outerrel->pathlist)
{
@@ -376,8 +392,9 @@ match_unsorted_outer(Query *root,
{
/*
* Always consider a nestloop join with this outer and
- * cheapest-total-cost inner. Consider nestloops using the
- * cheapest-startup-cost inner as well, and the best innerjoin
+ * cheapest-total-cost inner. When appropriate, also consider
+ * using the materialized form of the cheapest inner, the
+ * cheapest-startup-cost inner path, and the best innerjoin
* indexpath.
*/
add_path(joinrel, (Path *)
@@ -388,6 +405,15 @@ match_unsorted_outer(Query *root,
innerrel->cheapest_total_path,
restrictlist,
merge_pathkeys));
+ if (matpath != NULL)
+ add_path(joinrel, (Path *)
+ create_nestloop_path(root,
+ joinrel,
+ jointype,
+ outerpath,
+ matpath,
+ restrictlist,
+ merge_pathkeys));
if (innerrel->cheapest_startup_path !=
innerrel->cheapest_total_path)
add_path(joinrel, (Path *)