aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r--src/backend/optimizer/README3
-rw-r--r--src/backend/optimizer/path/allpaths.c21
-rw-r--r--src/backend/optimizer/path/costsize.c57
-rw-r--r--src/backend/optimizer/path/joinpath.c50
-rw-r--r--src/backend/optimizer/plan/createplan.c89
-rw-r--r--src/backend/optimizer/plan/subselect.c12
-rw-r--r--src/backend/optimizer/util/pathnode.c44
7 files changed, 180 insertions, 96 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 698b831a9cf..f4d64ebbcad 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -259,7 +259,8 @@ RelOptInfo - a relation or joined relations
IndexPath - index scans
TidPath - scan by CTID
AppendPath - append multiple subpaths together
- ResultPath - a Result plan (used for variable-free tlist or qual)
+ ResultPath - a Result plan node (used for variable-free tlist or qual)
+ MaterialPath - a Material plan node
NestPath - nested-loop joins
MergePath - merge joins
HashPath - hash joins
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 107641399d7..c0b3ab40da1 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.92 2002/11/13 00:39:47 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -724,33 +724,34 @@ static void
print_path(Query *root, Path *path, int indent)
{
const char *ptype;
- bool join;
+ bool join = false;
+ Path *subpath = NULL;
int i;
switch (nodeTag(path))
{
case T_Path:
ptype = "SeqScan";
- join = false;
break;
case T_IndexPath:
ptype = "IdxScan";
- join = false;
break;
case T_TidPath:
ptype = "TidScan";
- join = false;
break;
case T_AppendPath:
ptype = "Append";
- join = false;
break;
case T_ResultPath:
ptype = "Result";
- join = false;
+ subpath = ((ResultPath *) path)->subpath;
+ break;
+ case T_MaterialPath:
+ ptype = "Material";
+ subpath = ((MaterialPath *) path)->subpath;
break;
case T_NestPath:
- ptype = "Nestloop";
+ ptype = "NestLoop";
join = true;
break;
case T_MergePath:
@@ -763,7 +764,6 @@ print_path(Query *root, Path *path, int indent)
break;
default:
ptype = "???Path";
- join = false;
break;
}
@@ -814,6 +814,9 @@ print_path(Query *root, Path *path, int indent)
print_path(root, jp->outerjoinpath, indent + 1);
print_path(root, jp->innerjoinpath, indent + 1);
}
+
+ if (subpath)
+ print_path(root, subpath, indent + 1);
}
void
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index fbdeea414c2..1db310fc52e 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -230,7 +230,7 @@ cost_index(Path *path, Query *root,
Assert(length(baserel->relids) == 1);
Assert(baserel->rtekind == RTE_RELATION);
- if (!enable_indexscan && !is_injoin)
+ if (!enable_indexscan)
startup_cost += disable_cost;
/*
@@ -514,6 +514,43 @@ cost_sort(Path *path, Query *root,
}
/*
+ * cost_material
+ * Determines and returns the cost of materializing a relation, including
+ * the cost of reading the input data.
+ *
+ * If the total volume of data to materialize exceeds SortMem, we will need
+ * to write it to disk, so the cost is much higher in that case.
+ */
+void
+cost_material(Path *path,
+ Cost input_cost, double tuples, int width)
+{
+ Cost startup_cost = input_cost;
+ Cost run_cost = 0;
+ double nbytes = relation_byte_size(tuples, width);
+ long sortmembytes = SortMem * 1024L;
+
+ /* disk costs */
+ if (nbytes > sortmembytes)
+ {
+ double npages = ceil(nbytes / BLCKSZ);
+
+ /* We'll write during startup and read during retrieval */
+ startup_cost += npages;
+ run_cost += npages;
+ }
+
+ /*
+ * Also charge a small amount per extracted tuple. We use cpu_tuple_cost
+ * so that it doesn't appear worthwhile to materialize a bare seqscan.
+ */
+ run_cost += cpu_tuple_cost * tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
* cost_agg
* Determines and returns the cost of performing an Agg plan node,
* including the cost of its input.
@@ -630,19 +667,17 @@ cost_nestloop(Path *path, Query *root,
* before we can start returning tuples, so the join's startup cost is
* their sum. What's not so clear is whether the inner path's
* startup_cost must be paid again on each rescan of the inner path.
- * This is not true if the inner path is materialized, but probably is
- * true otherwise. Since we don't yet have clean handling of the
- * decision whether to materialize a path, we can't tell here which
- * will happen. As a compromise, charge 50% of the inner startup cost
- * for each restart.
+ * This is not true if the inner path is materialized or is a hashjoin,
+ * but probably is true otherwise.
*/
startup_cost += outer_path->startup_cost + inner_path->startup_cost;
run_cost += outer_path->total_cost - outer_path->startup_cost;
run_cost += outer_path->parent->rows *
(inner_path->total_cost - inner_path->startup_cost);
- if (outer_path->parent->rows > 1)
- run_cost += (outer_path->parent->rows - 1) *
- inner_path->startup_cost * 0.5;
+ if (!(IsA(inner_path, MaterialPath) ||
+ IsA(inner_path, HashPath)) &&
+ outer_path->parent->rows > 1)
+ run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost;
/*
* Number of tuples processed (not number emitted!). If inner path is
@@ -1544,7 +1579,7 @@ set_rel_width(Query *root, RelOptInfo *rel)
static double
relation_byte_size(double tuples, int width)
{
- return tuples * ((double) MAXALIGN(width + sizeof(HeapTupleData)));
+ return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleData)));
}
/*
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 6069a34d879..65d0d8fa358 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.74 2002/11/30 05:21:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -281,8 +281,9 @@ sort_inner_and_outer(Query *root,
* only outer paths that are already ordered well enough for merging).
*
* We always generate a nestloop path for each available outer path.
- * In fact we may generate as many as three: one on the cheapest-total-cost
- * inner path, one on the cheapest-startup-cost inner path (if different),
+ * In fact we may generate as many as four: one on the cheapest-total-cost
+ * inner path, one on the same with materialization, one on the
+ * cheapest-startup-cost inner path (if different),
* and one on the best inner-indexscan path (if any).
*
* We also consider mergejoins if mergejoin clauses are available. We have
@@ -315,7 +316,8 @@ match_unsorted_outer(Query *root,
{
bool nestjoinOK;
bool useallclauses;
- Path *bestinnerjoin;
+ Path *matpath = NULL;
+ Path *bestinnerjoin = NULL;
List *i;
/*
@@ -345,12 +347,26 @@ match_unsorted_outer(Query *root,
break;
}
- /*
- * Get the best innerjoin indexpath (if any) for this outer rel. It's
- * the same for all outer paths.
- */
- bestinnerjoin = best_inner_indexscan(root, innerrel,
- outerrel->relids, jointype);
+ if (nestjoinOK)
+ {
+ /*
+ * If the cheapest inner path is a join or seqscan, we should consider
+ * materializing it. (This is a heuristic: we could consider it
+ * always, but for inner indexscans it's probably a waste of time.)
+ */
+ if (!(IsA(innerrel->cheapest_total_path, IndexPath) ||
+ IsA(innerrel->cheapest_total_path, TidPath)))
+ matpath = (Path *)
+ create_material_path(innerrel,
+ innerrel->cheapest_total_path);
+
+ /*
+ * Get the best innerjoin indexpath (if any) for this outer rel. It's
+ * the same for all outer paths.
+ */
+ bestinnerjoin = best_inner_indexscan(root, innerrel,
+ outerrel->relids, jointype);
+ }
foreach(i, outerrel->pathlist)
{
@@ -376,8 +392,9 @@ match_unsorted_outer(Query *root,
{
/*
* Always consider a nestloop join with this outer and
- * cheapest-total-cost inner. Consider nestloops using the
- * cheapest-startup-cost inner as well, and the best innerjoin
+ * cheapest-total-cost inner. When appropriate, also consider
+ * using the materialized form of the cheapest inner, the
+ * cheapest-startup-cost inner path, and the best innerjoin
* indexpath.
*/
add_path(joinrel, (Path *)
@@ -388,6 +405,15 @@ match_unsorted_outer(Query *root,
innerrel->cheapest_total_path,
restrictlist,
merge_pathkeys));
+ if (matpath != NULL)
+ add_path(joinrel, (Path *)
+ create_nestloop_path(root,
+ joinrel,
+ jointype,
+ outerpath,
+ matpath,
+ restrictlist,
+ merge_pathkeys));
if (innerrel->cheapest_startup_path !=
innerrel->cheapest_total_path)
add_path(joinrel, (Path *)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index d43e3271fbf..148bd86b85a 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.125 2002/11/30 00:08:17 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.126 2002/11/30 05:21:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -35,6 +35,7 @@ static Scan *create_scan_plan(Query *root, Path *best_path);
static Join *create_join_plan(Query *root, JoinPath *best_path);
static Append *create_append_plan(Query *root, AppendPath *best_path);
static Result *create_result_plan(Query *root, ResultPath *best_path);
+static Material *create_material_plan(Query *root, MaterialPath *best_path);
static SeqScan *create_seqscan_plan(Path *best_path, List *tlist,
List *scan_clauses);
static IndexScan *create_indexscan_plan(Query *root, IndexPath *best_path,
@@ -141,6 +142,10 @@ create_plan(Query *root, Path *best_path)
plan = (Plan *) create_result_plan(root,
(ResultPath *) best_path);
break;
+ case T_Material:
+ plan = (Plan *) create_material_plan(root,
+ (MaterialPath *) best_path);
+ break;
default:
elog(ERROR, "create_plan: unknown pathtype %d",
best_path->pathtype);
@@ -383,6 +388,28 @@ create_result_plan(Query *root, ResultPath *best_path)
return plan;
}
+/*
+ * create_material_plan
+ * Create a Material plan for 'best_path' and (recursively) plans
+ * for its subpaths.
+ *
+ * Returns a Plan node.
+ */
+static Material *
+create_material_plan(Query *root, MaterialPath *best_path)
+{
+ Material *plan;
+ Plan *subplan;
+
+ subplan = create_plan(root, best_path->subpath);
+
+ plan = make_material(best_path->path.parent->targetlist, subplan);
+
+ copy_path_costsize(&plan->plan, (Path *) best_path);
+
+ return plan;
+}
+
/*****************************************************************************
*
@@ -739,18 +766,6 @@ create_nestloop_plan(Query *root,
inner_tlist,
innerscan->scan.scanrelid);
}
- else if (IsA_Join(inner_plan))
- {
- /*
- * Materialize the inner join for speed reasons.
- *
- * XXX It is probably *not* always fastest to materialize an inner
- * join --- how can we estimate whether this is a good thing to
- * do?
- */
- inner_plan = (Plan *) make_material(inner_tlist,
- inner_plan);
- }
/*
* Set quals to contain INNER/OUTER var references.
@@ -843,44 +858,6 @@ create_mergejoin_plan(Query *root,
best_path->innersortkeys);
/*
- * The executor requires the inner side of a mergejoin to support
- * "mark" and "restore" operations. Not all plan types do, so we must
- * be careful not to generate an invalid plan. If necessary, an
- * invalid inner plan can be handled by inserting a Materialize node.
- *
- * Since the inner side must be ordered, and only Sorts and IndexScans
- * can create order to begin with, you might think there's no problem
- * --- but you'd be wrong. Nestloop and merge joins can *preserve*
- * the order of their inputs, so they can be selected as the input of
- * a mergejoin, and that won't work in the present executor.
- *
- * Doing this here is a bit of a kluge since the cost of the Materialize
- * wasn't taken into account in our earlier decisions. But
- * Materialize is hard to estimate a cost for, and the above
- * consideration shows that this is a rare case anyway, so this seems
- * an acceptable way to proceed.
- *
- * This check must agree with ExecMarkPos/ExecRestrPos in
- * executor/execAmi.c!
- */
- switch (nodeTag(inner_plan))
- {
- case T_SeqScan:
- case T_IndexScan:
- case T_FunctionScan:
- case T_Material:
- case T_Sort:
- /* OK, these inner plans support mark/restore */
- break;
-
- default:
- /* Ooops, need to materialize the inner plan */
- inner_plan = (Plan *) make_material(inner_tlist,
- inner_plan);
- break;
- }
-
- /*
* Now we can build the mergejoin node.
*/
join_plan = make_mergejoin(tlist,
@@ -1668,15 +1645,7 @@ make_material(List *tlist, Plan *lefttree)
Material *node = makeNode(Material);
Plan *plan = &node->plan;
- copy_plan_costsize(plan, lefttree);
-
- /*
- * For plausibility, make startup & total costs equal total cost of
- * input plan; this only affects EXPLAIN display not decisions.
- *
- * XXX shouldn't we charge some additional cost for materialization?
- */
- plan->startup_cost = plan->total_cost;
+ /* cost should be inserted by caller */
plan->state = (EState *) NULL;
plan->targetlist = tlist;
plan->qual = NIL;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 61476a65604..e4bbd29105e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.57 2002/11/30 00:08:18 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.58 2002/11/30 05:21:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -328,9 +328,17 @@ make_subplan(SubLink *slink)
if (use_material)
{
Plan *matplan;
+ Path matpath; /* dummy for result of cost_material */
matplan = (Plan *) make_material(plan->targetlist, plan);
- /* kluge --- see comments above */
+ /* need to calculate costs */
+ cost_material(&matpath,
+ plan->total_cost,
+ plan->plan_rows,
+ plan->plan_width);
+ matplan->startup_cost = matpath.startup_cost;
+ matplan->total_cost = matpath.total_cost;
+ /* parameter kluge --- see comments above */
matplan->extParam = listCopy(plan->extParam);
matplan->locParam = listCopy(plan->locParam);
node->plan = plan = matplan;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 98227355605..84896b939fd 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.81 2002/11/30 00:08:20 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.82 2002/11/30 05:21:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,6 +16,7 @@
#include <math.h>
+#include "executor/executor.h"
#include "nodes/plannodes.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
@@ -450,6 +451,7 @@ create_result_path(RelOptInfo *rel, Path *subpath, List *constantqual)
pathnode->subpath = subpath;
pathnode->constantqual = constantqual;
+ /* Ideally should define cost_result(), but I'm too lazy */
if (subpath)
{
pathnode->path.startup_cost = subpath->startup_cost;
@@ -465,6 +467,31 @@ create_result_path(RelOptInfo *rel, Path *subpath, List *constantqual)
}
/*
+ * create_material_path
+ * Creates a path corresponding to a Material plan, returning the
+ * pathnode.
+ */
+MaterialPath *
+create_material_path(RelOptInfo *rel, Path *subpath)
+{
+ MaterialPath *pathnode = makeNode(MaterialPath);
+
+ pathnode->path.pathtype = T_Material;
+ pathnode->path.parent = rel;
+
+ pathnode->path.pathkeys = subpath->pathkeys;
+
+ pathnode->subpath = subpath;
+
+ cost_material(&pathnode->path,
+ subpath->total_cost,
+ rel->rows,
+ rel->width);
+
+ return pathnode;
+}
+
+/*
* create_subqueryscan_path
* Creates a path corresponding to a sequential scan of a subquery,
* returning the pathnode.
@@ -583,6 +610,21 @@ create_mergejoin_path(Query *root,
if (innersortkeys &&
pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innersortkeys = NIL;
+ /*
+ * If we are not sorting the inner path, we may need a materialize
+ * node to ensure it can be marked/restored. (Sort does support
+ * mark/restore, so no materialize is needed in that case.)
+ *
+ * Since the inner side must be ordered, and only Sorts and IndexScans
+ * can create order to begin with, you might think there's no problem
+ * --- but you'd be wrong. Nestloop and merge joins can *preserve*
+ * the order of their inputs, so they can be selected as the input of
+ * a mergejoin, and they don't support mark/restore at present.
+ */
+ if (innersortkeys == NIL &&
+ !ExecSupportsMarkRestore(inner_path->pathtype))
+ inner_path = (Path *)
+ create_material_path(inner_path->parent, inner_path);
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;